diff --git a/include/ctre/evaluation.hpp b/include/ctre/evaluation.hpp index 6701c6fa..3c608da8 100644 --- a/include/ctre/evaluation.hpp +++ b/include/ctre/evaluation.hpp @@ -115,11 +115,24 @@ template constexpr CTR return false; } +struct zero_terminated_string_end_iterator; template constexpr CTRE_FORCE_INLINE string_match_result evaluate_match_string(Iterator current, [[maybe_unused]] const EndIterator end, std::index_sequence) noexcept { - - bool same = (compare_character(String, current, end) && ... && true); - - return {current, same}; +#if __cpp_char8_t >= 201811 + if constexpr (sizeof...(String) && !std::is_same_v && is_random_accessible(typename std::iterator_traits::iterator_category{}) && !std::is_same_v) { +#else + if constexpr (sizeof...(String) && is_random_accessible(typename std::iterator_traits::iterator_category{}) && !std::is_same_v) { +#endif + using char_type = decltype(*current); + bool same = ((size_t)std::distance(current, end) >= sizeof...(String)) && ((static_cast(String) == *(current + Idx)) && ...); + if (same) { + return { current += sizeof...(String), same }; + } else { + return { current, same }; + } + } else { + bool same = (compare_character(String, current, end) && ... && true); + return { current, same }; + } } template @@ -522,6 +535,167 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c } } +template +constexpr bool is_string(T) noexcept { + return false; +} +template +constexpr bool is_string(string)noexcept { + return true; +} + +template +constexpr bool is_string_like(T) noexcept { + return false; +} +template +constexpr bool is_string_like(string) noexcept { + return true; +} +template ::template value())>>> +constexpr bool is_string_like(CharacterLike) noexcept { + return true; +} + +template +constexpr auto extract_leading_string(ctll::list) noexcept -> ctll::list { + return {}; +} +template +constexpr auto extract_leading_string(sequence) noexcept -> sequence { + return {}; +} + +//concatenation +template +constexpr auto extract_leading_string(ctll::list, character, Content...>) noexcept { + return extract_leading_string(ctll::list, Content...>()); +} + +template +constexpr auto extract_leading_string(ctll::list, string, Content...>) noexcept { + return extract_leading_string(ctll::list, Content...>()); +} +//move things up out of sequences +template +constexpr auto extract_leading_string(ctll::list, Tail...>) noexcept { + return extract_leading_string(ctll::list()); +} + +template +constexpr auto extract_leading_string(ctll::list, Tail...>) noexcept { + return extract_leading_string(ctll::list()); +} + +template +constexpr auto make_into_sequence(ctll::list) noexcept -> sequence { + return{}; +} +template +constexpr auto make_into_sequence(sequence) noexcept -> sequence { + return{}; +} + +//boyer moore utils +template +constexpr bool is_prefix(Ty* word, size_t wordlen, ptrdiff_t pos) { + ptrdiff_t suffixlen = wordlen - pos; + for (int i = 0; i < suffixlen; i++) { + if (word[i] != word[pos + i]) { + return false; + } + } + return true; +} + +template +constexpr size_t suffix_length(Ty* word, size_t wordlen, ptrdiff_t pos) { + size_t i = 0; + // increment suffix length i to the first mismatch or beginning of the word + for (; (word[pos - i] == word[wordlen - 1 - i]) && (i < pos); i++); + return i; +} +//MSVC workaround, array operator[] blows up in face if constexpr, use pointers instead +template +constexpr auto make_delta_2(string) { + std::array chars{ String... }; + std::array table; + constexpr size_t patlen = sizeof...(String); + size_t p = 0; + size_t last_prefix_index = patlen - 1; + + for (p = patlen - 1; p < patlen; p--) { + if (is_prefix(chars.data(), patlen, p + 1)) { + last_prefix_index = p + 1; + } + table.data()[p] = last_prefix_index + (patlen - 1 - p); + } + + for (p = 0; p < patlen - 1; p++) { + size_t slen = suffix_length(chars.data(), patlen, p); + if (chars.data()[p - slen] != chars.data()[patlen - 1 - slen]) { + table.data()[patlen - 1 - slen] = patlen - 1 - p + slen; + } + } + + return table; +} + +template struct string_search_result { + Iterator position; + Iterator end_position; + bool match; +}; + +template +constexpr CTRE_FORCE_INLINE string_search_result search_for_string(Iterator current, const EndIterator end, string) noexcept { +#if __cpp_char8_t >= 201811 + if constexpr (sizeof...(String) > 2 && !std::is_same_v && is_random_accessible(typename std::iterator_traits::iterator_category{})) { +#else + if constexpr (sizeof...(String) > 2 && is_random_accessible(typename std::iterator_traits::iterator_category{})) { +#endif + constexpr std::array::value_type, sizeof...(String)> chars{ String... }; + constexpr std::array delta_2 = make_delta_2::value_type>(string()); + + size_t str_size = std::distance(current, end); + if (str_size < sizeof...(String)) { //quick exit no way to match + return { current + str_size, current + str_size, false }; + } + + size_t i = sizeof...(String) - 1; //index over to the starting location + for (; i < str_size;) { + size_t j = sizeof...(String) - 1; + size_t m = i + 1; + for (; *(current + i) == *(chars.data() + j); --i, --j) { //match string in reverse + if (j == 0) { + return { current + i, current + m, true }; + } + } + size_t shift = enumeration::match_char(*(current + i)) ? static_cast(*(delta_2.data() + j)) : sizeof...(String); + i += shift; + } + + return { current + str_size, current + str_size, false }; + } else if constexpr (sizeof...(String)) { + //fallback to plain string matching + constexpr std::array::value_type, sizeof...(String)> chars{ String... }; + constexpr typename ::std::iterator_traits::value_type first_char = chars.data()[0]; + while (current != end) { + while (current != end && *current != first_char) { + current++; + } + auto result = evaluate_match_string(current, end, std::make_index_sequence()); + if (result.match) { + return { current, result.position, result.match }; + } else { + ++current; + } + } + return { current, current, false }; + } else { + return { current, current, true }; + } +} } diff --git a/include/ctre/wrapper.hpp b/include/ctre/wrapper.hpp index e87b1be2..9475896d 100644 --- a/include/ctre/wrapper.hpp +++ b/include/ctre/wrapper.hpp @@ -62,19 +62,42 @@ struct match_method { struct search_method { template constexpr CTRE_FORCE_INLINE static auto exec(IteratorBegin orig_begin, IteratorBegin begin, IteratorEnd end, RE) noexcept { using result_iterator = std::conditional_t, IteratorBegin, ResultIterator>; - + using front_re = decltype(pop_and_get_front(extract_leading_string(ctll::list{}))); constexpr bool fixed = starts_with_anchor(Modifier{}, ctll::list{}); auto it = begin; - - for (; end != it && !fixed; ++it) { - if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list())) { - return out; + if constexpr (is_string(front_re{}.front) && size(front_re{}.list)) { + auto it2 = search_for_string(it, end, front_re{}.front); + return_type result{}; + for (; end != it2.position;) { + result.set_start_mark(it2.position); + result = evaluate(orig_begin, it2.end_position, end, Modifier{}, result, ctll::list()); + if (result) { + return result; + } + result.unmatch(); + std::advance(it2.position, 1); + it2 = search_for_string(it2.position, end, front_re{}.front); } + result.set_start_mark(it2.position); + return result = evaluate(orig_begin, it2.end_position, end, Modifier{}, result, ctll::list()); + } else if constexpr (is_string(front_re{}.front)) { + auto it2 = search_for_string(it, end, front_re{}.front); + return_type result{}; + result.set_start_mark(it2.position); + result.set_end_mark(it2.end_position); + if (it2.match) + result.matched(); + return result; + } else { + for (; end != it && !fixed; ++it) { + if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list())) { + return out; + } + } + // in case the RE is empty or fixed + return evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list()); } - - // in case the RE is empty or fixed - return evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list()); } template constexpr CTRE_FORCE_INLINE static auto exec(IteratorBegin begin, IteratorEnd end, RE) noexcept {