From 9ec376135bba2584cec0c08e767055b256f6d7e3 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sun, 8 Oct 2017 09:22:24 +0800 Subject: [PATCH] Regex: Introduce RegexExecFlags::PrevAvailable Rework assertion code as well. --- src/regex.cc | 1 - src/regex.hh | 4 +++- src/regex_impl.cc | 11 +++++++++++ src/regex_impl.hh | 30 +++++++++++++++++++----------- 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/regex.cc b/src/regex.cc index 25ace466..a53dc498 100644 --- a/src/regex.cc +++ b/src/regex.cc @@ -31,7 +31,6 @@ void option_from_string(StringView str, Regex& re) re = Regex{str}; } - void regex_mismatch(const Regex& re) { write_to_debug_buffer(format("regex mismatch for '{}'", re.str())); diff --git a/src/regex.hh b/src/regex.hh index 64e24304..3d067b0b 100644 --- a/src/regex.hh +++ b/src/regex.hh @@ -133,6 +133,8 @@ inline RegexExecFlags convert_flags(RegexConstant::match_flag_type flags) res |= RegexExecFlags::NotInitialNull; if (flags & RegexConstant::match_any) res |= RegexExecFlags::AnyMatch; + if (flags & RegexConstant::match_prev_avail) + res |= RegexExecFlags::PrevAvailable; return res; } @@ -261,7 +263,7 @@ private: if (m_results.size() and m_results[0].first == m_results[0].second) additional_flags |= RegexConstant::match_not_initial_null; if (m_begin != m_next_begin) - additional_flags |= RegexConstant::match_not_bob; + additional_flags |= RegexConstant::match_not_bob | RegexConstant::match_prev_avail; if (not regex_search(m_next_begin, m_end, m_results, *m_regex, m_flags | additional_flags)) diff --git a/src/regex_impl.cc b/src/regex_impl.cc index e7991d14..98fa45f3 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -1041,6 +1041,11 @@ auto test_regex = UnitTest{[]{ kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f"); } + { + TestVM<> vm{R"((? vm{R"((?!foo)...)"}; kak_assert(not vm.exec("foo")); @@ -1110,6 +1115,12 @@ auto test_regex = UnitTest{[]{ TestVM<> vm{R"(()*)"}; kak_assert(not vm.exec(" ")); } + + { + TestVM<> vm{R"(\b(?) { return true; } @@ -125,8 +126,11 @@ public: bool exec(Iterator begin, Iterator end, RegexExecFlags flags) { const bool forward = direction == MatchDirection::Forward; - m_begin = Utf8It{utf8::iterator{forward ? begin : end, begin, end}}; - m_end = Utf8It{utf8::iterator{forward ? end : begin, begin, end}}; + const bool prev_avail = flags & RegexExecFlags::PrevAvailable; + m_begin = Utf8It{utf8::iterator{forward ? begin : end, + prev_avail ? begin-1 : begin, end}}; + m_end = Utf8It{utf8::iterator{forward ? end : begin, + prev_avail ? begin-1 : begin, end}}; m_flags = flags; if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end) @@ -314,8 +318,8 @@ private: case CompiledRegex::NegativeLookBehind: { auto ref = m_program.lookarounds.begin() + inst.param; - for (auto it = pos-1; *ref != -1 and it >= m_begin; --it, ++ref) - if (*it != *ref) + for (auto it = pos; *ref != -1 and it > m_begin; --it, ++ref) + if (*(it-1) != *ref) break; if ((inst.op == CompiledRegex::LookBehind and *ref != -1) or (inst.op == CompiledRegex::NegativeLookBehind and *ref == -1)) @@ -400,21 +404,25 @@ private: bool is_line_start(const Utf8It& pos) const { - return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or - *(pos-1) == '\n'; + if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin) + return not (m_flags & RegexExecFlags::NotBeginOfLine); + return *(pos-1) == '\n'; } bool is_line_end(const Utf8It& pos) const { - return (pos == m_end and not (m_flags & RegexExecFlags::NotEndOfLine)) or - *pos == '\n'; + if (pos == m_end) + return not (m_flags & RegexExecFlags::NotEndOfLine); + return *pos == '\n'; } bool is_word_boundary(const Utf8It& pos) const { - return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfWord)) or - (pos == m_end and not (m_flags & RegexExecFlags::NotEndOfWord)) or - is_word(*(pos-1)) != is_word(*pos); + if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin) + return not (m_flags & RegexExecFlags::NotBeginOfWord); + if (pos == m_end) + return not (m_flags & RegexExecFlags::NotEndOfWord); + return is_word(*(pos-1)) != is_word(*pos); } static const Iterator& get_base(const utf8::iterator& it) { return it.base(); }