diff --git a/src/regex_impl.cc b/src/regex_impl.cc index c32bbbf1..f02e6c2a 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -614,6 +614,7 @@ struct RegexCompiler RegexCompiler(const ParsedRegex& parsed_regex, RegexCompileFlags flags, MatchDirection direction) : m_parsed_regex{parsed_regex}, m_flags(flags), m_forward{direction == MatchDirection::Forward} { + write_search_prefix(); compile_node(m_parsed_regex.ast); push_inst(CompiledRegex::Match); m_program.matchers = m_parsed_regex.matchers; @@ -788,6 +789,16 @@ private: return pos; } + // Add an set of instruction prefix used in the search use case + void write_search_prefix() + { + kak_assert(m_program.instructions.empty()); + push_inst(CompiledRegex::Split_PrioritizeChild, CompiledRegex::search_prefix_size); + push_inst(CompiledRegex::FindNextStart); + push_inst(CompiledRegex::Split_PrioritizeParent, 1); + kak_assert(m_program.instructions.size() == CompiledRegex::search_prefix_size); + } + uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0) { constexpr auto max_instructions = std::numeric_limits::max(); @@ -1003,6 +1014,8 @@ void dump_regex(const CompiledRegex& program) printf("%s (%s)\n", name, str.c_str()); break; } + case CompiledRegex::FindNextStart: + printf("find next start\n"); case CompiledRegex::Match: printf("match\n"); } diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 108fe626..90411e94 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -28,6 +28,7 @@ struct CompiledRegex : RefCountable, UseMemoryDomain enum Op : char { Match, + FindNextStart, Literal, Literal_IgnoreCase, AnyChar, @@ -62,6 +63,8 @@ struct CompiledRegex : RefCountable, UseMemoryDomain }; static_assert(sizeof(Instruction) == 8, ""); + static constexpr uint16_t search_prefix_size = 3; + explicit operator bool() const { return not instructions.empty(); } Vector instructions; @@ -151,30 +154,15 @@ public: const bool no_saves = (flags & RegexExecFlags::NoSaves); - Utf8It start{m_begin}; + const bool search = (flags & RegexExecFlags::Search); - const CompiledRegex::StartChars* start_chars = m_program.start_chars.get(); - if (flags & RegexExecFlags::Search) - to_next_start(start, m_end, start_chars); + Utf8It start{m_begin}; + if (search) + to_next_start(start, m_end, m_program.start_chars.get()); ExecState state; - if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), - state)) - return true; - - if (not (flags & RegexExecFlags::Search)) - return false; - - do - { - to_next_start(++start, m_end, start_chars); - if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), - state)) - return true; - } - while (start != m_end); - - return false; + return exec_program(start, search ? 0 : CompiledRegex::search_prefix_size, + no_saves ? nullptr : new_saves(nullptr), state); } ArrayView captures() const @@ -247,10 +235,10 @@ private: uint16_t step = -1; }; - enum class StepResult { Consumed, Matched, Failed }; + enum class StepResult { Consumed, Matched, Failed, FindNextStart }; // Steps a thread until it consumes the current character, matches or fail - StepResult step(const Utf8It& pos, Thread& thread, ExecState& state) + StepResult step(Utf8It& pos, Thread& thread, ExecState& state) { while (true) { @@ -354,6 +342,11 @@ private: (inst.op == CompiledRegex::LookBehind_IgnoreCase)) return StepResult::Failed; break; + case CompiledRegex::FindNextStart: + kak_assert(state.current_threads.empty()); // search thread should by construction be the lower priority one + if (state.next_threads.empty()) + return StepResult::FindNextStart; + return StepResult::Consumed; case CompiledRegex::Match: return StepResult::Matched; } @@ -361,9 +354,9 @@ private: return StepResult::Failed; } - bool exec_from(Utf8It pos, Saves* initial_saves, ExecState& state) + bool exec_program(Utf8It pos, uint16_t first_inst, Saves* initial_saves, ExecState& state) { - state.current_threads.push_back({0, initial_saves}); + state.current_threads.push_back({first_inst, initial_saves}); state.next_threads.clear(); bool found_match = false; @@ -377,6 +370,7 @@ private: state.step = 1; // step 0 is never valid } + bool find_next_start = false; while (not state.current_threads.empty()) { auto thread = state.current_threads.back(); @@ -408,6 +402,10 @@ private: m_program.instructions[thread.inst].scheduled = true; state.next_threads.push_back(thread); break; + case StepResult::FindNextStart: + state.next_threads.push_back(thread); + find_next_start = true; + break; } } for (auto& thread : state.next_threads) @@ -420,6 +418,9 @@ private: std::swap(state.current_threads, state.next_threads); std::reverse(state.current_threads.begin(), state.current_threads.end()); ++pos; + + if (find_next_start) + to_next_start(pos, m_end, m_program.start_chars.get()); } }