diff --git a/src/regex_impl.cc b/src/regex_impl.cc index 28d201f5..8742e19c 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -684,6 +684,7 @@ struct RegexCompiler { m_program.forward_start_desc = compute_start_desc(); compile_node(0); + peephole_optimize(0, m_program.instructions.size()); push_inst(CompiledRegex::Match); } @@ -692,6 +693,7 @@ struct RegexCompiler m_program.first_backward_inst = m_program.instructions.size(); m_program.backward_start_desc = compute_start_desc(); compile_node(0); + peephole_optimize(m_program.first_backward_inst, m_program.instructions.size()); push_inst(CompiledRegex::Match); } else @@ -1019,6 +1021,21 @@ private: return std::make_unique(start_desc); } + void peephole_optimize(size_t begin, size_t end) + { + if (not (m_flags & RegexCompileFlags::Optimize)) + return; + + // Move saves after all assertions on the same character + auto is_assertion = [](CompiledRegex::Op op) { return op >= CompiledRegex::LineStart; }; + for (auto i = begin, j = begin + 1; j < end; ++i, ++j) + { + if (m_program.instructions[i].op == CompiledRegex::Save and + is_assertion(m_program.instructions[j].op)) + std::swap(m_program.instructions[i], m_program.instructions[j]); + } + } + const ParsedRegex::Node& get_node(ParsedRegex::NodeIndex index) const { return m_parsed_regex.nodes[index]; diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 9ab89c71..962fbf61 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -471,7 +471,8 @@ private: release_saves(m_captures); m_captures = -1; m_threads.grow_ifn(); - const int16_t first_inst = &config.instructions[0] - &m_program.instructions[0]; + + const int16_t first_inst = forward ? 0 : m_program.first_backward_inst; m_threads.push_current({first_inst, -1}); const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc;