From 4ff655cc09a572b8672f3d14210c245027de4f25 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sat, 7 Oct 2017 19:08:14 +0800 Subject: [PATCH] Regex: store the processed flag directly in CompiledRegex instructions --- src/regex_impl.cc | 2 +- src/regex_impl.hh | 31 ++++++++++++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/regex_impl.cc b/src/regex_impl.cc index 85777f4b..f853665a 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -666,7 +666,7 @@ private: uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0) { uint32_t res = m_program.instructions.size(); - m_program.instructions.push_back({ op, param }); + m_program.instructions.push_back({ op, false, param }); return res; } diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 59f50082..a3193ded 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -48,8 +48,10 @@ struct CompiledRegex : RefCountable struct Instruction { Op op; + mutable bool processed; uint32_t param; }; + static_assert(sizeof(Instruction) == 8, ""); explicit operator bool() const { return not instructions.empty(); } @@ -129,7 +131,6 @@ public: return false; Vector current_threads, next_threads; - std::unique_ptr processed_inst{new bool[m_program.instructions.size()]}; const bool no_saves = (m_flags & RegexExecFlags::NoSaves); Utf8It start{m_begin}; @@ -140,7 +141,7 @@ public: to_next_start(start, m_end, start_chars); if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), - current_threads, next_threads, processed_inst.get())) + current_threads, next_threads)) return true; if (not (flags & RegexExecFlags::Search)) @@ -150,7 +151,7 @@ public: { to_next_start(++start, m_end, start_chars); if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), - current_threads, next_threads, processed_inst.get())) + current_threads, next_threads)) return true; } while (start != m_end); @@ -215,15 +216,14 @@ private: enum class StepResult { Consumed, Matched, Failed }; // Steps a thread until it consumes the current character, matches or fail - StepResult step(const Utf8It& pos, Thread& thread, Vector& threads, bool* processed_inst) + StepResult step(const Utf8It& pos, Thread& thread, Vector& threads) { while (true) { - if (processed_inst[thread.inst]) - return StepResult::Failed; - processed_inst[thread.inst] = true; - auto& inst = m_program.instructions[thread.inst++]; + if (inst.processed) + return StepResult::Failed; + inst.processed = true; const Codepoint cp = pos == m_end ? 0 : *pos; switch (inst.op) @@ -326,20 +326,25 @@ private: return StepResult::Failed; } - bool exec_from(const Utf8It& start, Saves* initial_saves, Vector& current_threads, Vector& next_threads, bool* processed_inst) + bool exec_from(const Utf8It& start, Saves* initial_saves, Vector& current_threads, Vector& next_threads) { current_threads.push_back({0, initial_saves}); next_threads.clear(); + auto clear_processed = [this]() { + for (auto& inst : m_program.instructions) + inst.processed = false; + }; + bool found_match = false; for (Utf8It pos = start; pos != m_end; ++pos) { - memset(processed_inst, 0, sizeof(bool) * m_program.instructions.size()); + clear_processed(); while (not current_threads.empty()) { auto thread = current_threads.back(); current_threads.pop_back(); - switch (step(pos, thread, current_threads, processed_inst)) + switch (step(pos, thread, current_threads)) { case StepResult::Matched: if (not (m_flags & RegexExecFlags::Search) or // We are not at end, this is not a full match @@ -377,13 +382,13 @@ private: if (found_match) return true; - memset(processed_inst, 0, sizeof(bool) * m_program.instructions.size()); + clear_processed(); // Step remaining threads to see if they match without consuming anything else while (not current_threads.empty()) { auto thread = current_threads.back(); current_threads.pop_back(); - if (step(m_end, thread, current_threads, processed_inst) == StepResult::Matched) + if (step(m_end, thread, current_threads) == StepResult::Matched) { release_saves(m_captures); m_captures = thread.saves;