Regex: remove the need to a processed inst vector

Identify each step with a counter, and check if the instruction
was already processed this step. This makes the matching faster,
by removing the need to maintain a vector of instructions executed
this step.
This commit is contained in:
Maxime Coste 2017-10-14 12:58:42 +08:00
parent cfc52d7e6a
commit 621b0d3ab8

View File

@ -55,8 +55,8 @@ struct CompiledRegex : RefCountable
struct Instruction struct Instruction
{ {
Op op; Op op;
mutable bool processed;
mutable bool scheduled; mutable bool scheduled;
mutable uint16_t last_step;
uint32_t param; uint32_t param;
}; };
static_assert(sizeof(Instruction) == 8, ""); static_assert(sizeof(Instruction) == 8, "");
@ -242,7 +242,7 @@ private:
{ {
Vector<Thread> current_threads; Vector<Thread> current_threads;
Vector<Thread> next_threads; Vector<Thread> next_threads;
Vector<uint16_t> processed; uint16_t step = -1;
}; };
enum class StepResult { Consumed, Matched, Failed }; enum class StepResult { Consumed, Matched, Failed };
@ -252,11 +252,10 @@ private:
{ {
while (true) while (true)
{ {
auto& inst = m_program.instructions[thread.inst]; auto& inst = m_program.instructions[thread.inst++];
if (inst.processed) if (inst.last_step == state.step)
return StepResult::Failed; return StepResult::Failed;
inst.processed = true; inst.last_step = state.step;
state.processed.push_back(thread.inst++);
switch (inst.op) switch (inst.op)
{ {
@ -368,6 +367,14 @@ private:
bool found_match = false; bool found_match = false;
while (true) // Iterate on all codepoints and once at the end while (true) // Iterate on all codepoints and once at the end
{ {
if (++state.step == 0)
{
// We wrapped, avoid potential collision on inst.last_step by resetting them
for (auto& inst : m_program.instructions)
inst.last_step = 0;
state.step = 1; // step 0 is never valid
}
while (not state.current_threads.empty()) while (not state.current_threads.empty())
{ {
auto thread = state.current_threads.back(); auto thread = state.current_threads.back();
@ -403,9 +410,6 @@ private:
} }
for (auto& thread : state.next_threads) for (auto& thread : state.next_threads)
m_program.instructions[thread.inst].scheduled = false; m_program.instructions[thread.inst].scheduled = false;
for (auto inst : state.processed)
m_program.instructions[inst].processed = false;
state.processed.clear();
if (pos == m_end or state.next_threads.empty() or if (pos == m_end or state.next_threads.empty() or
(found_match and (m_flags & RegexExecFlags::AnyMatch))) (found_match and (m_flags & RegexExecFlags::AnyMatch)))