Regex: tag instructions as scheduled as well instead of searching

And a few more code cleanup in the ThreadedRegexVM
This commit is contained in:
Maxime Coste 2017-10-07 19:58:10 +08:00
parent 6bc5823745
commit f3736a4b48
2 changed files with 17 additions and 9 deletions

View File

@ -666,7 +666,7 @@ private:
uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0) uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0)
{ {
uint32_t res = m_program.instructions.size(); uint32_t res = m_program.instructions.size();
m_program.instructions.push_back({ op, false, param }); m_program.instructions.push_back({ op, false, false, param });
return res; return res;
} }

View File

@ -49,6 +49,7 @@ struct CompiledRegex : RefCountable
{ {
Op op; Op op;
mutable bool processed; mutable bool processed;
mutable bool scheduled;
uint32_t param; uint32_t param;
}; };
static_assert(sizeof(Instruction) == 8, ""); static_assert(sizeof(Instruction) == 8, "");
@ -225,15 +226,14 @@ private:
return StepResult::Failed; return StepResult::Failed;
inst.processed = true; inst.processed = true;
const Codepoint cp = pos == m_end ? 0 : *pos;
switch (inst.op) switch (inst.op)
{ {
case CompiledRegex::Literal: case CompiledRegex::Literal:
if (inst.param == cp) if (pos != m_end and inst.param == *pos)
return StepResult::Consumed; return StepResult::Consumed;
return StepResult::Failed; return StepResult::Failed;
case CompiledRegex::LiteralIgnoreCase: case CompiledRegex::LiteralIgnoreCase:
if (inst.param == to_lower(cp)) if (pos != m_end and inst.param == to_lower(*pos))
return StepResult::Consumed; return StepResult::Consumed;
return StepResult::Failed; return StepResult::Failed;
case CompiledRegex::AnyChar: case CompiledRegex::AnyChar:
@ -269,7 +269,9 @@ private:
break; break;
} }
case CompiledRegex::Matcher: case CompiledRegex::Matcher:
return m_program.matchers[inst.param](cp) ? if (pos == m_end)
return StepResult::Failed;
return m_program.matchers[inst.param](*pos) ?
StepResult::Consumed : StepResult::Failed; StepResult::Consumed : StepResult::Failed;
case CompiledRegex::LineStart: case CompiledRegex::LineStart:
if (not is_line_start(pos)) if (not is_line_start(pos))
@ -332,10 +334,13 @@ private:
next_threads.clear(); next_threads.clear();
bool found_match = false; bool found_match = false;
while (true) while (true) // Iterate on all codepoints and once at the end
{ {
for (auto& inst : m_program.instructions) for (auto& inst : m_program.instructions)
{
inst.processed = false; inst.processed = false;
inst.scheduled = false;
}
while (not current_threads.empty()) while (not current_threads.empty())
{ {
@ -363,10 +368,13 @@ private:
release_saves(thread.saves); release_saves(thread.saves);
break; break;
case StepResult::Consumed: case StepResult::Consumed:
if (contains_that(next_threads, [&](auto& t) { return t.inst == thread.inst; })) if (m_program.instructions[thread.inst].scheduled)
{
release_saves(thread.saves); release_saves(thread.saves);
else continue;
next_threads.push_back(thread); }
m_program.instructions[thread.inst].scheduled = true;
next_threads.push_back(thread);
break; break;
} }
} }