Refactor ThreadedRegexVM::exec_program to avoid branching
Moving logic into step_thread instead of returning an enum to select what to run avoids the switch logic and improves run time.
This commit is contained in:
parent
7463a0d449
commit
7959c7f731
|
@ -302,12 +302,19 @@ private:
|
||||||
ConstArrayView<CompiledRegex::Instruction> instructions;
|
ConstArrayView<CompiledRegex::Instruction> instructions;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class StepResult { Consumed, Matched, Failed, FindNextStart };
|
|
||||||
|
|
||||||
// Steps a thread until it consumes the current character, matches or fail
|
// Steps a thread until it consumes the current character, matches or fail
|
||||||
StepResult step_thread(const Iterator& pos, uint16_t current_step, Thread& thread, const ExecConfig& config)
|
void step_thread(const Iterator& pos, uint16_t current_step, Thread thread, const ExecConfig& config)
|
||||||
{
|
{
|
||||||
const bool no_saves = (config.flags & RegexExecFlags::NoSaves);
|
auto failed = [this](const Thread& thread) {
|
||||||
|
release_saves(thread.saves);
|
||||||
|
};
|
||||||
|
auto consumed = [this](const Thread& thread) {
|
||||||
|
if (m_program.instructions[thread.inst].scheduled)
|
||||||
|
return release_saves(thread.saves);
|
||||||
|
m_program.instructions[thread.inst].scheduled = true;
|
||||||
|
m_threads.push_next(thread);
|
||||||
|
};
|
||||||
|
|
||||||
auto* instructions = m_program.instructions.data();
|
auto* instructions = m_program.instructions.data();
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
|
@ -315,25 +322,25 @@ private:
|
||||||
// if this instruction was already executed for this step in another thread,
|
// if this instruction was already executed for this step in another thread,
|
||||||
// then this thread is redundant and can be dropped
|
// then this thread is redundant and can be dropped
|
||||||
if (inst.last_step == current_step)
|
if (inst.last_step == current_step)
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
inst.last_step = current_step;
|
inst.last_step = current_step;
|
||||||
|
|
||||||
switch (inst.op)
|
switch (inst.op)
|
||||||
{
|
{
|
||||||
case CompiledRegex::Literal:
|
case CompiledRegex::Literal:
|
||||||
if (pos != config.end and inst.param == codepoint(pos, config))
|
if (pos != config.end and inst.param == codepoint(pos, config))
|
||||||
return StepResult::Consumed;
|
return consumed(thread);
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
case CompiledRegex::Literal_IgnoreCase:
|
case CompiledRegex::Literal_IgnoreCase:
|
||||||
if (pos != config.end and inst.param == to_lower(codepoint(pos, config)))
|
if (pos != config.end and inst.param == to_lower(codepoint(pos, config)))
|
||||||
return StepResult::Consumed;
|
return consumed(thread);
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
case CompiledRegex::AnyChar:
|
case CompiledRegex::AnyChar:
|
||||||
return StepResult::Consumed;
|
return consumed(thread);
|
||||||
case CompiledRegex::AnyCharExceptNewLine:
|
case CompiledRegex::AnyCharExceptNewLine:
|
||||||
if (pos != config.end and codepoint(pos, config) != '\n')
|
if (pos != config.end and codepoint(pos, config) != '\n')
|
||||||
return StepResult::Consumed;
|
return consumed(thread);
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
case CompiledRegex::Jump:
|
case CompiledRegex::Jump:
|
||||||
thread.inst = static_cast<int16_t>(inst.param);
|
thread.inst = static_cast<int16_t>(inst.param);
|
||||||
break;
|
break;
|
||||||
|
@ -354,7 +361,7 @@ private:
|
||||||
}
|
}
|
||||||
case CompiledRegex::Save:
|
case CompiledRegex::Save:
|
||||||
{
|
{
|
||||||
if (no_saves)
|
if (config.flags & RegexExecFlags::NoSaves)
|
||||||
break;
|
break;
|
||||||
if (thread.saves < 0)
|
if (thread.saves < 0)
|
||||||
thread.saves = new_saves<false>(nullptr);
|
thread.saves = new_saves<false>(nullptr);
|
||||||
|
@ -368,72 +375,86 @@ private:
|
||||||
}
|
}
|
||||||
case CompiledRegex::Class:
|
case CompiledRegex::Class:
|
||||||
if (pos == config.end)
|
if (pos == config.end)
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
return is_character_class(m_program.character_classes[inst.param], codepoint(pos, config)) ?
|
return is_character_class(m_program.character_classes[inst.param], codepoint(pos, config)) ?
|
||||||
StepResult::Consumed : StepResult::Failed;
|
consumed(thread) : failed(thread);
|
||||||
case CompiledRegex::CharacterType:
|
case CompiledRegex::CharacterType:
|
||||||
if (pos == config.end)
|
if (pos == config.end)
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
return is_ctype((CharacterType)inst.param, codepoint(pos, config)) ?
|
return is_ctype((CharacterType)inst.param, codepoint(pos, config)) ?
|
||||||
StepResult::Consumed : StepResult::Failed;
|
consumed(thread) : failed(thread);
|
||||||
case CompiledRegex::LineStart:
|
case CompiledRegex::LineStart:
|
||||||
if (not is_line_start(pos, config))
|
if (not is_line_start(pos, config))
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LineEnd:
|
case CompiledRegex::LineEnd:
|
||||||
if (not is_line_end(pos, config))
|
if (not is_line_end(pos, config))
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::WordBoundary:
|
case CompiledRegex::WordBoundary:
|
||||||
if (not is_word_boundary(pos, config))
|
if (not is_word_boundary(pos, config))
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::NotWordBoundary:
|
case CompiledRegex::NotWordBoundary:
|
||||||
if (is_word_boundary(pos, config))
|
if (is_word_boundary(pos, config))
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::SubjectBegin:
|
case CompiledRegex::SubjectBegin:
|
||||||
if (pos != config.subject_begin)
|
if (pos != config.subject_begin)
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::SubjectEnd:
|
case CompiledRegex::SubjectEnd:
|
||||||
if (pos != config.subject_end)
|
if (pos != config.subject_end)
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LookAhead:
|
case CompiledRegex::LookAhead:
|
||||||
case CompiledRegex::NegativeLookAhead:
|
case CompiledRegex::NegativeLookAhead:
|
||||||
if (lookaround<MatchDirection::Forward, false>(inst.param, pos, config) !=
|
if (lookaround<MatchDirection::Forward, false>(inst.param, pos, config) !=
|
||||||
(inst.op == CompiledRegex::LookAhead))
|
(inst.op == CompiledRegex::LookAhead))
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LookAhead_IgnoreCase:
|
case CompiledRegex::LookAhead_IgnoreCase:
|
||||||
case CompiledRegex::NegativeLookAhead_IgnoreCase:
|
case CompiledRegex::NegativeLookAhead_IgnoreCase:
|
||||||
if (lookaround<MatchDirection::Forward, true>(inst.param, pos, config) !=
|
if (lookaround<MatchDirection::Forward, true>(inst.param, pos, config) !=
|
||||||
(inst.op == CompiledRegex::LookAhead_IgnoreCase))
|
(inst.op == CompiledRegex::LookAhead_IgnoreCase))
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LookBehind:
|
case CompiledRegex::LookBehind:
|
||||||
case CompiledRegex::NegativeLookBehind:
|
case CompiledRegex::NegativeLookBehind:
|
||||||
if (lookaround<MatchDirection::Backward, false>(inst.param, pos, config) !=
|
if (lookaround<MatchDirection::Backward, false>(inst.param, pos, config) !=
|
||||||
(inst.op == CompiledRegex::LookBehind))
|
(inst.op == CompiledRegex::LookBehind))
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LookBehind_IgnoreCase:
|
case CompiledRegex::LookBehind_IgnoreCase:
|
||||||
case CompiledRegex::NegativeLookBehind_IgnoreCase:
|
case CompiledRegex::NegativeLookBehind_IgnoreCase:
|
||||||
if (lookaround<MatchDirection::Backward, true>(inst.param, pos, config) !=
|
if (lookaround<MatchDirection::Backward, true>(inst.param, pos, config) !=
|
||||||
(inst.op == CompiledRegex::LookBehind_IgnoreCase))
|
(inst.op == CompiledRegex::LookBehind_IgnoreCase))
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::FindNextStart:
|
case CompiledRegex::FindNextStart:
|
||||||
kak_assert(m_threads.current_is_empty()); // search thread should by construction be the lower priority one
|
// search thread should by construction be the lowest priority thread
|
||||||
if (m_threads.next_is_empty())
|
kak_assert(m_threads.current_is_empty());
|
||||||
return StepResult::FindNextStart;
|
if (not m_threads.next_is_empty())
|
||||||
return StepResult::Consumed;
|
return consumed(thread);
|
||||||
|
m_threads.push_next(thread);
|
||||||
|
m_find_next_start = true;
|
||||||
|
return;
|
||||||
case CompiledRegex::Match:
|
case CompiledRegex::Match:
|
||||||
return StepResult::Matched;
|
if ((pos != config.end and not (config.flags & RegexExecFlags::Search)) or
|
||||||
|
(config.flags & RegexExecFlags::NotInitialNull and pos == config.begin))
|
||||||
|
return failed(thread);
|
||||||
|
|
||||||
|
release_saves(m_captures);
|
||||||
|
m_captures = thread.saves;
|
||||||
|
m_found_match = true;
|
||||||
|
|
||||||
|
// remove lower priority threads
|
||||||
|
while (not m_threads.current_is_empty())
|
||||||
|
release_saves(m_threads.pop_current().saves);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return StepResult::Failed;
|
return failed(thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool exec_program(Iterator pos, const ExecConfig& config)
|
bool exec_program(Iterator pos, const ExecConfig& config)
|
||||||
|
@ -446,7 +467,7 @@ private:
|
||||||
const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc;
|
const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc;
|
||||||
|
|
||||||
uint16_t current_step = -1;
|
uint16_t current_step = -1;
|
||||||
bool found_match = false;
|
m_found_match = false;
|
||||||
while (true) // Iterate on all codepoints and once at the end
|
while (true) // Iterate on all codepoints and once at the end
|
||||||
{
|
{
|
||||||
if (++current_step == 0)
|
if (++current_step == 0)
|
||||||
|
@ -457,63 +478,27 @@ private:
|
||||||
current_step = 1; // step 0 is never valid
|
current_step = 1; // step 0 is never valid
|
||||||
}
|
}
|
||||||
|
|
||||||
bool find_next_start = false;
|
m_find_next_start = false;
|
||||||
while (not m_threads.current_is_empty())
|
while (not m_threads.current_is_empty())
|
||||||
{
|
step_thread(pos, current_step, m_threads.pop_current(), config);
|
||||||
auto thread = m_threads.pop_current();
|
|
||||||
switch (step_thread(pos, current_step, thread, config))
|
|
||||||
{
|
|
||||||
case StepResult::Matched:
|
|
||||||
if ((pos != config.end and not (config.flags & RegexExecFlags::Search)) or
|
|
||||||
(config.flags & RegexExecFlags::NotInitialNull and pos == config.begin))
|
|
||||||
{
|
|
||||||
release_saves(thread.saves);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
release_saves(m_captures);
|
|
||||||
m_captures = thread.saves;
|
|
||||||
found_match = true;
|
|
||||||
|
|
||||||
// remove this and lower priority threads
|
|
||||||
while (not m_threads.current_is_empty())
|
|
||||||
release_saves(m_threads.pop_current().saves);
|
|
||||||
break;
|
|
||||||
case StepResult::Failed:
|
|
||||||
release_saves(thread.saves);
|
|
||||||
break;
|
|
||||||
case StepResult::Consumed:
|
|
||||||
if (m_program.instructions[thread.inst].scheduled)
|
|
||||||
{
|
|
||||||
release_saves(thread.saves);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
m_program.instructions[thread.inst].scheduled = true;
|
|
||||||
m_threads.push_next(thread);
|
|
||||||
break;
|
|
||||||
case StepResult::FindNextStart:
|
|
||||||
m_threads.push_next(thread);
|
|
||||||
find_next_start = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (auto& thread : m_threads.next_threads())
|
for (auto& thread : m_threads.next_threads())
|
||||||
m_program.instructions[thread.inst].scheduled = false;
|
m_program.instructions[thread.inst].scheduled = false;
|
||||||
|
|
||||||
if (pos == config.end or m_threads.next_is_empty() or
|
if (pos == config.end or m_threads.next_is_empty() or
|
||||||
(found_match and (config.flags & RegexExecFlags::AnyMatch)))
|
(m_found_match and (config.flags & RegexExecFlags::AnyMatch)))
|
||||||
{
|
{
|
||||||
for (auto& t : m_threads.next_threads())
|
for (auto& t : m_threads.next_threads())
|
||||||
release_saves(t.saves);
|
release_saves(t.saves);
|
||||||
m_threads.clear_next();
|
m_threads.clear_next();
|
||||||
return found_match;
|
return m_found_match;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_threads.swap_next();
|
m_threads.swap_next();
|
||||||
forward ? utf8::to_next(pos, config.subject_end)
|
forward ? utf8::to_next(pos, config.subject_end)
|
||||||
: utf8::to_previous(pos, config.subject_begin);
|
: utf8::to_previous(pos, config.subject_begin);
|
||||||
|
|
||||||
if (find_next_start and start_desc)
|
if (m_find_next_start and start_desc)
|
||||||
to_next_start(pos, config, *start_desc);
|
to_next_start(pos, config, *start_desc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -674,6 +659,8 @@ private:
|
||||||
Vector<Saves*, MemoryDomain::Regex> m_saves;
|
Vector<Saves*, MemoryDomain::Regex> m_saves;
|
||||||
int16_t m_first_free = -1;
|
int16_t m_first_free = -1;
|
||||||
int16_t m_captures = -1;
|
int16_t m_captures = -1;
|
||||||
|
bool m_found_match = false;
|
||||||
|
bool m_find_next_start = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user