Regex: remove the need to a processed inst vector
Identify each step with a counter, and check if the instruction was already processed this step. This makes the matching faster, by removing the need to maintain a vector of instructions executed this step.
This commit is contained in:
parent
cfc52d7e6a
commit
621b0d3ab8
|
@ -55,8 +55,8 @@ struct CompiledRegex : RefCountable
|
||||||
struct Instruction
|
struct Instruction
|
||||||
{
|
{
|
||||||
Op op;
|
Op op;
|
||||||
mutable bool processed;
|
|
||||||
mutable bool scheduled;
|
mutable bool scheduled;
|
||||||
|
mutable uint16_t last_step;
|
||||||
uint32_t param;
|
uint32_t param;
|
||||||
};
|
};
|
||||||
static_assert(sizeof(Instruction) == 8, "");
|
static_assert(sizeof(Instruction) == 8, "");
|
||||||
|
@ -242,7 +242,7 @@ private:
|
||||||
{
|
{
|
||||||
Vector<Thread> current_threads;
|
Vector<Thread> current_threads;
|
||||||
Vector<Thread> next_threads;
|
Vector<Thread> next_threads;
|
||||||
Vector<uint16_t> processed;
|
uint16_t step = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class StepResult { Consumed, Matched, Failed };
|
enum class StepResult { Consumed, Matched, Failed };
|
||||||
|
@ -252,11 +252,10 @@ private:
|
||||||
{
|
{
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
auto& inst = m_program.instructions[thread.inst];
|
auto& inst = m_program.instructions[thread.inst++];
|
||||||
if (inst.processed)
|
if (inst.last_step == state.step)
|
||||||
return StepResult::Failed;
|
return StepResult::Failed;
|
||||||
inst.processed = true;
|
inst.last_step = state.step;
|
||||||
state.processed.push_back(thread.inst++);
|
|
||||||
|
|
||||||
switch (inst.op)
|
switch (inst.op)
|
||||||
{
|
{
|
||||||
|
@ -368,6 +367,14 @@ private:
|
||||||
bool found_match = false;
|
bool found_match = false;
|
||||||
while (true) // Iterate on all codepoints and once at the end
|
while (true) // Iterate on all codepoints and once at the end
|
||||||
{
|
{
|
||||||
|
if (++state.step == 0)
|
||||||
|
{
|
||||||
|
// We wrapped, avoid potential collision on inst.last_step by resetting them
|
||||||
|
for (auto& inst : m_program.instructions)
|
||||||
|
inst.last_step = 0;
|
||||||
|
state.step = 1; // step 0 is never valid
|
||||||
|
}
|
||||||
|
|
||||||
while (not state.current_threads.empty())
|
while (not state.current_threads.empty())
|
||||||
{
|
{
|
||||||
auto thread = state.current_threads.back();
|
auto thread = state.current_threads.back();
|
||||||
|
@ -403,9 +410,6 @@ private:
|
||||||
}
|
}
|
||||||
for (auto& thread : state.next_threads)
|
for (auto& thread : state.next_threads)
|
||||||
m_program.instructions[thread.inst].scheduled = false;
|
m_program.instructions[thread.inst].scheduled = false;
|
||||||
for (auto inst : state.processed)
|
|
||||||
m_program.instructions[inst].processed = false;
|
|
||||||
state.processed.clear();
|
|
||||||
|
|
||||||
if (pos == m_end or state.next_threads.empty() or
|
if (pos == m_end or state.next_threads.empty() or
|
||||||
(found_match and (m_flags & RegexExecFlags::AnyMatch)))
|
(found_match and (m_flags & RegexExecFlags::AnyMatch)))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user