Regex: Refactor thread handling in ThreadedRegexVM
This commit is contained in:
parent
589cde67f0
commit
9fbafba4cb
|
@ -100,13 +100,12 @@ struct ThreadedRegexVM
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class StepResult { Consumed, Matched, Failed };
|
enum class StepResult { Consumed, Matched, Failed };
|
||||||
StepResult step(size_t thread_index)
|
StepResult step(Thread& thread)
|
||||||
{
|
{
|
||||||
const auto prog_start = m_program.bytecode.data();
|
const auto prog_start = m_program.bytecode.data();
|
||||||
const auto prog_end = prog_start + m_program.bytecode.size();
|
const auto prog_end = prog_start + m_program.bytecode.size();
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
auto& thread = m_threads[thread_index];
|
|
||||||
const Codepoint cp = m_pos == m_end ? 0 : *m_pos;
|
const Codepoint cp = m_pos == m_end ? 0 : *m_pos;
|
||||||
const CompiledRegex::Op op = (CompiledRegex::Op)*thread.inst++;
|
const CompiledRegex::Op op = (CompiledRegex::Op)*thread.inst++;
|
||||||
switch (op)
|
switch (op)
|
||||||
|
@ -130,7 +129,7 @@ struct ThreadedRegexVM
|
||||||
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst);
|
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst);
|
||||||
thread.inst = parent;
|
thread.inst = parent;
|
||||||
++thread.saves->refcount;
|
++thread.saves->refcount;
|
||||||
m_threads.insert(m_threads.begin() + thread_index + 1, {child, thread.saves});
|
m_current_threads.push_back({child, thread.saves});
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CompiledRegex::Split_PrioritizeChild:
|
case CompiledRegex::Split_PrioritizeChild:
|
||||||
|
@ -139,7 +138,7 @@ struct ThreadedRegexVM
|
||||||
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst);
|
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst);
|
||||||
thread.inst = child;
|
thread.inst = child;
|
||||||
++thread.saves->refcount;
|
++thread.saves->refcount;
|
||||||
m_threads.insert(m_threads.begin() + thread_index + 1, {parent, thread.saves});
|
m_current_threads.push_back({parent, thread.saves});
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CompiledRegex::Save:
|
case CompiledRegex::Save:
|
||||||
|
@ -219,10 +218,12 @@ struct ThreadedRegexVM
|
||||||
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
|
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
|
||||||
{
|
{
|
||||||
bool found_match = false;
|
bool found_match = false;
|
||||||
m_threads.clear();
|
m_current_threads.clear();
|
||||||
|
m_next_threads.clear();
|
||||||
|
|
||||||
const auto start_offset = (flags & RegexExecFlags::Search) ? 0 : CompiledRegex::search_prefix_size;
|
const auto start_offset = (flags & RegexExecFlags::Search) ? 0 : CompiledRegex::search_prefix_size;
|
||||||
m_saves.push_back(std::make_unique<Saves>(Saves{1, Vector<Iterator>(m_program.save_count, Iterator{})}));
|
m_saves.push_back(std::make_unique<Saves>(Saves{1, Vector<Iterator>(m_program.save_count, Iterator{})}));
|
||||||
m_threads.push_back({m_program.bytecode.data() + start_offset, m_saves.back().get()});
|
m_current_threads.push_back({m_program.bytecode.data() + start_offset, m_saves.back().get()});
|
||||||
|
|
||||||
m_begin = begin;
|
m_begin = begin;
|
||||||
m_end = end;
|
m_end = end;
|
||||||
|
@ -238,60 +239,55 @@ struct ThreadedRegexVM
|
||||||
|
|
||||||
for (m_pos = Utf8It{m_begin, m_begin, m_end}; m_pos != m_end; ++m_pos)
|
for (m_pos = Utf8It{m_begin, m_begin, m_end}; m_pos != m_end; ++m_pos)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < m_threads.size(); ++i)
|
while (not m_current_threads.empty())
|
||||||
{
|
{
|
||||||
const auto res = step(i);
|
auto thread = m_current_threads.back();
|
||||||
if (res == StepResult::Matched)
|
m_current_threads.pop_back();
|
||||||
|
switch (step(thread))
|
||||||
{
|
{
|
||||||
|
case StepResult::Matched:
|
||||||
if (not (flags & RegexExecFlags::Search) or // We are not at end, this is not a full match
|
if (not (flags & RegexExecFlags::Search) or // We are not at end, this is not a full match
|
||||||
(flags & RegexExecFlags::NotInitialNull and m_pos == m_begin))
|
(flags & RegexExecFlags::NotInitialNull and m_pos == m_begin))
|
||||||
{
|
{
|
||||||
m_threads[i].inst = nullptr;
|
release_saves(thread.saves);
|
||||||
release_saves(m_threads[i].saves);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_captures = std::move(m_threads[i].saves->pos);
|
m_captures = std::move(thread.saves->pos);
|
||||||
if (flags & RegexExecFlags::AnyMatch)
|
if (flags & RegexExecFlags::AnyMatch)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
found_match = true;
|
found_match = true;
|
||||||
m_threads.resize(i); // remove this and lower priority threads
|
m_current_threads.clear(); // remove this and lower priority threads
|
||||||
}
|
break;
|
||||||
else if (res == StepResult::Failed)
|
case StepResult::Failed:
|
||||||
{
|
release_saves(thread.saves);
|
||||||
m_threads[i].inst = nullptr;
|
break;
|
||||||
release_saves(m_threads[i].saves);
|
case StepResult::Consumed:
|
||||||
}
|
if (contains_that(m_next_threads, [&](auto& t) { return t.inst == thread.inst; }))
|
||||||
|
release_saves(thread.saves);
|
||||||
else
|
else
|
||||||
{
|
m_next_threads.push_back(thread);
|
||||||
auto it = m_threads.begin() + i;
|
break;
|
||||||
if (std::find_if(m_threads.begin(), it, [inst = it->inst](auto& t)
|
|
||||||
{ return t.inst == inst; }) != it)
|
|
||||||
{
|
|
||||||
m_threads[i].inst = nullptr;
|
|
||||||
release_saves(m_threads[i].saves);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if (m_next_threads.empty())
|
||||||
// Remove dead threads
|
|
||||||
m_threads.erase(std::remove_if(m_threads.begin(), m_threads.end(),
|
|
||||||
[](auto& t) { return t.inst == nullptr; }),
|
|
||||||
m_threads.end());
|
|
||||||
// we should never have more than one thread on the same instruction
|
|
||||||
kak_assert(m_threads.size() <= m_program.bytecode.size());
|
|
||||||
if (m_threads.empty())
|
|
||||||
return found_match;
|
return found_match;
|
||||||
|
|
||||||
|
std::swap(m_current_threads, m_next_threads);
|
||||||
|
std::reverse(m_current_threads.begin(), m_current_threads.end());
|
||||||
}
|
}
|
||||||
if (found_match)
|
if (found_match)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// Step remaining threads to see if they match without consuming anything else
|
// Step remaining threads to see if they match without consuming anything else
|
||||||
for (int i = 0; i < m_threads.size(); ++i)
|
while (not m_current_threads.empty())
|
||||||
{
|
{
|
||||||
if (step(i) == StepResult::Matched)
|
auto thread = m_current_threads.back();
|
||||||
|
m_current_threads.pop_back();
|
||||||
|
if (step(thread) == StepResult::Matched)
|
||||||
{
|
{
|
||||||
m_captures = std::move(m_threads[i].saves->pos);
|
m_captures = std::move(thread.saves->pos);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -318,7 +314,8 @@ struct ThreadedRegexVM
|
||||||
}
|
}
|
||||||
|
|
||||||
const CompiledRegex& m_program;
|
const CompiledRegex& m_program;
|
||||||
Vector<Thread> m_threads;
|
Vector<Thread> m_current_threads;
|
||||||
|
Vector<Thread> m_next_threads;
|
||||||
|
|
||||||
using Utf8It = utf8::iterator<Iterator>;
|
using Utf8It = utf8::iterator<Iterator>;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user