Refactor regex find next start not to be an instruction anymore
The same logic can be hard coded, avoiding one thread and 3 instructions, improving the regex matching speed.
This commit is contained in:
parent
fd043435e5
commit
0364a99827
|
@ -676,14 +676,13 @@ struct RegexCompiler
|
||||||
{
|
{
|
||||||
kak_assert(not (flags & RegexCompileFlags::NoForward) or flags & RegexCompileFlags::Backward);
|
kak_assert(not (flags & RegexCompileFlags::NoForward) or flags & RegexCompileFlags::Backward);
|
||||||
// Approximation of the number of instructions generated
|
// Approximation of the number of instructions generated
|
||||||
m_program.instructions.reserve((CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1)
|
m_program.instructions.reserve((parsed_regex.nodes.size() + 1)
|
||||||
* (((flags & RegexCompileFlags::Backward) and
|
* (((flags & RegexCompileFlags::Backward) and
|
||||||
not (flags & RegexCompileFlags::NoForward)) ? 2 : 1));
|
not (flags & RegexCompileFlags::NoForward)) ? 2 : 1));
|
||||||
|
|
||||||
if (not (flags & RegexCompileFlags::NoForward))
|
if (not (flags & RegexCompileFlags::NoForward))
|
||||||
{
|
{
|
||||||
m_program.forward_start_desc = compute_start_desc<RegexMode::Forward>();
|
m_program.forward_start_desc = compute_start_desc<RegexMode::Forward>();
|
||||||
write_search_prefix();
|
|
||||||
compile_node<RegexMode::Forward>(0);
|
compile_node<RegexMode::Forward>(0);
|
||||||
push_inst(CompiledRegex::Match);
|
push_inst(CompiledRegex::Match);
|
||||||
}
|
}
|
||||||
|
@ -692,7 +691,6 @@ struct RegexCompiler
|
||||||
{
|
{
|
||||||
m_program.first_backward_inst = m_program.instructions.size();
|
m_program.first_backward_inst = m_program.instructions.size();
|
||||||
m_program.backward_start_desc = compute_start_desc<RegexMode::Backward>();
|
m_program.backward_start_desc = compute_start_desc<RegexMode::Backward>();
|
||||||
write_search_prefix();
|
|
||||||
compile_node<RegexMode::Backward>(0);
|
compile_node<RegexMode::Backward>(0);
|
||||||
push_inst(CompiledRegex::Match);
|
push_inst(CompiledRegex::Match);
|
||||||
}
|
}
|
||||||
|
@ -866,16 +864,6 @@ private:
|
||||||
return start_pos;
|
return start_pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a sequence of instructions that enable searching for a match instead of checking for it
|
|
||||||
void write_search_prefix()
|
|
||||||
{
|
|
||||||
const uint32_t first_inst = m_program.instructions.size();
|
|
||||||
push_inst(CompiledRegex::Split_PrioritizeChild, first_inst + CompiledRegex::search_prefix_size);
|
|
||||||
push_inst(CompiledRegex::FindNextStart);
|
|
||||||
push_inst(CompiledRegex::Split_PrioritizeParent, first_inst + 1);
|
|
||||||
kak_assert(m_program.instructions.size() == first_inst + CompiledRegex::search_prefix_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0)
|
uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0)
|
||||||
{
|
{
|
||||||
constexpr auto max_instructions = std::numeric_limits<int16_t>::max();
|
constexpr auto max_instructions = std::numeric_limits<int16_t>::max();
|
||||||
|
@ -1137,9 +1125,6 @@ String dump_regex(const CompiledRegex& program)
|
||||||
res += format("{} ({})\n", name, str);
|
res += format("{} ({})\n", name, str);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CompiledRegex::FindNextStart:
|
|
||||||
res += "find next start\n";
|
|
||||||
break;
|
|
||||||
case CompiledRegex::Match:
|
case CompiledRegex::Match:
|
||||||
res += "match\n";
|
res += "match\n";
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,6 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
enum Op : char
|
enum Op : char
|
||||||
{
|
{
|
||||||
Match,
|
Match,
|
||||||
FindNextStart,
|
|
||||||
Literal,
|
Literal,
|
||||||
Literal_IgnoreCase,
|
Literal_IgnoreCase,
|
||||||
AnyChar,
|
AnyChar,
|
||||||
|
@ -97,8 +96,6 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
};
|
};
|
||||||
static_assert(sizeof(Instruction) == 8, "");
|
static_assert(sizeof(Instruction) == 8, "");
|
||||||
|
|
||||||
static constexpr uint16_t search_prefix_size = 3;
|
|
||||||
|
|
||||||
explicit operator bool() const { return not instructions.empty(); }
|
explicit operator bool() const { return not instructions.empty(); }
|
||||||
|
|
||||||
struct NamedCapture
|
struct NamedCapture
|
||||||
|
@ -217,8 +214,6 @@ public:
|
||||||
instructions = instructions.subrange(0, m_program.first_backward_inst);
|
instructions = instructions.subrange(0, m_program.first_backward_inst);
|
||||||
else
|
else
|
||||||
instructions = instructions.subrange(m_program.first_backward_inst);
|
instructions = instructions.subrange(m_program.first_backward_inst);
|
||||||
if (not search)
|
|
||||||
instructions = instructions.subrange(CompiledRegex::search_prefix_size);
|
|
||||||
|
|
||||||
const ExecConfig config{
|
const ExecConfig config{
|
||||||
Sentinel{forward ? begin : end},
|
Sentinel{forward ? begin : end},
|
||||||
|
@ -452,14 +447,6 @@ private:
|
||||||
(inst.op == CompiledRegex::LookBehind_IgnoreCase))
|
(inst.op == CompiledRegex::LookBehind_IgnoreCase))
|
||||||
return failed();
|
return failed();
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::FindNextStart:
|
|
||||||
// search thread should by construction be the lowest priority thread
|
|
||||||
kak_assert(m_threads.current_is_empty());
|
|
||||||
if (not m_threads.next_is_empty())
|
|
||||||
return consumed();
|
|
||||||
m_threads.push_next(thread);
|
|
||||||
m_find_next_start = true;
|
|
||||||
return;
|
|
||||||
case CompiledRegex::Match:
|
case CompiledRegex::Match:
|
||||||
if ((pos != config.end and not (mode & RegexMode::Search)) or
|
if ((pos != config.end and not (mode & RegexMode::Search)) or
|
||||||
(config.flags & RegexExecFlags::NotInitialNull and pos == config.begin))
|
(config.flags & RegexExecFlags::NotInitialNull and pos == config.begin))
|
||||||
|
@ -484,10 +471,12 @@ private:
|
||||||
release_saves(m_captures);
|
release_saves(m_captures);
|
||||||
m_captures = -1;
|
m_captures = -1;
|
||||||
m_threads.grow_ifn();
|
m_threads.grow_ifn();
|
||||||
m_threads.push_current({static_cast<int16_t>(&config.instructions[0] - &m_program.instructions[0]), -1});
|
const int16_t first_inst = &config.instructions[0] - &m_program.instructions[0];
|
||||||
|
m_threads.push_current({first_inst, -1});
|
||||||
|
|
||||||
const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc;
|
const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc;
|
||||||
|
|
||||||
|
constexpr bool search = mode & RegexMode::Search;
|
||||||
constexpr bool any_match = mode & RegexMode::AnyMatch;
|
constexpr bool any_match = mode & RegexMode::AnyMatch;
|
||||||
uint16_t current_step = -1;
|
uint16_t current_step = -1;
|
||||||
m_found_match = false;
|
m_found_match = false;
|
||||||
|
@ -501,14 +490,15 @@ private:
|
||||||
current_step = 1; // step 0 is never valid
|
current_step = 1; // step 0 is never valid
|
||||||
}
|
}
|
||||||
|
|
||||||
m_find_next_start = false;
|
|
||||||
while (not m_threads.current_is_empty())
|
while (not m_threads.current_is_empty())
|
||||||
step_thread(pos, current_step, m_threads.pop_current(), config);
|
step_thread(pos, current_step, m_threads.pop_current(), config);
|
||||||
|
|
||||||
for (auto& thread : m_threads.next_threads())
|
for (auto& thread : m_threads.next_threads())
|
||||||
m_program.instructions[thread.inst].scheduled = false;
|
m_program.instructions[thread.inst].scheduled = false;
|
||||||
|
|
||||||
if (pos == config.end or m_threads.next_is_empty() or (m_found_match and any_match))
|
if (pos == config.end or
|
||||||
|
(m_threads.next_is_empty() and (not search or m_found_match)) or
|
||||||
|
(m_found_match and any_match))
|
||||||
{
|
{
|
||||||
for (auto& t : m_threads.next_threads())
|
for (auto& t : m_threads.next_threads())
|
||||||
release_saves(t.saves);
|
release_saves(t.saves);
|
||||||
|
@ -516,12 +506,17 @@ private:
|
||||||
return m_found_match;
|
return m_found_match;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_threads.swap_next();
|
|
||||||
forward ? utf8::to_next(pos, config.subject_end)
|
forward ? utf8::to_next(pos, config.subject_end)
|
||||||
: utf8::to_previous(pos, config.subject_begin);
|
: utf8::to_previous(pos, config.subject_begin);
|
||||||
|
|
||||||
if (m_find_next_start and start_desc)
|
if (search)
|
||||||
to_next_start(pos, config, *start_desc);
|
{
|
||||||
|
if (start_desc and m_threads.next_is_empty())
|
||||||
|
to_next_start(pos, config, *start_desc);
|
||||||
|
m_threads.grow_ifn();
|
||||||
|
m_threads.push_next({first_inst, -1});
|
||||||
|
}
|
||||||
|
m_threads.swap_next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -691,7 +686,6 @@ private:
|
||||||
int16_t m_first_free = -1;
|
int16_t m_first_free = -1;
|
||||||
int16_t m_captures = -1;
|
int16_t m_captures = -1;
|
||||||
bool m_found_match = false;
|
bool m_found_match = false;
|
||||||
bool m_find_next_start = false;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user