Fix regex alternation execution priority
The ThreadedRegexVM implementation does not execute split opcodes as expected: on split the pending thread is pushed on top of the thread stack, which means that when multiple splits are executed in a row (such as with a disjunction with 3 or more branches) the last split target gets on top of the thread stack and gets executed next (when the thread from the first split target would be the expected one) Fixing this in the ThreadedRegexVM would have a performance impact as we would not be able to use a plain stack for current threads, so the best solution at the moment is to reverse the order of splits generated by a disjunction. Fixes #4519
This commit is contained in:
parent
0b29fcf32a
commit
33e81af0f3
|
@ -757,19 +757,19 @@ private:
|
||||||
}
|
}
|
||||||
case ParsedRegex::Alternation:
|
case ParsedRegex::Alternation:
|
||||||
{
|
{
|
||||||
auto split_pos = m_program.instructions.size();
|
|
||||||
for (auto child : Children<>{m_parsed_regex, index})
|
for (auto child : Children<>{m_parsed_regex, index})
|
||||||
{
|
{
|
||||||
if (child != index+1)
|
if (child != index+1)
|
||||||
push_inst(CompiledRegex::Split);
|
push_inst(CompiledRegex::Split);
|
||||||
}
|
}
|
||||||
|
auto split_pos = m_program.instructions.size();
|
||||||
|
|
||||||
const auto end = node.children_end;
|
const auto end = node.children_end;
|
||||||
for (auto child : Children<>{m_parsed_regex, index})
|
for (auto child : Children<>{m_parsed_regex, index})
|
||||||
{
|
{
|
||||||
auto node = compile_node<direction>(child);
|
auto node = compile_node<direction>(child);
|
||||||
if (child != index+1)
|
if (child != index+1)
|
||||||
m_program.instructions[split_pos++].param.split = CompiledRegex::Param::Split{.target = node, .prioritize_parent = true};
|
m_program.instructions[--split_pos].param.split = CompiledRegex::Param::Split{.target = node, .prioritize_parent = true};
|
||||||
if (get_node(child).children_end != end)
|
if (get_node(child).children_end != end)
|
||||||
{
|
{
|
||||||
auto jump = push_inst(CompiledRegex::Jump);
|
auto jump = push_inst(CompiledRegex::Jump);
|
||||||
|
@ -1350,7 +1350,11 @@ auto test_regex = UnitTest{[]{
|
||||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
|
||||||
kak_assert(not vm.exec("bar"));
|
kak_assert(not vm.exec("bar"));
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(foobaz|foo|foobar)"};
|
||||||
|
kak_assert(vm.exec("foobar"));
|
||||||
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foo");
|
||||||
|
}
|
||||||
{
|
{
|
||||||
TestVM<RegexMode::Forward> vm{R"((fo+?).*)"};
|
TestVM<RegexMode::Forward> vm{R"((fo+?).*)"};
|
||||||
kak_assert(vm.exec("foooo"));
|
kak_assert(vm.exec("foooo"));
|
||||||
|
|
|
@ -113,8 +113,6 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
};
|
};
|
||||||
static_assert(sizeof(Instruction) == 8);
|
static_assert(sizeof(Instruction) == 8);
|
||||||
|
|
||||||
static constexpr uint32_t prioritize_parent{1 << 16};
|
|
||||||
|
|
||||||
explicit operator bool() const { return not instructions.empty(); }
|
explicit operator bool() const { return not instructions.empty(); }
|
||||||
|
|
||||||
struct NamedCapture
|
struct NamedCapture
|
||||||
|
|
1
test/regression/4519-regex-alternation-priority/cmd
Normal file
1
test/regression/4519-regex-alternation-priority/cmd
Normal file
|
@ -0,0 +1 @@
|
||||||
|
%sfoobaz|foo|foobar<ret>
|
1
test/regression/4519-regex-alternation-priority/in
Normal file
1
test/regression/4519-regex-alternation-priority/in
Normal file
|
@ -0,0 +1 @@
|
||||||
|
foobar
|
|
@ -0,0 +1 @@
|
||||||
|
foo
|
Loading…
Reference in New Issue
Block a user