Fix regex alternation execution priority

The ThreadedRegexVM implementation does not execute split opcodes as
expected: on split the pending thread is pushed on top of the thread
stack, which means that when multiple splits are executed in a row
(such as with a disjunction with 3 or more branches) the last split
target gets on top of the thread stack and gets executed next (when the
thread from the first split target would be the expected one)

Fixing this in the ThreadedRegexVM would have a performance impact as
we would not be able to use a plain stack for current threads, so the
best solution at the moment is to reverse the order of splits generated
by a disjunction.

Fixes #4519
This commit is contained in:
Maxime Coste 2022-02-02 14:51:04 +11:00
parent 0b29fcf32a
commit 33e81af0f3
5 changed files with 11 additions and 6 deletions

View File

@ -757,19 +757,19 @@ private:
}
case ParsedRegex::Alternation:
{
auto split_pos = m_program.instructions.size();
for (auto child : Children<>{m_parsed_regex, index})
{
if (child != index+1)
push_inst(CompiledRegex::Split);
}
auto split_pos = m_program.instructions.size();
const auto end = node.children_end;
for (auto child : Children<>{m_parsed_regex, index})
{
auto node = compile_node<direction>(child);
if (child != index+1)
m_program.instructions[split_pos++].param.split = CompiledRegex::Param::Split{.target = node, .prioritize_parent = true};
m_program.instructions[--split_pos].param.split = CompiledRegex::Param::Split{.target = node, .prioritize_parent = true};
if (get_node(child).children_end != end)
{
auto jump = push_inst(CompiledRegex::Jump);
@ -1350,7 +1350,11 @@ auto test_regex = UnitTest{[]{
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
kak_assert(not vm.exec("bar"));
}
{
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(foobaz|foo|foobar)"};
kak_assert(vm.exec("foobar"));
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foo");
}
{
TestVM<RegexMode::Forward> vm{R"((fo+?).*)"};
kak_assert(vm.exec("foooo"));

View File

@ -113,8 +113,6 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
};
static_assert(sizeof(Instruction) == 8);
static constexpr uint32_t prioritize_parent{1 << 16};
explicit operator bool() const { return not instructions.empty(); }
struct NamedCapture
@ -255,7 +253,7 @@ public:
else if (start != config.end)
{
const unsigned char c = forward ? *start : *utf8::previous(start, config.end);
if (not start_desc->map[(c < StartDesc::count) ? c : StartDesc::other])
if (not start_desc->map[(c < StartDesc::count) ? c : StartDesc::other])
return false;
}
}

View File

@ -0,0 +1 @@
%sfoobaz|foo|foobar<ret>

View File

@ -0,0 +1 @@
foobar

View File

@ -0,0 +1 @@
foo