From 33e81af0f3b6dff09903d2d3aaa36e7800941166 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Wed, 2 Feb 2022 14:51:04 +1100 Subject: [PATCH] Fix regex alternation execution priority The ThreadedRegexVM implementation does not execute split opcodes as expected: on split the pending thread is pushed on top of the thread stack, which means that when multiple splits are executed in a row (such as with a disjunction with 3 or more branches) the last split target gets on top of the thread stack and gets executed next (when the thread from the first split target would be the expected one) Fixing this in the ThreadedRegexVM would have a performance impact as we would not be able to use a plain stack for current threads, so the best solution at the moment is to reverse the order of splits generated by a disjunction. Fixes #4519 --- src/regex_impl.cc | 10 +++++++--- src/regex_impl.hh | 4 +--- test/regression/4519-regex-alternation-priority/cmd | 1 + test/regression/4519-regex-alternation-priority/in | 1 + .../4519-regex-alternation-priority/kak_selections | 1 + 5 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 test/regression/4519-regex-alternation-priority/cmd create mode 100644 test/regression/4519-regex-alternation-priority/in create mode 100644 test/regression/4519-regex-alternation-priority/kak_selections diff --git a/src/regex_impl.cc b/src/regex_impl.cc index 3dc8bd6b..256afcc1 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -757,19 +757,19 @@ private: } case ParsedRegex::Alternation: { - auto split_pos = m_program.instructions.size(); for (auto child : Children<>{m_parsed_regex, index}) { if (child != index+1) push_inst(CompiledRegex::Split); } + auto split_pos = m_program.instructions.size(); const auto end = node.children_end; for (auto child : Children<>{m_parsed_regex, index}) { auto node = compile_node(child); if (child != index+1) - m_program.instructions[split_pos++].param.split = CompiledRegex::Param::Split{.target = node, .prioritize_parent = true}; + m_program.instructions[--split_pos].param.split = CompiledRegex::Param::Split{.target = node, .prioritize_parent = true}; if (get_node(child).children_end != end) { auto jump = push_inst(CompiledRegex::Jump); @@ -1350,7 +1350,11 @@ auto test_regex = UnitTest{[]{ kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar"); kak_assert(not vm.exec("bar")); } - + { + TestVM vm{R"(foobaz|foo|foobar)"}; + kak_assert(vm.exec("foobar")); + kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foo"); + } { TestVM vm{R"((fo+?).*)"}; kak_assert(vm.exec("foooo")); diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 0565c49c..ca859592 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -113,8 +113,6 @@ struct CompiledRegex : RefCountable, UseMemoryDomain }; static_assert(sizeof(Instruction) == 8); - static constexpr uint32_t prioritize_parent{1 << 16}; - explicit operator bool() const { return not instructions.empty(); } struct NamedCapture @@ -255,7 +253,7 @@ public: else if (start != config.end) { const unsigned char c = forward ? *start : *utf8::previous(start, config.end); - if (not start_desc->map[(c < StartDesc::count) ? c : StartDesc::other]) + if (not start_desc->map[(c < StartDesc::count) ? c : StartDesc::other]) return false; } } diff --git a/test/regression/4519-regex-alternation-priority/cmd b/test/regression/4519-regex-alternation-priority/cmd new file mode 100644 index 00000000..c9327054 --- /dev/null +++ b/test/regression/4519-regex-alternation-priority/cmd @@ -0,0 +1 @@ +%sfoobaz|foo|foobar diff --git a/test/regression/4519-regex-alternation-priority/in b/test/regression/4519-regex-alternation-priority/in new file mode 100644 index 00000000..323fae03 --- /dev/null +++ b/test/regression/4519-regex-alternation-priority/in @@ -0,0 +1 @@ +foobar diff --git a/test/regression/4519-regex-alternation-priority/kak_selections b/test/regression/4519-regex-alternation-priority/kak_selections new file mode 100644 index 00000000..257cc564 --- /dev/null +++ b/test/regression/4519-regex-alternation-priority/kak_selections @@ -0,0 +1 @@ +foo