From 578640c8a482389beeaf6c3588663dfd5fa5461a Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sat, 7 Oct 2017 21:24:05 +0800 Subject: [PATCH] Regex: Fix handling of control escapes inside character classes --- src/regex_impl.cc | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/regex_impl.cc b/src/regex_impl.cc index dcc968f6..659a84ec 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -278,9 +278,6 @@ private: } // CharacterEscape - struct { Codepoint name; Codepoint value; } control_escapes[] = { - { 'f', '\f' }, { 'n', '\n' }, { 'r', '\r' }, { 't', '\t' }, { 'v', '\v' } - }; for (auto& control : control_escapes) { if (control.name == cp) @@ -337,10 +334,15 @@ private: } else // its just an escaped character { - - if (++m_pos == m_regex.end()) - break; - cp = *m_pos; + cp = *m_pos++; + for (auto& control : control_escapes) + { + if (control.name == cp) + { + cp = control.value; + break; + } + } } } @@ -487,8 +489,10 @@ private: StringView additional_chars; bool neg; }; - static const CharacterClassEscape character_class_escapes[8]; + + struct ControlEscape { Codepoint name; Codepoint value; }; + static const ControlEscape control_escapes[5]; }; // For some reason Gcc fails to link if this is constexpr @@ -499,6 +503,11 @@ const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[8] { 'h', nullptr, " \t", false }, }; + +const RegexParser::ControlEscape RegexParser::control_escapes[5] = { + { 'f', '\f' }, { 'n', '\n' }, { 'r', '\r' }, { 't', '\t' }, { 'v', '\v' } +}; + struct RegexCompiler { RegexCompiler(const ParsedRegex& parsed_regex, MatchDirection direction) @@ -1062,6 +1071,12 @@ auto test_regex = UnitTest{[]{ kak_assert(vm.exec("abc")); } + { + TestVM<> vm{R"([^:\n]+)"}; + kak_assert(not vm.exec("\nbc")); + kak_assert(vm.exec("abc")); + } + { TestVM<> vm{R"((?:foo)+)"}; kak_assert(vm.exec("foofoofoo"));