Regex: Fix handling of control escapes inside character classes

This commit is contained in:
Maxime Coste 2017-10-07 21:24:05 +08:00
parent f3736a4b48
commit 578640c8a4

View File

@ -278,9 +278,6 @@ private:
}
// CharacterEscape
struct { Codepoint name; Codepoint value; } control_escapes[] = {
{ 'f', '\f' }, { 'n', '\n' }, { 'r', '\r' }, { 't', '\t' }, { 'v', '\v' }
};
for (auto& control : control_escapes)
{
if (control.name == cp)
@ -337,10 +334,15 @@ private:
}
else // its just an escaped character
{
if (++m_pos == m_regex.end())
break;
cp = *m_pos;
cp = *m_pos++;
for (auto& control : control_escapes)
{
if (control.name == cp)
{
cp = control.value;
break;
}
}
}
}
@ -487,8 +489,10 @@ private:
StringView additional_chars;
bool neg;
};
static const CharacterClassEscape character_class_escapes[8];
struct ControlEscape { Codepoint name; Codepoint value; };
static const ControlEscape control_escapes[5];
};
// For some reason Gcc fails to link if this is constexpr
@ -499,6 +503,11 @@ const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[8]
{ 'h', nullptr, " \t", false },
};
const RegexParser::ControlEscape RegexParser::control_escapes[5] = {
{ 'f', '\f' }, { 'n', '\n' }, { 'r', '\r' }, { 't', '\t' }, { 'v', '\v' }
};
struct RegexCompiler
{
RegexCompiler(const ParsedRegex& parsed_regex, MatchDirection direction)
@ -1062,6 +1071,12 @@ auto test_regex = UnitTest{[]{
kak_assert(vm.exec("abc"));
}
{
TestVM<> vm{R"([^:\n]+)"};
kak_assert(not vm.exec("\nbc"));
kak_assert(vm.exec("abc"));
}
{
TestVM<> vm{R"((?:foo)+)"};
kak_assert(vm.exec("foofoofoo"));