Regex: Fix handling of all unicode codepoint as start chars

This commit is contained in:
Maxime Coste 2017-10-20 18:26:33 +08:00
parent df2bf9601c
commit 60e32d73ff

View File

@ -858,6 +858,8 @@ private:
else else
start_chars.map[node->value] = true; start_chars.map[node->value] = true;
} }
else
start_chars.accept_other = true;
return node->quantifier.allows_none(); return node->quantifier.allows_none();
case ParsedRegex::AnyChar: case ParsedRegex::AnyChar:
for (auto& b : start_chars.map) for (auto& b : start_chars.map)
@ -1332,6 +1334,11 @@ auto test_regex = UnitTest{[]{
kak_assert(vm.exec("abcde")); kak_assert(vm.exec("abcde"));
} }
{
TestVM<> vm{R"(д)"};
kak_assert(vm.exec("д", RegexExecFlags::Search));
}
{ {
TestVM<> vm{R"(\0\x0A\u260e\u260F)"}; TestVM<> vm{R"(\0\x0A\u260e\u260F)"};
const char str[] = "\0\n☎☏"; // work around the null byte in the literal const char str[] = "\0\n☎☏"; // work around the null byte in the literal