Regex: Add support for \h and \H "horizontal blank" character classes
This commit is contained in:
parent
4ac0d35d1e
commit
e4004a7b7f
|
@ -222,9 +222,9 @@ private:
|
|||
{
|
||||
auto matcher_id = m_parsed_regex.matchers.size();
|
||||
m_parsed_regex.matchers.push_back(
|
||||
[ctype = wctype(character_class.ctype),
|
||||
[ctype = character_class.ctype ? wctype(character_class.ctype) : (wctype_t)0,
|
||||
chars = character_class.additional_chars] (Codepoint cp) {
|
||||
return iswctype(cp, ctype) or contains(chars, cp);
|
||||
return (ctype != 0 and iswctype(cp, ctype)) or contains(chars, cp);
|
||||
});
|
||||
return new_node(ParsedRegex::Matcher, matcher_id);
|
||||
}
|
||||
|
@ -255,6 +255,7 @@ private:
|
|||
|
||||
struct CharRange { Codepoint min, max; };
|
||||
Vector<CharRange> ranges;
|
||||
Vector<Codepoint> excluded;
|
||||
Vector<std::pair<wctype_t, bool>> ctypes;
|
||||
while (m_pos != m_regex.end() and *m_pos != ']')
|
||||
{
|
||||
|
@ -274,9 +275,15 @@ private:
|
|||
[cp = *m_pos](auto& t) { return t.cp == cp; });
|
||||
if (it != std::end(character_class_escapes))
|
||||
{
|
||||
ctypes.push_back({wctype(it->ctype), not it->neg});
|
||||
for (auto& c : it->additional_chars)
|
||||
ranges.push_back({(Codepoint)c, (Codepoint)c});
|
||||
if (it->ctype)
|
||||
ctypes.push_back({wctype(it->ctype), not it->neg});
|
||||
for (auto& c : it->additional_chars) // TODO: handle negative case
|
||||
{
|
||||
if (it->neg)
|
||||
excluded.push_back((Codepoint)c);
|
||||
else
|
||||
ranges.push_back({(Codepoint)c, (Codepoint)c});
|
||||
}
|
||||
++m_pos;
|
||||
continue;
|
||||
}
|
||||
|
@ -306,12 +313,13 @@ private:
|
|||
++m_pos;
|
||||
|
||||
auto matcher = [ranges = std::move(ranges),
|
||||
ctypes = std::move(ctypes), negative] (Codepoint cp) {
|
||||
ctypes = std::move(ctypes),
|
||||
excluded = std::move(excluded), negative] (Codepoint cp) {
|
||||
auto found = contains_that(ranges, [cp](auto& r) {
|
||||
return r.min <= cp and cp <= r.max;
|
||||
}) or contains_that(ctypes, [cp](auto& c) {
|
||||
return (bool)iswctype(cp, c.first) == c.second;
|
||||
});
|
||||
}) or (not excluded.empty() and not contains(excluded, cp));
|
||||
return negative ? not found : found;
|
||||
};
|
||||
|
||||
|
@ -390,17 +398,19 @@ private:
|
|||
bool neg;
|
||||
};
|
||||
|
||||
static const CharacterClassEscape character_class_escapes[6];
|
||||
static const CharacterClassEscape character_class_escapes[8];
|
||||
};
|
||||
|
||||
// For some reason Gcc fails to link if this is constexpr
|
||||
const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[6] = {
|
||||
const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[8] = {
|
||||
{ 'd', "digit", "", false },
|
||||
{ 'D', "digit", "", true },
|
||||
{ 'w', "alnum", "_", false },
|
||||
{ 'W', "alnum", "_", true },
|
||||
{ 's', "space", "", false },
|
||||
{ 's', "space", "", true },
|
||||
{ 'S', "space", "", true },
|
||||
{ 'h', nullptr, " \t", false },
|
||||
{ 'H', nullptr, " \t", true },
|
||||
};
|
||||
|
||||
struct CompiledRegex
|
||||
|
@ -982,6 +992,12 @@ auto test_regex = UnitTest{[]{
|
|||
kak_assert(not vm.exec("123_456"));
|
||||
}
|
||||
|
||||
{
|
||||
TestVM vm{R"([ \H]+)"};
|
||||
kak_assert(vm.exec("abc "));
|
||||
kak_assert(not vm.exec("a \t"));
|
||||
}
|
||||
|
||||
{
|
||||
TestVM vm{R"(\Q{}[]*+?\Ea+)"};
|
||||
kak_assert(vm.exec("{}[]*+?aa"));
|
||||
|
|
Loading…
Reference in New Issue
Block a user