Reuse existing character classes when possible in regex
This commit is contained in:
parent
26d14d52bb
commit
ca71d8997d
|
@ -119,6 +119,7 @@ struct Children
|
||||||
Index operator*() const { return m_pos; }
|
Index operator*() const { return m_pos; }
|
||||||
bool operator!=(Sentinel) const { return m_pos != m_end; }
|
bool operator!=(Sentinel) const { return m_pos != m_end; }
|
||||||
|
|
||||||
|
private:
|
||||||
Index find_prev(Index parent, Index pos) const
|
Index find_prev(Index parent, Index pos) const
|
||||||
{
|
{
|
||||||
Index child = parent+1;
|
Index child = parent+1;
|
||||||
|
@ -544,8 +545,10 @@ private:
|
||||||
character_class.ranges.empty())
|
character_class.ranges.empty())
|
||||||
return add_node(ParsedRegex::CharType, (Codepoint)character_class.ctypes);
|
return add_node(ParsedRegex::CharType, (Codepoint)character_class.ctypes);
|
||||||
|
|
||||||
auto class_id = m_parsed_regex.character_classes.size();
|
auto it = std::find(m_parsed_regex.character_classes.begin(), m_parsed_regex.character_classes.end(), character_class);
|
||||||
m_parsed_regex.character_classes.push_back(std::move(character_class));
|
auto class_id = it - m_parsed_regex.character_classes.begin();
|
||||||
|
if (it == m_parsed_regex.character_classes.end())
|
||||||
|
m_parsed_regex.character_classes.push_back(std::move(character_class));
|
||||||
|
|
||||||
return add_node(ParsedRegex::CharClass, class_id);
|
return add_node(ParsedRegex::CharClass, class_id);
|
||||||
}
|
}
|
||||||
|
@ -1536,6 +1539,12 @@ auto test_regex = UnitTest{[]{
|
||||||
kak_assert(vm.exec("\t\n\v\f\r"));
|
kak_assert(vm.exec("\t\n\v\f\r"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
TestVM<> vm{R"([\t-\r]\h+[\t-\r])"};
|
||||||
|
kak_assert(vm.character_classes.size() == 1);
|
||||||
|
kak_assert(vm.exec("\n \f"));
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"([^\x00-\x7F]+)"};
|
TestVM<> vm{R"([^\x00-\x7F]+)"};
|
||||||
kak_assert(not vm.exec("ascii"));
|
kak_assert(not vm.exec("ascii"));
|
||||||
|
|
|
@ -33,12 +33,18 @@ constexpr bool with_bit_ops(Meta::Type<CharacterType>) { return true; }
|
||||||
|
|
||||||
struct CharacterClass
|
struct CharacterClass
|
||||||
{
|
{
|
||||||
struct Range { Codepoint min, max; };
|
struct Range
|
||||||
|
{
|
||||||
|
Codepoint min, max;
|
||||||
|
friend bool operator==(const Range&, const Range&) = default;
|
||||||
|
};
|
||||||
|
|
||||||
Vector<Range, MemoryDomain::Regex> ranges;
|
Vector<Range, MemoryDomain::Regex> ranges;
|
||||||
CharacterType ctypes = CharacterType::None;
|
CharacterType ctypes = CharacterType::None;
|
||||||
bool negative = false;
|
bool negative = false;
|
||||||
bool ignore_case = false;
|
bool ignore_case = false;
|
||||||
|
|
||||||
|
friend bool operator==(const CharacterClass&, const CharacterClass&) = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool is_character_class(const CharacterClass& character_class, Codepoint cp);
|
bool is_character_class(const CharacterClass& character_class, Codepoint cp);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user