diff --git a/src/regex_impl.cc b/src/regex_impl.cc index 23321af8..9ada6fd5 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -959,7 +959,7 @@ private: { for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp) { - if (start_desc.map[cp] or is_character_class(character_class, cp)) + if (start_desc.map[cp] or character_class.matches(cp)) start_desc.map[cp] = true; } } @@ -1165,20 +1165,6 @@ CompiledRegex compile_regex(StringView re, RegexCompileFlags flags) return RegexCompiler{RegexParser::parse(re), flags}.get_compiled_regex(); } -bool is_character_class(const CharacterClass& character_class, Codepoint cp) -{ - if (character_class.ignore_case) - cp = to_lower(cp); - - auto it = std::find_if(character_class.ranges.begin(), - character_class.ranges.end(), - [cp](auto& range) { return range.min <= cp and cp <= range.max; }); - - bool found = it != character_class.ranges.end() or (character_class.ctypes != CharacterType::None and - is_ctype(character_class.ctypes, cp)); - return found != character_class.negative; -} - bool is_ctype(CharacterType ctype, Codepoint cp) { auto check = [&](CharacterType bit, CharacterType not_bit, auto&& func) { diff --git a/src/regex_impl.hh b/src/regex_impl.hh index fd99ea1e..48788094 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -31,6 +31,8 @@ enum class CharacterType : unsigned char }; constexpr bool with_bit_ops(Meta::Type) { return true; } +bool is_ctype(CharacterType ctype, Codepoint cp); + struct CharacterClass { struct Range @@ -45,10 +47,24 @@ struct CharacterClass bool ignore_case = false; friend bool operator==(const CharacterClass&, const CharacterClass&) = default; -}; -bool is_character_class(const CharacterClass& character_class, Codepoint cp); -bool is_ctype(CharacterType ctype, Codepoint cp); + bool matches(Codepoint cp) const + { + if (ignore_case) + cp = to_lower(cp); + + for (auto& range : ranges) + { + if (cp < range.min) + break; + else if (cp <= range.max) + return not negative; + } + + return (ctypes != CharacterType::None and is_ctype(ctypes, cp)) != negative; + } + +}; struct CompiledRegex : RefCountable, UseMemoryDomain { @@ -418,7 +434,7 @@ private: case CompiledRegex::CharClass: if (pos == config.end) return failed(); - return is_character_class(m_program.character_classes[inst.param.character_class_index], codepoint(pos, config)) ? + return m_program.character_classes[inst.param.character_class_index].matches(codepoint(pos, config)) ? consumed() : failed(); case CompiledRegex::CharType: if (pos == config.end) @@ -552,7 +568,7 @@ private: else if (op >= Lookaround::CharacterClass and op < Lookaround::CharacterType) { auto index = to_underlying(op) - to_underlying(Lookaround::CharacterClass); - if (not is_character_class(m_program.character_classes[index], cp)) + if (not m_program.character_classes[index].matches(cp)) return false; } else if (op >= Lookaround::CharacterType and op < Lookaround::OpEnd)