Micro-optimize regex character class/type matching

Also force-inline step_thread as function call overhead has a
mesurable impact.
This commit is contained in:
Maxime Coste 2020-07-30 19:51:25 +10:00
parent 8566ae14a0
commit ba379cba52
2 changed files with 13 additions and 15 deletions

View File

@ -1173,27 +1173,24 @@ bool is_character_class(const CharacterClass& character_class, Codepoint cp)
if (character_class.ignore_case)
cp = to_lower(cp);
auto it = std::lower_bound(character_class.ranges.begin(),
character_class.ranges.end(), cp,
[](auto& range, Codepoint cp)
{ return range.max < cp; });
auto found = (it != character_class.ranges.end() and it->min <= cp) or
is_ctype(character_class.ctypes, cp);
auto it = std::find_if(character_class.ranges.begin(),
character_class.ranges.end(),
[cp](auto& range) { return range.min <= cp and cp <= range.max; });
bool found = it != character_class.ranges.end() or (character_class.ctypes != CharacterType::None and
is_ctype(character_class.ctypes, cp));
return found != character_class.negative;
}
bool is_ctype(CharacterType ctype, Codepoint cp)
{
return ((ctype & CharacterType::Whitespace) and is_blank(cp)) or
((ctype & CharacterType::HorizontalWhitespace) and is_horizontal_blank(cp)) or
((ctype & CharacterType::Digit) and iswdigit(cp)) or
((ctype & CharacterType::Word) and is_word(cp)) or
((ctype & CharacterType::NotWhitespace) and not is_blank(cp)) or
((ctype & CharacterType::NotHorizontalWhitespace) and not is_horizontal_blank(cp)) or
((ctype & CharacterType::NotDigit) and not iswdigit(cp)) or
((ctype & CharacterType::NotWord) and not is_word(cp));
auto check = [&](CharacterType bit, CharacterType not_bit, auto&& func) {
return (ctype & (bit | not_bit)) and func(cp) == (bool)(ctype & bit);
};
return check(CharacterType::Word, CharacterType::NotWord, [](Codepoint cp) { return is_word(cp); }) or
check(CharacterType::Whitespace, CharacterType::NotWhitespace, is_blank) or
check(CharacterType::HorizontalWhitespace, CharacterType::NotHorizontalWhitespace, is_horizontal_blank) or
check(CharacterType::Digit, CharacterType::NotDigit, iswdigit);
}
namespace

View File

@ -338,6 +338,7 @@ private:
};
// Steps a thread until it consumes the current character, matches or fail
[[gnu::always_inline]]
void step_thread(const Iterator& pos, uint16_t current_step, Thread thread, const ExecConfig& config)
{
auto failed = [this, &thread]() {