Micro-optimize regex character class/type matching
Also force-inline step_thread as function call overhead has a mesurable impact.
This commit is contained in:
parent
8566ae14a0
commit
ba379cba52
|
@ -1173,27 +1173,24 @@ bool is_character_class(const CharacterClass& character_class, Codepoint cp)
|
||||||
if (character_class.ignore_case)
|
if (character_class.ignore_case)
|
||||||
cp = to_lower(cp);
|
cp = to_lower(cp);
|
||||||
|
|
||||||
auto it = std::lower_bound(character_class.ranges.begin(),
|
auto it = std::find_if(character_class.ranges.begin(),
|
||||||
character_class.ranges.end(), cp,
|
character_class.ranges.end(),
|
||||||
[](auto& range, Codepoint cp)
|
[cp](auto& range) { return range.min <= cp and cp <= range.max; });
|
||||||
{ return range.max < cp; });
|
|
||||||
|
|
||||||
auto found = (it != character_class.ranges.end() and it->min <= cp) or
|
|
||||||
is_ctype(character_class.ctypes, cp);
|
|
||||||
|
|
||||||
|
bool found = it != character_class.ranges.end() or (character_class.ctypes != CharacterType::None and
|
||||||
|
is_ctype(character_class.ctypes, cp));
|
||||||
return found != character_class.negative;
|
return found != character_class.negative;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_ctype(CharacterType ctype, Codepoint cp)
|
bool is_ctype(CharacterType ctype, Codepoint cp)
|
||||||
{
|
{
|
||||||
return ((ctype & CharacterType::Whitespace) and is_blank(cp)) or
|
auto check = [&](CharacterType bit, CharacterType not_bit, auto&& func) {
|
||||||
((ctype & CharacterType::HorizontalWhitespace) and is_horizontal_blank(cp)) or
|
return (ctype & (bit | not_bit)) and func(cp) == (bool)(ctype & bit);
|
||||||
((ctype & CharacterType::Digit) and iswdigit(cp)) or
|
};
|
||||||
((ctype & CharacterType::Word) and is_word(cp)) or
|
return check(CharacterType::Word, CharacterType::NotWord, [](Codepoint cp) { return is_word(cp); }) or
|
||||||
((ctype & CharacterType::NotWhitespace) and not is_blank(cp)) or
|
check(CharacterType::Whitespace, CharacterType::NotWhitespace, is_blank) or
|
||||||
((ctype & CharacterType::NotHorizontalWhitespace) and not is_horizontal_blank(cp)) or
|
check(CharacterType::HorizontalWhitespace, CharacterType::NotHorizontalWhitespace, is_horizontal_blank) or
|
||||||
((ctype & CharacterType::NotDigit) and not iswdigit(cp)) or
|
check(CharacterType::Digit, CharacterType::NotDigit, iswdigit);
|
||||||
((ctype & CharacterType::NotWord) and not is_word(cp));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
|
|
|
@ -338,6 +338,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
// Steps a thread until it consumes the current character, matches or fail
|
// Steps a thread until it consumes the current character, matches or fail
|
||||||
|
[[gnu::always_inline]]
|
||||||
void step_thread(const Iterator& pos, uint16_t current_step, Thread thread, const ExecConfig& config)
|
void step_thread(const Iterator& pos, uint16_t current_step, Thread thread, const ExecConfig& config)
|
||||||
{
|
{
|
||||||
auto failed = [this, &thread]() {
|
auto failed = [this, &thread]() {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user