From bd91a255e49c81e3d20c43359facac95191e041a Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sun, 11 Feb 2024 12:17:21 +1100 Subject: [PATCH] Do not decode utf8 while looking for next regex match start candidate If the first byte in the multi-byte utf8 sequence does not match, it means the "other" character is not set, so none of the sequence byte will match (as they are all with the MSB set). This tightens the critical loop which ends up running faster in most cases. --- src/regex_impl.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/regex_impl.hh b/src/regex_impl.hh index f59b2249..2fd2254a 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -530,7 +530,7 @@ private: const unsigned char c = *start; if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other]) return; - utf8::to_next(start, config.end); + ++start; } else {