Do not decode utf-8 when looking for regex next start

There is no need to decode as we know any non-ascii characters will
be treated as Other in the StartDesc.
This commit is contained in:
Maxime Coste 2019-12-04 22:00:31 +11:00
parent ee2985739b
commit d539e8fb89

View File

@ -235,8 +235,8 @@ public:
} }
else if (start != config.end) else if (start != config.end)
{ {
const Codepoint cp = codepoint(start, config); const unsigned char c = forward ? *start : *utf8::previous(start, config.end);
if (not start_desc->map[cp < StartDesc::count ? cp : StartDesc::other]) if (not start_desc->map[(c < StartDesc::count) ? c : StartDesc::other])
return false; return false;
} }
} }
@ -525,12 +525,21 @@ private:
{ {
while (start != config.end) while (start != config.end)
{ {
const Codepoint cp = read_codepoint(start, config); static_assert(StartDesc::count <= 128, "start desc should be ascii only");
if (start_desc.map[(cp >= 0 and cp < StartDesc::count) ? cp : StartDesc::other]) if constexpr (forward)
{ {
forward ? utf8::to_previous(start, config.subject_begin) const unsigned char c = *start;
: utf8::to_next(start, config.subject_end); if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
return; return;
utf8::to_next(start, config.end);
}
else
{
auto prev = utf8::previous(start, config.end);
const unsigned char c = *prev;
if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
return;
start = prev;
} }
} }
} }
@ -612,17 +621,6 @@ private:
is_word(utf8::codepoint(pos, config.subject_end)); is_word(utf8::codepoint(pos, config.subject_end));
} }
static Codepoint read_codepoint(Iterator& it, const ExecConfig& config)
{
if (forward)
return utf8::read_codepoint(it, config.subject_end);
else
{
utf8::to_previous(it, config.subject_begin);
return utf8::codepoint(it, config.subject_end);
}
}
static Codepoint codepoint(const Iterator& it, const ExecConfig& config) static Codepoint codepoint(const Iterator& it, const ExecConfig& config)
{ {
return utf8::codepoint(forward ? it : utf8::previous(it, config.subject_begin), return utf8::codepoint(forward ? it : utf8::previous(it, config.subject_begin),