Do not decode utf-8 when looking for regex next start
There is no need to decode as we know any non-ascii characters will be treated as Other in the StartDesc.
This commit is contained in:
parent
ee2985739b
commit
d539e8fb89
|
@ -235,8 +235,8 @@ public:
|
|||
}
|
||||
else if (start != config.end)
|
||||
{
|
||||
const Codepoint cp = codepoint(start, config);
|
||||
if (not start_desc->map[cp < StartDesc::count ? cp : StartDesc::other])
|
||||
const unsigned char c = forward ? *start : *utf8::previous(start, config.end);
|
||||
if (not start_desc->map[(c < StartDesc::count) ? c : StartDesc::other])
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -525,12 +525,21 @@ private:
|
|||
{
|
||||
while (start != config.end)
|
||||
{
|
||||
const Codepoint cp = read_codepoint(start, config);
|
||||
if (start_desc.map[(cp >= 0 and cp < StartDesc::count) ? cp : StartDesc::other])
|
||||
static_assert(StartDesc::count <= 128, "start desc should be ascii only");
|
||||
if constexpr (forward)
|
||||
{
|
||||
forward ? utf8::to_previous(start, config.subject_begin)
|
||||
: utf8::to_next(start, config.subject_end);
|
||||
return;
|
||||
const unsigned char c = *start;
|
||||
if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
|
||||
return;
|
||||
utf8::to_next(start, config.end);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto prev = utf8::previous(start, config.end);
|
||||
const unsigned char c = *prev;
|
||||
if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
|
||||
return;
|
||||
start = prev;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -612,17 +621,6 @@ private:
|
|||
is_word(utf8::codepoint(pos, config.subject_end));
|
||||
}
|
||||
|
||||
static Codepoint read_codepoint(Iterator& it, const ExecConfig& config)
|
||||
{
|
||||
if (forward)
|
||||
return utf8::read_codepoint(it, config.subject_end);
|
||||
else
|
||||
{
|
||||
utf8::to_previous(it, config.subject_begin);
|
||||
return utf8::codepoint(it, config.subject_end);
|
||||
}
|
||||
}
|
||||
|
||||
static Codepoint codepoint(const Iterator& it, const ExecConfig& config)
|
||||
{
|
||||
return utf8::codepoint(forward ? it : utf8::previous(it, config.subject_begin),
|
||||
|
|
Loading…
Reference in New Issue
Block a user