Do not decode utf-8 when looking for regex next start
There is no need to decode as we know any non-ascii characters will be treated as Other in the StartDesc.
This commit is contained in:
parent
ee2985739b
commit
d539e8fb89
|
@ -235,8 +235,8 @@ public:
|
||||||
}
|
}
|
||||||
else if (start != config.end)
|
else if (start != config.end)
|
||||||
{
|
{
|
||||||
const Codepoint cp = codepoint(start, config);
|
const unsigned char c = forward ? *start : *utf8::previous(start, config.end);
|
||||||
if (not start_desc->map[cp < StartDesc::count ? cp : StartDesc::other])
|
if (not start_desc->map[(c < StartDesc::count) ? c : StartDesc::other])
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -525,12 +525,21 @@ private:
|
||||||
{
|
{
|
||||||
while (start != config.end)
|
while (start != config.end)
|
||||||
{
|
{
|
||||||
const Codepoint cp = read_codepoint(start, config);
|
static_assert(StartDesc::count <= 128, "start desc should be ascii only");
|
||||||
if (start_desc.map[(cp >= 0 and cp < StartDesc::count) ? cp : StartDesc::other])
|
if constexpr (forward)
|
||||||
{
|
{
|
||||||
forward ? utf8::to_previous(start, config.subject_begin)
|
const unsigned char c = *start;
|
||||||
: utf8::to_next(start, config.subject_end);
|
if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
|
||||||
return;
|
return;
|
||||||
|
utf8::to_next(start, config.end);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto prev = utf8::previous(start, config.end);
|
||||||
|
const unsigned char c = *prev;
|
||||||
|
if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
|
||||||
|
return;
|
||||||
|
start = prev;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -612,17 +621,6 @@ private:
|
||||||
is_word(utf8::codepoint(pos, config.subject_end));
|
is_word(utf8::codepoint(pos, config.subject_end));
|
||||||
}
|
}
|
||||||
|
|
||||||
static Codepoint read_codepoint(Iterator& it, const ExecConfig& config)
|
|
||||||
{
|
|
||||||
if (forward)
|
|
||||||
return utf8::read_codepoint(it, config.subject_end);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
utf8::to_previous(it, config.subject_begin);
|
|
||||||
return utf8::codepoint(it, config.subject_end);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static Codepoint codepoint(const Iterator& it, const ExecConfig& config)
|
static Codepoint codepoint(const Iterator& it, const ExecConfig& config)
|
||||||
{
|
{
|
||||||
return utf8::codepoint(forward ? it : utf8::previous(it, config.subject_begin),
|
return utf8::codepoint(forward ? it : utf8::previous(it, config.subject_begin),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user