Simplify and accelerate start desc map
Store values for all possible bytes and fill utf8 multi byte start values when necessary.
This commit is contained in:
parent
c956413046
commit
c4df0fac52
|
@ -17,7 +17,6 @@
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
constexpr Codepoint CompiledRegex::StartDesc::other;
|
|
||||||
constexpr Codepoint CompiledRegex::StartDesc::count;
|
constexpr Codepoint CompiledRegex::StartDesc::count;
|
||||||
|
|
||||||
struct ParsedRegex
|
struct ParsedRegex
|
||||||
|
@ -893,11 +892,17 @@ private:
|
||||||
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
||||||
CompiledRegex::StartDesc& start_desc) const
|
CompiledRegex::StartDesc& start_desc) const
|
||||||
{
|
{
|
||||||
|
// fill all bytes that mark the start of an utf8 multi byte sequence
|
||||||
|
auto add_multi_byte_utf8 = [&] {
|
||||||
|
std::fill(start_desc.map + 0b11000000, start_desc.map + 0b11111000, true);
|
||||||
|
};
|
||||||
|
static constexpr Codepoint single_byte_limit = 128;
|
||||||
|
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
switch (node.op)
|
switch (node.op)
|
||||||
{
|
{
|
||||||
case ParsedRegex::Literal:
|
case ParsedRegex::Literal:
|
||||||
if (node.value < CompiledRegex::StartDesc::count)
|
if (node.value < single_byte_limit)
|
||||||
{
|
{
|
||||||
if (node.ignore_case)
|
if (node.ignore_case)
|
||||||
{
|
{
|
||||||
|
@ -908,14 +913,14 @@ private:
|
||||||
start_desc.map[node.value] = true;
|
start_desc.map[node.value] = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
add_multi_byte_utf8();
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
case ParsedRegex::AnyChar:
|
case ParsedRegex::AnyChar:
|
||||||
for (auto& b : start_desc.map)
|
for (auto& b : start_desc.map)
|
||||||
b = true;
|
b = true;
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
case ParsedRegex::AnyCharExceptNewLine:
|
case ParsedRegex::AnyCharExceptNewLine:
|
||||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
|
for (Codepoint cp = 0; cp < single_byte_limit; ++cp)
|
||||||
{
|
{
|
||||||
if (cp != '\n')
|
if (cp != '\n')
|
||||||
start_desc.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
|
@ -930,33 +935,33 @@ private:
|
||||||
{
|
{
|
||||||
for (auto& range : character_class.ranges)
|
for (auto& range : character_class.ranges)
|
||||||
{
|
{
|
||||||
const auto clamp = [](Codepoint cp) { return std::min(CompiledRegex::StartDesc::count, cp); };
|
const auto clamp = [](Codepoint cp) { return std::min(single_byte_limit, cp); };
|
||||||
for (auto cp = clamp(range.min), end = clamp(range.max + 1); cp < end; ++cp)
|
for (auto cp = clamp(range.min), end = clamp(range.max + 1); cp < end; ++cp)
|
||||||
start_desc.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
if (range.max >= CompiledRegex::StartDesc::count)
|
if (range.max >= single_byte_limit)
|
||||||
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
add_multi_byte_utf8();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
|
for (Codepoint cp = 0; cp < single_byte_limit; ++cp)
|
||||||
{
|
{
|
||||||
if (start_desc.map[cp] or character_class.matches(cp))
|
if (start_desc.map[cp] or character_class.matches(cp))
|
||||||
start_desc.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
add_multi_byte_utf8();
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
}
|
}
|
||||||
case ParsedRegex::CharType:
|
case ParsedRegex::CharType:
|
||||||
{
|
{
|
||||||
const CharacterType ctype = (CharacterType)node.value;
|
const CharacterType ctype = (CharacterType)node.value;
|
||||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
|
for (Codepoint cp = 0; cp < single_byte_limit; ++cp)
|
||||||
{
|
{
|
||||||
if (is_ctype(ctype, cp))
|
if (is_ctype(ctype, cp))
|
||||||
start_desc.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
}
|
}
|
||||||
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
add_multi_byte_utf8();
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
}
|
}
|
||||||
case ParsedRegex::Sequence:
|
case ParsedRegex::Sequence:
|
||||||
|
|
|
@ -152,8 +152,7 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
|
|
||||||
struct StartDesc : UseMemoryDomain<MemoryDomain::Regex>
|
struct StartDesc : UseMemoryDomain<MemoryDomain::Regex>
|
||||||
{
|
{
|
||||||
static constexpr Codepoint count = 128;
|
static constexpr Codepoint count = 256;
|
||||||
static constexpr Codepoint other = 0;
|
|
||||||
bool map[count];
|
bool map[count];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -277,7 +276,7 @@ public:
|
||||||
else if (start != config.end)
|
else if (start != config.end)
|
||||||
{
|
{
|
||||||
const unsigned char c = forward ? *start : *utf8::previous(start, config.end);
|
const unsigned char c = forward ? *start : *utf8::previous(start, config.end);
|
||||||
if (not start_desc->map[(c < StartDesc::count) ? c : StartDesc::other])
|
if (not start_desc->map[c])
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -519,11 +518,11 @@ private:
|
||||||
{
|
{
|
||||||
while (start != config.end)
|
while (start != config.end)
|
||||||
{
|
{
|
||||||
static_assert(StartDesc::count <= 128, "start desc should be ascii only");
|
static_assert(StartDesc::count <= 256, "start desc should be ascii only");
|
||||||
if constexpr (forward)
|
if constexpr (forward)
|
||||||
{
|
{
|
||||||
const unsigned char c = *start;
|
const unsigned char c = *start;
|
||||||
if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
|
if (start_desc.map[c])
|
||||||
return;
|
return;
|
||||||
++start;
|
++start;
|
||||||
}
|
}
|
||||||
|
@ -531,7 +530,7 @@ private:
|
||||||
{
|
{
|
||||||
auto prev = utf8::previous(start, config.end);
|
auto prev = utf8::previous(start, config.end);
|
||||||
const unsigned char c = *prev;
|
const unsigned char c = *prev;
|
||||||
if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
|
if (start_desc.map[c])
|
||||||
return;
|
return;
|
||||||
start = prev;
|
start = prev;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user