Regex: Use only 128 characters in start desc and encode others as 0
Using 257 was using lots of memory for no good reason, as > 127 codepoint are not common enough to be treated specially.
This commit is contained in:
parent
528ecb7417
commit
1e8026f143
|
@ -15,6 +15,7 @@ namespace Kakoune
|
|||
{
|
||||
|
||||
constexpr Codepoint CompiledRegex::StartDesc::other;
|
||||
constexpr Codepoint CompiledRegex::StartDesc::count;
|
||||
|
||||
struct ParsedRegex
|
||||
{
|
||||
|
@ -906,15 +907,16 @@ private:
|
|||
{
|
||||
for (auto& range : character_class.ranges)
|
||||
{
|
||||
auto min = std::min(CompiledRegex::StartDesc::other, range.min);
|
||||
auto max = std::min(CompiledRegex::StartDesc::other, range.max);
|
||||
for (Codepoint cp = min; cp <= max; ++cp)
|
||||
constexpr auto clamp = [](Codepoint cp) { return std::min(CompiledRegex::StartDesc::count, cp); };
|
||||
for (auto cp = clamp(range.min), end = clamp(range.max + 1); cp < end; ++cp)
|
||||
start_desc.map[cp] = true;
|
||||
if (range.max >= CompiledRegex::StartDesc::count)
|
||||
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
|
||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
|
||||
{
|
||||
if (start_desc.map[cp] or is_character_class(character_class, cp))
|
||||
start_desc.map[cp] = true;
|
||||
|
@ -926,7 +928,7 @@ private:
|
|||
case ParsedRegex::CharacterType:
|
||||
{
|
||||
const CharacterType ctype = (CharacterType)node.value;
|
||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
|
||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
|
||||
{
|
||||
if (is_ctype(ctype, cp))
|
||||
start_desc.map[cp] = true;
|
||||
|
|
|
@ -103,9 +103,9 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
|||
|
||||
struct StartDesc : UseMemoryDomain<MemoryDomain::Regex>
|
||||
{
|
||||
static constexpr size_t count = 256;
|
||||
static constexpr Codepoint other = 256;
|
||||
bool map[count+1];
|
||||
static constexpr Codepoint count = 128;
|
||||
static constexpr Codepoint other = 0;
|
||||
bool map[count];
|
||||
};
|
||||
|
||||
std::unique_ptr<StartDesc> forward_start_desc;
|
||||
|
@ -213,7 +213,8 @@ public:
|
|||
return false;
|
||||
}
|
||||
else if (start != config.end and
|
||||
not start_desc->map[std::min(*start, CompiledRegex::StartDesc::other)])
|
||||
not start_desc->map[*start < CompiledRegex::StartDesc::count ?
|
||||
*start : CompiledRegex::StartDesc::other])
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -509,8 +510,9 @@ private:
|
|||
void to_next_start(EffectiveIt& start, const EffectiveIt& end,
|
||||
const CompiledRegex::StartDesc& start_desc)
|
||||
{
|
||||
while (start != end and *start >= 0 and
|
||||
not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)])
|
||||
Codepoint cp;
|
||||
while (start != end and (cp = *start) >= 0 and
|
||||
not start_desc.map[cp < CompiledRegex::StartDesc::count ? cp : CompiledRegex::StartDesc::other])
|
||||
++start;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user