Regex: Use only 128 characters in start desc and encode others as 0
Using 257 was using lots of memory for no good reason, as > 127 codepoint are not common enough to be treated specially.
This commit is contained in:
parent
528ecb7417
commit
1e8026f143
|
@ -15,6 +15,7 @@ namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
constexpr Codepoint CompiledRegex::StartDesc::other;
|
constexpr Codepoint CompiledRegex::StartDesc::other;
|
||||||
|
constexpr Codepoint CompiledRegex::StartDesc::count;
|
||||||
|
|
||||||
struct ParsedRegex
|
struct ParsedRegex
|
||||||
{
|
{
|
||||||
|
@ -906,15 +907,16 @@ private:
|
||||||
{
|
{
|
||||||
for (auto& range : character_class.ranges)
|
for (auto& range : character_class.ranges)
|
||||||
{
|
{
|
||||||
auto min = std::min(CompiledRegex::StartDesc::other, range.min);
|
constexpr auto clamp = [](Codepoint cp) { return std::min(CompiledRegex::StartDesc::count, cp); };
|
||||||
auto max = std::min(CompiledRegex::StartDesc::other, range.max);
|
for (auto cp = clamp(range.min), end = clamp(range.max + 1); cp < end; ++cp)
|
||||||
for (Codepoint cp = min; cp <= max; ++cp)
|
|
||||||
start_desc.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
|
if (range.max >= CompiledRegex::StartDesc::count)
|
||||||
|
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
|
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
|
||||||
{
|
{
|
||||||
if (start_desc.map[cp] or is_character_class(character_class, cp))
|
if (start_desc.map[cp] or is_character_class(character_class, cp))
|
||||||
start_desc.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
|
@ -926,7 +928,7 @@ private:
|
||||||
case ParsedRegex::CharacterType:
|
case ParsedRegex::CharacterType:
|
||||||
{
|
{
|
||||||
const CharacterType ctype = (CharacterType)node.value;
|
const CharacterType ctype = (CharacterType)node.value;
|
||||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
|
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
|
||||||
{
|
{
|
||||||
if (is_ctype(ctype, cp))
|
if (is_ctype(ctype, cp))
|
||||||
start_desc.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
|
|
|
@ -103,9 +103,9 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
|
|
||||||
struct StartDesc : UseMemoryDomain<MemoryDomain::Regex>
|
struct StartDesc : UseMemoryDomain<MemoryDomain::Regex>
|
||||||
{
|
{
|
||||||
static constexpr size_t count = 256;
|
static constexpr Codepoint count = 128;
|
||||||
static constexpr Codepoint other = 256;
|
static constexpr Codepoint other = 0;
|
||||||
bool map[count+1];
|
bool map[count];
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<StartDesc> forward_start_desc;
|
std::unique_ptr<StartDesc> forward_start_desc;
|
||||||
|
@ -213,7 +213,8 @@ public:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
else if (start != config.end and
|
else if (start != config.end and
|
||||||
not start_desc->map[std::min(*start, CompiledRegex::StartDesc::other)])
|
not start_desc->map[*start < CompiledRegex::StartDesc::count ?
|
||||||
|
*start : CompiledRegex::StartDesc::other])
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -509,8 +510,9 @@ private:
|
||||||
void to_next_start(EffectiveIt& start, const EffectiveIt& end,
|
void to_next_start(EffectiveIt& start, const EffectiveIt& end,
|
||||||
const CompiledRegex::StartDesc& start_desc)
|
const CompiledRegex::StartDesc& start_desc)
|
||||||
{
|
{
|
||||||
while (start != end and *start >= 0 and
|
Codepoint cp;
|
||||||
not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)])
|
while (start != end and (cp = *start) >= 0 and
|
||||||
|
not start_desc.map[cp < CompiledRegex::StartDesc::count ? cp : CompiledRegex::StartDesc::other])
|
||||||
++start;
|
++start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user