Regex: Use only 128 characters in start desc and encode others as 0

Using 257 was using lots of memory for no good reason, as > 127
codepoint are not common enough to be treated specially.
This commit is contained in:
Maxime Coste 2018-04-29 16:42:46 +10:00
parent 528ecb7417
commit 1e8026f143
2 changed files with 15 additions and 11 deletions

View File

@ -15,6 +15,7 @@ namespace Kakoune
{ {
constexpr Codepoint CompiledRegex::StartDesc::other; constexpr Codepoint CompiledRegex::StartDesc::other;
constexpr Codepoint CompiledRegex::StartDesc::count;
struct ParsedRegex struct ParsedRegex
{ {
@ -906,15 +907,16 @@ private:
{ {
for (auto& range : character_class.ranges) for (auto& range : character_class.ranges)
{ {
auto min = std::min(CompiledRegex::StartDesc::other, range.min); constexpr auto clamp = [](Codepoint cp) { return std::min(CompiledRegex::StartDesc::count, cp); };
auto max = std::min(CompiledRegex::StartDesc::other, range.max); for (auto cp = clamp(range.min), end = clamp(range.max + 1); cp < end; ++cp)
for (Codepoint cp = min; cp <= max; ++cp)
start_desc.map[cp] = true; start_desc.map[cp] = true;
if (range.max >= CompiledRegex::StartDesc::count)
start_desc.map[CompiledRegex::StartDesc::other] = true;
} }
} }
else else
{ {
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp) for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
{ {
if (start_desc.map[cp] or is_character_class(character_class, cp)) if (start_desc.map[cp] or is_character_class(character_class, cp))
start_desc.map[cp] = true; start_desc.map[cp] = true;
@ -926,7 +928,7 @@ private:
case ParsedRegex::CharacterType: case ParsedRegex::CharacterType:
{ {
const CharacterType ctype = (CharacterType)node.value; const CharacterType ctype = (CharacterType)node.value;
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp) for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
{ {
if (is_ctype(ctype, cp)) if (is_ctype(ctype, cp))
start_desc.map[cp] = true; start_desc.map[cp] = true;

View File

@ -103,9 +103,9 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
struct StartDesc : UseMemoryDomain<MemoryDomain::Regex> struct StartDesc : UseMemoryDomain<MemoryDomain::Regex>
{ {
static constexpr size_t count = 256; static constexpr Codepoint count = 128;
static constexpr Codepoint other = 256; static constexpr Codepoint other = 0;
bool map[count+1]; bool map[count];
}; };
std::unique_ptr<StartDesc> forward_start_desc; std::unique_ptr<StartDesc> forward_start_desc;
@ -213,7 +213,8 @@ public:
return false; return false;
} }
else if (start != config.end and else if (start != config.end and
not start_desc->map[std::min(*start, CompiledRegex::StartDesc::other)]) not start_desc->map[*start < CompiledRegex::StartDesc::count ?
*start : CompiledRegex::StartDesc::other])
return false; return false;
} }
@ -509,8 +510,9 @@ private:
void to_next_start(EffectiveIt& start, const EffectiveIt& end, void to_next_start(EffectiveIt& start, const EffectiveIt& end,
const CompiledRegex::StartDesc& start_desc) const CompiledRegex::StartDesc& start_desc)
{ {
while (start != end and *start >= 0 and Codepoint cp;
not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)]) while (start != end and (cp = *start) >= 0 and
not start_desc.map[cp < CompiledRegex::StartDesc::count ? cp : CompiledRegex::StartDesc::other])
++start; ++start;
} }