From 1e8026f143f96467ca15b45101adab2e212d35a2 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sun, 29 Apr 2018 16:42:46 +1000 Subject: [PATCH] Regex: Use only 128 characters in start desc and encode others as 0 Using 257 was using lots of memory for no good reason, as > 127 codepoint are not common enough to be treated specially. --- src/regex_impl.cc | 12 +++++++----- src/regex_impl.hh | 14 ++++++++------ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/regex_impl.cc b/src/regex_impl.cc index 38a2dfc6..e074f68b 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -15,6 +15,7 @@ namespace Kakoune { constexpr Codepoint CompiledRegex::StartDesc::other; +constexpr Codepoint CompiledRegex::StartDesc::count; struct ParsedRegex { @@ -906,15 +907,16 @@ private: { for (auto& range : character_class.ranges) { - auto min = std::min(CompiledRegex::StartDesc::other, range.min); - auto max = std::min(CompiledRegex::StartDesc::other, range.max); - for (Codepoint cp = min; cp <= max; ++cp) + constexpr auto clamp = [](Codepoint cp) { return std::min(CompiledRegex::StartDesc::count, cp); }; + for (auto cp = clamp(range.min), end = clamp(range.max + 1); cp < end; ++cp) start_desc.map[cp] = true; + if (range.max >= CompiledRegex::StartDesc::count) + start_desc.map[CompiledRegex::StartDesc::other] = true; } } else { - for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp) + for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp) { if (start_desc.map[cp] or is_character_class(character_class, cp)) start_desc.map[cp] = true; @@ -926,7 +928,7 @@ private: case ParsedRegex::CharacterType: { const CharacterType ctype = (CharacterType)node.value; - for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp) + for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp) { if (is_ctype(ctype, cp)) start_desc.map[cp] = true; diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 90e276d0..379e5539 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -103,9 +103,9 @@ struct CompiledRegex : RefCountable, UseMemoryDomain struct StartDesc : UseMemoryDomain { - static constexpr size_t count = 256; - static constexpr Codepoint other = 256; - bool map[count+1]; + static constexpr Codepoint count = 128; + static constexpr Codepoint other = 0; + bool map[count]; }; std::unique_ptr forward_start_desc; @@ -213,7 +213,8 @@ public: return false; } else if (start != config.end and - not start_desc->map[std::min(*start, CompiledRegex::StartDesc::other)]) + not start_desc->map[*start < CompiledRegex::StartDesc::count ? + *start : CompiledRegex::StartDesc::other]) return false; } @@ -509,8 +510,9 @@ private: void to_next_start(EffectiveIt& start, const EffectiveIt& end, const CompiledRegex::StartDesc& start_desc) { - while (start != end and *start >= 0 and - not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)]) + Codepoint cp; + while (start != end and (cp = *start) >= 0 and + not start_desc.map[cp < CompiledRegex::StartDesc::count ? cp : CompiledRegex::StartDesc::other]) ++start; }