diff --git a/src/regex_impl.cc b/src/regex_impl.cc index 77f1bd2e..19516f63 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -14,7 +14,7 @@ namespace Kakoune { -constexpr Codepoint CompiledRegex::StartChars::other; +constexpr Codepoint CompiledRegex::StartDesc::other; struct ParsedRegex { @@ -623,7 +623,7 @@ struct RegexCompiler { // Approximation of the number of instructions generated m_program.instructions.reserve(CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1); - m_program.start_chars = compute_start_chars(); + m_program.start_desc = compute_start_desc(); write_search_prefix(); compile_node(0); @@ -861,28 +861,28 @@ private: // Fills accepted and rejected according to which chars can start the given node, // returns true if the node did not consume the char, hence a following node in // sequence would be still relevant for the parent node start chars computation. - bool compute_start_chars(ParsedRegex::NodeIndex index, - CompiledRegex::StartChars& start_chars) const + bool compute_start_desc(ParsedRegex::NodeIndex index, + CompiledRegex::StartDesc& start_desc) const { auto& node = get_node(index); switch (node.op) { case ParsedRegex::Literal: - if (node.value < CompiledRegex::StartChars::count) + if (node.value < CompiledRegex::StartDesc::count) { if (node.ignore_case) { - start_chars.map[to_lower(node.value)] = true; - start_chars.map[to_upper(node.value)] = true; + start_desc.map[to_lower(node.value)] = true; + start_desc.map[to_upper(node.value)] = true; } else - start_chars.map[node.value] = true; + start_desc.map[node.value] = true; } else - start_chars.map[CompiledRegex::StartChars::other] = true; + start_desc.map[CompiledRegex::StartDesc::other] = true; return node.quantifier.allows_none(); case ParsedRegex::AnyChar: - for (auto& b : start_chars.map) + for (auto& b : start_desc.map) b = true; return node.quantifier.allows_none(); case ParsedRegex::Class: @@ -892,39 +892,39 @@ private: { for (auto& range : character_class.ranges) { - auto min = std::min(CompiledRegex::StartChars::other, range.min); - auto max = std::min(CompiledRegex::StartChars::other, range.max); + auto min = std::min(CompiledRegex::StartDesc::other, range.min); + auto max = std::min(CompiledRegex::StartDesc::other, range.max); for (Codepoint cp = min; cp <= max; ++cp) - start_chars.map[cp] = true; + start_desc.map[cp] = true; } } else { - for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp) + for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp) { - if (start_chars.map[cp] or is_character_class(character_class, cp)) - start_chars.map[cp] = true; + if (start_desc.map[cp] or is_character_class(character_class, cp)) + start_desc.map[cp] = true; } } - start_chars.map[CompiledRegex::StartChars::other] = true; + start_desc.map[CompiledRegex::StartDesc::other] = true; return node.quantifier.allows_none(); } case ParsedRegex::CharacterType: { const CharacterType ctype = (CharacterType)node.value; - for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp) + for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp) { if (is_ctype(ctype, cp)) - start_chars.map[cp] = true; + start_desc.map[cp] = true; } - start_chars.map[CompiledRegex::StartChars::other] = true; + start_desc.map[CompiledRegex::StartDesc::other] = true; return node.quantifier.allows_none(); } case ParsedRegex::Sequence: { bool did_not_consume = false; auto does_not_consume = [&, this](auto child) { - return this->compute_start_chars(child, start_chars); + return this->compute_start_desc(child, start_desc); }; if (m_forward) did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume); @@ -937,7 +937,7 @@ private: { bool all_consumed = not node.quantifier.allows_none(); for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) { - if (compute_start_chars(child, start_chars)) + if (compute_start_desc(child, start_desc)) all_consumed = false; return true; }); @@ -960,14 +960,14 @@ private: } [[gnu::noinline]] - std::unique_ptr compute_start_chars() const + std::unique_ptr compute_start_desc() const { - CompiledRegex::StartChars start_chars{}; - if (compute_start_chars(0, start_chars) or - not contains(start_chars.map, false)) + CompiledRegex::StartDesc start_desc{}; + if (compute_start_desc(0, start_desc) or + not contains(start_desc.map, false)) return nullptr; - return std::make_unique(start_chars); + return std::make_unique(start_desc); } const ParsedRegex::Node& get_node(ParsedRegex::NodeIndex index) const diff --git a/src/regex_impl.hh b/src/regex_impl.hh index a2575ca6..11dea98d 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -101,14 +101,14 @@ struct CompiledRegex : RefCountable, UseMemoryDomain MatchDirection direction; size_t save_count; - struct StartChars + struct StartDesc { static constexpr size_t count = 256; static constexpr Codepoint other = 256; bool map[count+1]; }; - std::unique_ptr start_chars; + std::unique_ptr start_desc; }; enum class RegexCompileFlags @@ -183,8 +183,8 @@ public: const bool search = (flags & RegexExecFlags::Search); Utf8It start{m_begin}; - if (search) - to_next_start(start, m_end, m_program.start_chars.get()); + if (search and m_program.start_desc) + to_next_start(start, m_end, *m_program.start_desc); return exec_program(start, Thread{&m_program.instructions[search ? 0 : CompiledRegex::search_prefix_size], nullptr}); } @@ -460,19 +460,16 @@ private: std::reverse(state.current_threads.begin(), state.current_threads.end()); ++pos; - if (find_next_start) - to_next_start(pos, m_end, m_program.start_chars.get()); + if (find_next_start and m_program.start_desc) + to_next_start(pos, m_end, *m_program.start_desc); } } void to_next_start(Utf8It& start, const Utf8It& end, - const CompiledRegex::StartChars* start_chars) + const CompiledRegex::StartDesc& start_desc) { - if (not start_chars) - return; - while (start != end and *start >= 0 and - not start_chars->map[std::min(*start, CompiledRegex::StartChars::other)]) + not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)]) ++start; }