Regex: rename StartChars to StartDesc

It only contains chars for now, but its still more generally describing where matches can start.
2017-12-01 14:46:18 +08:00 · 2017-12-01 14:46:18 +08:00 · 65b057f261
commit 65b057f261
parent b91f43b031
2 changed files with 35 additions and 38 deletions
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@ -14,7 +14,7 @@
 namespace Kakoune
 {
-constexpr Codepoint CompiledRegex::StartChars::other;
+constexpr Codepoint CompiledRegex::StartDesc::other;
 struct ParsedRegex
 {
@ -623,7 +623,7 @@ struct RegexCompiler
    {
        // Approximation of the number of instructions generated
        m_program.instructions.reserve(CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1);
-        m_program.start_chars = compute_start_chars();
+        m_program.start_desc = compute_start_desc();
        write_search_prefix();
        compile_node(0);
@ -861,28 +861,28 @@ private:
    // Fills accepted and rejected according to which chars can start the given node,
    // returns true if the node did not consume the char, hence a following node in
    // sequence would be still relevant for the parent node start chars computation.
-    bool compute_start_chars(ParsedRegex::NodeIndex index,
+    bool compute_start_desc(ParsedRegex::NodeIndex index,
-                             CompiledRegex::StartChars& start_chars) const
+                             CompiledRegex::StartDesc& start_desc) const
    {
        auto& node = get_node(index);
        switch (node.op)
        {
            case ParsedRegex::Literal:
-                if (node.value < CompiledRegex::StartChars::count)
+                if (node.value < CompiledRegex::StartDesc::count)
                {
                    if (node.ignore_case)
                    {
-                        start_chars.map[to_lower(node.value)] = true;
+                        start_desc.map[to_lower(node.value)] = true;
-                        start_chars.map[to_upper(node.value)] = true;
+                        start_desc.map[to_upper(node.value)] = true;
                    }
                    else
-                        start_chars.map[node.value] = true;
+                        start_desc.map[node.value] = true;
                }
                else
-                    start_chars.map[CompiledRegex::StartChars::other] = true;
+                    start_desc.map[CompiledRegex::StartDesc::other] = true;
                return node.quantifier.allows_none();
            case ParsedRegex::AnyChar:
-                for (auto& b : start_chars.map)
+                for (auto& b : start_desc.map)
                    b = true;
               return node.quantifier.allows_none();
            case ParsedRegex::Class:
@ -892,39 +892,39 @@ private:
                {
                    for (auto& range : character_class.ranges)
                    {
-                        auto min = std::min(CompiledRegex::StartChars::other, range.min);
+                        auto min = std::min(CompiledRegex::StartDesc::other, range.min);
-                        auto max = std::min(CompiledRegex::StartChars::other, range.max);
+                        auto max = std::min(CompiledRegex::StartDesc::other, range.max);
                        for (Codepoint cp = min; cp <= max; ++cp)
-                            start_chars.map[cp] = true;
+                            start_desc.map[cp] = true;
                    }
                }
                else
                {
-                    for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp)
+                    for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
                    {
-                        if (start_chars.map[cp] or is_character_class(character_class, cp))
+                        if (start_desc.map[cp] or is_character_class(character_class, cp))
-                            start_chars.map[cp] = true;
+                            start_desc.map[cp] = true;
                    }
                }
-                start_chars.map[CompiledRegex::StartChars::other] = true;
+                start_desc.map[CompiledRegex::StartDesc::other] = true;
                return node.quantifier.allows_none();
            }
            case ParsedRegex::CharacterType:
            {
                const CharacterType ctype = (CharacterType)node.value;
-                for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp)
+                for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
                {
                    if (is_ctype(ctype, cp))
-                        start_chars.map[cp] = true;
+                        start_desc.map[cp] = true;
                }
-                start_chars.map[CompiledRegex::StartChars::other] = true;
+                start_desc.map[CompiledRegex::StartDesc::other] = true;
                return node.quantifier.allows_none();
            }
            case ParsedRegex::Sequence:
            {
                bool did_not_consume = false;
                auto does_not_consume = [&, this](auto child) {
-                    return this->compute_start_chars(child, start_chars);
+                    return this->compute_start_desc(child, start_desc);
                };
                if (m_forward)
                    did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume);
@ -937,7 +937,7 @@ private:
            {
                bool all_consumed = not node.quantifier.allows_none();
                for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex  child) {
-                    if (compute_start_chars(child, start_chars))
+                    if (compute_start_desc(child, start_desc))
                        all_consumed = false;
                    return true;
                });
@ -960,14 +960,14 @@ private:
    }
    [[gnu::noinline]]
-    std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
+    std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc() const
    {
-        CompiledRegex::StartChars start_chars{};
+        CompiledRegex::StartDesc start_desc{};
-        if (compute_start_chars(0, start_chars) or
+        if (compute_start_desc(0, start_desc) or
-            not contains(start_chars.map, false))
+            not contains(start_desc.map, false))
            return nullptr;
-        return std::make_unique<CompiledRegex::StartChars>(start_chars);
+        return std::make_unique<CompiledRegex::StartDesc>(start_desc);
    }
    const ParsedRegex::Node& get_node(ParsedRegex::NodeIndex index) const
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@ -101,14 +101,14 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
    MatchDirection direction;
    size_t save_count;
-    struct StartChars
+    struct StartDesc
    {
        static constexpr size_t count = 256;
        static constexpr Codepoint other = 256;
        bool map[count+1];
    };
-    std::unique_ptr<StartChars> start_chars;
+    std::unique_ptr<StartDesc> start_desc;
 };
 enum class RegexCompileFlags
@ -183,8 +183,8 @@ public:
        const bool search = (flags & RegexExecFlags::Search);
        Utf8It start{m_begin};
-        if (search)
+        if (search and m_program.start_desc)
-            to_next_start(start, m_end, m_program.start_chars.get());
+            to_next_start(start, m_end, *m_program.start_desc);
        return exec_program(start, Thread{&m_program.instructions[search ? 0 : CompiledRegex::search_prefix_size], nullptr});
    }
@ -460,19 +460,16 @@ private:
            std::reverse(state.current_threads.begin(), state.current_threads.end());
            ++pos;
-            if (find_next_start)
+            if (find_next_start and m_program.start_desc)
-                to_next_start(pos, m_end, m_program.start_chars.get());
+                to_next_start(pos, m_end, *m_program.start_desc);
        }
    }
    void to_next_start(Utf8It& start, const Utf8It& end,
-                       const CompiledRegex::StartChars* start_chars)
+                       const CompiledRegex::StartDesc& start_desc)
    {
        if (not start_chars)
            return;
        while (start != end and *start >= 0 and
-               not start_chars->map[std::min(*start, CompiledRegex::StartChars::other)])
+               not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)])
            ++start;
    }