Do not decode utf-8 when looking for regex next start

There is no need to decode as we know any non-ascii characters will be treated as Other in the StartDesc.
2019-12-04 22:00:31 +11:00 · 2019-12-04 22:00:31 +11:00 · d539e8fb89
commit d539e8fb89
parent ee2985739b
1 changed files with 16 additions and 18 deletions
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@ -235,8 +235,8 @@ public:
            }
            else if (start != config.end)
            {
-                const Codepoint cp = codepoint(start, config);
-                 if (not start_desc->map[cp < StartDesc::count ? cp : StartDesc::other])
+                const unsigned char c = forward ? *start : *utf8::previous(start, config.end);
+                 if (not start_desc->map[(c < StartDesc::count) ? c : StartDesc::other])
                    return false;
            }
        }
@ -525,12 +525,21 @@ private:
    {
        while (start != config.end)
        {
-            const Codepoint cp = read_codepoint(start, config);
-            if (start_desc.map[(cp >= 0 and cp < StartDesc::count) ? cp : StartDesc::other])
+            static_assert(StartDesc::count <= 128, "start desc should be ascii only");
+            if constexpr (forward)
            {
-                forward ? utf8::to_previous(start, config.subject_begin)
-                        : utf8::to_next(start, config.subject_end);
-                return;
+                const unsigned char c = *start;
+                if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
+                    return;
+                utf8::to_next(start, config.end);
+            }
+            else
+            {
+                auto prev = utf8::previous(start, config.end);
+                const unsigned char c = *prev;
+                if (start_desc.map[(c < StartDesc::count) ? c : StartDesc::other])
+                    return;
+                start = prev;
            }
        }
    }
@ -612,17 +621,6 @@ private:
               is_word(utf8::codepoint(pos, config.subject_end));
    }

-    static Codepoint read_codepoint(Iterator& it, const ExecConfig& config)
-    {
-        if (forward)
-            return utf8::read_codepoint(it, config.subject_end);
-        else
-        {
-            utf8::to_previous(it, config.subject_begin);
-            return utf8::codepoint(it, config.subject_end);
-        }
-    }
-
    static Codepoint codepoint(const Iterator& it, const ExecConfig& config)
    {
        return utf8::codepoint(forward ? it : utf8::previous(it, config.subject_begin),