From 4cd7583bbcd85671eea51c9d7b0d1c2fbcc65b6d Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Wed, 31 Oct 2018 21:13:14 +1100 Subject: [PATCH] Improve regex vm to next start performance by avoiding iterator copies --- src/regex_impl.hh | 16 ++++++++++++---- src/utf8_iterator.hh | 5 +++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/regex_impl.hh b/src/regex_impl.hh index b0d7feb5..4ccbb617 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -525,10 +525,15 @@ private: void to_next_start(EffectiveIt& start, const EffectiveIt& end, const StartDesc& start_desc) { - Codepoint cp; - while (start != end and (cp = *start) >= 0 and - not start_desc.map[cp < StartDesc::count ? cp : StartDesc::other]) - ++start; + while (start != end) + { + const Codepoint cp = read(start); + if (start_desc.map[(cp >= 0 and cp < StartDesc::count) ? cp : StartDesc::other]) + { + --start; + return; + } + } } template @@ -596,6 +601,9 @@ private: return is_word(*(pos-1)) != is_word(*pos); } + static Codepoint read(Utf8It& it) { return it.read(); } + static Codepoint read(std::reverse_iterator& it) { Codepoint cp = *it; ++it; return cp; } + static const Iterator& get_base(const Utf8It& it) { return it.base(); } static Iterator get_base(const std::reverse_iterator& it) { return it.base().base(); } diff --git a/src/utf8_iterator.hh b/src/utf8_iterator.hh index c145f900..9d10df2a 100644 --- a/src/utf8_iterator.hh +++ b/src/utf8_iterator.hh @@ -130,6 +130,11 @@ public: return get_value(); } + CodepointType read() noexcept(noexcept_policy) + { + return (CodepointType)utf8::read_codepoint(m_it, m_end); + } + const BaseIt& base() const noexcept(noexcept_policy) { return m_it; } private: