Improve regex vm to next start performance by avoiding iterator copies

This commit is contained in:
Maxime Coste 2018-10-31 21:13:14 +11:00
parent 4cfb46ff2e
commit 4cd7583bbc
2 changed files with 17 additions and 4 deletions

View File

@ -525,10 +525,15 @@ private:
void to_next_start(EffectiveIt& start, const EffectiveIt& end, const StartDesc& start_desc)
{
Codepoint cp;
while (start != end and (cp = *start) >= 0 and
not start_desc.map[cp < StartDesc::count ? cp : StartDesc::other])
++start;
while (start != end)
{
const Codepoint cp = read(start);
if (start_desc.map[(cp >= 0 and cp < StartDesc::count) ? cp : StartDesc::other])
{
--start;
return;
}
}
}
template<MatchDirection look_direction, bool ignore_case>
@ -596,6 +601,9 @@ private:
return is_word(*(pos-1)) != is_word(*pos);
}
static Codepoint read(Utf8It& it) { return it.read(); }
static Codepoint read(std::reverse_iterator<Utf8It>& it) { Codepoint cp = *it; ++it; return cp; }
static const Iterator& get_base(const Utf8It& it) { return it.base(); }
static Iterator get_base(const std::reverse_iterator<Utf8It>& it) { return it.base().base(); }

View File

@ -130,6 +130,11 @@ public:
return get_value();
}
CodepointType read() noexcept(noexcept_policy)
{
return (CodepointType)utf8::read_codepoint<InvalidPolicy>(m_it, m_end);
}
const BaseIt& base() const noexcept(noexcept_policy) { return m_it; }
private: