From f52f5f7c53e8a4f0137d5ce66526e1ded33c235c Mon Sep 17 00:00:00 2001
From: Maxime Coste <frrrwww@gmail.com>
Date: Tue, 30 Aug 2016 00:30:52 +0100
Subject: [PATCH] Only decode utf8 when strictly necessary in
 RankedMatch::operator<

---
 src/ranked_match.cc | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/ranked_match.cc b/src/ranked_match.cc
index a96925f4..4cebb574 100644
--- a/src/ranked_match.cc
+++ b/src/ranked_match.cc
@@ -146,18 +146,28 @@ bool RankedMatch::operator<(const RankedMatch& other) const
     if (m_max_index != other.m_max_index)
         return m_max_index < other.m_max_index;
 
-    Utf8It it1{m_candidate.begin(), m_candidate}, it2{other.m_candidate.begin(), other.m_candidate};
-    for (; it1 != m_candidate.end() and it2 != other.m_candidate.end(); ++it1, ++it2)
+    auto it1 = m_candidate.begin(), it2 = other.m_candidate.begin();
+    const auto end1 = m_candidate.end(), end2 = other.m_candidate.end();
+    while (true)
     {
-        const auto cp1 = *it1, cp2 = *it2;
+        // find next mismatch
+        while (it1 != end1 and it2 != end2 and *it1 == *it2)
+            ++it1, ++it2;
+
+        if (it1 == end1 or it2 == end2)
+            return it1 == end1 and it2 != end2;
+
+        // compare codepoints
+        it1 = utf8::character_start(it1, m_candidate.begin());
+        it2 = utf8::character_start(it2, other.m_candidate.begin());
+        const auto cp1 = utf8::read_codepoint(it1, end1);
+        const auto cp2 = utf8::read_codepoint(it2, end2);;
         if (cp1 != cp2)
         {
             const bool low1 = islower(cp1), low2 = islower(cp2);
             return low1 == low2 ? cp1 < cp2 : low1;
         }
     }
-
-    return it1 == m_candidate.end() and it2 != other.m_candidate.end();
 }
 
 UnitTest test_ranked_match{[] {