From dfafcdb6e6b52ebdd5b664b7561e682c32762663 Mon Sep 17 00:00:00 2001
From: Maxime Coste <frrrwww@gmail.com>
Date: Sat, 13 Oct 2012 18:31:29 +0200
Subject: [PATCH] utf8::codepoint: configurable invalid byte policy

---
 src/editor.cc        |  2 +-
 src/ncurses.cc       | 12 ++++++------
 src/selectors.cc     |  2 +-
 src/utf8.hh          | 20 ++++++++++++++++++--
 src/utf8_iterator.hh |  5 +++--
 5 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/src/editor.cc b/src/editor.cc
index 1a4cfdb7..cee14eda 100644
--- a/src/editor.cc
+++ b/src/editor.cc
@@ -335,7 +335,7 @@ void Editor::end_edition()
     --m_edition_level;
 }
 
-using utf8_it = utf8::utf8_iterator<BufferIterator>;
+using utf8_it = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
 
 IncrementalInserter::IncrementalInserter(Editor& editor, InsertMode mode)
     : m_editor(editor), m_edition(editor), m_mode(mode)
diff --git a/src/ncurses.cc b/src/ncurses.cc
index 2f9ab649..6418c26b 100644
--- a/src/ncurses.cc
+++ b/src/ncurses.cc
@@ -105,9 +105,9 @@ static void redraw(WINDOW* menu_win)
     }
     doupdate();
 }
-
-using utf8_it = utf8::utf8_iterator<String::iterator>;
-void addutf8str(utf8_it begin, utf8_it end)
+using Utf8Policy = utf8::InvalidBytePolicy::Pass;
+using Utf8Iterator = utf8::utf8_iterator<String::iterator, Utf8Policy>;
+void addutf8str(Utf8Iterator begin, Utf8Iterator end)
 {
     while (begin != end)
         addch(*begin++);
@@ -142,12 +142,12 @@ void NCursesUI::draw_window(Window& window)
             getyx(stdscr, y,x);
             if (content[content.length()-1] == '\n' and content.length() - 1 < max_x - x)
             {
-                addutf8str(utf8_it(content.begin()), utf8_it(content.end())-1);
+                addutf8str(Utf8Iterator(content.begin()), Utf8Iterator(content.end())-1);
                 addch(' ');
             }
             else
             {
-                utf8_it begin(content.begin()), end(content.end());
+                Utf8Iterator begin(content.begin()), end(content.end());
                 if (end - begin > max_x - x)
                     end = begin + (max_x - x);
                 addutf8str(begin, end);
@@ -234,7 +234,7 @@ void NCursesUI::print_status(const String& status, CharCount cursor_pos)
         auto end = status.end();
         addutf8str(status.begin(), cursor_it);
         set_attribute(A_REVERSE, 1);
-        addch((cursor_it == end) ? ' ' : utf8::codepoint(cursor_it));
+        addch((cursor_it == end) ? ' ' : utf8::codepoint<Utf8Policy>(cursor_it));
         set_attribute(A_REVERSE, 0);
         if (cursor_it != end)
             addutf8str(utf8::next(cursor_it), end);
diff --git a/src/selectors.cc b/src/selectors.cc
index 24e552c9..65df890b 100644
--- a/src/selectors.cc
+++ b/src/selectors.cc
@@ -9,7 +9,7 @@
 namespace Kakoune
 {
 
-using Utf8Iterator = utf8::utf8_iterator<BufferIterator>;
+using Utf8Iterator = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
 
 namespace
 {
diff --git a/src/utf8.hh b/src/utf8.hh
index 0f136c68..53737414 100644
--- a/src/utf8.hh
+++ b/src/utf8.hh
@@ -81,9 +81,25 @@ bool is_character_start(Iterator it)
 
 struct invalid_utf8_sequence{};
 
+namespace InvalidBytePolicy
+{
+
+struct Throw
+{
+    Codepoint operator()(char byte) const { throw invalid_utf8_sequence{}; }
+};
+
+struct Pass
+{
+    Codepoint operator()(char byte) const { return byte; }
+};
+
+}
+
 // returns the codepoint of the character whose first byte
 // is pointed by it
-template<typename Iterator>
+template<typename InvalidPolicy = InvalidBytePolicy::Throw,
+         typename Iterator>
 Codepoint codepoint(Iterator it)
 {
     // According to rfc3629, UTF-8 allows only up to 4 bytes.
@@ -108,7 +124,7 @@ Codepoint codepoint(Iterator it)
         cp |= (*it & 0x3F);
     }
     else
-        throw invalid_utf8_sequence{};
+        cp = InvalidPolicy{}(byte);
     return cp;
 }
 
diff --git a/src/utf8_iterator.hh b/src/utf8_iterator.hh
index e782e297..b3077dc8 100644
--- a/src/utf8_iterator.hh
+++ b/src/utf8_iterator.hh
@@ -11,7 +11,8 @@ namespace utf8
 
 // adapter for an iterator on bytes which permits to iterate
 // on unicode codepoints instead.
-template<typename Iterator>
+template<typename Iterator,
+         typename InvalidPolicy = InvalidBytePolicy::Throw>
 class utf8_iterator
 {
 public:
@@ -125,7 +126,7 @@ private:
     Codepoint get_value() const
     {
         if (m_value == -1)
-            m_value = utf8::codepoint(m_it);
+            m_value = utf8::codepoint<InvalidPolicy>(m_it);
         return m_value;
     }