utf8::codepoint: configurable invalid byte policy

2012-10-13 18:31:29 +02:00 · 2012-10-13 18:31:29 +02:00 · dfafcdb6e6
commit dfafcdb6e6
parent 4f1ab5b749
5 changed files with 29 additions and 12 deletions
--- a/src/editor.cc
+++ b/src/editor.cc
@ -335,7 +335,7 @@ void Editor::end_edition()
    --m_edition_level;
 }
-using utf8_it = utf8::utf8_iterator<BufferIterator>;
+using utf8_it = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
 IncrementalInserter::IncrementalInserter(Editor& editor, InsertMode mode)
    : m_editor(editor), m_edition(editor), m_mode(mode)
--- a/src/ncurses.cc
+++ b/src/ncurses.cc
@ -105,9 +105,9 @@ static void redraw(WINDOW* menu_win)
    }
    doupdate();
 }
-
+using Utf8Policy = utf8::InvalidBytePolicy::Pass;
-using utf8_it = utf8::utf8_iterator<String::iterator>;
+using Utf8Iterator = utf8::utf8_iterator<String::iterator, Utf8Policy>;
-void addutf8str(utf8_it begin, utf8_it end)
+void addutf8str(Utf8Iterator begin, Utf8Iterator end)
 {
    while (begin != end)
        addch(*begin++);
@ -142,12 +142,12 @@ void NCursesUI::draw_window(Window& window)
            getyx(stdscr, y,x);
            if (content[content.length()-1] == '\n' and content.length() - 1 < max_x - x)
            {
-                addutf8str(utf8_it(content.begin()), utf8_it(content.end())-1);
+                addutf8str(Utf8Iterator(content.begin()), Utf8Iterator(content.end())-1);
                addch(' ');
            }
            else
            {
-                utf8_it begin(content.begin()), end(content.end());
+                Utf8Iterator begin(content.begin()), end(content.end());
                if (end - begin > max_x - x)
                    end = begin + (max_x - x);
                addutf8str(begin, end);
@ -234,7 +234,7 @@ void NCursesUI::print_status(const String& status, CharCount cursor_pos)
        auto end = status.end();
        addutf8str(status.begin(), cursor_it);
        set_attribute(A_REVERSE, 1);
-        addch((cursor_it == end) ? ' ' : utf8::codepoint(cursor_it));
+        addch((cursor_it == end) ? ' ' : utf8::codepoint<Utf8Policy>(cursor_it));
        set_attribute(A_REVERSE, 0);
        if (cursor_it != end)
            addutf8str(utf8::next(cursor_it), end);
--- a/src/selectors.cc
+++ b/src/selectors.cc
@ -9,7 +9,7 @@
 namespace Kakoune
 {
-using Utf8Iterator = utf8::utf8_iterator<BufferIterator>;
+using Utf8Iterator = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
 namespace
 {
--- a/src/utf8.hh
+++ b/src/utf8.hh
@ -81,9 +81,25 @@ bool is_character_start(Iterator it)
 struct invalid_utf8_sequence{};
 namespace InvalidBytePolicy
 {
 struct Throw
 {
    Codepoint operator()(char byte) const { throw invalid_utf8_sequence{}; }
 };
 struct Pass
 {
    Codepoint operator()(char byte) const { return byte; }
 };
 }
 // returns the codepoint of the character whose first byte
 // is pointed by it
-template<typename Iterator>
+template<typename InvalidPolicy = InvalidBytePolicy::Throw,
         typename Iterator>
 Codepoint codepoint(Iterator it)
 {
    // According to rfc3629, UTF-8 allows only up to 4 bytes.
@ -108,7 +124,7 @@ Codepoint codepoint(Iterator it)
        cp |= (*it & 0x3F);
    }
    else
-        throw invalid_utf8_sequence{};
+        cp = InvalidPolicy{}(byte);
    return cp;
 }
--- a/src/utf8_iterator.hh
+++ b/src/utf8_iterator.hh
@ -11,7 +11,8 @@ namespace utf8
 // adapter for an iterator on bytes which permits to iterate
 // on unicode codepoints instead.
-template<typename Iterator>
+template<typename Iterator,
         typename InvalidPolicy = InvalidBytePolicy::Throw>
 class utf8_iterator
 {
 public:
@ -125,7 +126,7 @@ private:
    Codepoint get_value() const
    {
        if (m_value == -1)
-            m_value = utf8::codepoint(m_it);
+            m_value = utf8::codepoint<InvalidPolicy>(m_it);
        return m_value;
    }