From dfafcdb6e6b52ebdd5b664b7561e682c32762663 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sat, 13 Oct 2012 18:31:29 +0200 Subject: [PATCH] utf8::codepoint: configurable invalid byte policy --- src/editor.cc | 2 +- src/ncurses.cc | 12 ++++++------ src/selectors.cc | 2 +- src/utf8.hh | 20 ++++++++++++++++++-- src/utf8_iterator.hh | 5 +++-- 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/editor.cc b/src/editor.cc index 1a4cfdb7..cee14eda 100644 --- a/src/editor.cc +++ b/src/editor.cc @@ -335,7 +335,7 @@ void Editor::end_edition() --m_edition_level; } -using utf8_it = utf8::utf8_iterator; +using utf8_it = utf8::utf8_iterator; IncrementalInserter::IncrementalInserter(Editor& editor, InsertMode mode) : m_editor(editor), m_edition(editor), m_mode(mode) diff --git a/src/ncurses.cc b/src/ncurses.cc index 2f9ab649..6418c26b 100644 --- a/src/ncurses.cc +++ b/src/ncurses.cc @@ -105,9 +105,9 @@ static void redraw(WINDOW* menu_win) } doupdate(); } - -using utf8_it = utf8::utf8_iterator; -void addutf8str(utf8_it begin, utf8_it end) +using Utf8Policy = utf8::InvalidBytePolicy::Pass; +using Utf8Iterator = utf8::utf8_iterator; +void addutf8str(Utf8Iterator begin, Utf8Iterator end) { while (begin != end) addch(*begin++); @@ -142,12 +142,12 @@ void NCursesUI::draw_window(Window& window) getyx(stdscr, y,x); if (content[content.length()-1] == '\n' and content.length() - 1 < max_x - x) { - addutf8str(utf8_it(content.begin()), utf8_it(content.end())-1); + addutf8str(Utf8Iterator(content.begin()), Utf8Iterator(content.end())-1); addch(' '); } else { - utf8_it begin(content.begin()), end(content.end()); + Utf8Iterator begin(content.begin()), end(content.end()); if (end - begin > max_x - x) end = begin + (max_x - x); addutf8str(begin, end); @@ -234,7 +234,7 @@ void NCursesUI::print_status(const String& status, CharCount cursor_pos) auto end = status.end(); addutf8str(status.begin(), cursor_it); set_attribute(A_REVERSE, 1); - addch((cursor_it == end) ? ' ' : utf8::codepoint(cursor_it)); + addch((cursor_it == end) ? ' ' : utf8::codepoint(cursor_it)); set_attribute(A_REVERSE, 0); if (cursor_it != end) addutf8str(utf8::next(cursor_it), end); diff --git a/src/selectors.cc b/src/selectors.cc index 24e552c9..65df890b 100644 --- a/src/selectors.cc +++ b/src/selectors.cc @@ -9,7 +9,7 @@ namespace Kakoune { -using Utf8Iterator = utf8::utf8_iterator; +using Utf8Iterator = utf8::utf8_iterator; namespace { diff --git a/src/utf8.hh b/src/utf8.hh index 0f136c68..53737414 100644 --- a/src/utf8.hh +++ b/src/utf8.hh @@ -81,9 +81,25 @@ bool is_character_start(Iterator it) struct invalid_utf8_sequence{}; +namespace InvalidBytePolicy +{ + +struct Throw +{ + Codepoint operator()(char byte) const { throw invalid_utf8_sequence{}; } +}; + +struct Pass +{ + Codepoint operator()(char byte) const { return byte; } +}; + +} + // returns the codepoint of the character whose first byte // is pointed by it -template +template Codepoint codepoint(Iterator it) { // According to rfc3629, UTF-8 allows only up to 4 bytes. @@ -108,7 +124,7 @@ Codepoint codepoint(Iterator it) cp |= (*it & 0x3F); } else - throw invalid_utf8_sequence{}; + cp = InvalidPolicy{}(byte); return cp; } diff --git a/src/utf8_iterator.hh b/src/utf8_iterator.hh index e782e297..b3077dc8 100644 --- a/src/utf8_iterator.hh +++ b/src/utf8_iterator.hh @@ -11,7 +11,8 @@ namespace utf8 // adapter for an iterator on bytes which permits to iterate // on unicode codepoints instead. -template +template class utf8_iterator { public: @@ -125,7 +126,7 @@ private: Codepoint get_value() const { if (m_value == -1) - m_value = utf8::codepoint(m_it); + m_value = utf8::codepoint(m_it); return m_value; }