utf8::codepoint: configurable invalid byte policy

This commit is contained in:
Maxime Coste 2012-10-13 18:31:29 +02:00
parent 4f1ab5b749
commit dfafcdb6e6
5 changed files with 29 additions and 12 deletions

View File

@ -335,7 +335,7 @@ void Editor::end_edition()
--m_edition_level; --m_edition_level;
} }
using utf8_it = utf8::utf8_iterator<BufferIterator>; using utf8_it = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
IncrementalInserter::IncrementalInserter(Editor& editor, InsertMode mode) IncrementalInserter::IncrementalInserter(Editor& editor, InsertMode mode)
: m_editor(editor), m_edition(editor), m_mode(mode) : m_editor(editor), m_edition(editor), m_mode(mode)

View File

@ -105,9 +105,9 @@ static void redraw(WINDOW* menu_win)
} }
doupdate(); doupdate();
} }
using Utf8Policy = utf8::InvalidBytePolicy::Pass;
using utf8_it = utf8::utf8_iterator<String::iterator>; using Utf8Iterator = utf8::utf8_iterator<String::iterator, Utf8Policy>;
void addutf8str(utf8_it begin, utf8_it end) void addutf8str(Utf8Iterator begin, Utf8Iterator end)
{ {
while (begin != end) while (begin != end)
addch(*begin++); addch(*begin++);
@ -142,12 +142,12 @@ void NCursesUI::draw_window(Window& window)
getyx(stdscr, y,x); getyx(stdscr, y,x);
if (content[content.length()-1] == '\n' and content.length() - 1 < max_x - x) if (content[content.length()-1] == '\n' and content.length() - 1 < max_x - x)
{ {
addutf8str(utf8_it(content.begin()), utf8_it(content.end())-1); addutf8str(Utf8Iterator(content.begin()), Utf8Iterator(content.end())-1);
addch(' '); addch(' ');
} }
else else
{ {
utf8_it begin(content.begin()), end(content.end()); Utf8Iterator begin(content.begin()), end(content.end());
if (end - begin > max_x - x) if (end - begin > max_x - x)
end = begin + (max_x - x); end = begin + (max_x - x);
addutf8str(begin, end); addutf8str(begin, end);
@ -234,7 +234,7 @@ void NCursesUI::print_status(const String& status, CharCount cursor_pos)
auto end = status.end(); auto end = status.end();
addutf8str(status.begin(), cursor_it); addutf8str(status.begin(), cursor_it);
set_attribute(A_REVERSE, 1); set_attribute(A_REVERSE, 1);
addch((cursor_it == end) ? ' ' : utf8::codepoint(cursor_it)); addch((cursor_it == end) ? ' ' : utf8::codepoint<Utf8Policy>(cursor_it));
set_attribute(A_REVERSE, 0); set_attribute(A_REVERSE, 0);
if (cursor_it != end) if (cursor_it != end)
addutf8str(utf8::next(cursor_it), end); addutf8str(utf8::next(cursor_it), end);

View File

@ -9,7 +9,7 @@
namespace Kakoune namespace Kakoune
{ {
using Utf8Iterator = utf8::utf8_iterator<BufferIterator>; using Utf8Iterator = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
namespace namespace
{ {

View File

@ -81,9 +81,25 @@ bool is_character_start(Iterator it)
struct invalid_utf8_sequence{}; struct invalid_utf8_sequence{};
namespace InvalidBytePolicy
{
struct Throw
{
Codepoint operator()(char byte) const { throw invalid_utf8_sequence{}; }
};
struct Pass
{
Codepoint operator()(char byte) const { return byte; }
};
}
// returns the codepoint of the character whose first byte // returns the codepoint of the character whose first byte
// is pointed by it // is pointed by it
template<typename Iterator> template<typename InvalidPolicy = InvalidBytePolicy::Throw,
typename Iterator>
Codepoint codepoint(Iterator it) Codepoint codepoint(Iterator it)
{ {
// According to rfc3629, UTF-8 allows only up to 4 bytes. // According to rfc3629, UTF-8 allows only up to 4 bytes.
@ -108,7 +124,7 @@ Codepoint codepoint(Iterator it)
cp |= (*it & 0x3F); cp |= (*it & 0x3F);
} }
else else
throw invalid_utf8_sequence{}; cp = InvalidPolicy{}(byte);
return cp; return cp;
} }

View File

@ -11,7 +11,8 @@ namespace utf8
// adapter for an iterator on bytes which permits to iterate // adapter for an iterator on bytes which permits to iterate
// on unicode codepoints instead. // on unicode codepoints instead.
template<typename Iterator> template<typename Iterator,
typename InvalidPolicy = InvalidBytePolicy::Throw>
class utf8_iterator class utf8_iterator
{ {
public: public:
@ -125,7 +126,7 @@ private:
Codepoint get_value() const Codepoint get_value() const
{ {
if (m_value == -1) if (m_value == -1)
m_value = utf8::codepoint(m_it); m_value = utf8::codepoint<InvalidPolicy>(m_it);
return m_value; return m_value;
} }