utf8::codepoint: configurable invalid byte policy

This commit is contained in:
Maxime Coste 2012-10-13 18:31:29 +02:00
parent 4f1ab5b749
commit dfafcdb6e6
5 changed files with 29 additions and 12 deletions

View File

@ -335,7 +335,7 @@ void Editor::end_edition()
--m_edition_level;
}
using utf8_it = utf8::utf8_iterator<BufferIterator>;
using utf8_it = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
IncrementalInserter::IncrementalInserter(Editor& editor, InsertMode mode)
: m_editor(editor), m_edition(editor), m_mode(mode)

View File

@ -105,9 +105,9 @@ static void redraw(WINDOW* menu_win)
}
doupdate();
}
using utf8_it = utf8::utf8_iterator<String::iterator>;
void addutf8str(utf8_it begin, utf8_it end)
using Utf8Policy = utf8::InvalidBytePolicy::Pass;
using Utf8Iterator = utf8::utf8_iterator<String::iterator, Utf8Policy>;
void addutf8str(Utf8Iterator begin, Utf8Iterator end)
{
while (begin != end)
addch(*begin++);
@ -142,12 +142,12 @@ void NCursesUI::draw_window(Window& window)
getyx(stdscr, y,x);
if (content[content.length()-1] == '\n' and content.length() - 1 < max_x - x)
{
addutf8str(utf8_it(content.begin()), utf8_it(content.end())-1);
addutf8str(Utf8Iterator(content.begin()), Utf8Iterator(content.end())-1);
addch(' ');
}
else
{
utf8_it begin(content.begin()), end(content.end());
Utf8Iterator begin(content.begin()), end(content.end());
if (end - begin > max_x - x)
end = begin + (max_x - x);
addutf8str(begin, end);
@ -234,7 +234,7 @@ void NCursesUI::print_status(const String& status, CharCount cursor_pos)
auto end = status.end();
addutf8str(status.begin(), cursor_it);
set_attribute(A_REVERSE, 1);
addch((cursor_it == end) ? ' ' : utf8::codepoint(cursor_it));
addch((cursor_it == end) ? ' ' : utf8::codepoint<Utf8Policy>(cursor_it));
set_attribute(A_REVERSE, 0);
if (cursor_it != end)
addutf8str(utf8::next(cursor_it), end);

View File

@ -9,7 +9,7 @@
namespace Kakoune
{
using Utf8Iterator = utf8::utf8_iterator<BufferIterator>;
using Utf8Iterator = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
namespace
{

View File

@ -81,9 +81,25 @@ bool is_character_start(Iterator it)
struct invalid_utf8_sequence{};
namespace InvalidBytePolicy
{
struct Throw
{
Codepoint operator()(char byte) const { throw invalid_utf8_sequence{}; }
};
struct Pass
{
Codepoint operator()(char byte) const { return byte; }
};
}
// returns the codepoint of the character whose first byte
// is pointed by it
template<typename Iterator>
template<typename InvalidPolicy = InvalidBytePolicy::Throw,
typename Iterator>
Codepoint codepoint(Iterator it)
{
// According to rfc3629, UTF-8 allows only up to 4 bytes.
@ -108,7 +124,7 @@ Codepoint codepoint(Iterator it)
cp |= (*it & 0x3F);
}
else
throw invalid_utf8_sequence{};
cp = InvalidPolicy{}(byte);
return cp;
}

View File

@ -11,7 +11,8 @@ namespace utf8
// adapter for an iterator on bytes which permits to iterate
// on unicode codepoints instead.
template<typename Iterator>
template<typename Iterator,
typename InvalidPolicy = InvalidBytePolicy::Throw>
class utf8_iterator
{
public:
@ -125,7 +126,7 @@ private:
Codepoint get_value() const
{
if (m_value == -1)
m_value = utf8::codepoint(m_it);
m_value = utf8::codepoint<InvalidPolicy>(m_it);
return m_value;
}