utf8::codepoint: configurable invalid byte policy
This commit is contained in:
parent
4f1ab5b749
commit
dfafcdb6e6
|
@ -335,7 +335,7 @@ void Editor::end_edition()
|
||||||
--m_edition_level;
|
--m_edition_level;
|
||||||
}
|
}
|
||||||
|
|
||||||
using utf8_it = utf8::utf8_iterator<BufferIterator>;
|
using utf8_it = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
|
||||||
|
|
||||||
IncrementalInserter::IncrementalInserter(Editor& editor, InsertMode mode)
|
IncrementalInserter::IncrementalInserter(Editor& editor, InsertMode mode)
|
||||||
: m_editor(editor), m_edition(editor), m_mode(mode)
|
: m_editor(editor), m_edition(editor), m_mode(mode)
|
||||||
|
|
|
@ -105,9 +105,9 @@ static void redraw(WINDOW* menu_win)
|
||||||
}
|
}
|
||||||
doupdate();
|
doupdate();
|
||||||
}
|
}
|
||||||
|
using Utf8Policy = utf8::InvalidBytePolicy::Pass;
|
||||||
using utf8_it = utf8::utf8_iterator<String::iterator>;
|
using Utf8Iterator = utf8::utf8_iterator<String::iterator, Utf8Policy>;
|
||||||
void addutf8str(utf8_it begin, utf8_it end)
|
void addutf8str(Utf8Iterator begin, Utf8Iterator end)
|
||||||
{
|
{
|
||||||
while (begin != end)
|
while (begin != end)
|
||||||
addch(*begin++);
|
addch(*begin++);
|
||||||
|
@ -142,12 +142,12 @@ void NCursesUI::draw_window(Window& window)
|
||||||
getyx(stdscr, y,x);
|
getyx(stdscr, y,x);
|
||||||
if (content[content.length()-1] == '\n' and content.length() - 1 < max_x - x)
|
if (content[content.length()-1] == '\n' and content.length() - 1 < max_x - x)
|
||||||
{
|
{
|
||||||
addutf8str(utf8_it(content.begin()), utf8_it(content.end())-1);
|
addutf8str(Utf8Iterator(content.begin()), Utf8Iterator(content.end())-1);
|
||||||
addch(' ');
|
addch(' ');
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
utf8_it begin(content.begin()), end(content.end());
|
Utf8Iterator begin(content.begin()), end(content.end());
|
||||||
if (end - begin > max_x - x)
|
if (end - begin > max_x - x)
|
||||||
end = begin + (max_x - x);
|
end = begin + (max_x - x);
|
||||||
addutf8str(begin, end);
|
addutf8str(begin, end);
|
||||||
|
@ -234,7 +234,7 @@ void NCursesUI::print_status(const String& status, CharCount cursor_pos)
|
||||||
auto end = status.end();
|
auto end = status.end();
|
||||||
addutf8str(status.begin(), cursor_it);
|
addutf8str(status.begin(), cursor_it);
|
||||||
set_attribute(A_REVERSE, 1);
|
set_attribute(A_REVERSE, 1);
|
||||||
addch((cursor_it == end) ? ' ' : utf8::codepoint(cursor_it));
|
addch((cursor_it == end) ? ' ' : utf8::codepoint<Utf8Policy>(cursor_it));
|
||||||
set_attribute(A_REVERSE, 0);
|
set_attribute(A_REVERSE, 0);
|
||||||
if (cursor_it != end)
|
if (cursor_it != end)
|
||||||
addutf8str(utf8::next(cursor_it), end);
|
addutf8str(utf8::next(cursor_it), end);
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
using Utf8Iterator = utf8::utf8_iterator<BufferIterator>;
|
using Utf8Iterator = utf8::utf8_iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
|
20
src/utf8.hh
20
src/utf8.hh
|
@ -81,9 +81,25 @@ bool is_character_start(Iterator it)
|
||||||
|
|
||||||
struct invalid_utf8_sequence{};
|
struct invalid_utf8_sequence{};
|
||||||
|
|
||||||
|
namespace InvalidBytePolicy
|
||||||
|
{
|
||||||
|
|
||||||
|
struct Throw
|
||||||
|
{
|
||||||
|
Codepoint operator()(char byte) const { throw invalid_utf8_sequence{}; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Pass
|
||||||
|
{
|
||||||
|
Codepoint operator()(char byte) const { return byte; }
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// returns the codepoint of the character whose first byte
|
// returns the codepoint of the character whose first byte
|
||||||
// is pointed by it
|
// is pointed by it
|
||||||
template<typename Iterator>
|
template<typename InvalidPolicy = InvalidBytePolicy::Throw,
|
||||||
|
typename Iterator>
|
||||||
Codepoint codepoint(Iterator it)
|
Codepoint codepoint(Iterator it)
|
||||||
{
|
{
|
||||||
// According to rfc3629, UTF-8 allows only up to 4 bytes.
|
// According to rfc3629, UTF-8 allows only up to 4 bytes.
|
||||||
|
@ -108,7 +124,7 @@ Codepoint codepoint(Iterator it)
|
||||||
cp |= (*it & 0x3F);
|
cp |= (*it & 0x3F);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw invalid_utf8_sequence{};
|
cp = InvalidPolicy{}(byte);
|
||||||
return cp;
|
return cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,8 @@ namespace utf8
|
||||||
|
|
||||||
// adapter for an iterator on bytes which permits to iterate
|
// adapter for an iterator on bytes which permits to iterate
|
||||||
// on unicode codepoints instead.
|
// on unicode codepoints instead.
|
||||||
template<typename Iterator>
|
template<typename Iterator,
|
||||||
|
typename InvalidPolicy = InvalidBytePolicy::Throw>
|
||||||
class utf8_iterator
|
class utf8_iterator
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -125,7 +126,7 @@ private:
|
||||||
Codepoint get_value() const
|
Codepoint get_value() const
|
||||||
{
|
{
|
||||||
if (m_value == -1)
|
if (m_value == -1)
|
||||||
m_value = utf8::codepoint(m_it);
|
m_value = utf8::codepoint<InvalidPolicy>(m_it);
|
||||||
return m_value;
|
return m_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user