utf8: use end of sequence iterators for more security
This commit is contained in:
parent
3f70d91f8c
commit
ed68d1ff28
|
@ -452,7 +452,7 @@ ByteCoord Buffer::char_next(ByteCoord coord) const
|
||||||
if (coord.column < m_lines[coord.line].length() - 1)
|
if (coord.column < m_lines[coord.line].length() - 1)
|
||||||
{
|
{
|
||||||
auto& line = m_lines[coord.line];
|
auto& line = m_lines[coord.line];
|
||||||
coord.column += utf8::codepoint_size(line.begin() + (int)coord.column);
|
coord.column += utf8::codepoint_size(line[(int)coord.column]);
|
||||||
// Handle invalid utf-8
|
// Handle invalid utf-8
|
||||||
if (coord.column >= line.length())
|
if (coord.column >= line.length())
|
||||||
{
|
{
|
||||||
|
@ -483,7 +483,7 @@ ByteCoord Buffer::char_prev(ByteCoord coord) const
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto& line = m_lines[coord.line];
|
auto& line = m_lines[coord.line];
|
||||||
coord.column = (int)(utf8::character_start(line.begin() + (int)coord.column - 1) - line.begin());
|
coord.column = (int)(utf8::character_start(line.begin() + (int)coord.column - 1, line.begin()) - line.begin());
|
||||||
}
|
}
|
||||||
return coord;
|
return coord;
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,7 @@ CharCount get_column(const Buffer& buffer,
|
||||||
auto col = 0_char;
|
auto col = 0_char;
|
||||||
for (auto it = line.begin();
|
for (auto it = line.begin();
|
||||||
it != line.end() and coord.column > (int)(it - line.begin());
|
it != line.end() and coord.column > (int)(it - line.begin());
|
||||||
it = utf8::next(it))
|
it = utf8::next(it, line.end()))
|
||||||
{
|
{
|
||||||
if (*it == '\t')
|
if (*it == '\t')
|
||||||
col = (col / tabstop + 1) * tabstop;
|
col = (col / tabstop + 1) * tabstop;
|
||||||
|
|
|
@ -15,13 +15,13 @@ inline String content(const Buffer& buffer, const Selection& range)
|
||||||
inline BufferIterator erase(Buffer& buffer, const Selection& range)
|
inline BufferIterator erase(Buffer& buffer, const Selection& range)
|
||||||
{
|
{
|
||||||
return buffer.erase(buffer.iterator_at(range.min()),
|
return buffer.erase(buffer.iterator_at(range.min()),
|
||||||
utf8::next(buffer.iterator_at(range.max())));
|
utf8::next(buffer.iterator_at(range.max()), buffer.end()));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline CharCount char_length(const Buffer& buffer, const Selection& range)
|
inline CharCount char_length(const Buffer& buffer, const Selection& range)
|
||||||
{
|
{
|
||||||
return utf8::distance(buffer.iterator_at(range.min()),
|
return utf8::distance(buffer.iterator_at(range.min()),
|
||||||
utf8::next(buffer.iterator_at(range.max())));
|
utf8::next(buffer.iterator_at(range.max()), buffer.end()));
|
||||||
}
|
}
|
||||||
|
|
||||||
CharCount get_column(const Buffer& buffer,
|
CharCount get_column(const Buffer& buffer,
|
||||||
|
|
|
@ -602,8 +602,8 @@ void expand_unprintable(const Context& context, HighlightFlags flags, DisplayBuf
|
||||||
for (auto it = buffer.iterator_at(atom_it->begin()),
|
for (auto it = buffer.iterator_at(atom_it->begin()),
|
||||||
end = buffer.iterator_at(atom_it->end()); it < end;)
|
end = buffer.iterator_at(atom_it->end()); it < end;)
|
||||||
{
|
{
|
||||||
Codepoint cp = utf8::codepoint<utf8::InvalidBytePolicy::Pass>(it);
|
Codepoint cp = utf8::codepoint<utf8::InvalidPolicy::Pass>(it, end);
|
||||||
auto next = utf8::next(it);
|
auto next = utf8::next(it, end);
|
||||||
if (cp != '\n' and not iswprint(cp))
|
if (cp != '\n' and not iswprint(cp))
|
||||||
{
|
{
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
|
|
|
@ -32,7 +32,7 @@ template<bool other_buffers>
|
||||||
InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
|
InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
|
||||||
{
|
{
|
||||||
auto pos = buffer.iterator_at(cursor_pos);
|
auto pos = buffer.iterator_at(cursor_pos);
|
||||||
if (pos == buffer.begin() or not is_word(*utf8::previous(pos)))
|
if (pos == buffer.begin() or not is_word(*utf8::previous(pos, buffer.begin())))
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
auto end = buffer.iterator_at(cursor_pos);
|
auto end = buffer.iterator_at(cursor_pos);
|
||||||
|
|
|
@ -41,7 +41,7 @@ static const KeyAndName keynamemap[] = {
|
||||||
KeyList parse_keys(StringView str)
|
KeyList parse_keys(StringView str)
|
||||||
{
|
{
|
||||||
KeyList result;
|
KeyList result;
|
||||||
using PassPolicy = utf8::InvalidBytePolicy::Pass;
|
using PassPolicy = utf8::InvalidPolicy::Pass;
|
||||||
using Utf8It = utf8::iterator<const char*, PassPolicy>;
|
using Utf8It = utf8::iterator<const char*, PassPolicy>;
|
||||||
for (Utf8It it = str.begin(), str_end = str.end(); it < str_end; ++it)
|
for (Utf8It it = str.begin(), str_end = str.end(); it < str_end; ++it)
|
||||||
{
|
{
|
||||||
|
@ -71,7 +71,7 @@ KeyList parse_keys(StringView str)
|
||||||
}
|
}
|
||||||
if (keyname.char_length() == 1)
|
if (keyname.char_length() == 1)
|
||||||
{
|
{
|
||||||
result.push_back(Key{ modifier, utf8::codepoint<PassPolicy>(keyname.begin()) });
|
result.push_back(Key{ modifier, utf8::codepoint<PassPolicy>(keyname.begin(),keyname.end()) });
|
||||||
it = end_it;
|
it = end_it;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -207,7 +207,7 @@ void NCursesUI::refresh()
|
||||||
m_dirty = false;
|
m_dirty = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
using Utf8Policy = utf8::InvalidBytePolicy::Pass;
|
using Utf8Policy = utf8::InvalidPolicy::Pass;
|
||||||
using Utf8Iterator = utf8::iterator<const char*, Utf8Policy>;
|
using Utf8Iterator = utf8::iterator<const char*, Utf8Policy>;
|
||||||
void addutf8str(WINDOW* win, Utf8Iterator begin, Utf8Iterator end)
|
void addutf8str(WINDOW* win, Utf8Iterator begin, Utf8Iterator end)
|
||||||
{
|
{
|
||||||
|
@ -408,8 +408,9 @@ Key NCursesUI::get_key()
|
||||||
int operator*() { return getch(); }
|
int operator*() { return getch(); }
|
||||||
getch_iterator& operator++() { return *this; }
|
getch_iterator& operator++() { return *this; }
|
||||||
getch_iterator& operator++(int) { return *this; }
|
getch_iterator& operator++(int) { return *this; }
|
||||||
|
bool operator== (const getch_iterator&) const { return false; }
|
||||||
};
|
};
|
||||||
return utf8::codepoint(getch_iterator{});
|
return utf8::codepoint(getch_iterator{}, getch_iterator{});
|
||||||
}
|
}
|
||||||
return Key::Invalid;
|
return Key::Invalid;
|
||||||
}
|
}
|
||||||
|
|
|
@ -719,7 +719,7 @@ void keep(Context& context, int)
|
||||||
for (auto& sel : context.selections())
|
for (auto& sel : context.selections())
|
||||||
{
|
{
|
||||||
if (boost::regex_search(buffer.iterator_at(sel.min()),
|
if (boost::regex_search(buffer.iterator_at(sel.min()),
|
||||||
utf8::next(buffer.iterator_at(sel.max())), ex) == matching)
|
utf8::next(buffer.iterator_at(sel.max()), buffer.end()), ex) == matching)
|
||||||
keep.push_back(sel);
|
keep.push_back(sel);
|
||||||
}
|
}
|
||||||
if (keep.empty())
|
if (keep.empty())
|
||||||
|
|
|
@ -446,7 +446,7 @@ BufferIterator prepare_insert(Buffer& buffer, const Selection& sel, InsertMode m
|
||||||
{
|
{
|
||||||
// special case for end of lines, append to current line instead
|
// special case for end of lines, append to current line instead
|
||||||
auto pos = buffer.iterator_at(sel.max());
|
auto pos = buffer.iterator_at(sel.max());
|
||||||
return *pos == '\n' ? pos : utf8::next(pos);
|
return *pos == '\n' ? pos : utf8::next(pos, buffer.end());
|
||||||
}
|
}
|
||||||
case InsertMode::InsertAtLineBegin:
|
case InsertMode::InsertAtLineBegin:
|
||||||
return buffer.iterator_at(sel.min().line);
|
return buffer.iterator_at(sel.min().line);
|
||||||
|
|
|
@ -494,7 +494,7 @@ void select_all_matches(SelectionList& selections, const Regex& regex)
|
||||||
auto& buffer = selections.buffer();
|
auto& buffer = selections.buffer();
|
||||||
for (auto& sel : selections)
|
for (auto& sel : selections)
|
||||||
{
|
{
|
||||||
auto sel_end = utf8::next(buffer.iterator_at(sel.max()));
|
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
||||||
RegexIterator re_it(buffer.iterator_at(sel.min()), sel_end, regex);
|
RegexIterator re_it(buffer.iterator_at(sel.min()), sel_end, regex);
|
||||||
RegexIterator re_end;
|
RegexIterator re_end;
|
||||||
|
|
||||||
|
@ -511,7 +511,7 @@ void select_all_matches(SelectionList& selections, const Regex& regex)
|
||||||
captures.emplace_back(match.first, match.second);
|
captures.emplace_back(match.first, match.second);
|
||||||
|
|
||||||
result.push_back({ begin.coord(),
|
result.push_back({ begin.coord(),
|
||||||
(begin == end ? end : utf8::previous(end)).coord(),
|
(begin == end ? end : utf8::previous(end, begin)).coord(),
|
||||||
std::move(captures) });
|
std::move(captures) });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -527,7 +527,7 @@ void split_selections(SelectionList& selections, const Regex& regex)
|
||||||
for (auto& sel : selections)
|
for (auto& sel : selections)
|
||||||
{
|
{
|
||||||
auto begin = buffer.iterator_at(sel.min());
|
auto begin = buffer.iterator_at(sel.min());
|
||||||
auto sel_end = utf8::next(buffer.iterator_at(sel.max()));
|
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
||||||
RegexIterator re_it(begin, sel_end, regex,
|
RegexIterator re_it(begin, sel_end, regex,
|
||||||
boost::regex_constants::match_nosubs);
|
boost::regex_constants::match_nosubs);
|
||||||
RegexIterator re_end;
|
RegexIterator re_end;
|
||||||
|
@ -536,7 +536,7 @@ void split_selections(SelectionList& selections, const Regex& regex)
|
||||||
{
|
{
|
||||||
BufferIterator end = (*re_it)[0].first;
|
BufferIterator end = (*re_it)[0].first;
|
||||||
|
|
||||||
result.push_back({ begin.coord(), (begin == end) ? end.coord() : utf8::previous(end).coord() });
|
result.push_back({ begin.coord(), (begin == end) ? end.coord() : utf8::previous(end, begin).coord() });
|
||||||
begin = (*re_it)[0].second;
|
begin = (*re_it)[0].second;
|
||||||
}
|
}
|
||||||
if (begin.coord() <= sel.max())
|
if (begin.coord() <= sel.max())
|
||||||
|
|
|
@ -50,7 +50,7 @@ inline void remove_selection(SelectionList& selections, int index)
|
||||||
selections.check_invariant();
|
selections.check_invariant();
|
||||||
}
|
}
|
||||||
|
|
||||||
using Utf8Iterator = utf8::iterator<BufferIterator, utf8::InvalidBytePolicy::Pass>;
|
using Utf8Iterator = utf8::iterator<BufferIterator, utf8::InvalidPolicy::Pass>;
|
||||||
|
|
||||||
inline Selection utf8_range(const Utf8Iterator& first, const Utf8Iterator& last)
|
inline Selection utf8_range(const Utf8Iterator& first, const Utf8Iterator& last)
|
||||||
{
|
{
|
||||||
|
@ -265,7 +265,7 @@ Selection find_next_match(const Buffer& buffer, const Selection& sel, const Rege
|
||||||
CaptureList captures;
|
CaptureList captures;
|
||||||
MatchResults matches;
|
MatchResults matches;
|
||||||
bool found = false;
|
bool found = false;
|
||||||
if ((found = find_match_in_buffer<direction>(buffer, utf8::next(begin), matches, regex)))
|
if ((found = find_match_in_buffer<direction>(buffer, utf8::next(begin, buffer.end()), matches, regex)))
|
||||||
{
|
{
|
||||||
begin = matches[0].first;
|
begin = matches[0].first;
|
||||||
end = matches[0].second;
|
end = matches[0].second;
|
||||||
|
@ -275,7 +275,7 @@ Selection find_next_match(const Buffer& buffer, const Selection& sel, const Rege
|
||||||
if (not found or begin == buffer.end())
|
if (not found or begin == buffer.end())
|
||||||
throw runtime_error("'" + regex.str() + "': no matches found");
|
throw runtime_error("'" + regex.str() + "': no matches found");
|
||||||
|
|
||||||
end = (begin == end) ? end : utf8::previous(end);
|
end = (begin == end) ? end : utf8::previous(end, begin);
|
||||||
if (direction == Backward)
|
if (direction == Backward)
|
||||||
std::swap(begin, end);
|
std::swap(begin, end);
|
||||||
|
|
||||||
|
|
|
@ -99,7 +99,7 @@ void test_utf8()
|
||||||
{
|
{
|
||||||
String str = "maïs mélange bientôt";
|
String str = "maïs mélange bientôt";
|
||||||
kak_assert(utf8::distance(str.begin(), str.end()) == 20);
|
kak_assert(utf8::distance(str.begin(), str.end()) == 20);
|
||||||
kak_assert(utf8::codepoint(str.begin() + 2) == 0x00EF);
|
kak_assert(utf8::codepoint(str.begin() + 2, str.end()) == 0x00EF);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_string()
|
void test_string()
|
||||||
|
|
77
src/utf8.hh
77
src/utf8.hh
|
@ -15,10 +15,10 @@ namespace utf8
|
||||||
|
|
||||||
// returns an iterator to next character first byte
|
// returns an iterator to next character first byte
|
||||||
template<typename Iterator>
|
template<typename Iterator>
|
||||||
Iterator next(Iterator it)
|
Iterator next(Iterator it, Iterator end)
|
||||||
{
|
{
|
||||||
if (*it++ & 0x80)
|
if (it != end and *it++ & 0x80)
|
||||||
while ((*(it) & 0xC0) == 0x80)
|
while (it != end and (*(it) & 0xC0) == 0x80)
|
||||||
++it;
|
++it;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
@ -26,18 +26,18 @@ Iterator next(Iterator it)
|
||||||
// returns it's parameter if it points to a character first byte,
|
// returns it's parameter if it points to a character first byte,
|
||||||
// or else returns next character first byte
|
// or else returns next character first byte
|
||||||
template<typename Iterator>
|
template<typename Iterator>
|
||||||
Iterator finish(Iterator it)
|
Iterator finish(Iterator it, Iterator end)
|
||||||
{
|
{
|
||||||
while ((*(it) & 0xC0) == 0x80)
|
while (it != end and (*(it) & 0xC0) == 0x80)
|
||||||
++it;
|
++it;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns an iterator to the previous character first byte
|
// returns an iterator to the previous character first byte
|
||||||
template<typename Iterator>
|
template<typename Iterator>
|
||||||
Iterator previous(Iterator it)
|
Iterator previous(Iterator it, Iterator begin)
|
||||||
{
|
{
|
||||||
while ((*(--it) & 0xC0) == 0x80)
|
while (it != begin and (*(--it) & 0xC0) == 0x80)
|
||||||
;
|
;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
@ -51,12 +51,12 @@ Iterator advance(Iterator it, Iterator end, CharCount d)
|
||||||
if (d < 0)
|
if (d < 0)
|
||||||
{
|
{
|
||||||
while (it != end and d++)
|
while (it != end and d++)
|
||||||
it = utf8::previous(it);
|
it = utf8::previous(it, end);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
while (it != end and d--)
|
while (it != end and d--)
|
||||||
it = utf8::next(it);
|
it = utf8::next(it, end);
|
||||||
}
|
}
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
@ -83,65 +83,72 @@ inline bool is_character_start(char c)
|
||||||
|
|
||||||
// returns an iterator to the first byte of the character it is into
|
// returns an iterator to the first byte of the character it is into
|
||||||
template<typename Iterator>
|
template<typename Iterator>
|
||||||
Iterator character_start(Iterator it)
|
Iterator character_start(Iterator it, Iterator begin)
|
||||||
{
|
{
|
||||||
while (not is_character_start(*it))
|
while (it != begin and not is_character_start(*it))
|
||||||
--it;
|
--it;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace InvalidBytePolicy
|
namespace InvalidPolicy
|
||||||
{
|
{
|
||||||
|
|
||||||
struct Assert
|
struct Assert
|
||||||
{
|
{
|
||||||
Codepoint operator()(unsigned char byte) const { kak_assert(false); return byte; }
|
Codepoint operator()(Codepoint cp) const { kak_assert(false); return cp; }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Pass
|
struct Pass
|
||||||
{
|
{
|
||||||
Codepoint operator()(unsigned char byte) const { return byte; }
|
Codepoint operator()(Codepoint cp) const { return cp; }
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns the codepoint of the character whose first byte
|
// returns the codepoint of the character whose first byte
|
||||||
// is pointed by it
|
// is pointed by it
|
||||||
template<typename InvalidPolicy = InvalidBytePolicy::Assert,
|
template<typename InvalidPolicy = utf8::InvalidPolicy::Assert,
|
||||||
typename Iterator>
|
typename Iterator>
|
||||||
Codepoint codepoint(Iterator it)
|
Codepoint codepoint(Iterator it, Iterator end)
|
||||||
{
|
{
|
||||||
|
if (it == end)
|
||||||
|
return InvalidPolicy{}(-1);
|
||||||
// According to rfc3629, UTF-8 allows only up to 4 bytes.
|
// According to rfc3629, UTF-8 allows only up to 4 bytes.
|
||||||
// (21 bits codepoint)
|
// (21 bits codepoint)
|
||||||
Codepoint cp;
|
|
||||||
unsigned char byte = *it++;
|
unsigned char byte = *it++;
|
||||||
if (not (byte & 0x80)) // 0xxxxxxx
|
if (not (byte & 0x80)) // 0xxxxxxx
|
||||||
cp = byte;
|
return byte;
|
||||||
else if ((byte & 0xE0) == 0xC0) // 110xxxxx
|
|
||||||
|
if (it == end)
|
||||||
|
return InvalidPolicy{}(byte);
|
||||||
|
|
||||||
|
if ((byte & 0xE0) == 0xC0) // 110xxxxx
|
||||||
|
return ((byte & 0x1F) << 6) | (*it & 0x3F);
|
||||||
|
|
||||||
|
if ((byte & 0xF0) == 0xE0) // 1110xxxx
|
||||||
{
|
{
|
||||||
cp = ((byte & 0x1F) << 6) | (*it & 0x3F);
|
Codepoint cp = ((byte & 0x0F) << 12) | ((*it++ & 0x3F) << 6);
|
||||||
|
if (it == end)
|
||||||
|
return InvalidPolicy{}(cp);
|
||||||
|
return cp | (*it & 0x3F);
|
||||||
}
|
}
|
||||||
else if ((byte & 0xF0) == 0xE0) // 1110xxxx
|
|
||||||
|
if ((byte & 0xF8) == 0xF0) // 11110xxx
|
||||||
{
|
{
|
||||||
cp = ((byte & 0x0F) << 12) | ((*it++ & 0x3F) << 6);
|
Codepoint cp = ((byte & 0x0F) << 18) | ((*it++ & 0x3F) << 12);
|
||||||
cp |= (*it & 0x3F);
|
if (it == end)
|
||||||
}
|
return InvalidPolicy{}(cp);
|
||||||
else if ((byte & 0xF8) == 0xF0) // 11110xxx
|
|
||||||
{
|
|
||||||
cp = ((byte & 0x0F) << 18) | ((*it++ & 0x3F) << 12);
|
|
||||||
cp |= (*it++ & 0x3F) << 6;
|
cp |= (*it++ & 0x3F) << 6;
|
||||||
cp |= (*it & 0x3F);
|
if (it == end)
|
||||||
|
return InvalidPolicy{}(cp);
|
||||||
|
return cp | (*it & 0x3F);
|
||||||
}
|
}
|
||||||
else
|
return InvalidPolicy{}(byte);
|
||||||
cp = InvalidPolicy{}(byte);
|
|
||||||
return cp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename InvalidPolicy = InvalidBytePolicy::Assert,
|
template<typename InvalidPolicy = utf8::InvalidPolicy::Assert>
|
||||||
typename Iterator>
|
ByteCount codepoint_size(char byte)
|
||||||
ByteCount codepoint_size(Iterator it)
|
|
||||||
{
|
{
|
||||||
unsigned char byte = *it;
|
|
||||||
if (not (byte & 0x80)) // 0xxxxxxx
|
if (not (byte & 0x80)) // 0xxxxxxx
|
||||||
return 1;
|
return 1;
|
||||||
else if ((byte & 0xE0) == 0xC0) // 110xxxxx
|
else if ((byte & 0xE0) == 0xC0) // 110xxxxx
|
||||||
|
|
|
@ -12,7 +12,7 @@ namespace utf8
|
||||||
// adapter for an iterator on bytes which permits to iterate
|
// adapter for an iterator on bytes which permits to iterate
|
||||||
// on unicode codepoints instead.
|
// on unicode codepoints instead.
|
||||||
template<typename Iterator,
|
template<typename Iterator,
|
||||||
typename InvalidPolicy = InvalidBytePolicy::Assert>
|
typename InvalidPolicy = utf8::InvalidPolicy::Assert>
|
||||||
class iterator
|
class iterator
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -21,7 +21,7 @@ public:
|
||||||
|
|
||||||
iterator& operator++()
|
iterator& operator++()
|
||||||
{
|
{
|
||||||
m_it = utf8::next(m_it);
|
m_it = utf8::next(m_it, Iterator{});
|
||||||
invalidate_value();
|
invalidate_value();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,7 @@ public:
|
||||||
|
|
||||||
iterator& operator--()
|
iterator& operator--()
|
||||||
{
|
{
|
||||||
m_it = utf8::previous(m_it);
|
m_it = utf8::previous(m_it, Iterator{});
|
||||||
invalidate_value();
|
invalidate_value();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -132,7 +132,7 @@ private:
|
||||||
Codepoint get_value() const
|
Codepoint get_value() const
|
||||||
{
|
{
|
||||||
if (m_value == -1)
|
if (m_value == -1)
|
||||||
m_value = utf8::codepoint<InvalidPolicy>(m_it);
|
m_value = utf8::codepoint<InvalidPolicy>(m_it, Iterator{});
|
||||||
return m_value;
|
return m_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -140,7 +140,7 @@ private:
|
||||||
mutable Codepoint m_value = -1;
|
mutable Codepoint m_value = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename InvalidPolicy = InvalidBytePolicy::Assert, typename Iterator>
|
template<typename InvalidPolicy = utf8::InvalidPolicy::Assert, typename Iterator>
|
||||||
iterator<Iterator, InvalidPolicy> make_iterator(Iterator it)
|
iterator<Iterator, InvalidPolicy> make_iterator(Iterator it)
|
||||||
{
|
{
|
||||||
return iterator<Iterator, InvalidPolicy>{std::move(it)};
|
return iterator<Iterator, InvalidPolicy>{std::move(it)};
|
||||||
|
|
|
@ -10,7 +10,7 @@ namespace Kakoune
|
||||||
static std::vector<String> get_words(StringView content)
|
static std::vector<String> get_words(StringView content)
|
||||||
{
|
{
|
||||||
std::vector<String> res;
|
std::vector<String> res;
|
||||||
using Iterator = utf8::iterator<const char*, utf8::InvalidBytePolicy::Pass>;
|
using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>;
|
||||||
const char* word_start = content.begin();
|
const char* word_start = content.begin();
|
||||||
bool in_word = false;
|
bool in_word = false;
|
||||||
for (Iterator it{word_start}, end{content.end()}; it != end; ++it)
|
for (Iterator it{word_start}, end{content.end()}; it != end; ++it)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user