Refactor utf8::iterator to be on the safe side
utf8::iterator now knows the iterator valid range, and pass it to utf8 functions.
This commit is contained in:
parent
b5972cd647
commit
27571a7716
|
@ -58,7 +58,7 @@ KeyList parse_keys(StringView str)
|
||||||
{
|
{
|
||||||
KeyList result;
|
KeyList result;
|
||||||
using Utf8It = utf8::iterator<const char*>;
|
using Utf8It = utf8::iterator<const char*>;
|
||||||
for (Utf8It it = str.begin(), str_end = str.end(); it < str_end; ++it)
|
for (Utf8It it{str.begin(), str}, str_end{str.end(), str}; it < str_end; ++it)
|
||||||
{
|
{
|
||||||
if (*it != '<')
|
if (*it != '<')
|
||||||
{
|
{
|
||||||
|
|
|
@ -637,10 +637,13 @@ void use_selection_as_search_pattern(Context& context, NormalParams)
|
||||||
Vector<String> patterns;
|
Vector<String> patterns;
|
||||||
auto& sels = context.selections();
|
auto& sels = context.selections();
|
||||||
const auto& buffer = context.buffer();
|
const auto& buffer = context.buffer();
|
||||||
|
using Utf8It = utf8::iterator<BufferIterator, utf8::InvalidPolicy::Pass>;
|
||||||
for (auto& sel : sels)
|
for (auto& sel : sels)
|
||||||
{
|
{
|
||||||
auto begin = utf8::make_iterator(buffer.iterator_at(sel.min()));
|
Utf8It begin{buffer.iterator_at(sel.min()), buffer};
|
||||||
auto end = utf8::make_iterator(buffer.iterator_at(sel.max()))+1;
|
Utf8It end{buffer.iterator_at(sel.max()), buffer};
|
||||||
|
++end;
|
||||||
|
|
||||||
auto content = "\\Q" + buffer.string(begin.base().coord(), end.base().coord()) + "\\E";
|
auto content = "\\Q" + buffer.string(begin.base().coord(), end.base().coord()) + "\\E";
|
||||||
if (smart)
|
if (smart)
|
||||||
{
|
{
|
||||||
|
|
|
@ -394,8 +394,6 @@ void SelectionList::check_invariant() const
|
||||||
kak_assert(buffer.is_valid(sel.cursor()));
|
kak_assert(buffer.is_valid(sel.cursor()));
|
||||||
kak_assert(not buffer.is_end(sel.anchor()));
|
kak_assert(not buffer.is_end(sel.anchor()));
|
||||||
kak_assert(not buffer.is_end(sel.cursor()));
|
kak_assert(not buffer.is_end(sel.cursor()));
|
||||||
kak_assert(utf8::is_character_start(buffer.byte_at(sel.anchor())));
|
|
||||||
kak_assert(utf8::is_character_start(buffer.byte_at(sel.cursor())));
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,7 @@ namespace Kakoune
|
||||||
|
|
||||||
Selection select_line(const Buffer& buffer, const Selection& selection)
|
Selection select_line(const Buffer& buffer, const Selection& selection)
|
||||||
{
|
{
|
||||||
Utf8Iterator first = buffer.iterator_at(selection.cursor());
|
Utf8Iterator first{buffer.iterator_at(selection.cursor()), buffer};
|
||||||
if (*first == '\n' and first + 1 != buffer.end())
|
if (*first == '\n' and first + 1 != buffer.end())
|
||||||
++first;
|
++first;
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ Selection select_line(const Buffer& buffer, const Selection& selection)
|
||||||
Selection select_matching(const Buffer& buffer, const Selection& selection)
|
Selection select_matching(const Buffer& buffer, const Selection& selection)
|
||||||
{
|
{
|
||||||
Vector<Codepoint> matching_pairs = { '(', ')', '{', '}', '[', ']', '<', '>' };
|
Vector<Codepoint> matching_pairs = { '(', ')', '{', '}', '[', ']', '<', '>' };
|
||||||
Utf8Iterator it = buffer.iterator_at(selection.cursor());
|
Utf8Iterator it{buffer.iterator_at(selection.cursor()), buffer};
|
||||||
Vector<Codepoint>::iterator match = matching_pairs.end();
|
Vector<Codepoint>::iterator match = matching_pairs.end();
|
||||||
while (not is_eol(*it))
|
while (not is_eol(*it))
|
||||||
{
|
{
|
||||||
|
@ -82,7 +82,7 @@ static Optional<Selection> find_surrounding(const Buffer& buffer,
|
||||||
const bool to_end = flags & ObjectFlags::ToEnd;
|
const bool to_end = flags & ObjectFlags::ToEnd;
|
||||||
const bool nestable = matching.opening != matching.closing;
|
const bool nestable = matching.opening != matching.closing;
|
||||||
auto pos = buffer.iterator_at(coord);
|
auto pos = buffer.iterator_at(coord);
|
||||||
Utf8Iterator first = pos;
|
Utf8Iterator first{pos, buffer};
|
||||||
if (to_begin)
|
if (to_begin)
|
||||||
{
|
{
|
||||||
int level = nestable ? init_level : 0;
|
int level = nestable ? init_level : 0;
|
||||||
|
@ -103,7 +103,7 @@ static Optional<Selection> find_surrounding(const Buffer& buffer,
|
||||||
return Optional<Selection>{};
|
return Optional<Selection>{};
|
||||||
}
|
}
|
||||||
|
|
||||||
Utf8Iterator last = pos;
|
Utf8Iterator last{pos, buffer};
|
||||||
if (to_end)
|
if (to_end)
|
||||||
{
|
{
|
||||||
int level = nestable ? init_level : 0;
|
int level = nestable ? init_level : 0;
|
||||||
|
@ -168,7 +168,7 @@ Selection select_surrounding(const Buffer& buffer, const Selection& selection,
|
||||||
Selection select_to(const Buffer& buffer, const Selection& selection,
|
Selection select_to(const Buffer& buffer, const Selection& selection,
|
||||||
Codepoint c, int count, bool inclusive)
|
Codepoint c, int count, bool inclusive)
|
||||||
{
|
{
|
||||||
Utf8Iterator begin = buffer.iterator_at(selection.cursor());
|
Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer};
|
||||||
Utf8Iterator end = begin;
|
Utf8Iterator end = begin;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -185,7 +185,7 @@ Selection select_to(const Buffer& buffer, const Selection& selection,
|
||||||
Selection select_to_reverse(const Buffer& buffer, const Selection& selection,
|
Selection select_to_reverse(const Buffer& buffer, const Selection& selection,
|
||||||
Codepoint c, int count, bool inclusive)
|
Codepoint c, int count, bool inclusive)
|
||||||
{
|
{
|
||||||
Utf8Iterator begin = buffer.iterator_at(selection.cursor());
|
Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer};
|
||||||
Utf8Iterator end = begin;
|
Utf8Iterator end = begin;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -367,8 +367,8 @@ Selection select_whitespaces(const Buffer& buffer, const Selection& selection, O
|
||||||
--last;
|
--last;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (flags & ObjectFlags::ToEnd) ? utf8_range(first, last)
|
return (flags & ObjectFlags::ToEnd) ? Selection{first.coord(), last.coord()}
|
||||||
: utf8_range(last, first);
|
: Selection{last.coord(), first.coord()};
|
||||||
}
|
}
|
||||||
|
|
||||||
Selection select_indent(const Buffer& buffer, const Selection& selection, ObjectFlags flags)
|
Selection select_indent(const Buffer& buffer, const Selection& selection, ObjectFlags flags)
|
||||||
|
@ -511,8 +511,8 @@ Selection select_argument(const Buffer& buffer, const Selection& selection,
|
||||||
--end;
|
--end;
|
||||||
|
|
||||||
if (flags & ObjectFlags::ToBegin and not (flags & ObjectFlags::ToEnd))
|
if (flags & ObjectFlags::ToBegin and not (flags & ObjectFlags::ToEnd))
|
||||||
return utf8_range(pos, begin);
|
return {pos.coord(), begin.coord()};
|
||||||
return utf8_range(flags & ObjectFlags::ToBegin ? begin : pos, end);
|
return {(flags & ObjectFlags::ToBegin ? begin : pos).coord(), end.coord()};
|
||||||
}
|
}
|
||||||
|
|
||||||
Selection select_lines(const Buffer& buffer, const Selection& selection)
|
Selection select_lines(const Buffer& buffer, const Selection& selection)
|
||||||
|
|
|
@ -48,7 +48,7 @@ inline Selection utf8_range(const Utf8Iterator& first, const Utf8Iterator& last)
|
||||||
template<WordType word_type>
|
template<WordType word_type>
|
||||||
Selection select_to_next_word(const Buffer& buffer, const Selection& selection)
|
Selection select_to_next_word(const Buffer& buffer, const Selection& selection)
|
||||||
{
|
{
|
||||||
Utf8Iterator begin = buffer.iterator_at(selection.cursor());
|
Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer};
|
||||||
if (begin+1 == buffer.end())
|
if (begin+1 == buffer.end())
|
||||||
return selection;
|
return selection;
|
||||||
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1)))
|
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1)))
|
||||||
|
@ -72,7 +72,7 @@ Selection select_to_next_word(const Buffer& buffer, const Selection& selection)
|
||||||
template<WordType word_type>
|
template<WordType word_type>
|
||||||
Selection select_to_next_word_end(const Buffer& buffer, const Selection& selection)
|
Selection select_to_next_word_end(const Buffer& buffer, const Selection& selection)
|
||||||
{
|
{
|
||||||
Utf8Iterator begin = buffer.iterator_at(selection.cursor());
|
Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer};
|
||||||
if (begin+1 == buffer.end())
|
if (begin+1 == buffer.end())
|
||||||
return selection;
|
return selection;
|
||||||
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1)))
|
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1)))
|
||||||
|
@ -95,7 +95,7 @@ Selection select_to_next_word_end(const Buffer& buffer, const Selection& selecti
|
||||||
template<WordType word_type>
|
template<WordType word_type>
|
||||||
Selection select_to_previous_word(const Buffer& buffer, const Selection& selection)
|
Selection select_to_previous_word(const Buffer& buffer, const Selection& selection)
|
||||||
{
|
{
|
||||||
Utf8Iterator begin = buffer.iterator_at(selection.cursor());
|
Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer};
|
||||||
if (begin == buffer.begin())
|
if (begin == buffer.begin())
|
||||||
return selection;
|
return selection;
|
||||||
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin-1)))
|
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin-1)))
|
||||||
|
@ -160,7 +160,7 @@ Selection select_word(const Buffer& buffer,
|
||||||
const Selection& selection,
|
const Selection& selection,
|
||||||
ObjectFlags flags)
|
ObjectFlags flags)
|
||||||
{
|
{
|
||||||
Utf8Iterator first = buffer.iterator_at(selection.cursor());
|
Utf8Iterator first{buffer.iterator_at(selection.cursor()), buffer};
|
||||||
Utf8Iterator last = first;
|
Utf8Iterator last = first;
|
||||||
if (is_word<word_type>(*first))
|
if (is_word<word_type>(*first))
|
||||||
{
|
{
|
||||||
|
|
|
@ -225,12 +225,12 @@ Vector<StringView> wrap_lines(StringView text, CharCount max_width)
|
||||||
throw runtime_error("Invalid max width");
|
throw runtime_error("Invalid max width");
|
||||||
|
|
||||||
using Utf8It = utf8::iterator<const char*>;
|
using Utf8It = utf8::iterator<const char*>;
|
||||||
Utf8It word_begin{text.begin()};
|
Utf8It word_begin{text.begin(), text};
|
||||||
Utf8It word_end{word_begin};
|
Utf8It word_end{word_begin};
|
||||||
Utf8It end{text.end()};
|
Utf8It end{text.end(), text};
|
||||||
CharCount col = 0;
|
CharCount col = 0;
|
||||||
Vector<StringView> lines;
|
Vector<StringView> lines;
|
||||||
Utf8It line_begin = text.begin();
|
Utf8It line_begin{text.begin(), text};
|
||||||
Utf8It line_end = line_begin;
|
Utf8It line_end = line_begin;
|
||||||
while (word_begin != end)
|
while (word_begin != end)
|
||||||
{
|
{
|
||||||
|
|
|
@ -19,12 +19,18 @@ class iterator : public std::iterator<std::forward_iterator_tag,
|
||||||
Codepoint, CharCount>
|
Codepoint, CharCount>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
iterator() = default;
|
iterator(Iterator it, Iterator begin, Iterator end)
|
||||||
iterator(Iterator it) : m_it(std::move(it)) {}
|
: m_it{std::move(it)}, m_begin{std::move(begin)}, m_end{std::move(end)}
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Container>
|
||||||
|
iterator(Iterator it, const Container& c)
|
||||||
|
: m_it{std::move(it)}, m_begin{begin(c)}, m_end{end(c)}
|
||||||
|
{}
|
||||||
|
|
||||||
iterator& operator++()
|
iterator& operator++()
|
||||||
{
|
{
|
||||||
m_it = utf8::next(m_it, Iterator{});
|
m_it = utf8::next(m_it, m_end);
|
||||||
invalidate_value();
|
invalidate_value();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -38,7 +44,7 @@ public:
|
||||||
|
|
||||||
iterator& operator--()
|
iterator& operator--()
|
||||||
{
|
{
|
||||||
m_it = utf8::previous(m_it, Iterator{});
|
m_it = utf8::previous(m_it, m_begin);
|
||||||
invalidate_value();
|
invalidate_value();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -75,25 +81,20 @@ public:
|
||||||
bool operator==(const iterator& other) { return m_it == other.m_it; }
|
bool operator==(const iterator& other) { return m_it == other.m_it; }
|
||||||
bool operator!=(const iterator& other) { return m_it != other.m_it; }
|
bool operator!=(const iterator& other) { return m_it != other.m_it; }
|
||||||
|
|
||||||
bool operator< (const iterator& other) const
|
bool operator< (const iterator& other) const { return m_it < other.m_it; }
|
||||||
{
|
bool operator<= (const iterator& other) const { return m_it <= other.m_it; }
|
||||||
return m_it < other.m_it;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator<= (const iterator& other) const
|
bool operator> (const iterator& other) const { return m_it > other.m_it; }
|
||||||
{
|
bool operator>= (const iterator& other) const { return m_it >= other.m_it; }
|
||||||
return m_it <= other.m_it;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator> (const iterator& other) const
|
bool operator==(const Iterator& other) { return m_it == other; }
|
||||||
{
|
bool operator!=(const Iterator& other) { return m_it != other; }
|
||||||
return m_it > other.m_it;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator>= (const iterator& other) const
|
bool operator< (const Iterator& other) const { return m_it < other; }
|
||||||
{
|
bool operator<= (const Iterator& other) const { return m_it <= other; }
|
||||||
return m_it >= other.m_it;
|
|
||||||
}
|
bool operator> (const Iterator& other) const { return m_it > other; }
|
||||||
|
bool operator>= (const Iterator& other) const { return m_it >= other; }
|
||||||
|
|
||||||
CharCount operator-(const iterator& other) const
|
CharCount operator-(const iterator& other) const
|
||||||
{
|
{
|
||||||
|
@ -108,32 +109,21 @@ public:
|
||||||
const Iterator& base() const { return m_it; }
|
const Iterator& base() const { return m_it; }
|
||||||
Iterator& base() { return m_it; }
|
Iterator& base() { return m_it; }
|
||||||
|
|
||||||
protected:
|
|
||||||
void check_invariant() const
|
|
||||||
{
|
|
||||||
// always point to a character first byte;
|
|
||||||
// kak_assert(is_character_start(it));
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void invalidate_value() { m_value = -1; }
|
void invalidate_value() { m_value = -1; }
|
||||||
Codepoint get_value() const
|
Codepoint get_value() const
|
||||||
{
|
{
|
||||||
if (m_value == -1)
|
if (m_value == -1)
|
||||||
m_value = utf8::codepoint<InvalidPolicy>(m_it, Iterator{});
|
m_value = utf8::codepoint<InvalidPolicy>(m_it, m_end);
|
||||||
return m_value;
|
return m_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator m_it;
|
Iterator m_it;
|
||||||
|
Iterator m_begin;
|
||||||
|
Iterator m_end;
|
||||||
mutable Codepoint m_value = -1;
|
mutable Codepoint m_value = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename InvalidPolicy = utf8::InvalidPolicy::Pass, typename Iterator>
|
|
||||||
iterator<Iterator, InvalidPolicy> make_iterator(Iterator it)
|
|
||||||
{
|
|
||||||
return iterator<Iterator, InvalidPolicy>{std::move(it)};
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,10 +30,10 @@ UsedLetters used_letters(StringView str)
|
||||||
static WordDB::WordList get_words(const SharedString& content)
|
static WordDB::WordList get_words(const SharedString& content)
|
||||||
{
|
{
|
||||||
WordDB::WordList res;
|
WordDB::WordList res;
|
||||||
using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>;
|
using Utf8It = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>;
|
||||||
const char* word_start = content.begin();
|
const char* word_start = content.begin();
|
||||||
bool in_word = false;
|
bool in_word = false;
|
||||||
for (Iterator it{word_start}, end{content.end()}; it != end; ++it)
|
for (Utf8It it{word_start, content}, end{content.end(), content}; it != end; ++it)
|
||||||
{
|
{
|
||||||
Codepoint c = *it;
|
Codepoint c = *it;
|
||||||
const bool word = is_word(c);
|
const bool word = is_word(c);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user