From 27571a77165f5cba2f7d0a7e335a4241dda91df7 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Wed, 23 Sep 2015 19:39:21 +0100 Subject: [PATCH] Refactor utf8::iterator to be on the safe side utf8::iterator now knows the iterator valid range, and pass it to utf8 functions. --- src/keys.cc | 2 +- src/normal.cc | 7 ++++-- src/selection.cc | 2 -- src/selectors.cc | 20 +++++++-------- src/selectors.hh | 8 +++--- src/string.cc | 6 ++--- src/utf8_iterator.hh | 58 ++++++++++++++++++-------------------------- src/word_db.cc | 4 +-- 8 files changed, 49 insertions(+), 58 deletions(-) diff --git a/src/keys.cc b/src/keys.cc index 349cbd53..4b9a83ec 100644 --- a/src/keys.cc +++ b/src/keys.cc @@ -58,7 +58,7 @@ KeyList parse_keys(StringView str) { KeyList result; using Utf8It = utf8::iterator; - for (Utf8It it = str.begin(), str_end = str.end(); it < str_end; ++it) + for (Utf8It it{str.begin(), str}, str_end{str.end(), str}; it < str_end; ++it) { if (*it != '<') { diff --git a/src/normal.cc b/src/normal.cc index 539bf76f..e0648abd 100644 --- a/src/normal.cc +++ b/src/normal.cc @@ -637,10 +637,13 @@ void use_selection_as_search_pattern(Context& context, NormalParams) Vector patterns; auto& sels = context.selections(); const auto& buffer = context.buffer(); + using Utf8It = utf8::iterator; for (auto& sel : sels) { - auto begin = utf8::make_iterator(buffer.iterator_at(sel.min())); - auto end = utf8::make_iterator(buffer.iterator_at(sel.max()))+1; + Utf8It begin{buffer.iterator_at(sel.min()), buffer}; + Utf8It end{buffer.iterator_at(sel.max()), buffer}; + ++end; + auto content = "\\Q" + buffer.string(begin.base().coord(), end.base().coord()) + "\\E"; if (smart) { diff --git a/src/selection.cc b/src/selection.cc index 69cce1b8..b2432d6e 100644 --- a/src/selection.cc +++ b/src/selection.cc @@ -394,8 +394,6 @@ void SelectionList::check_invariant() const kak_assert(buffer.is_valid(sel.cursor())); kak_assert(not buffer.is_end(sel.anchor())); kak_assert(not buffer.is_end(sel.cursor())); - kak_assert(utf8::is_character_start(buffer.byte_at(sel.anchor()))); - kak_assert(utf8::is_character_start(buffer.byte_at(sel.cursor()))); } #endif } diff --git a/src/selectors.cc b/src/selectors.cc index 8965fc51..99f4f95a 100644 --- a/src/selectors.cc +++ b/src/selectors.cc @@ -10,7 +10,7 @@ namespace Kakoune Selection select_line(const Buffer& buffer, const Selection& selection) { - Utf8Iterator first = buffer.iterator_at(selection.cursor()); + Utf8Iterator first{buffer.iterator_at(selection.cursor()), buffer}; if (*first == '\n' and first + 1 != buffer.end()) ++first; @@ -26,7 +26,7 @@ Selection select_line(const Buffer& buffer, const Selection& selection) Selection select_matching(const Buffer& buffer, const Selection& selection) { Vector matching_pairs = { '(', ')', '{', '}', '[', ']', '<', '>' }; - Utf8Iterator it = buffer.iterator_at(selection.cursor()); + Utf8Iterator it{buffer.iterator_at(selection.cursor()), buffer}; Vector::iterator match = matching_pairs.end(); while (not is_eol(*it)) { @@ -82,7 +82,7 @@ static Optional find_surrounding(const Buffer& buffer, const bool to_end = flags & ObjectFlags::ToEnd; const bool nestable = matching.opening != matching.closing; auto pos = buffer.iterator_at(coord); - Utf8Iterator first = pos; + Utf8Iterator first{pos, buffer}; if (to_begin) { int level = nestable ? init_level : 0; @@ -103,7 +103,7 @@ static Optional find_surrounding(const Buffer& buffer, return Optional{}; } - Utf8Iterator last = pos; + Utf8Iterator last{pos, buffer}; if (to_end) { int level = nestable ? init_level : 0; @@ -168,7 +168,7 @@ Selection select_surrounding(const Buffer& buffer, const Selection& selection, Selection select_to(const Buffer& buffer, const Selection& selection, Codepoint c, int count, bool inclusive) { - Utf8Iterator begin = buffer.iterator_at(selection.cursor()); + Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer}; Utf8Iterator end = begin; do { @@ -185,7 +185,7 @@ Selection select_to(const Buffer& buffer, const Selection& selection, Selection select_to_reverse(const Buffer& buffer, const Selection& selection, Codepoint c, int count, bool inclusive) { - Utf8Iterator begin = buffer.iterator_at(selection.cursor()); + Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer}; Utf8Iterator end = begin; do { @@ -367,8 +367,8 @@ Selection select_whitespaces(const Buffer& buffer, const Selection& selection, O --last; } } - return (flags & ObjectFlags::ToEnd) ? utf8_range(first, last) - : utf8_range(last, first); + return (flags & ObjectFlags::ToEnd) ? Selection{first.coord(), last.coord()} + : Selection{last.coord(), first.coord()}; } Selection select_indent(const Buffer& buffer, const Selection& selection, ObjectFlags flags) @@ -511,8 +511,8 @@ Selection select_argument(const Buffer& buffer, const Selection& selection, --end; if (flags & ObjectFlags::ToBegin and not (flags & ObjectFlags::ToEnd)) - return utf8_range(pos, begin); - return utf8_range(flags & ObjectFlags::ToBegin ? begin : pos, end); + return {pos.coord(), begin.coord()}; + return {(flags & ObjectFlags::ToBegin ? begin : pos).coord(), end.coord()}; } Selection select_lines(const Buffer& buffer, const Selection& selection) diff --git a/src/selectors.hh b/src/selectors.hh index 2f638216..7809344f 100644 --- a/src/selectors.hh +++ b/src/selectors.hh @@ -48,7 +48,7 @@ inline Selection utf8_range(const Utf8Iterator& first, const Utf8Iterator& last) template Selection select_to_next_word(const Buffer& buffer, const Selection& selection) { - Utf8Iterator begin = buffer.iterator_at(selection.cursor()); + Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer}; if (begin+1 == buffer.end()) return selection; if (categorize(*begin) != categorize(*(begin+1))) @@ -72,7 +72,7 @@ Selection select_to_next_word(const Buffer& buffer, const Selection& selection) template Selection select_to_next_word_end(const Buffer& buffer, const Selection& selection) { - Utf8Iterator begin = buffer.iterator_at(selection.cursor()); + Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer}; if (begin+1 == buffer.end()) return selection; if (categorize(*begin) != categorize(*(begin+1))) @@ -95,7 +95,7 @@ Selection select_to_next_word_end(const Buffer& buffer, const Selection& selecti template Selection select_to_previous_word(const Buffer& buffer, const Selection& selection) { - Utf8Iterator begin = buffer.iterator_at(selection.cursor()); + Utf8Iterator begin{buffer.iterator_at(selection.cursor()), buffer}; if (begin == buffer.begin()) return selection; if (categorize(*begin) != categorize(*(begin-1))) @@ -160,7 +160,7 @@ Selection select_word(const Buffer& buffer, const Selection& selection, ObjectFlags flags) { - Utf8Iterator first = buffer.iterator_at(selection.cursor()); + Utf8Iterator first{buffer.iterator_at(selection.cursor()), buffer}; Utf8Iterator last = first; if (is_word(*first)) { diff --git a/src/string.cc b/src/string.cc index b6d0f3de..e0f3525d 100644 --- a/src/string.cc +++ b/src/string.cc @@ -225,12 +225,12 @@ Vector wrap_lines(StringView text, CharCount max_width) throw runtime_error("Invalid max width"); using Utf8It = utf8::iterator; - Utf8It word_begin{text.begin()}; + Utf8It word_begin{text.begin(), text}; Utf8It word_end{word_begin}; - Utf8It end{text.end()}; + Utf8It end{text.end(), text}; CharCount col = 0; Vector lines; - Utf8It line_begin = text.begin(); + Utf8It line_begin{text.begin(), text}; Utf8It line_end = line_begin; while (word_begin != end) { diff --git a/src/utf8_iterator.hh b/src/utf8_iterator.hh index 43242a73..b8ba5764 100644 --- a/src/utf8_iterator.hh +++ b/src/utf8_iterator.hh @@ -19,12 +19,18 @@ class iterator : public std::iterator { public: - iterator() = default; - iterator(Iterator it) : m_it(std::move(it)) {} + iterator(Iterator it, Iterator begin, Iterator end) + : m_it{std::move(it)}, m_begin{std::move(begin)}, m_end{std::move(end)} + {} + + template + iterator(Iterator it, const Container& c) + : m_it{std::move(it)}, m_begin{begin(c)}, m_end{end(c)} + {} iterator& operator++() { - m_it = utf8::next(m_it, Iterator{}); + m_it = utf8::next(m_it, m_end); invalidate_value(); return *this; } @@ -38,7 +44,7 @@ public: iterator& operator--() { - m_it = utf8::previous(m_it, Iterator{}); + m_it = utf8::previous(m_it, m_begin); invalidate_value(); return *this; } @@ -75,25 +81,20 @@ public: bool operator==(const iterator& other) { return m_it == other.m_it; } bool operator!=(const iterator& other) { return m_it != other.m_it; } - bool operator< (const iterator& other) const - { - return m_it < other.m_it; - } + bool operator< (const iterator& other) const { return m_it < other.m_it; } + bool operator<= (const iterator& other) const { return m_it <= other.m_it; } - bool operator<= (const iterator& other) const - { - return m_it <= other.m_it; - } + bool operator> (const iterator& other) const { return m_it > other.m_it; } + bool operator>= (const iterator& other) const { return m_it >= other.m_it; } - bool operator> (const iterator& other) const - { - return m_it > other.m_it; - } + bool operator==(const Iterator& other) { return m_it == other; } + bool operator!=(const Iterator& other) { return m_it != other; } - bool operator>= (const iterator& other) const - { - return m_it >= other.m_it; - } + bool operator< (const Iterator& other) const { return m_it < other; } + bool operator<= (const Iterator& other) const { return m_it <= other; } + + bool operator> (const Iterator& other) const { return m_it > other; } + bool operator>= (const Iterator& other) const { return m_it >= other; } CharCount operator-(const iterator& other) const { @@ -108,32 +109,21 @@ public: const Iterator& base() const { return m_it; } Iterator& base() { return m_it; } -protected: - void check_invariant() const - { - // always point to a character first byte; - // kak_assert(is_character_start(it)); - } - private: void invalidate_value() { m_value = -1; } Codepoint get_value() const { if (m_value == -1) - m_value = utf8::codepoint(m_it, Iterator{}); + m_value = utf8::codepoint(m_it, m_end); return m_value; } Iterator m_it; + Iterator m_begin; + Iterator m_end; mutable Codepoint m_value = -1; }; -template -iterator make_iterator(Iterator it) -{ - return iterator{std::move(it)}; -} - } } diff --git a/src/word_db.cc b/src/word_db.cc index 8ac7c8d3..4ef5a912 100644 --- a/src/word_db.cc +++ b/src/word_db.cc @@ -30,10 +30,10 @@ UsedLetters used_letters(StringView str) static WordDB::WordList get_words(const SharedString& content) { WordDB::WordList res; - using Iterator = utf8::iterator; + using Utf8It = utf8::iterator; const char* word_start = content.begin(); bool in_word = false; - for (Iterator it{word_start}, end{content.end()}; it != end; ++it) + for (Utf8It it{word_start, content}, end{content.end(), content}; it != end; ++it) { Codepoint c = *it; const bool word = is_word(c);