diff --git a/src/selectors.cc b/src/selectors.cc index dee4d1ef..d0135f1d 100644 --- a/src/selectors.cc +++ b/src/selectors.cc @@ -1,7 +1,6 @@ #include "selectors.hh" #include "string.hh" -#include "utf8_iterator.hh" #include @@ -10,153 +9,6 @@ namespace Kakoune { -using Utf8Iterator = utf8::utf8_iterator; - -namespace -{ - -template -bool is_word(Codepoint c) -{ - return Kakoune::is_word(c); -} - -template<> -bool is_word(Codepoint c) -{ - return !is_blank(c) and !is_eol(c); -} - -static bool is_punctuation(Codepoint c) -{ - return not (is_word(c) or is_blank(c) or is_eol(c)); -} - -enum class CharCategories -{ - Blank, - EndOfLine, - Word, - Punctuation, -}; - -template -CharCategories categorize(Codepoint c) -{ - if (is_word(c)) - return CharCategories::Word; - if (is_eol(c)) - return CharCategories::EndOfLine; - if (is_blank(c)) - return CharCategories::Blank; - return word_type == WORD ? CharCategories::Word - : CharCategories::Punctuation; -} - -template -void skip_while(Iterator& it, const EndIterator& end, T condition) -{ - while (it != end and condition(*it)) - ++it; -} - -template -void skip_while_reverse(Iterator& it, const BeginIterator& begin, T condition) -{ - while (it != begin and condition(*it)) - --it; -} - -Range utf8_range(const Utf8Iterator& first, const Utf8Iterator& last) -{ - return {first.base().coord(), last.base().coord()}; -} - -} - -typedef boost::regex_iterator RegexIterator; - -template -Selection select_to_next_word(const Buffer& buffer, const Selection& selection) -{ - Utf8Iterator begin = buffer.iterator_at(selection.last()); - if (begin+1 == buffer.end()) - return selection; - if (categorize(*begin) != categorize(*(begin+1))) - ++begin; - - skip_while(begin, buffer.end(), is_eol); - if (begin == buffer.end()) - return selection; - Utf8Iterator end = begin+1; - - if (word_type == Word and is_punctuation(*begin)) - skip_while(end, buffer.end(), is_punctuation); - else if (is_word(*begin)) - skip_while(end, buffer.end(), is_word); - - skip_while(end, buffer.end(), is_blank); - - return utf8_range(begin, end-1); -} -template Selection select_to_next_word(const Buffer&, const Selection&); -template Selection select_to_next_word(const Buffer&, const Selection&); - -template -Selection select_to_next_word_end(const Buffer& buffer, const Selection& selection) -{ - Utf8Iterator begin = buffer.iterator_at(selection.last()); - if (begin+1 == buffer.end()) - return selection; - if (categorize(*begin) != categorize(*(begin+1))) - ++begin; - - skip_while(begin, buffer.end(), is_eol); - if (begin == buffer.end()) - return selection; - Utf8Iterator end = begin; - skip_while(end, buffer.end(), is_blank); - - if (word_type == Word and is_punctuation(*end)) - skip_while(end, buffer.end(), is_punctuation); - else if (is_word(*end)) - skip_while(end, buffer.end(), is_word); - - return utf8_range(begin, end-1); -} -template Selection select_to_next_word_end(const Buffer&, const Selection&); -template Selection select_to_next_word_end(const Buffer&, const Selection&); - -template -Selection select_to_previous_word(const Buffer& buffer, const Selection& selection) -{ - Utf8Iterator begin = buffer.iterator_at(selection.last()); - if (begin == buffer.begin()) - return selection; - if (categorize(*begin) != categorize(*(begin-1))) - --begin; - - skip_while_reverse(begin, buffer.begin(), is_eol); - Utf8Iterator end = begin; - skip_while_reverse(end, buffer.begin(), is_blank); - - bool with_end = false; - if (word_type == Word and is_punctuation(*end)) - { - skip_while_reverse(end, buffer.begin(), is_punctuation); - with_end = is_punctuation(*end); - } - else if (is_word(*end)) - { - skip_while_reverse(end, buffer.begin(), is_word); - with_end = is_word(*end); - } - - return utf8_range(begin, with_end ? end : end+1); -} -template Selection select_to_previous_word(const Buffer&, const Selection&); -template Selection select_to_previous_word(const Buffer&, const Selection&); - Selection select_line(const Buffer& buffer, const Selection& selection) { Utf8Iterator first = buffer.iterator_at(selection.last()); @@ -366,50 +218,6 @@ Selection select_to_eol_reverse(const Buffer& buffer, const Selection& selection return utf8_range(begin, end == buffer.begin() ? end : end+1); } -template -Selection select_whole_word(const Buffer& buffer, const Selection& selection, ObjectFlags flags) -{ - Utf8Iterator first = buffer.iterator_at(selection.last()); - Utf8Iterator last = first; - if (is_word(*first)) - { - if (flags & ObjectFlags::ToBegin) - { - skip_while_reverse(first, buffer.begin(), is_word); - if (not is_word(*first)) - ++first; - } - if (flags & ObjectFlags::ToEnd) - { - skip_while(last, buffer.end(), is_word); - if (not (flags & ObjectFlags::Inner)) - skip_while(last, buffer.end(), is_blank); - --last; - } - } - else if (not (flags & ObjectFlags::Inner)) - { - if (flags & ObjectFlags::ToBegin) - { - skip_while_reverse(first, buffer.begin(), is_blank); - if (not is_word(*first)) - return selection; - skip_while_reverse(first, buffer.begin(), is_word); - if (not is_word(*first)) - ++first; - } - if (flags & ObjectFlags::ToEnd) - { - skip_while(last, buffer.end(), is_blank); - --last; - } - } - return (flags & ObjectFlags::ToEnd) ? utf8_range(first, last) - : utf8_range(last, first); -} -template Selection select_whole_word(const Buffer&, const Selection&, ObjectFlags); -template Selection select_whole_word(const Buffer&, const Selection&, ObjectFlags); - Selection select_whole_sentence(const Buffer& buffer, const Selection& selection, ObjectFlags flags) { BufferIterator first = buffer.iterator_at(selection.last()); diff --git a/src/selectors.hh b/src/selectors.hh index 64887e31..a44cf830 100644 --- a/src/selectors.hh +++ b/src/selectors.hh @@ -4,6 +4,7 @@ #include "selection.hh" #include "unicode.hh" #include "editor.hh" +#include "utf8_iterator.hh" namespace Kakoune { @@ -49,17 +50,89 @@ inline void remove_selection(const Buffer&, SelectionList& selections, int index selections.check_invariant(); } -enum WordType { Word, WORD }; +using Utf8Iterator = utf8::utf8_iterator; + +inline Range utf8_range(const Utf8Iterator& first, const Utf8Iterator& last) +{ + return {first.base().coord(), last.base().coord()}; +} + +typedef boost::regex_iterator RegexIterator; template -Selection select_to_next_word(const Buffer& buffer, - const Selection& selection); +Selection select_to_next_word(const Buffer& buffer, const Selection& selection) +{ + Utf8Iterator begin = buffer.iterator_at(selection.last()); + if (begin+1 == buffer.end()) + return selection; + if (categorize(*begin) != categorize(*(begin+1))) + ++begin; + + skip_while(begin, buffer.end(), is_eol); + if (begin == buffer.end()) + return selection; + Utf8Iterator end = begin+1; + + if (word_type == Word and is_punctuation(*begin)) + skip_while(end, buffer.end(), is_punctuation); + else if (is_word(*begin)) + skip_while(end, buffer.end(), is_word); + + skip_while(end, buffer.end(), is_blank); + + return utf8_range(begin, end-1); +} + template -Selection select_to_next_word_end(const Buffer& buffer, - const Selection& selection); +Selection select_to_next_word_end(const Buffer& buffer, const Selection& selection) +{ + Utf8Iterator begin = buffer.iterator_at(selection.last()); + if (begin+1 == buffer.end()) + return selection; + if (categorize(*begin) != categorize(*(begin+1))) + ++begin; + + skip_while(begin, buffer.end(), is_eol); + if (begin == buffer.end()) + return selection; + Utf8Iterator end = begin; + skip_while(end, buffer.end(), is_blank); + + if (word_type == Word and is_punctuation(*end)) + skip_while(end, buffer.end(), is_punctuation); + else if (is_word(*end)) + skip_while(end, buffer.end(), is_word); + + return utf8_range(begin, end-1); +} + template -Selection select_to_previous_word(const Buffer& buffer, -const Selection& selection); +Selection select_to_previous_word(const Buffer& buffer, const Selection& selection) +{ + Utf8Iterator begin = buffer.iterator_at(selection.last()); + if (begin == buffer.begin()) + return selection; + if (categorize(*begin) != categorize(*(begin-1))) + --begin; + + skip_while_reverse(begin, buffer.begin(), is_eol); + Utf8Iterator end = begin; + skip_while_reverse(end, buffer.begin(), is_blank); + + bool with_end = false; + if (word_type == Word and is_punctuation(*end)) + { + skip_while_reverse(end, buffer.begin(), is_punctuation); + with_end = is_punctuation(*end); + } + else if (is_word(*end)) + { + skip_while_reverse(end, buffer.begin(), is_word); + with_end = is_word(*end); + } + + return utf8_range(begin, with_end ? end : end+1); +} Selection select_line(const Buffer& buffer, const Selection& selection); @@ -86,8 +159,47 @@ constexpr ObjectFlags operator|(ObjectFlags lhs, ObjectFlags rhs) { return (ObjectFlags)((int)lhs | (int) rhs); } template -Selection select_whole_word(const Buffer& buffer, const Selection& selection, - ObjectFlags flags); +Selection select_whole_word(const Buffer& buffer, const Selection& selection, ObjectFlags flags) +{ + Utf8Iterator first = buffer.iterator_at(selection.last()); + Utf8Iterator last = first; + if (is_word(*first)) + { + if (flags & ObjectFlags::ToBegin) + { + skip_while_reverse(first, buffer.begin(), is_word); + if (not is_word(*first)) + ++first; + } + if (flags & ObjectFlags::ToEnd) + { + skip_while(last, buffer.end(), is_word); + if (not (flags & ObjectFlags::Inner)) + skip_while(last, buffer.end(), is_blank); + --last; + } + } + else if (not (flags & ObjectFlags::Inner)) + { + if (flags & ObjectFlags::ToBegin) + { + skip_while_reverse(first, buffer.begin(), is_blank); + if (not is_word(*first)) + return selection; + skip_while_reverse(first, buffer.begin(), is_word); + if (not is_word(*first)) + ++first; + } + if (flags & ObjectFlags::ToEnd) + { + skip_while(last, buffer.end(), is_blank); + --last; + } + } + return (flags & ObjectFlags::ToEnd) ? utf8_range(first, last) + : utf8_range(last, first); +} + Selection select_whole_sentence(const Buffer& buffer, const Selection& selection, ObjectFlags flags); Selection select_whole_paragraph(const Buffer& buffer, const Selection& selection, diff --git a/src/unicode.hh b/src/unicode.hh index ce7e9be2..fb235fad 100644 --- a/src/unicode.hh +++ b/src/unicode.hh @@ -9,11 +9,6 @@ namespace Kakoune using Codepoint = uint32_t; -inline bool is_word(Codepoint c) -{ - return c == '_' or isalnum(c); -} - inline bool is_eol(Codepoint c) { return c == '\n'; @@ -29,6 +24,46 @@ inline bool is_horizontal_blank(Codepoint c) return c == ' ' or c == '\t'; } +enum WordType { Word, WORD }; + +template +inline bool is_word(Codepoint c) +{ + return c == '_' or isalnum(c); +} + +template<> +inline bool is_word(Codepoint c) +{ + return !is_blank(c) and !is_eol(c); +} + +inline bool is_punctuation(Codepoint c) +{ + return not (is_word(c) or is_blank(c) or is_eol(c)); +} + +enum class CharCategories +{ + Blank, + EndOfLine, + Word, + Punctuation, +}; + +template +inline CharCategories categorize(Codepoint c) +{ + if (is_word(c)) + return CharCategories::Word; + if (is_eol(c)) + return CharCategories::EndOfLine; + if (is_blank(c)) + return CharCategories::Blank; + return word_type == WORD ? CharCategories::Word + : CharCategories::Punctuation; +} + } #endif // unicode_hh_INCLUDED diff --git a/src/utils.hh b/src/utils.hh index ef0cf3d5..a08e7707 100644 --- a/src/utils.hh +++ b/src/utils.hh @@ -189,6 +189,20 @@ bool contains(const std::unordered_set& container, const T2& value) return container.find(value) != container.end(); } +template +void skip_while(Iterator& it, const EndIterator& end, T condition) +{ + while (it != end and condition(*it)) + ++it; +} + +template +void skip_while_reverse(Iterator& it, const BeginIterator& begin, T condition) +{ + while (it != begin and condition(*it)) + --it; +} + // *** On scope end *** // // on_scope_end provides a way to register some code to be