From 1553d91d278ff943c3545e3cb0728acbf4cf433d Mon Sep 17 00:00:00 2001
From: Maxime Coste <mawww@kakoune.org>
Date: Tue, 27 Nov 2018 18:13:29 +1100
Subject: [PATCH] Make '_' the default extra_word_chars, and remove built-in
 support

Fixes #2599
---
 src/main.cc                                        | 4 ++--
 src/normal.cc                                      | 2 +-
 src/selectors.cc                                   | 3 +++
 src/string_utils.cc                                | 4 ++--
 src/unicode.hh                                     | 8 ++++----
 src/word_db.cc                                     | 9 +++------
 test/normal/previous-word-no-underscore/cmd        | 1 +
 test/normal/previous-word-no-underscore/in         | 1 +
 test/normal/previous-word-no-underscore/rc         | 1 +
 test/normal/previous-word-no-underscore/selections | 1 +
 10 files changed, 19 insertions(+), 15 deletions(-)
 create mode 100644 test/normal/previous-word-no-underscore/cmd
 create mode 100644 test/normal/previous-word-no-underscore/in
 create mode 100644 test/normal/previous-word-no-underscore/rc
 create mode 100644 test/normal/previous-word-no-underscore/selections
diff --git a/src/main.cc b/src/main.cc
index f9242708..7ae2098f 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -347,7 +347,7 @@ static void check_matching_pairs(const Vector<Codepoint, MemoryDomain::Options>&
 {
     if ((pairs.size() % 2) != 0)
         throw runtime_error{"matching pairs should have a pair number of element"};
-    if (not all_of(pairs, is_punctuation))
+    if (not all_of(pairs, [](Codepoint cp) { return is_punctuation(cp); }))
         throw runtime_error{"matching pairs can only be punctuation"};
 }
 
@@ -422,7 +422,7 @@ void register_options()
     reg.declare_option<Vector<Codepoint, MemoryDomain::Options>, check_extra_word_chars>(
         "extra_word_chars",
         "Additional characters to be considered as words for insert completion",
-        {});
+        { '_' });
     reg.declare_option<Vector<Codepoint, MemoryDomain::Options>, check_matching_pairs>(
         "matching_pairs",
         "set of pair of characters to be considered as matching pairs",
diff --git a/src/normal.cc b/src/normal.cc
index 9300bda5..40200029 100644
--- a/src/normal.cc
+++ b/src/normal.cc
@@ -1327,7 +1327,7 @@ void select_object(Context& context, NormalParams params)
             return;
 
         const Codepoint cp = *key.codepoint();
-        if (is_punctuation(cp) or cp == '_')
+        if (is_punctuation(cp, {}))
         {
             auto re = Regex{"\\Q" + to_string(cp), RegexCompileFlags::Backward};
             return select_and_set_last<mode>(
diff --git a/src/selectors.cc b/src/selectors.cc
index 8320c0b0..47aef03c 100644
--- a/src/selectors.cc
+++ b/src/selectors.cc
@@ -63,6 +63,7 @@ select_to_next_word(const Context& context, const Selection& selection)
     Utf8Iterator end = begin+1;
 
     auto is_word = [&](Codepoint c) { return Kakoune::is_word<word_type>(c, extra_word_chars); };
+    auto is_punctuation = [&](Codepoint c) { return Kakoune::is_punctuation(c, extra_word_chars); };
 
     if (is_word(*begin))
         skip_while(end, buffer.end(), is_word);
@@ -96,6 +97,7 @@ select_to_next_word_end(const Context& context, const Selection& selection)
     skip_while(end, buffer.end(), is_horizontal_blank);
 
     auto is_word = [&](Codepoint c) { return Kakoune::is_word<word_type>(c, extra_word_chars); };
+    auto is_punctuation = [&](Codepoint c) { return Kakoune::is_punctuation(c, extra_word_chars); };
 
     if (is_word(*end))
         skip_while(end, buffer.end(), is_word);
@@ -124,6 +126,7 @@ select_to_previous_word(const Context& context, const Selection& selection)
     Utf8Iterator end = begin;
 
     auto is_word = [&](Codepoint c) { return Kakoune::is_word<word_type>(c, extra_word_chars); };
+    auto is_punctuation = [&](Codepoint c) { return Kakoune::is_punctuation(c, extra_word_chars); };
 
     bool with_end = skip_while_reverse(end, buffer.begin(), is_horizontal_blank);
     if (is_word(*end))
diff --git a/src/string_utils.cc b/src/string_utils.cc
index 0a420fec..e478fd96 100644
--- a/src/string_utils.cc
+++ b/src/string_utils.cc
@@ -233,7 +233,7 @@ Vector<StringView> wrap_lines(StringView text, ColumnCount max_width)
     Vector<StringView> lines;
     while (it != end)
     {
-        const CharCategories cat = categorize(*it, {});
+        const CharCategories cat = categorize(*it, {'_'});
         if (cat == CharCategories::EndOfLine)
         {
             lines.emplace_back(line_begin.base(), it.base());
@@ -242,7 +242,7 @@ Vector<StringView> wrap_lines(StringView text, ColumnCount max_width)
         }
 
         Utf8It word_end = it+1;
-        while (word_end != end and categorize(*word_end, {}) == cat)
+        while (word_end != end and categorize(*word_end, {'_'}) == cat)
             ++word_end;
 
         while (word_end > line_begin and
diff --git a/src/unicode.hh b/src/unicode.hh
index 1dcc836b..d94fff8d 100644
--- a/src/unicode.hh
+++ b/src/unicode.hh
@@ -32,9 +32,9 @@ inline bool is_blank(Codepoint c) noexcept
 enum WordType { Word, WORD };
 
 template<WordType word_type = Word>
-inline bool is_word(Codepoint c, ConstArrayView<Codepoint> extra_word_chars = {}) noexcept
+inline bool is_word(Codepoint c, ConstArrayView<Codepoint> extra_word_chars = {'_'}) noexcept
 {
-    return c == '_' or iswalnum((wchar_t)c) or contains(extra_word_chars, c);
+    return iswalnum((wchar_t)c) or contains(extra_word_chars, c);
 }
 
 template<>
@@ -43,9 +43,9 @@ inline bool is_word<WORD>(Codepoint c, ConstArrayView<Codepoint>) noexcept
     return not is_blank(c);
 }
 
-inline bool is_punctuation(Codepoint c) noexcept
+inline bool is_punctuation(Codepoint c, ConstArrayView<Codepoint> extra_word_chars = {'_'}) noexcept
 {
-    return not (is_word(c) or is_blank(c));
+    return not (is_word(c, extra_word_chars) or is_blank(c));
 }
 
 inline bool is_basic_alpha(Codepoint c) noexcept
diff --git a/src/word_db.cc b/src/word_db.cc
index 463b9e28..6d847952 100644
--- a/src/word_db.cc
+++ b/src/word_db.cc
@@ -31,16 +31,13 @@ struct WordSplitter
         Iterator& operator++()
         {
             const auto* end = m_splitter->m_content.end();
-            auto is_word = [&](const char* ptr) {
-                const Codepoint c = utf8::codepoint(ptr, end);
-                return Kakoune::is_word(c) or contains(m_splitter->m_extra_word_chars, c);
-            };
+            auto extra_chars = m_splitter->m_extra_word_chars;
 
             m_word_begin = m_word_end;
-            while (m_word_begin != end and not is_word(m_word_begin))
+            while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
                 utf8::to_next(m_word_begin, end);
             m_word_end = m_word_begin;
-            while (m_word_end != end and is_word(m_word_end))
+            while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
                 utf8::to_next(m_word_end, end);
             return *this;
         }
diff --git a/test/normal/previous-word-no-underscore/cmd b/test/normal/previous-word-no-underscore/cmd
new file mode 100644
index 00000000..61780798
--- /dev/null
+++ b/test/normal/previous-word-no-underscore/cmd
@@ -0,0 +1 @@
+b
diff --git a/test/normal/previous-word-no-underscore/in b/test/normal/previous-word-no-underscore/in
new file mode 100644
index 00000000..017e5893
--- /dev/null
+++ b/test/normal/previous-word-no-underscore/in
@@ -0,0 +1 @@
+foo_%(b)ar
diff --git a/test/normal/previous-word-no-underscore/rc b/test/normal/previous-word-no-underscore/rc
new file mode 100644
index 00000000..9aaca091
--- /dev/null
+++ b/test/normal/previous-word-no-underscore/rc
@@ -0,0 +1 @@
+set-option global extra_word_chars
diff --git a/test/normal/previous-word-no-underscore/selections b/test/normal/previous-word-no-underscore/selections
new file mode 100644
index 00000000..69ad7b36
--- /dev/null
+++ b/test/normal/previous-word-no-underscore/selections
@@ -0,0 +1 @@
+'_'