Refactor get_words to be simpler and faster

This commit is contained in:
Maxime Coste 2017-02-23 00:51:54 +00:00
parent 73bb260e6c
commit 55f6ca238f

View File

@ -13,24 +13,18 @@ using WordList = Vector<StringView>;
static WordList get_words(StringView content, ConstArrayView<Codepoint> extra_word_chars) static WordList get_words(StringView content, ConstArrayView<Codepoint> extra_word_chars)
{ {
WordList res; WordList res;
using Utf8It = utf8::iterator<const char*>; auto is_word = [&](Codepoint c) {
const char* word_start = content.begin(); return Kakoune::is_word(c) or contains(extra_word_chars, c);
bool in_word = false; };
for (Utf8It it{word_start, content}; it != content.end(); ++it) for (utf8::iterator<const char*> it{content.begin(), content};
it != content.end(); ++it)
{ {
Codepoint c = *it; if (is_word(*it))
const bool word = is_word(c) or contains(extra_word_chars, c);
if (not in_word and word)
{ {
word_start = it.base(); const char* word = it.base();
in_word = true; while (++it != content.end() and is_word(*it))
} {}
else if (in_word and not word) res.emplace_back(word, it.base());
{
const ByteCount start = word_start - content.begin();
const ByteCount length = it.base() - word_start;
res.push_back(content.substr(start, length));
in_word = false;
} }
} }
return res; return res;