Refactor get_words to be simpler and faster
This commit is contained in:
parent
73bb260e6c
commit
55f6ca238f
|
@ -13,24 +13,18 @@ using WordList = Vector<StringView>;
|
||||||
static WordList get_words(StringView content, ConstArrayView<Codepoint> extra_word_chars)
|
static WordList get_words(StringView content, ConstArrayView<Codepoint> extra_word_chars)
|
||||||
{
|
{
|
||||||
WordList res;
|
WordList res;
|
||||||
using Utf8It = utf8::iterator<const char*>;
|
auto is_word = [&](Codepoint c) {
|
||||||
const char* word_start = content.begin();
|
return Kakoune::is_word(c) or contains(extra_word_chars, c);
|
||||||
bool in_word = false;
|
};
|
||||||
for (Utf8It it{word_start, content}; it != content.end(); ++it)
|
for (utf8::iterator<const char*> it{content.begin(), content};
|
||||||
|
it != content.end(); ++it)
|
||||||
{
|
{
|
||||||
Codepoint c = *it;
|
if (is_word(*it))
|
||||||
const bool word = is_word(c) or contains(extra_word_chars, c);
|
|
||||||
if (not in_word and word)
|
|
||||||
{
|
{
|
||||||
word_start = it.base();
|
const char* word = it.base();
|
||||||
in_word = true;
|
while (++it != content.end() and is_word(*it))
|
||||||
}
|
{}
|
||||||
else if (in_word and not word)
|
res.emplace_back(word, it.base());
|
||||||
{
|
|
||||||
const ByteCount start = word_start - content.begin();
|
|
||||||
const ByteCount length = it.base() - word_start;
|
|
||||||
res.push_back(content.substr(start, length));
|
|
||||||
in_word = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user