Improve WordDB performance by precomputing hashes

Avoid multiple computation of string hashes by making it possible
to pre-compute and pass hashes to interned strings and hash maps.
This commit is contained in:
Maxime Coste 2023-11-17 17:01:51 +11:00
parent b10a935b8c
commit 296ab1a1ff
4 changed files with 28 additions and 16 deletions

View File

@ -194,9 +194,9 @@ struct HashMap
insert(*begin++);
}
constexpr EffectiveValue& insert(Item item)
constexpr EffectiveValue& insert(Item item, size_t hash)
{
const auto hash = hash_value(item_key(item));
kak_assert(hash == hash_value(item_key(item)));
if constexpr (not multi_key)
{
if (auto index = find_index(item_key(item), hash); index >= 0)
@ -212,6 +212,11 @@ struct HashMap
return item_value(m_items.back());
}
constexpr EffectiveValue& insert(Item item)
{
return insert(std::move(item), hash_value(item_key(item)));
}
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
constexpr int find_index(const KeyType& key, size_t hash) const
{
@ -313,6 +318,7 @@ struct HashMap
constexpr const_iterator begin() const { return m_items.begin(); }
constexpr const_iterator end() const { return m_items.end(); }
Item& item(size_t index) { return m_items[index]; }
const Item& item(size_t index) const { return m_items[index]; }
template<typename KeyType> requires IsHashCompatible<Key, KeyType>

View File

@ -25,18 +25,24 @@ StringDataPtr StringData::create(ArrayView<const StringView> strs)
return RefPtr<StringData, PtrPolicy>{res};
}
StringDataPtr StringData::Registry::intern(StringView str)
StringDataPtr StringData::Registry::intern(StringView str, size_t hash)
{
auto it = m_strings.find(str);
if (it != m_strings.end())
return StringDataPtr{it->value};
kak_assert(hash_value(str) == hash);
auto index = m_strings.find_index(str, hash);
if (index >= 0)
return StringDataPtr{m_strings.item(index).value};
auto data = StringData::create(str);
data->refcount |= interned_flag;
m_strings.insert({data->strview(), data.get()});
m_strings.insert({data->strview(), data.get()}, hash);
return data;
}
StringDataPtr StringData::Registry::intern(StringView str)
{
return intern(str, hash_value(str));
}
void StringData::Registry::remove(StringView str)
{
kak_assert(m_strings.contains(str));

View File

@ -50,6 +50,7 @@ public:
public:
void debug_stats() const;
Ptr intern(StringView str);
Ptr intern(StringView str, size_t hash);
void remove(StringView str);
private:
@ -62,10 +63,8 @@ public:
using StringDataPtr = StringData::Ptr;
using StringRegistry = StringData::Registry;
inline StringDataPtr intern(StringView str)
{
return StringRegistry::instance().intern(str);
}
inline StringDataPtr intern(StringView str) { return StringRegistry::instance().intern(str); }
inline StringDataPtr intern(StringView str, size_t hash) { return StringRegistry::instance().intern(str, hash); }
}

View File

@ -71,14 +71,15 @@ void WordDB::add_words(StringView line, ConstArrayView<Codepoint> extra_word_cha
{
for (auto&& w : WordSplitter{line, extra_word_chars})
{
auto it = m_words.find(w);
if (it != m_words.end())
++it->value.refcount;
auto hash = hash_value(w);
auto index = m_words.find_index(w, hash);
if (index >= 0)
++m_words.item(index).value.refcount;
else
{
auto word = intern(w);
auto word = intern(w, hash);
auto view = word->strview();
m_words.insert({view, {std::move(word), used_letters(view), 1}});
m_words.insert({view, {std::move(word), used_letters(view), 1}}, hash);
}
}
}