Improve WordDB performance by precomputing hashes

Avoid multiple computation of string hashes by making it possible
to pre-compute and pass hashes to interned strings and hash maps.
This commit is contained in:
Maxime Coste 2023-11-17 17:01:51 +11:00
parent b10a935b8c
commit 296ab1a1ff
4 changed files with 28 additions and 16 deletions

View File

@ -194,9 +194,9 @@ struct HashMap
insert(*begin++); insert(*begin++);
} }
constexpr EffectiveValue& insert(Item item) constexpr EffectiveValue& insert(Item item, size_t hash)
{ {
const auto hash = hash_value(item_key(item)); kak_assert(hash == hash_value(item_key(item)));
if constexpr (not multi_key) if constexpr (not multi_key)
{ {
if (auto index = find_index(item_key(item), hash); index >= 0) if (auto index = find_index(item_key(item), hash); index >= 0)
@ -212,6 +212,11 @@ struct HashMap
return item_value(m_items.back()); return item_value(m_items.back());
} }
constexpr EffectiveValue& insert(Item item)
{
return insert(std::move(item), hash_value(item_key(item)));
}
template<typename KeyType> requires IsHashCompatible<Key, KeyType> template<typename KeyType> requires IsHashCompatible<Key, KeyType>
constexpr int find_index(const KeyType& key, size_t hash) const constexpr int find_index(const KeyType& key, size_t hash) const
{ {
@ -313,6 +318,7 @@ struct HashMap
constexpr const_iterator begin() const { return m_items.begin(); } constexpr const_iterator begin() const { return m_items.begin(); }
constexpr const_iterator end() const { return m_items.end(); } constexpr const_iterator end() const { return m_items.end(); }
Item& item(size_t index) { return m_items[index]; }
const Item& item(size_t index) const { return m_items[index]; } const Item& item(size_t index) const { return m_items[index]; }
template<typename KeyType> requires IsHashCompatible<Key, KeyType> template<typename KeyType> requires IsHashCompatible<Key, KeyType>

View File

@ -25,18 +25,24 @@ StringDataPtr StringData::create(ArrayView<const StringView> strs)
return RefPtr<StringData, PtrPolicy>{res}; return RefPtr<StringData, PtrPolicy>{res};
} }
StringDataPtr StringData::Registry::intern(StringView str) StringDataPtr StringData::Registry::intern(StringView str, size_t hash)
{ {
auto it = m_strings.find(str); kak_assert(hash_value(str) == hash);
if (it != m_strings.end()) auto index = m_strings.find_index(str, hash);
return StringDataPtr{it->value}; if (index >= 0)
return StringDataPtr{m_strings.item(index).value};
auto data = StringData::create(str); auto data = StringData::create(str);
data->refcount |= interned_flag; data->refcount |= interned_flag;
m_strings.insert({data->strview(), data.get()}); m_strings.insert({data->strview(), data.get()}, hash);
return data; return data;
} }
StringDataPtr StringData::Registry::intern(StringView str)
{
return intern(str, hash_value(str));
}
void StringData::Registry::remove(StringView str) void StringData::Registry::remove(StringView str)
{ {
kak_assert(m_strings.contains(str)); kak_assert(m_strings.contains(str));

View File

@ -50,6 +50,7 @@ public:
public: public:
void debug_stats() const; void debug_stats() const;
Ptr intern(StringView str); Ptr intern(StringView str);
Ptr intern(StringView str, size_t hash);
void remove(StringView str); void remove(StringView str);
private: private:
@ -62,10 +63,8 @@ public:
using StringDataPtr = StringData::Ptr; using StringDataPtr = StringData::Ptr;
using StringRegistry = StringData::Registry; using StringRegistry = StringData::Registry;
inline StringDataPtr intern(StringView str) inline StringDataPtr intern(StringView str) { return StringRegistry::instance().intern(str); }
{ inline StringDataPtr intern(StringView str, size_t hash) { return StringRegistry::instance().intern(str, hash); }
return StringRegistry::instance().intern(str);
}
} }

View File

@ -71,14 +71,15 @@ void WordDB::add_words(StringView line, ConstArrayView<Codepoint> extra_word_cha
{ {
for (auto&& w : WordSplitter{line, extra_word_chars}) for (auto&& w : WordSplitter{line, extra_word_chars})
{ {
auto it = m_words.find(w); auto hash = hash_value(w);
if (it != m_words.end()) auto index = m_words.find_index(w, hash);
++it->value.refcount; if (index >= 0)
++m_words.item(index).value.refcount;
else else
{ {
auto word = intern(w); auto word = intern(w, hash);
auto view = word->strview(); auto view = word->strview();
m_words.insert({view, {std::move(word), used_letters(view), 1}}); m_words.insert({view, {std::move(word), used_letters(view), 1}}, hash);
} }
} }
} }