Improve WordDB performance by precomputing hashes
Avoid multiple computation of string hashes by making it possible to pre-compute and pass hashes to interned strings and hash maps.
This commit is contained in:
parent
b10a935b8c
commit
296ab1a1ff
|
@ -194,9 +194,9 @@ struct HashMap
|
|||
insert(*begin++);
|
||||
}
|
||||
|
||||
constexpr EffectiveValue& insert(Item item)
|
||||
constexpr EffectiveValue& insert(Item item, size_t hash)
|
||||
{
|
||||
const auto hash = hash_value(item_key(item));
|
||||
kak_assert(hash == hash_value(item_key(item)));
|
||||
if constexpr (not multi_key)
|
||||
{
|
||||
if (auto index = find_index(item_key(item), hash); index >= 0)
|
||||
|
@ -212,6 +212,11 @@ struct HashMap
|
|||
return item_value(m_items.back());
|
||||
}
|
||||
|
||||
constexpr EffectiveValue& insert(Item item)
|
||||
{
|
||||
return insert(std::move(item), hash_value(item_key(item)));
|
||||
}
|
||||
|
||||
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
||||
constexpr int find_index(const KeyType& key, size_t hash) const
|
||||
{
|
||||
|
@ -313,6 +318,7 @@ struct HashMap
|
|||
constexpr const_iterator begin() const { return m_items.begin(); }
|
||||
constexpr const_iterator end() const { return m_items.end(); }
|
||||
|
||||
Item& item(size_t index) { return m_items[index]; }
|
||||
const Item& item(size_t index) const { return m_items[index]; }
|
||||
|
||||
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
||||
|
|
|
@ -25,18 +25,24 @@ StringDataPtr StringData::create(ArrayView<const StringView> strs)
|
|||
return RefPtr<StringData, PtrPolicy>{res};
|
||||
}
|
||||
|
||||
StringDataPtr StringData::Registry::intern(StringView str)
|
||||
StringDataPtr StringData::Registry::intern(StringView str, size_t hash)
|
||||
{
|
||||
auto it = m_strings.find(str);
|
||||
if (it != m_strings.end())
|
||||
return StringDataPtr{it->value};
|
||||
kak_assert(hash_value(str) == hash);
|
||||
auto index = m_strings.find_index(str, hash);
|
||||
if (index >= 0)
|
||||
return StringDataPtr{m_strings.item(index).value};
|
||||
|
||||
auto data = StringData::create(str);
|
||||
data->refcount |= interned_flag;
|
||||
m_strings.insert({data->strview(), data.get()});
|
||||
m_strings.insert({data->strview(), data.get()}, hash);
|
||||
return data;
|
||||
}
|
||||
|
||||
StringDataPtr StringData::Registry::intern(StringView str)
|
||||
{
|
||||
return intern(str, hash_value(str));
|
||||
}
|
||||
|
||||
void StringData::Registry::remove(StringView str)
|
||||
{
|
||||
kak_assert(m_strings.contains(str));
|
||||
|
|
|
@ -50,6 +50,7 @@ public:
|
|||
public:
|
||||
void debug_stats() const;
|
||||
Ptr intern(StringView str);
|
||||
Ptr intern(StringView str, size_t hash);
|
||||
void remove(StringView str);
|
||||
|
||||
private:
|
||||
|
@ -62,10 +63,8 @@ public:
|
|||
using StringDataPtr = StringData::Ptr;
|
||||
using StringRegistry = StringData::Registry;
|
||||
|
||||
inline StringDataPtr intern(StringView str)
|
||||
{
|
||||
return StringRegistry::instance().intern(str);
|
||||
}
|
||||
inline StringDataPtr intern(StringView str) { return StringRegistry::instance().intern(str); }
|
||||
inline StringDataPtr intern(StringView str, size_t hash) { return StringRegistry::instance().intern(str, hash); }
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -71,14 +71,15 @@ void WordDB::add_words(StringView line, ConstArrayView<Codepoint> extra_word_cha
|
|||
{
|
||||
for (auto&& w : WordSplitter{line, extra_word_chars})
|
||||
{
|
||||
auto it = m_words.find(w);
|
||||
if (it != m_words.end())
|
||||
++it->value.refcount;
|
||||
auto hash = hash_value(w);
|
||||
auto index = m_words.find_index(w, hash);
|
||||
if (index >= 0)
|
||||
++m_words.item(index).value.refcount;
|
||||
else
|
||||
{
|
||||
auto word = intern(w);
|
||||
auto word = intern(w, hash);
|
||||
auto view = word->strview();
|
||||
m_words.insert({view, {std::move(word), used_letters(view), 1}});
|
||||
m_words.insert({view, {std::move(word), used_letters(view), 1}}, hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user