2017-03-06 20:47:26 +01:00
|
|
|
#ifndef hash_map_hh_INCLUDED
|
|
|
|
#define hash_map_hh_INCLUDED
|
|
|
|
|
|
|
|
#include "hash.hh"
|
|
|
|
#include "memory.hh"
|
|
|
|
#include "vector.hh"
|
|
|
|
|
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
template<typename T>
|
|
|
|
constexpr void constexpr_swap(T& lhs, T& rhs)
|
|
|
|
{
|
|
|
|
T tmp = std::move(lhs);
|
|
|
|
lhs = std::move(rhs);
|
|
|
|
rhs = std::move(tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<MemoryDomain domain,
|
|
|
|
template<typename, MemoryDomain> class Container>
|
2017-03-06 20:47:26 +01:00
|
|
|
struct HashIndex
|
|
|
|
{
|
|
|
|
struct Entry
|
|
|
|
{
|
2017-10-20 06:16:58 +02:00
|
|
|
size_t hash = 0;
|
|
|
|
int index = -1;
|
2017-03-06 20:47:26 +01:00
|
|
|
};
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
static constexpr float max_fill_rate = 0.5f;
|
|
|
|
|
|
|
|
constexpr HashIndex() = default;
|
|
|
|
constexpr HashIndex(size_t count)
|
|
|
|
{
|
|
|
|
const size_t min_size = (size_t)(count / max_fill_rate) + 1;
|
|
|
|
size_t new_size = 4;
|
|
|
|
while (new_size < min_size)
|
|
|
|
new_size *= 2;
|
2017-10-20 13:00:06 +02:00
|
|
|
m_entries.resize(new_size);
|
2017-10-20 06:16:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
using ContainerType = Container<Entry, domain>;
|
|
|
|
|
|
|
|
constexpr void resize(size_t new_size)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
2017-03-07 16:48:04 +01:00
|
|
|
kak_assert(new_size > m_entries.size());
|
2017-10-20 06:16:58 +02:00
|
|
|
ContainerType old_entries = std::move(m_entries);
|
2017-10-20 13:00:06 +02:00
|
|
|
m_entries.resize(new_size);
|
2017-03-06 20:47:26 +01:00
|
|
|
for (auto& entry : old_entries)
|
|
|
|
{
|
|
|
|
if (entry.index >= 0)
|
|
|
|
add(entry.hash, entry.index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void reserve(size_t count)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
2017-10-27 05:02:09 +02:00
|
|
|
if (count == 0)
|
|
|
|
return;
|
|
|
|
|
2017-03-07 16:48:04 +01:00
|
|
|
const size_t min_size = (size_t)(count / max_fill_rate) + 1;
|
|
|
|
size_t new_size = m_entries.empty() ? 4 : m_entries.size();
|
|
|
|
while (new_size < min_size)
|
|
|
|
new_size *= 2;
|
2017-10-20 06:16:58 +02:00
|
|
|
|
2017-03-07 16:48:04 +01:00
|
|
|
if (new_size > m_entries.size())
|
|
|
|
resize(new_size);
|
|
|
|
}
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void add(size_t hash, int index)
|
2017-03-07 16:48:04 +01:00
|
|
|
{
|
2017-03-06 20:47:26 +01:00
|
|
|
Entry entry{hash, index};
|
|
|
|
while (true)
|
|
|
|
{
|
|
|
|
auto target_slot = compute_slot(entry.hash);
|
|
|
|
for (auto slot = target_slot; slot < m_entries.size(); ++slot)
|
|
|
|
{
|
|
|
|
if (m_entries[slot].index == -1)
|
|
|
|
{
|
|
|
|
m_entries[slot] = entry;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Robin hood hashing
|
|
|
|
auto candidate_slot = compute_slot(m_entries[slot].hash);
|
|
|
|
if (target_slot < candidate_slot)
|
|
|
|
{
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr_swap(m_entries[slot], entry);
|
2017-03-06 20:47:26 +01:00
|
|
|
target_slot = candidate_slot;
|
|
|
|
}
|
|
|
|
}
|
2017-03-07 16:48:04 +01:00
|
|
|
// no free entries found, resize, try again
|
|
|
|
resize(m_entries.size() * 2);
|
2017-03-06 20:47:26 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void remove(size_t hash, int index)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
for (auto slot = compute_slot(hash); slot < m_entries.size(); ++slot)
|
|
|
|
{
|
|
|
|
kak_assert(m_entries[slot].index >= 0);
|
|
|
|
if (m_entries[slot].index == index)
|
|
|
|
{
|
|
|
|
m_entries[slot].index = -1;
|
|
|
|
// Recompact following entries
|
|
|
|
for (auto next = slot+1; next < m_entries.size(); ++next)
|
|
|
|
{
|
|
|
|
if (m_entries[next].index == -1 or
|
|
|
|
compute_slot(m_entries[next].hash) == next)
|
|
|
|
break;
|
|
|
|
kak_assert(compute_slot(m_entries[next].hash) < next);
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr_swap(m_entries[next-1], m_entries[next]);
|
2017-03-06 20:47:26 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void ordered_fix_entries(int index)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
for (auto& entry : m_entries)
|
|
|
|
{
|
|
|
|
if (entry.index >= index)
|
|
|
|
--entry.index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void unordered_fix_entries(size_t hash, int old_index, int new_index)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
for (auto slot = compute_slot(hash); slot < m_entries.size(); ++slot)
|
|
|
|
{
|
|
|
|
if (m_entries[slot].index == old_index)
|
|
|
|
{
|
|
|
|
m_entries[slot].index = new_index;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
kak_assert(false); // entry not found ?!
|
|
|
|
}
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr const Entry& operator[](size_t index) const { return m_entries[index]; }
|
|
|
|
constexpr size_t size() const { return m_entries.size(); }
|
|
|
|
constexpr size_t compute_slot(size_t hash) const
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
// We assume entries.size() is power of 2
|
2017-03-07 16:48:04 +01:00
|
|
|
return hash & (m_entries.size()-1);
|
2017-03-06 20:47:26 +01:00
|
|
|
}
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void clear() { m_entries.clear(); }
|
2017-03-06 20:47:26 +01:00
|
|
|
|
|
|
|
private:
|
2017-10-20 06:16:58 +02:00
|
|
|
ContainerType m_entries;
|
2017-03-06 20:47:26 +01:00
|
|
|
};
|
|
|
|
|
2017-03-07 01:30:54 +01:00
|
|
|
template<typename Key, typename Value>
|
|
|
|
struct HashItem
|
2022-08-05 11:35:27 +02:00
|
|
|
{
|
|
|
|
Key key{};
|
|
|
|
Value value{};
|
|
|
|
|
|
|
|
friend bool operator==(const HashItem&, const HashItem&) = default;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename Key>
|
|
|
|
struct HashItem<Key, void>
|
2017-03-07 01:30:54 +01:00
|
|
|
{
|
|
|
|
Key key;
|
2022-08-05 11:35:27 +02:00
|
|
|
|
|
|
|
friend bool operator==(const HashItem&, const HashItem&) = default;
|
2017-03-07 01:30:54 +01:00
|
|
|
};
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
template<typename Key, typename Value,
|
|
|
|
MemoryDomain domain = MemoryDomain::Undefined,
|
2022-08-05 11:20:00 +02:00
|
|
|
template<typename, MemoryDomain> class Container = Vector,
|
|
|
|
bool multi_key = false>
|
2017-03-06 20:47:26 +01:00
|
|
|
struct HashMap
|
|
|
|
{
|
2022-08-05 11:35:27 +02:00
|
|
|
static constexpr bool has_value = not std::is_void_v<Value>;
|
|
|
|
using Item = std::conditional_t<has_value, HashItem<Key, Value>, Key>;
|
|
|
|
using EffectiveValue = std::conditional_t<has_value, Value, const Key>;
|
2017-10-20 06:16:58 +02:00
|
|
|
using ContainerType = Container<Item, domain>;
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr HashMap() = default;
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr HashMap(std::initializer_list<Item> val) : m_items(val), m_index(val.size())
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
for (int i = 0; i < m_items.size(); ++i)
|
|
|
|
m_index.add(hash_value(m_items[i].key), i);
|
|
|
|
}
|
|
|
|
|
2022-08-05 11:35:27 +02:00
|
|
|
template<typename Iterator>
|
|
|
|
constexpr HashMap(Iterator begin, Iterator end)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
2022-08-05 11:35:27 +02:00
|
|
|
while (begin != end)
|
|
|
|
insert(*begin++);
|
|
|
|
}
|
|
|
|
|
2023-11-17 07:01:51 +01:00
|
|
|
constexpr EffectiveValue& insert(Item item, size_t hash)
|
2022-08-05 11:35:27 +02:00
|
|
|
{
|
2023-11-17 07:01:51 +01:00
|
|
|
kak_assert(hash == hash_value(item_key(item)));
|
2022-08-05 11:20:00 +02:00
|
|
|
if constexpr (not multi_key)
|
|
|
|
{
|
2022-08-05 11:35:27 +02:00
|
|
|
if (auto index = find_index(item_key(item), hash); index >= 0)
|
2022-08-05 11:20:00 +02:00
|
|
|
{
|
|
|
|
m_items[index] = std::move(item);
|
2022-08-05 11:35:27 +02:00
|
|
|
return item_value(m_items[index]);
|
2022-08-05 11:20:00 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-07 16:48:04 +01:00
|
|
|
m_index.reserve(m_items.size()+1);
|
2022-08-05 11:20:00 +02:00
|
|
|
m_index.add(hash, (int)m_items.size());
|
2017-03-06 20:47:26 +01:00
|
|
|
m_items.push_back(std::move(item));
|
2022-08-05 11:35:27 +02:00
|
|
|
return item_value(m_items.back());
|
2017-03-06 20:47:26 +01:00
|
|
|
}
|
|
|
|
|
2023-11-17 07:01:51 +01:00
|
|
|
constexpr EffectiveValue& insert(Item item)
|
|
|
|
{
|
|
|
|
return insert(std::move(item), hash_value(item_key(item)));
|
|
|
|
}
|
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr int find_index(const KeyType& key, size_t hash) const
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
for (auto slot = m_index.compute_slot(hash); slot < m_index.size(); ++slot)
|
|
|
|
{
|
|
|
|
auto& entry = m_index[slot];
|
|
|
|
if (entry.index == -1)
|
|
|
|
return -1;
|
2022-08-05 11:35:27 +02:00
|
|
|
if (entry.hash == hash and item_key(m_items[entry.index]) == key)
|
2017-03-06 20:47:26 +01:00
|
|
|
return entry.index;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr int find_index(const KeyType& key) const { return find_index(key, hash_value(key)); }
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr bool contains(const KeyType& key) const { return find_index(key) >= 0; }
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, std::remove_cvref_t<KeyType>>
|
2022-08-05 11:35:27 +02:00
|
|
|
constexpr EffectiveValue& operator[](KeyType&& key)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
const auto hash = hash_value(key);
|
|
|
|
auto index = find_index(key, hash);
|
|
|
|
if (index >= 0)
|
2022-08-05 11:35:27 +02:00
|
|
|
return item_value(m_items[index]);
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2017-03-07 16:48:04 +01:00
|
|
|
m_index.reserve(m_items.size()+1);
|
2017-03-06 20:47:26 +01:00
|
|
|
m_index.add(hash, (int)m_items.size());
|
2022-08-05 11:35:27 +02:00
|
|
|
m_items.push_back({Key(std::forward<KeyType>(key))});
|
|
|
|
return item_value(m_items.back());
|
2017-03-06 20:47:26 +01:00
|
|
|
}
|
|
|
|
|
2022-08-10 21:58:31 +02:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, std::remove_cvref_t<KeyType>>
|
|
|
|
constexpr const EffectiveValue& get(KeyType&& key) const
|
|
|
|
{
|
|
|
|
return const_cast<HashMap&>(*this).get(key);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, std::remove_cvref_t<KeyType>>
|
|
|
|
constexpr EffectiveValue& get(KeyType&& key)
|
|
|
|
{
|
|
|
|
const auto hash = hash_value(key);
|
|
|
|
auto index = find_index(key, hash);
|
|
|
|
kak_assert(index >= 0);
|
|
|
|
return item_value(m_items[index]);
|
|
|
|
}
|
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void remove(const KeyType& key)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
const auto hash = hash_value(key);
|
|
|
|
int index = find_index(key, hash);
|
|
|
|
if (index >= 0)
|
|
|
|
{
|
|
|
|
m_items.erase(m_items.begin() + index);
|
|
|
|
m_index.remove(hash, index);
|
|
|
|
m_index.ordered_fix_entries(index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void unordered_remove(const KeyType& key)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
const auto hash = hash_value(key);
|
|
|
|
int index = find_index(key, hash);
|
|
|
|
if (index >= 0)
|
|
|
|
{
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr_swap(m_items[index], m_items.back());
|
2017-03-06 20:47:26 +01:00
|
|
|
m_items.pop_back();
|
|
|
|
m_index.remove(hash, index);
|
|
|
|
if (index != m_items.size())
|
2022-08-05 11:35:27 +02:00
|
|
|
m_index.unordered_fix_entries(hash_value(item_key(m_items[index])), m_items.size(), index);
|
2017-03-06 20:47:26 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
2020-07-18 06:29:44 +02:00
|
|
|
constexpr void erase(const KeyType& key) { unordered_remove(key); }
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void remove_all(const KeyType& key)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
const auto hash = hash_value(key);
|
|
|
|
for (int index = find_index(key, hash); index >= 0;
|
|
|
|
index = find_index(key, hash))
|
|
|
|
{
|
|
|
|
m_items.erase(m_items.begin() + index);
|
|
|
|
m_index.remove(hash, index);
|
|
|
|
m_index.ordered_fix_entries(index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
using iterator = typename ContainerType::iterator;
|
|
|
|
constexpr iterator begin() { return m_items.begin(); }
|
|
|
|
constexpr iterator end() { return m_items.end(); }
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
using const_iterator = typename ContainerType::const_iterator;
|
|
|
|
constexpr const_iterator begin() const { return m_items.begin(); }
|
|
|
|
constexpr const_iterator end() const { return m_items.end(); }
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2023-11-17 07:01:51 +01:00
|
|
|
Item& item(size_t index) { return m_items[index]; }
|
2018-06-28 11:49:45 +02:00
|
|
|
const Item& item(size_t index) const { return m_items[index]; }
|
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr iterator find(const KeyType& key)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
auto index = find_index(key);
|
|
|
|
return index >= 0 ? begin() + index : end();
|
|
|
|
}
|
|
|
|
|
2020-11-11 11:43:27 +01:00
|
|
|
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr const_iterator find(const KeyType& key) const
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
|
|
|
return const_cast<HashMap*>(this)->find(key);
|
|
|
|
}
|
|
|
|
|
2023-05-29 12:08:02 +02:00
|
|
|
constexpr void remove(const const_iterator& it)
|
|
|
|
{
|
|
|
|
auto index = it - m_items.begin();
|
|
|
|
const auto hash = hash_value(it->key);
|
|
|
|
m_index.remove(hash, index);
|
|
|
|
m_items.erase(it);
|
|
|
|
m_index.ordered_fix_entries(index);
|
|
|
|
}
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr void clear() { m_items.clear(); m_index.clear(); }
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr size_t size() const { return m_items.size(); }
|
|
|
|
constexpr bool empty() const { return m_items.empty(); }
|
|
|
|
constexpr void reserve(size_t size)
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
2018-04-05 00:52:33 +02:00
|
|
|
m_items.reserve(size);
|
2017-03-07 16:48:04 +01:00
|
|
|
m_index.reserve(size);
|
2017-03-06 20:47:26 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Equality is taking the order of insertion into account
|
|
|
|
template<MemoryDomain otherDomain>
|
2017-10-20 06:16:58 +02:00
|
|
|
constexpr bool operator==(const HashMap<Key, Value, otherDomain, Container>& other) const
|
2017-03-06 20:47:26 +01:00
|
|
|
{
|
2022-08-05 11:35:27 +02:00
|
|
|
return size() == other.size() and std::equal(begin(), end(), other.begin());
|
2017-03-06 20:47:26 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2022-08-10 21:58:31 +02:00
|
|
|
static auto& item_value(auto& item)
|
2022-08-05 11:35:27 +02:00
|
|
|
{
|
|
|
|
if constexpr (has_value) { return item.value; } else { return item; }
|
|
|
|
}
|
|
|
|
|
|
|
|
static const Key& item_key(const Item& item)
|
|
|
|
{
|
|
|
|
if constexpr (has_value) { return item.key; } else { return item; }
|
|
|
|
}
|
|
|
|
|
2017-10-20 06:16:58 +02:00
|
|
|
ContainerType m_items;
|
|
|
|
HashIndex<domain, Container> m_index;
|
2017-03-06 20:47:26 +01:00
|
|
|
};
|
|
|
|
|
2022-08-05 11:20:00 +02:00
|
|
|
template<typename Key, typename Value,
|
|
|
|
MemoryDomain domain = MemoryDomain::Undefined,
|
|
|
|
template<typename, MemoryDomain> class Container = Vector>
|
|
|
|
using MultiHashMap = HashMap<Key, Value, domain, Container, true>;
|
|
|
|
|
2022-08-05 11:35:27 +02:00
|
|
|
template<typename Value,
|
|
|
|
MemoryDomain domain = MemoryDomain::Undefined,
|
|
|
|
template<typename, MemoryDomain> class Container = Vector>
|
|
|
|
using HashSet = HashMap<Value, void, domain, Container>;
|
|
|
|
|
|
|
|
template<typename Value,
|
|
|
|
MemoryDomain domain = MemoryDomain::Undefined,
|
|
|
|
template<typename, MemoryDomain> class Container = Vector>
|
|
|
|
using MultiHashSet = HashMap<Value, void, domain, Container, true>;
|
|
|
|
|
2017-03-06 20:47:26 +01:00
|
|
|
void profile_hash_maps();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // hash_map_hh_INCLUDED
|