Add support for interned strings
Use interned strings for Modification contents and word database. Interned strings are guaranteed not to move in memory and are reference counted.
This commit is contained in:
parent
d9e462851c
commit
d55d041c6a
|
@ -2,11 +2,12 @@
|
|||
|
||||
#include "assert.hh"
|
||||
#include "buffer_manager.hh"
|
||||
#include "client.hh"
|
||||
#include "context.hh"
|
||||
#include "file.hh"
|
||||
#include "interned_string.hh"
|
||||
#include "utils.hh"
|
||||
#include "window.hh"
|
||||
#include "client.hh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -170,9 +171,9 @@ struct Buffer::Modification
|
|||
|
||||
Type type;
|
||||
ByteCoord coord;
|
||||
String content;
|
||||
InternedString content;
|
||||
|
||||
Modification(Type type, ByteCoord coord, String content)
|
||||
Modification(Type type, ByteCoord coord, InternedString content)
|
||||
: type(type), coord(coord), content(std::move(content)) {}
|
||||
|
||||
Modification inverse() const
|
||||
|
@ -240,7 +241,7 @@ void Buffer::check_invariant() const
|
|||
#endif
|
||||
}
|
||||
|
||||
ByteCoord Buffer::do_insert(ByteCoord pos, const String& content)
|
||||
ByteCoord Buffer::do_insert(ByteCoord pos, StringView content)
|
||||
{
|
||||
kak_assert(is_valid(pos));
|
||||
|
||||
|
@ -341,7 +342,7 @@ ByteCoord Buffer::do_erase(ByteCoord begin, ByteCoord end)
|
|||
|
||||
void Buffer::apply_modification(const Modification& modification)
|
||||
{
|
||||
const String& content = modification.content;
|
||||
StringView content = modification.content;
|
||||
ByteCoord coord = modification.coord;
|
||||
|
||||
kak_assert(is_valid(coord));
|
||||
|
|
|
@ -190,7 +190,7 @@ private:
|
|||
};
|
||||
LineList m_lines;
|
||||
|
||||
ByteCoord do_insert(ByteCoord pos, const String& content);
|
||||
ByteCoord do_insert(ByteCoord pos, StringView content);
|
||||
ByteCoord do_erase(ByteCoord begin, ByteCoord end);
|
||||
|
||||
String m_name;
|
||||
|
|
50
src/interned_string.cc
Normal file
50
src/interned_string.cc
Normal file
|
@ -0,0 +1,50 @@
|
|||
#include "interned_string.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
||||
InternedString StringRegistry::acquire(StringView str)
|
||||
{
|
||||
auto it = m_slot_map.find(str);
|
||||
if (it == m_slot_map.end())
|
||||
{
|
||||
size_t slot;
|
||||
if (not m_free_slots.empty())
|
||||
{
|
||||
slot = m_free_slots.back();
|
||||
m_free_slots.pop_back();
|
||||
m_storage[slot] = DataAndRefCount({str.begin(), str.end()}, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
slot = m_storage.size();
|
||||
m_storage.push_back(DataAndRefCount({str.begin(), str.end()}, 1));
|
||||
}
|
||||
// Create a new string view that point to the storage data
|
||||
StringView storage_view{m_storage[slot].first.data(), (int)m_storage[slot].first.size()};
|
||||
m_slot_map[storage_view] = slot;
|
||||
|
||||
return InternedString{storage_view, InternedString::AlreadyAcquired{}};
|
||||
}
|
||||
|
||||
size_t slot = it->second;
|
||||
m_storage[slot].second++;
|
||||
StringView storage_view{m_storage[slot].first.data(), (int)m_storage[slot].first.size()};
|
||||
return InternedString{storage_view, InternedString::AlreadyAcquired{}};
|
||||
}
|
||||
|
||||
void StringRegistry::release(StringView str)
|
||||
{
|
||||
auto it = m_slot_map.find(str);
|
||||
kak_assert(it != m_slot_map.end());
|
||||
|
||||
size_t slot = it->second;
|
||||
if (--m_storage[slot].second == 0)
|
||||
{
|
||||
m_free_slots.push_back(slot);
|
||||
m_slot_map.erase(it);
|
||||
m_storage[slot].first.clear();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
111
src/interned_string.hh
Normal file
111
src/interned_string.hh
Normal file
|
@ -0,0 +1,111 @@
|
|||
#ifndef interned_string_hh_INCLUDED
|
||||
#define interned_string_hh_INCLUDED
|
||||
|
||||
#include "string.hh"
|
||||
#include "utils.hh"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
||||
class InternedString;
|
||||
|
||||
class StringRegistry : public Singleton<StringRegistry>
|
||||
{
|
||||
private:
|
||||
friend class InternedString;
|
||||
|
||||
InternedString acquire(StringView str);
|
||||
void release(StringView str);
|
||||
|
||||
std::unordered_map<StringView, size_t> m_slot_map;
|
||||
std::vector<size_t> m_free_slots;
|
||||
using DataAndRefCount = std::pair<std::vector<char>, int>;
|
||||
std::vector<DataAndRefCount> m_storage;
|
||||
};
|
||||
|
||||
class InternedString : public StringView
|
||||
{
|
||||
public:
|
||||
InternedString() = default;
|
||||
|
||||
InternedString(const InternedString& str) { acquire_ifn(str); }
|
||||
|
||||
InternedString(InternedString&& str) : StringView(str)
|
||||
{
|
||||
static_cast<StringView&>(str) = StringView{};
|
||||
}
|
||||
|
||||
InternedString(const char* str) : StringView() { acquire_ifn(str); }
|
||||
InternedString(StringView str) : StringView() { acquire_ifn(str); }
|
||||
InternedString(const String& str) : StringView() { acquire_ifn(str); }
|
||||
|
||||
InternedString& operator=(const InternedString& str)
|
||||
{
|
||||
if (str.data() == data() && str.length() == length())
|
||||
return *this;
|
||||
release_ifn();
|
||||
acquire_ifn(str);
|
||||
return *this;
|
||||
}
|
||||
|
||||
InternedString& operator=(InternedString&& str)
|
||||
{
|
||||
static_cast<StringView&>(*this) = str;
|
||||
static_cast<StringView&>(str) = StringView{};
|
||||
return *this;
|
||||
}
|
||||
|
||||
~InternedString()
|
||||
{
|
||||
release_ifn();
|
||||
}
|
||||
|
||||
bool operator==(const InternedString& str) const
|
||||
{ return data() == str.data() && length() == str.length(); }
|
||||
bool operator!=(const InternedString& str) const
|
||||
{ return !(*this == str); }
|
||||
|
||||
using StringView::operator==;
|
||||
using StringView::operator!=;
|
||||
|
||||
private:
|
||||
friend class StringRegistry;
|
||||
|
||||
struct AlreadyAcquired{};
|
||||
InternedString(StringView str, AlreadyAcquired)
|
||||
: StringView(str) {}
|
||||
|
||||
void acquire_ifn(StringView str)
|
||||
{
|
||||
if (str.empty())
|
||||
static_cast<StringView&>(*this) = StringView{};
|
||||
else
|
||||
*this = StringRegistry::instance().acquire(str);
|
||||
}
|
||||
|
||||
void release_ifn()
|
||||
{
|
||||
if (!empty())
|
||||
StringRegistry::instance().release(*this);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace std
|
||||
{
|
||||
template<>
|
||||
struct hash<Kakoune::InternedString>
|
||||
{
|
||||
size_t operator()(const Kakoune::InternedString& str) const
|
||||
{
|
||||
return hash<const char*>{}(str.data()) ^
|
||||
hash<int>{}((int)str.length());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // interned_string_hh_INCLUDED
|
||||
|
|
@ -20,6 +20,7 @@
|
|||
#include "remote.hh"
|
||||
#include "shell_manager.hh"
|
||||
#include "string.hh"
|
||||
#include "interned_string.hh"
|
||||
#include "window.hh"
|
||||
|
||||
#if defined(__APPLE__)
|
||||
|
@ -303,6 +304,7 @@ int run_server(StringView session, StringView init_command,
|
|||
signal(SIGTERM, [](int) { terminate = true; });
|
||||
}
|
||||
|
||||
StringRegistry string_registry;
|
||||
EventManager event_manager;
|
||||
GlobalOptions global_options;
|
||||
GlobalHooks global_hooks;
|
||||
|
|
|
@ -7,6 +7,14 @@
|
|||
namespace Kakoune
|
||||
{
|
||||
|
||||
bool operator<(StringView lhs, StringView rhs)
|
||||
{
|
||||
int cmp = strncmp(lhs.data(), rhs.data(), (int)std::min(lhs.length(), rhs.length()));
|
||||
if (cmp == 0)
|
||||
return lhs.length() < rhs.length();
|
||||
return cmp < 0;
|
||||
}
|
||||
|
||||
std::vector<String> split(StringView str, char separator, char escape)
|
||||
{
|
||||
std::vector<String> res;
|
||||
|
@ -139,4 +147,64 @@ String expand_tabs(StringView line, CharCount tabstop, CharCount col)
|
|||
return res;
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
static inline uint32_t rotl(uint32_t x, int8_t r)
|
||||
{
|
||||
return (x << r) | (x >> (32 - r));
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
static inline uint32_t fmix(uint32_t h)
|
||||
{
|
||||
h ^= h >> 16;
|
||||
h *= 0x85ebca6b;
|
||||
h ^= h >> 13;
|
||||
h *= 0xc2b2ae35;
|
||||
h ^= h >> 16;
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
// murmur3 hash, based on https://github.com/PeterScott/murmur3
|
||||
size_t hash_data(const char* input, size_t len)
|
||||
{
|
||||
const uint8_t* data = reinterpret_cast<const uint8_t*>(input);
|
||||
uint32_t hash = 0x1235678;
|
||||
constexpr uint32_t c1 = 0xcc9e2d51;
|
||||
constexpr uint32_t c2 = 0x1b873593;
|
||||
|
||||
const int nblocks = len / 4;
|
||||
const uint32_t* blocks = reinterpret_cast<const uint32_t*>(data + nblocks*4);
|
||||
|
||||
for (int i = -nblocks; i; ++i)
|
||||
{
|
||||
uint32_t key = blocks[i];
|
||||
key *= c1;
|
||||
key = rotl(key, 15);
|
||||
key *= c2;
|
||||
|
||||
hash ^= key;
|
||||
hash = rotl(hash, 13);
|
||||
hash = hash * 5 + 0xe6546b64;
|
||||
}
|
||||
|
||||
const uint8_t* tail = data + nblocks * 4;
|
||||
uint32_t key = 0;
|
||||
switch (len & 3)
|
||||
{
|
||||
case 3: key ^= tail[2] << 16;
|
||||
case 2: key ^= tail[1] << 8;
|
||||
case 1: key ^= tail[0];
|
||||
key *= c1;
|
||||
key = rotl(key,15);
|
||||
key *= c2;
|
||||
hash ^= key;
|
||||
}
|
||||
|
||||
hash ^= len;
|
||||
hash = fmix(hash);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -140,6 +140,8 @@ inline bool StringView::operator!=(StringView other) const
|
|||
return !this->operator==(other);
|
||||
}
|
||||
|
||||
bool operator<(StringView lhs, StringView rhs);
|
||||
|
||||
inline bool operator==(const char* lhs, StringView rhs)
|
||||
{
|
||||
return StringView{lhs} == rhs;
|
||||
|
@ -304,6 +306,8 @@ bool subsequence_match(StringView str, StringView subseq);
|
|||
|
||||
String expand_tabs(StringView line, CharCount tabstop, CharCount col = 0);
|
||||
|
||||
size_t hash_data(const char* data, size_t len);
|
||||
|
||||
}
|
||||
|
||||
namespace std
|
||||
|
@ -316,6 +320,15 @@ namespace std
|
|||
return hash<std::string>::operator()(str);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct hash<Kakoune::StringView>
|
||||
{
|
||||
size_t operator()(Kakoune::StringView str) const
|
||||
{
|
||||
return Kakoune::hash_data(str.data(), (int)str.length());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // string_hh_INCLUDED
|
||||
|
|
|
@ -82,17 +82,17 @@ void test_word_db()
|
|||
WordDB word_db(buffer);
|
||||
auto res = word_db.find_prefix("");
|
||||
std::sort(res.begin(), res.end());
|
||||
kak_assert(res == std::vector<String>{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" });
|
||||
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" });
|
||||
kak_assert(word_db.get_word_occurences("tchou") == 3);
|
||||
kak_assert(word_db.get_word_occurences("allo") == 1);
|
||||
buffer.erase(buffer.iterator_at({1, 6}), buffer.iterator_at({4, 0}));
|
||||
res = word_db.find_prefix("");
|
||||
std::sort(res.begin(), res.end());
|
||||
kak_assert(res == std::vector<String>{ "allo" COMMA "mutch" COMMA "tchou" });
|
||||
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "tchou" });
|
||||
buffer.insert(buffer.iterator_at({1, 0}), "re");
|
||||
res = word_db.find_prefix("");
|
||||
std::sort(res.begin(), res.end());
|
||||
kak_assert(res == std::vector<String>{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" });
|
||||
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" });
|
||||
}
|
||||
|
||||
void test_utf8()
|
||||
|
|
|
@ -7,9 +7,9 @@
|
|||
namespace Kakoune
|
||||
{
|
||||
|
||||
static std::vector<String> get_words(StringView content)
|
||||
static std::vector<InternedString> get_words(StringView content)
|
||||
{
|
||||
std::vector<String> res;
|
||||
std::vector<InternedString> res;
|
||||
using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>;
|
||||
const char* word_start = content.begin();
|
||||
bool in_word = false;
|
||||
|
@ -24,20 +24,20 @@ static std::vector<String> get_words(StringView content)
|
|||
}
|
||||
else if (in_word and not word)
|
||||
{
|
||||
res.push_back({word_start, it.base()});
|
||||
res.push_back(StringView{word_start, it.base()});
|
||||
in_word = false;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static void add_words(WordDB::WordList& wl, const std::vector<String>& words)
|
||||
static void add_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
||||
{
|
||||
for (auto& w : words)
|
||||
++wl[w];
|
||||
}
|
||||
|
||||
static void remove_words(WordDB::WordList& wl, const std::vector<String>& words)
|
||||
static void remove_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
||||
{
|
||||
for (auto& w : words)
|
||||
{
|
||||
|
@ -104,11 +104,11 @@ void WordDB::update_db()
|
|||
m_line_to_words = std::move(new_lines);
|
||||
}
|
||||
|
||||
std::vector<String> WordDB::find_prefix(const String& prefix)
|
||||
std::vector<InternedString> WordDB::find_prefix(StringView prefix)
|
||||
{
|
||||
update_db();
|
||||
|
||||
std::vector<String> res;
|
||||
std::vector<InternedString> res;
|
||||
for (auto it = m_words.lower_bound(prefix); it != m_words.end(); ++it)
|
||||
{
|
||||
if (not prefix_match(it->first, prefix))
|
||||
|
@ -118,11 +118,11 @@ std::vector<String> WordDB::find_prefix(const String& prefix)
|
|||
return res;
|
||||
}
|
||||
|
||||
std::vector<String> WordDB::find_subsequence(const String& subsequence)
|
||||
std::vector<InternedString> WordDB::find_subsequence(StringView subsequence)
|
||||
{
|
||||
update_db();
|
||||
|
||||
std::vector<String> res;
|
||||
std::vector<InternedString> res;
|
||||
for (auto it = m_words.begin(); it != m_words.end(); ++it)
|
||||
{
|
||||
if (subsequence_match(it->first, subsequence))
|
||||
|
@ -131,7 +131,7 @@ std::vector<String> WordDB::find_subsequence(const String& subsequence)
|
|||
return res;
|
||||
}
|
||||
|
||||
int WordDB::get_word_occurences(const String& word) const
|
||||
int WordDB::get_word_occurences(StringView word) const
|
||||
{
|
||||
auto it = m_words.find(word);
|
||||
if (it != m_words.end())
|
||||
|
|
|
@ -2,27 +2,26 @@
|
|||
#define word_db_hh_INCLUDED
|
||||
|
||||
#include "buffer.hh"
|
||||
#include "interned_string.hh"
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
||||
class String;
|
||||
|
||||
// maintain a database of words available in a buffer
|
||||
class WordDB
|
||||
{
|
||||
public:
|
||||
WordDB(const Buffer& buffer);
|
||||
|
||||
std::vector<String> find_prefix(const String& prefix);
|
||||
std::vector<String> find_subsequence(const String& subsequence);
|
||||
int get_word_occurences(const String& word) const;
|
||||
std::vector<InternedString> find_prefix(StringView prefix);
|
||||
std::vector<InternedString> find_subsequence(StringView subsequence);
|
||||
int get_word_occurences(StringView word) const;
|
||||
|
||||
using WordList = std::map<String, int>;
|
||||
using WordList = std::map<InternedString, int>;
|
||||
private:
|
||||
using LineToWords = std::vector<std::vector<String>>;
|
||||
using LineToWords = std::vector<std::vector<InternedString>>;
|
||||
|
||||
void update_db();
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user