Use different hash algorithms for strings and file hashing
For hash map, using fnv1a is faster as it is a much simpler algorithm we can afford to inline. For files murmur3 should win as it processes bytes 4 by 4.
This commit is contained in:
parent
53d9b9b676
commit
04a96b059f
|
@ -140,7 +140,7 @@ decltype(auto) parse_file(StringView filename, Func&& func)
|
||||||
const bool crlf = has_crlf and not has_lf;
|
const bool crlf = has_crlf and not has_lf;
|
||||||
auto eolformat = crlf ? EolFormat::Crlf : EolFormat::Lf;
|
auto eolformat = crlf ? EolFormat::Crlf : EolFormat::Lf;
|
||||||
|
|
||||||
FsStatus fs_status{file.st.st_mtim, file.st.st_size, hash_data(file.data, file.st.st_size)};
|
FsStatus fs_status{file.st.st_mtim, file.st.st_size, murmur3(file.data, file.st.st_size)};
|
||||||
return func(parse_lines(pos, end, eolformat), bom, eolformat, fs_status);
|
return func(parse_lines(pos, end, eolformat), bom, eolformat, fs_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -394,7 +394,7 @@ void Client::check_if_buffer_needs_reloading()
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (MappedFile fd{filename};
|
if (MappedFile fd{filename};
|
||||||
fd.st.st_size == status.file_size and hash_data(fd.data, fd.st.st_size) == status.hash)
|
fd.st.st_size == status.file_size and murmur3(fd.data, fd.st.st_size) == status.hash)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (reload == Autoreload::Ask)
|
if (reload == Autoreload::Ask)
|
||||||
|
|
|
@ -626,7 +626,7 @@ FsStatus get_fs_status(StringView filename)
|
||||||
{
|
{
|
||||||
MappedFile fd{filename};
|
MappedFile fd{filename};
|
||||||
|
|
||||||
return {fd.st.st_mtim, fd.st.st_size, hash_data(fd.data, fd.st.st_size)};
|
return {fd.st.st_mtim, fd.st.st_size, murmur3(fd.data, fd.st.st_size)};
|
||||||
}
|
}
|
||||||
|
|
||||||
String get_kak_binary_path()
|
String get_kak_binary_path()
|
||||||
|
|
10
src/hash.cc
10
src/hash.cc
|
@ -27,8 +27,8 @@ static inline uint32_t fmix(uint32_t h)
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
// murmur3 hash, based on https://github.com/PeterScott/murmur3
|
// based on https://github.com/PeterScott/murmur3
|
||||||
size_t hash_data(const char* input, size_t len)
|
size_t murmur3(const char* input, size_t len)
|
||||||
{
|
{
|
||||||
const uint8_t* data = reinterpret_cast<const uint8_t*>(input);
|
const uint8_t* data = reinterpret_cast<const uint8_t*>(input);
|
||||||
uint32_t hash = 0x1235678;
|
uint32_t hash = 0x1235678;
|
||||||
|
@ -73,13 +73,13 @@ size_t hash_data(const char* input, size_t len)
|
||||||
UnitTest test_murmur_hash{[] {
|
UnitTest test_murmur_hash{[] {
|
||||||
{
|
{
|
||||||
constexpr char data[] = "Hello, World!";
|
constexpr char data[] = "Hello, World!";
|
||||||
kak_assert(hash_data(data, strlen(data)) == 0xf816f95b);
|
kak_assert(murmur3(data, strlen(data)) == 0xf816f95b);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
constexpr char data[] = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx";
|
constexpr char data[] = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx";
|
||||||
kak_assert(hash_data(data, strlen(data)) == 3551113186);
|
kak_assert(murmur3(data, strlen(data)) == 3551113186);
|
||||||
}
|
}
|
||||||
kak_assert(hash_data("", 0) == 2572747774);
|
kak_assert(murmur3("", 0) == 2572747774);
|
||||||
}};
|
}};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
14
src/hash.hh
14
src/hash.hh
|
@ -5,11 +5,23 @@
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
size_t hash_data(const char* data, size_t len);
|
inline size_t fnv1a(const char* data, size_t len)
|
||||||
|
{
|
||||||
|
constexpr uint32_t FNV_prime_32 = 16777619;
|
||||||
|
constexpr uint32_t offset_basis_32 = 2166136261;
|
||||||
|
|
||||||
|
uint32_t hash_value = offset_basis_32;
|
||||||
|
for (size_t i = 0; i < len; ++i)
|
||||||
|
hash_value = (hash_value ^ data[i]) * FNV_prime_32;
|
||||||
|
return hash_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t murmur3(const char* input, size_t len);
|
||||||
|
|
||||||
template<typename Type> requires std::is_integral_v<Type>
|
template<typename Type> requires std::is_integral_v<Type>
|
||||||
constexpr size_t hash_value(const Type& val)
|
constexpr size_t hash_value(const Type& val)
|
||||||
|
|
|
@ -21,7 +21,7 @@ public:
|
||||||
|
|
||||||
friend constexpr size_t hash_value(const Type& str)
|
friend constexpr size_t hash_value(const Type& str)
|
||||||
{
|
{
|
||||||
return hash_data(str.data(), (int)str.length());
|
return fnv1a(str.data(), (int)str.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
using iterator = CharType*;
|
using iterator = CharType*;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user