kakoune/src/hash.cc
Maxime Coste e4a23a64fa Support opening files bigger than 2 GiB
The real technical limit is with lines bigger than 2 GiB and buffers
with more than 2 Gi lines, refactor buffer loading to make it possible
to load those files.

Fix an overflow with the hash_data function at the same time
2021-05-28 17:03:41 +10:00

86 lines
1.8 KiB
C++

#include "hash.hh"
#include <cstdint>
#include <cstring>
#include "unit_tests.hh"
#include "assert.hh"
namespace Kakoune
{
[[gnu::always_inline]]
static inline uint32_t rotl(uint32_t x, int8_t r)
{
return (x << r) | (x >> (32 - r));
}
[[gnu::always_inline]]
static inline uint32_t fmix(uint32_t h)
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
// murmur3 hash, based on https://github.com/PeterScott/murmur3
size_t hash_data(const char* input, size_t len)
{
const uint8_t* data = reinterpret_cast<const uint8_t*>(input);
uint32_t hash = 0x1235678;
constexpr uint32_t c1 = 0xcc9e2d51;
constexpr uint32_t c2 = 0x1b873593;
const ptrdiff_t nblocks = len / 4;
const uint8_t* blocks = data + nblocks*4;
for (ptrdiff_t i = -nblocks; i; ++i)
{
uint32_t key;
memcpy(&key, blocks + 4*i, 4);
key *= c1;
key = rotl(key, 15);
key *= c2;
hash ^= key;
hash = rotl(hash, 13);
hash = hash * 5 + 0xe6546b64;
}
const uint8_t* tail = data + nblocks * 4;
uint32_t key = 0;
switch (len & 0b11)
{
case 3: key ^= tail[2] << 16; [[fallthrough]];
case 2: key ^= tail[1] << 8; [[fallthrough]];
case 1: key ^= tail[0];
key *= c1;
key = rotl(key,15);
key *= c2;
hash ^= key;
}
hash ^= len;
hash = fmix(hash);
return hash;
}
UnitTest test_murmur_hash{[] {
{
constexpr char data[] = "Hello, World!";
kak_assert(hash_data(data, strlen(data)) == 0xf816f95b);
}
{
constexpr char data[] = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx";
kak_assert(hash_data(data, strlen(data)) == 3551113186);
}
kak_assert(hash_data("", 0) == 2572747774);
}};
}