From 085973a486d2a394385e5c3575ca8c2f9c3cc99b Mon Sep 17 00:00:00 2001 From: Peter Pentchev Date: Fri, 24 Jun 2022 20:01:25 +0300 Subject: [PATCH 1/2] Fix murmurhash for big-endian architectures. The murmurhash implementation tries to read a sequence of four bytes as a single little-endian uint32 value. This does not work on e.g. Linux/s390x; https://buildd.debian.org/status/fetch.php?pkg=kakoune&arch=s390x&ver=2021.11.08-1&stamp=1645975425&raw=0 --- src/hash.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/hash.cc b/src/hash.cc index ecfd6e46..a603e48b 100644 --- a/src/hash.cc +++ b/src/hash.cc @@ -41,7 +41,11 @@ size_t hash_data(const char* input, size_t len) for (ptrdiff_t i = -nblocks; i; ++i) { uint32_t key; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ memcpy(&key, blocks + 4*i, 4); +#else + key = blocks[4*i] + (blocks[4*i + 1] << 8) + (blocks[4*i + 2] << 16) + (blocks[4*i + 3] << 24); +#endif key *= c1; key = rotl(key, 15); key *= c2; From ded97628f79aa3517329f8a0831cca780c3391f5 Mon Sep 17 00:00:00 2001 From: Peter Pentchev Date: Thu, 7 Jul 2022 08:53:57 +0300 Subject: [PATCH 2/2] murmurhash: always load byte by byte Also reverse the order of bytes, loading the most significant parts first, and use bitwise "or" instead of addition. --- src/hash.cc | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/hash.cc b/src/hash.cc index a603e48b..3fad8f42 100644 --- a/src/hash.cc +++ b/src/hash.cc @@ -41,11 +41,7 @@ size_t hash_data(const char* input, size_t len) for (ptrdiff_t i = -nblocks; i; ++i) { uint32_t key; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - memcpy(&key, blocks + 4*i, 4); -#else - key = blocks[4*i] + (blocks[4*i + 1] << 8) + (blocks[4*i + 2] << 16) + (blocks[4*i + 3] << 24); -#endif + key = (blocks[4*i + 3] << 24) | (blocks[4*i + 2] << 16) | (blocks[4*i + 1] << 8) + blocks[4*i]; key *= c1; key = rotl(key, 15); key *= c2;