ref: 8bf655dcb8c3e88224162024b1f29f1862ffc043
parent: dcefc02be84f75919752787fe81b66c9dbe982fc
author: Sigrid Solveig Haflínudóttir <[email protected]>
date: Wed Nov 6 20:46:15 EST 2024
replace lookup3 with SpookyHash
--- a/3rd/lookup3.c
+++ /dev/null
@@ -1,319 +1,0 @@
-/*
--------------------------------------------------------------------------------
-lookup3.c, by Bob Jenkins, May 2006, Public Domain.
-
-These are functions for producing 32-bit hashes for hash table lookup.
-hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
-are externally useful functions. You can use this free for any purpose.
-It's in the public domain. It has no warranty.
-
-If you want to find a hash of, say, exactly 7 integers, do
- a = i1; b = i2; c = i3;
- mix(a,b,c);
- a += i4; b += i5; c += i6;
- mix(a,b,c);
- a += i7;
- final(a,b,c);
-then use c as the hash value. If you have a variable length array of
-4-byte integers to hash, use hashword(). If you have a byte array (like
-a character string), use hashlittle(). If you have several byte arrays, or
-a mix of things, see the comments above hashlittle().
-
-Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
-then mix those integers. This is fast (you can do a lot more thorough
-mixing with 12*3 instructions on 3 integers than you can with 3 instructions
-on 1 byte), but shoehorning those bytes into integers efficiently is messy.
--------------------------------------------------------------------------------
-*/
-
-/*
- * My best guess at if you are big-endian or little-endian. This may
- * need adjustment.
- */
-#if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && BYTE_ORDER == LITTLE_ENDIAN
-#define HASH_LITTLE_ENDIAN 1
-#define HASH_BIG_ENDIAN 0
-#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
-#define HASH_LITTLE_ENDIAN 0
-#define HASH_BIG_ENDIAN 1
-#else
-#error endianess unknown
-#endif
-
-#define hashsize(n) ((uint32_t)1<<(n))
-#define hashmask(n) (hashsize(n)-1)
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-
-/*
--------------------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-
-This is reversible, so any information in (a,b,c) before mix() is
-still in (a,b,c) after mix().
-
-If four pairs of (a,b,c) inputs are run through mix(), or through
-mix() in reverse, there are at least 32 bits of the output that
-are sometimes the same for one pair and different for another pair.
-This was tested for:
-* pairs that differed by one bit, by two bits, in any combination
- of top bits of (a,b,c), or in any combination of bottom bits of
- (a,b,c).
-* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
- the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
- is commonly produced by subtraction) look like a single 1-bit
- difference.
-* the base values were pseudorandom, all zero but one bit set, or
- all zero plus a counter that starts at zero.
-
-Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
-satisfy this are
- 4 6 8 16 19 4
- 9 15 3 18 27 15
- 14 9 3 7 17 3
-Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
-for "differ" defined as + with a one-bit base and a two-bit delta. I
-used http://burtleburtle.net/bob/hash/avalanche.html to choose
-the operations, constants, and arrangements of the variables.
-
-This does not achieve avalanche. There are input bits of (a,b,c)
-that fail to affect some output bits of (a,b,c), especially of a. The
-most thoroughly mixed value is c, but it doesn't really even achieve
-avalanche in c.
-
-This allows some parallelism. Read-after-writes are good at doubling
-the number of bits affected, so the goal of mixing pulls in the opposite
-direction as the goal of parallelism. I did what I could. Rotates
-seem to cost as much as shifts on every machine I could lay my hands
-on, and rotates are much kinder to the top and bottom bits, so I used
-rotates.
--------------------------------------------------------------------------------
-*/
-#define mix(a,b,c) \
-{ \
- a -= c; a ^= rot(c, 4); c += b; \
- b -= a; b ^= rot(a, 6); a += c; \
- c -= b; c ^= rot(b, 8); b += a; \
- a -= c; a ^= rot(c,16); c += b; \
- b -= a; b ^= rot(a,19); a += c; \
- c -= b; c ^= rot(b, 4); b += a; \
-}
-
-/*
--------------------------------------------------------------------------------
-final -- final mixing of 3 32-bit values (a,b,c) into c
-
-Pairs of (a,b,c) values differing in only a few bits will usually
-produce values of c that look totally different. This was tested for
-* pairs that differed by one bit, by two bits, in any combination
- of top bits of (a,b,c), or in any combination of bottom bits of
- (a,b,c).
-* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
- the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
- is commonly produced by subtraction) look like a single 1-bit
- difference.
-* the base values were pseudorandom, all zero but one bit set, or
- all zero plus a counter that starts at zero.
-
-These constants passed:
- 14 11 25 16 4 14 24
- 12 14 25 16 4 14 24
-and these came close:
- 4 8 15 26 3 22 24
- 10 8 15 26 3 22 24
- 11 8 15 26 3 22 24
--------------------------------------------------------------------------------
-*/
-#define final(a,b,c) \
-{ \
- c ^= b; c -= rot(b,14); \
- a ^= c; a -= rot(c,11); \
- b ^= a; b -= rot(a,25); \
- c ^= b; c -= rot(b,16); \
- a ^= c; a -= rot(c,4); \
- b ^= a; b -= rot(a,14); \
- c ^= b; c -= rot(b,24); \
-}
-
-/*
- * hashlittle2: return 2 32-bit hash values
- *
- * This is identical to hashlittle(), except it returns two 32-bit hash
- * values instead of just one. This is good enough for hash table
- * lookup with 2^^64 buckets, or if you want a second hash if you're not
- * happy with the first, or if you want a probably-unique 64-bit ID for
- * the key. *pc is better mixed than *pb, so use *pc first. If you want
- * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
- */
-static void hashlittle2(
- const void *key, /* the key to hash */
- size_t length, /* length of the key */
- uint32_t *pc, /* IN: primary initval, OUT: primary hash */
- uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
-{
- uint32_t a,b,c; /* internal state */
- union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
-
- /* Set up the internal state */
- a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
- c += *pb;
-
- u.ptr = key;
- if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
- const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
- const uint8_t *k8;
-
- /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
- while (length > 12)
- {
- a += k[0];
- b += k[1];
- c += k[2];
- mix(a,b,c);
- length -= 12;
- k += 3;
- }
-
- /*----------------------------- handle the last (probably partial) block */
- /*
- * "k[2]&0xffffff" actually reads beyond the end of the string, but
- * then masks off the part it's not allowed to read. Because the
- * string is aligned, the masked-off tail is in the same word as the
- * rest of the string. Every machine with memory protection I've seen
- * does it on word boundaries, so is OK with this. But VALGRIND will
- * still catch it and complain. The masking trick does make the hash
- * noticably faster for short strings (like English words).
- */
-#ifndef VALGRIND
- (void)k8;
- switch(length)
- {
- case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
- case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
- case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
- case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
- case 8 : b+=k[1]; a+=k[0]; break;
- case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
- case 6 : b+=k[1]&0xffff; a+=k[0]; break;
- case 5 : b+=k[1]&0xff; a+=k[0]; break;
- case 4 : a+=k[0]; break;
- case 3 : a+=k[0]&0xffffff; break;
- case 2 : a+=k[0]&0xffff; break;
- case 1 : a+=k[0]&0xff; break;
- case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
- }
-
-#else /* make valgrind happy */
-
- k8 = (const uint8_t *)k;
- switch(length)
- {
- case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
- case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
- case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
- case 9 : c+=k8[8]; /* fall through */
- case 8 : b+=k[1]; a+=k[0]; break;
- case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
- case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
- case 5 : b+=k8[4]; /* fall through */
- case 4 : a+=k[0]; break;
- case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
- case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
- case 1 : a+=k8[0]; break;
- case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
- }
-
-#endif /* !valgrind */
-
- } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
- const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
- const uint8_t *k8;
-
- /*--------------- all but last block: aligned reads and different mixing */
- while (length > 12)
- {
- a += k[0] + (((uint32_t)k[1])<<16);
- b += k[2] + (((uint32_t)k[3])<<16);
- c += k[4] + (((uint32_t)k[5])<<16);
- mix(a,b,c);
- length -= 12;
- k += 6;
- }
-
- /*----------------------------- handle the last (probably partial) block */
- k8 = (const uint8_t *)k;
- switch(length)
- {
- case 12: c+=k[4]+(((uint32_t)k[5])<<16);
- b+=k[2]+(((uint32_t)k[3])<<16);
- a+=k[0]+(((uint32_t)k[1])<<16);
- break;
- case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
- case 10: c+=k[4];
- b+=k[2]+(((uint32_t)k[3])<<16);
- a+=k[0]+(((uint32_t)k[1])<<16);
- break;
- case 9 : c+=k8[8]; /* fall through */
- case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
- a+=k[0]+(((uint32_t)k[1])<<16);
- break;
- case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
- case 6 : b+=k[2];
- a+=k[0]+(((uint32_t)k[1])<<16);
- break;
- case 5 : b+=k8[4]; /* fall through */
- case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
- break;
- case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
- case 2 : a+=k[0];
- break;
- case 1 : a+=k8[0];
- break;
- case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
- }
-
- } else { /* need to read the key one byte at a time */
- const uint8_t *k = (const uint8_t *)key;
-
- /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
- while (length > 12)
- {
- a += k[0];
- a += ((uint32_t)k[1])<<8;
- a += ((uint32_t)k[2])<<16;
- a += ((uint32_t)k[3])<<24;
- b += k[4];
- b += ((uint32_t)k[5])<<8;
- b += ((uint32_t)k[6])<<16;
- b += ((uint32_t)k[7])<<24;
- c += k[8];
- c += ((uint32_t)k[9])<<8;
- c += ((uint32_t)k[10])<<16;
- c += ((uint32_t)k[11])<<24;
- mix(a,b,c);
- length -= 12;
- k += 12;
- }
-
- /*-------------------------------- last block: affect all 32 bits of (c) */
- switch(length) /* all the case statements fall through */
- {
- case 12: c+=((uint32_t)k[11])<<24; // fallthrough
- case 11: c+=((uint32_t)k[10])<<16; // fallthrough
- case 10: c+=((uint32_t)k[9])<<8; // fallthrough
- case 9 : c+=k[8]; // fallthrough
- case 8 : b+=((uint32_t)k[7])<<24; // fallthrough
- case 7 : b+=((uint32_t)k[6])<<16; // fallthrough
- case 6 : b+=((uint32_t)k[5])<<8; // fallthrough
- case 5 : b+=k[4]; // fallthrough
- case 4 : a+=((uint32_t)k[3])<<24; // fallthrough
- case 3 : a+=((uint32_t)k[2])<<16; // fallthrough
- case 2 : a+=((uint32_t)k[1])<<8; // fallthrough
- case 1 : a+=k[0];
- break;
- case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
- }
- }
-
- final(a,b,c);
- *pc=c; *pb=b;
-}
--- /dev/null
+++ b/3rd/spooky.c
@@ -1,0 +1,526 @@
+//
+// SpookyHash - 128-bit noncryptographic hash function
+//
+// Written in 2012 by Bob Jenkins
+//
+// Converted to C in 2015 by Joergen Ibsen
+//
+// To the extent possible under law, the author(s) have dedicated all
+// copyright and related and neighboring rights to this software to the
+// public domain worldwide. This software is distributed without any
+// warranty. <http://creativecommons.org/publicdomain/zero/1.0/>
+//
+// Original comment from SpookyV2.cpp by Bob Jenkins:
+//
+// Spooky Hash
+// A 128-bit noncryptographic hash, for checksums and table lookup
+// By Bob Jenkins. Public domain.
+// Oct 31 2010: published framework, disclaimer ShortHash isn't right
+// Nov 7 2010: disabled ShortHash
+// Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
+// April 10 2012: buffer overflow on platforms without unaligned reads
+// July 12 2012: was passing out variables in final to in/out in short
+// July 30 2012: I reintroduced the buffer overflow
+// August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash
+
+#include "platform.h"
+#include "spooky.h"
+
+#define ALLOW_UNALIGNED_READS 1
+
+//
+// SC_CONST: a constant which:
+// - is not zero
+// - is odd
+// - is a not-very-regular mix of 1's and 0's
+// - does not need any other special mathematical properties
+//
+#define SC_CONST 0xDEADBEEFDEADBEEFULL
+
+#define ROTL64(x, k) (((x) << (k)) | ((x) >> (64 - (k))))
+
+#ifdef _MSC_VER
+# define restrict __restrict
+# define inline __forceinline
+#endif
+
+static bool
+spooky_is_aligned(const void *p, size_t size)
+{
+ return (uintptr_t) p % size == 0;
+}
+
+static bool
+spooky_is_little_endian(void)
+{
+ const union {
+ uint32_t i;
+ uint8_t c[sizeof(uint32_t)];
+ } x = { 1 };
+
+ return x.c[0];
+}
+
+//
+// Read uint64_t in little-endian order.
+//
+static inline uint64_t
+spooky_read_le64(const uint64_t *s)
+{
+ if (spooky_is_little_endian()) {
+ uint64_t v;
+ memcpy(&v, s, sizeof(v));
+ return v;
+ }
+ else {
+ const uint8_t *p = (const uint8_t *) s;
+ return (uint64_t) p[0]
+ | ((uint64_t) p[1] << 8)
+ | ((uint64_t) p[2] << 16)
+ | ((uint64_t) p[3] << 24)
+ | ((uint64_t) p[4] << 32)
+ | ((uint64_t) p[5] << 40)
+ | ((uint64_t) p[6] << 48)
+ | ((uint64_t) p[7] << 56);
+ }
+}
+
+//
+// This is used if the input is 96 bytes long or longer.
+//
+// The internal state is fully overwritten every 96 bytes.
+// Every input bit appears to cause at least 128 bits of entropy
+// before 96 other bytes are combined, when run forward or backward
+// For every input bit,
+// Two inputs differing in just that input bit
+// Where "differ" means xor or subtraction
+// And the base value is random
+// When run forward or backwards one Mix
+// I tried 3 pairs of each; they all differed by at least 212 bits.
+//
+static inline void
+spooky_mix(const uint64_t *restrict data, uint64_t *restrict s)
+{
+ s[0] += spooky_read_le64(&data[0]); s[2] ^= s[10];
+ s[11] ^= s[0]; s[0] = ROTL64(s[0], 11); s[11] += s[1];
+ s[1] += spooky_read_le64(&data[1]); s[3] ^= s[11];
+ s[0] ^= s[1]; s[1] = ROTL64(s[1], 32); s[0] += s[2];
+ s[2] += spooky_read_le64(&data[2]); s[4] ^= s[0];
+ s[1] ^= s[2]; s[2] = ROTL64(s[2], 43); s[1] += s[3];
+ s[3] += spooky_read_le64(&data[3]); s[5] ^= s[1];
+ s[2] ^= s[3]; s[3] = ROTL64(s[3], 31); s[2] += s[4];
+ s[4] += spooky_read_le64(&data[4]); s[6] ^= s[2];
+ s[3] ^= s[4]; s[4] = ROTL64(s[4], 17); s[3] += s[5];
+ s[5] += spooky_read_le64(&data[5]); s[7] ^= s[3];
+ s[4] ^= s[5]; s[5] = ROTL64(s[5], 28); s[4] += s[6];
+ s[6] += spooky_read_le64(&data[6]); s[8] ^= s[4];
+ s[5] ^= s[6]; s[6] = ROTL64(s[6], 39); s[5] += s[7];
+ s[7] += spooky_read_le64(&data[7]); s[9] ^= s[5];
+ s[6] ^= s[7]; s[7] = ROTL64(s[7], 57); s[6] += s[8];
+ s[8] += spooky_read_le64(&data[8]); s[10] ^= s[6];
+ s[7] ^= s[8]; s[8] = ROTL64(s[8], 55); s[7] += s[9];
+ s[9] += spooky_read_le64(&data[9]); s[11] ^= s[7];
+ s[8] ^= s[9]; s[9] = ROTL64(s[9], 54); s[8] += s[10];
+ s[10] += spooky_read_le64(&data[10]); s[0] ^= s[8];
+ s[9] ^= s[10]; s[10] = ROTL64(s[10], 22); s[9] += s[11];
+ s[11] += spooky_read_le64(&data[11]); s[1] ^= s[9];
+ s[10] ^= s[11]; s[11] = ROTL64(s[11], 46); s[10] += s[0];
+}
+
+//
+// Mix all 12 inputs together so that h0, h1 are a hash of them all.
+//
+// For two inputs differing in just the input bits
+// Where "differ" means xor or subtraction
+// And the base value is random, or a counting value starting at that bit
+// The final result will have each bit of h0, h1 flip
+// For every input bit,
+// with probability 50 +- .3%
+// For every pair of input bits,
+// with probability 50 +- 3%
+//
+// This does not rely on the last Mix() call having already mixed some.
+// Two iterations was almost good enough for a 64-bit result, but a
+// 128-bit result is reported, so End() does three iterations.
+//
+static inline void
+spooky_end_partial(uint64_t *h)
+{
+ h[11] += h[1]; h[2] ^= h[11]; h[1] = ROTL64(h[1], 44);
+ h[0] += h[2]; h[3] ^= h[0]; h[2] = ROTL64(h[2], 15);
+ h[1] += h[3]; h[4] ^= h[1]; h[3] = ROTL64(h[3], 34);
+ h[2] += h[4]; h[5] ^= h[2]; h[4] = ROTL64(h[4], 21);
+ h[3] += h[5]; h[6] ^= h[3]; h[5] = ROTL64(h[5], 38);
+ h[4] += h[6]; h[7] ^= h[4]; h[6] = ROTL64(h[6], 33);
+ h[5] += h[7]; h[8] ^= h[5]; h[7] = ROTL64(h[7], 10);
+ h[6] += h[8]; h[9] ^= h[6]; h[8] = ROTL64(h[8], 13);
+ h[7] += h[9]; h[10] ^= h[7]; h[9] = ROTL64(h[9], 38);
+ h[8] += h[10]; h[11] ^= h[8]; h[10] = ROTL64(h[10], 53);
+ h[9] += h[11]; h[0] ^= h[9]; h[11] = ROTL64(h[11], 42);
+ h[10] += h[0]; h[1] ^= h[10]; h[0] = ROTL64(h[0], 54);
+}
+
+static inline void
+spooky_end(const uint64_t *restrict data, uint64_t *restrict h)
+{
+ h[0] += spooky_read_le64(&data[0]);
+ h[1] += spooky_read_le64(&data[1]);
+ h[2] += spooky_read_le64(&data[2]);
+ h[3] += spooky_read_le64(&data[3]);
+ h[4] += spooky_read_le64(&data[4]);
+ h[5] += spooky_read_le64(&data[5]);
+ h[6] += spooky_read_le64(&data[6]);
+ h[7] += spooky_read_le64(&data[7]);
+ h[8] += spooky_read_le64(&data[8]);
+ h[9] += spooky_read_le64(&data[9]);
+ h[10] += spooky_read_le64(&data[10]);
+ h[11] += spooky_read_le64(&data[11]);
+ spooky_end_partial(h);
+ spooky_end_partial(h);
+ spooky_end_partial(h);
+}
+
+//
+// The goal is for each bit of the input to expand into 128 bits of
+// apparent entropy before it is fully overwritten.
+// n trials both set and cleared at least m bits of h0 h1 h2 h3
+// n: 2 m: 29
+// n: 3 m: 46
+// n: 4 m: 57
+// n: 5 m: 107
+// n: 6 m: 146
+// n: 7 m: 152
+// when run forwards or backwards
+// for all 1-bit and 2-bit diffs
+// with diffs defined by either xor or subtraction
+// with a base of all zeros plus a counter, or plus another bit, or random
+//
+static inline void
+spooky_short_mix(uint64_t *h)
+{
+ h[2] = ROTL64(h[2], 50); h[2] += h[3]; h[0] ^= h[2];
+ h[3] = ROTL64(h[3], 52); h[3] += h[0]; h[1] ^= h[3];
+ h[0] = ROTL64(h[0], 30); h[0] += h[1]; h[2] ^= h[0];
+ h[1] = ROTL64(h[1], 41); h[1] += h[2]; h[3] ^= h[1];
+ h[2] = ROTL64(h[2], 54); h[2] += h[3]; h[0] ^= h[2];
+ h[3] = ROTL64(h[3], 48); h[3] += h[0]; h[1] ^= h[3];
+ h[0] = ROTL64(h[0], 38); h[0] += h[1]; h[2] ^= h[0];
+ h[1] = ROTL64(h[1], 37); h[1] += h[2]; h[3] ^= h[1];
+ h[2] = ROTL64(h[2], 62); h[2] += h[3]; h[0] ^= h[2];
+ h[3] = ROTL64(h[3], 34); h[3] += h[0]; h[1] ^= h[3];
+ h[0] = ROTL64(h[0], 5); h[0] += h[1]; h[2] ^= h[0];
+ h[1] = ROTL64(h[1], 36); h[1] += h[2]; h[3] ^= h[1];
+}
+
+//
+// Mix all 4 inputs together so that h0, h1 are a hash of them all.
+//
+// For two inputs differing in just the input bits
+// Where "differ" means xor or subtraction
+// And the base value is random, or a counting value starting at that bit
+// The final result will have each bit of h0, h1 flip
+// For every input bit,
+// with probability 50 +- .3% (it is probably better than that)
+// For every pair of input bits,
+// with probability 50 +- .75% (the worst case is approximately that)
+//
+static inline void
+spooky_short_end(uint64_t *h)
+{
+ h[3] ^= h[2]; h[2] = ROTL64(h[2], 15); h[3] += h[2];
+ h[0] ^= h[3]; h[3] = ROTL64(h[3], 52); h[0] += h[3];
+ h[1] ^= h[0]; h[0] = ROTL64(h[0], 26); h[1] += h[0];
+ h[2] ^= h[1]; h[1] = ROTL64(h[1], 51); h[2] += h[1];
+ h[3] ^= h[2]; h[2] = ROTL64(h[2], 28); h[3] += h[2];
+ h[0] ^= h[3]; h[3] = ROTL64(h[3], 9); h[0] += h[3];
+ h[1] ^= h[0]; h[0] = ROTL64(h[0], 47); h[1] += h[0];
+ h[2] ^= h[1]; h[1] = ROTL64(h[1], 54); h[2] += h[1];
+ h[3] ^= h[2]; h[2] = ROTL64(h[2], 32); h[3] += h[2];
+ h[0] ^= h[3]; h[3] = ROTL64(h[3], 25); h[0] += h[3];
+ h[1] ^= h[0]; h[0] = ROTL64(h[0], 63); h[1] += h[0];
+}
+
+//
+// short hash ... it could be used on any message,
+// but it's used by Spooky just for short messages.
+//
+static void
+spooky_short(const void *restrict message, size_t length,
+ uint64_t *restrict hash1, uint64_t *restrict hash2)
+{
+ uint64_t buf[2 * SC_NUMVARS];
+ union {
+ const uint8_t *p8;
+ uint64_t *p64;
+ } u;
+
+ u.p8 = (const uint8_t *) message;
+
+ if (ALLOW_UNALIGNED_READS == 0 && !spooky_is_aligned(u.p8, 8)) {
+ memcpy(buf, message, length);
+ u.p64 = buf;
+ }
+
+ size_t left = length % 32;
+ uint64_t h[4];
+ h[0] = *hash1;
+ h[1] = *hash2;
+ h[2] = SC_CONST;
+ h[3] = SC_CONST;
+
+ if (length > 15) {
+ const uint64_t *end = u.p64 + (length / 32) * 4;
+
+ // handle all complete sets of 32 bytes
+ for (; u.p64 < end; u.p64 += 4) {
+ h[2] += spooky_read_le64(&u.p64[0]);
+ h[3] += spooky_read_le64(&u.p64[1]);
+ spooky_short_mix(h);
+ h[0] += spooky_read_le64(&u.p64[2]);
+ h[1] += spooky_read_le64(&u.p64[3]);
+ }
+
+ //Handle the case of 16+ remaining bytes.
+ if (left >= 16) {
+ h[2] += spooky_read_le64(&u.p64[0]);
+ h[3] += spooky_read_le64(&u.p64[1]);
+ spooky_short_mix(h);
+ u.p64 += 2;
+ left -= 16;
+ }
+ }
+
+ // Handle the last 0..15 bytes, and its length
+ h[3] += ((uint64_t) length) << 56;
+ switch (left) {
+ case 15:
+ h[3] += ((uint64_t) u.p8[14]) << 48; // fallthrough
+ case 14:
+ h[3] += ((uint64_t) u.p8[13]) << 40; // fallthrough
+ case 13:
+ h[3] += ((uint64_t) u.p8[12]) << 32; // fallthrough
+ case 12:
+ h[3] += ((uint64_t) u.p8[11]) << 24; // fallthrough
+ case 11:
+ h[3] += ((uint64_t) u.p8[10]) << 16; // fallthrough
+ case 10:
+ h[3] += ((uint64_t) u.p8[9]) << 8; // fallthrough
+ case 9:
+ h[3] += (uint64_t) u.p8[8]; // fallthrough
+ case 8:
+ h[2] += spooky_read_le64(&u.p64[0]);
+ break;
+ case 7:
+ h[2] += ((uint64_t) u.p8[6]) << 48; // fallthrough
+ case 6:
+ h[2] += ((uint64_t) u.p8[5]) << 40; // fallthrough
+ case 5:
+ h[2] += ((uint64_t) u.p8[4]) << 32; // fallthrough
+ case 4:
+ h[2] += ((uint64_t) u.p8[3]) << 24; // fallthrough
+ case 3:
+ h[2] += ((uint64_t) u.p8[2]) << 16; // fallthrough
+ case 2:
+ h[2] += ((uint64_t) u.p8[1]) << 8; // fallthrough
+ case 1:
+ h[2] += (uint64_t) u.p8[0];
+ break;
+ case 0:
+ h[2] += SC_CONST;
+ h[3] += SC_CONST;
+ }
+ spooky_short_end(h);
+ *hash1 = h[0];
+ *hash2 = h[1];
+}
+
+uint64_t
+spooky_hash64(const void *message, size_t length, uint64_t seed)
+{
+ uint64_t hash1 = seed;
+ spooky_hash128(message, length, &hash1, &seed);
+ return hash1;
+}
+
+uint32_t
+spooky_hash32(const void *message, size_t length, uint32_t seed)
+{
+ uint64_t hash1 = seed, hash2 = seed;
+ spooky_hash128(message, length, &hash1, &hash2);
+ return (uint32_t) hash1;
+}
+
+// do the whole hash in one call
+void
+spooky_hash128(const void *restrict message, size_t length,
+ uint64_t *restrict hash1, uint64_t *restrict hash2)
+{
+ if (length < SC_BUFSIZE) {
+ spooky_short(message, length, hash1, hash2);
+ return;
+ }
+
+ uint64_t h[SC_NUMVARS];
+ uint64_t buf[SC_NUMVARS];
+ uint64_t *end;
+ union {
+ const uint8_t *p8;
+ uint64_t *p64;
+ } u;
+ size_t left;
+
+ h[0] = h[3] = h[6] = h[9] = *hash1;
+ h[1] = h[4] = h[7] = h[10] = *hash2;
+ h[2] = h[5] = h[8] = h[11] = SC_CONST;
+
+ u.p8 = (const uint8_t *) message;
+ end = u.p64 + (length / SC_BLOCKSIZE) * SC_NUMVARS;
+
+ // handle all whole SC_BLOCKSIZE blocks of bytes
+ if (ALLOW_UNALIGNED_READS || spooky_is_aligned(u.p8, 8)) {
+ do {
+ spooky_mix(u.p64, h);
+ u.p64 += SC_NUMVARS;
+ } while (u.p64 < end);
+ }
+ else {
+ do {
+ memcpy(buf, u.p64, SC_BLOCKSIZE);
+ spooky_mix(buf, h);
+ u.p64 += SC_NUMVARS;
+ } while (u.p64 < end);
+ }
+
+ // handle the last partial block of SC_BLOCKSIZE bytes
+ left = length - ((const uint8_t *) end - (const uint8_t *) message);
+ memcpy(buf, end, left);
+ memset(((uint8_t *) buf) + left, 0, SC_BLOCKSIZE - left);
+ ((uint8_t *) buf)[SC_BLOCKSIZE - 1] = (uint8_t) left;
+
+ // do some final mixing
+ spooky_end(buf, h);
+ *hash1 = h[0];
+ *hash2 = h[1];
+}
+
+/*
+// init spooky state
+void
+spooky_init(struct spooky_state *state, uint64_t seed1, uint64_t seed2)
+{
+ state->length = 0;
+ state->left = 0;
+ state->state[0] = seed1;
+ state->state[1] = seed2;
+}
+
+// add a message fragment to the state
+void
+spooky_update(struct spooky_state *restrict state,
+ const void *restrict message, size_t length)
+{
+ uint64_t h[SC_NUMVARS];
+ size_t newLength = length + state->left;
+ uint8_t left;
+ union {
+ const uint8_t *p8;
+ uint64_t *p64;
+ } u;
+ const uint64_t *end;
+
+ // Is this message fragment too short? If it is, stuff it away.
+ if (newLength < SC_BUFSIZE) {
+ memcpy(&((uint8_t *) state->data)[state->left], message, length);
+ state->length = length + state->length;
+ state->left = (uint8_t) newLength;
+ return;
+ }
+
+ // init the variables
+ if (state->length < SC_BUFSIZE) {
+ h[0] = h[3] = h[6] = h[9] = state->state[0];
+ h[1] = h[4] = h[7] = h[10] = state->state[1];
+ h[2] = h[5] = h[8] = h[11] = SC_CONST;
+ }
+ else {
+ memcpy(h, state->state, sizeof(state->state));
+ }
+ state->length = length + state->length;
+
+ // if we've got anything stuffed away, use it now
+ if (state->left) {
+ uint8_t prefix = SC_BUFSIZE - state->left;
+ memcpy(&(((uint8_t *) state->data)[state->left]), message, prefix);
+ u.p64 = state->data;
+ spooky_mix(u.p64, h);
+ spooky_mix(&u.p64[SC_NUMVARS], h);
+ u.p8 = ((const uint8_t *) message) + prefix;
+ length -= prefix;
+ }
+ else {
+ u.p8 = (const uint8_t *) message;
+ }
+
+ // handle all whole blocks of SC_BLOCKSIZE bytes
+ end = u.p64 + (length / SC_BLOCKSIZE) * SC_NUMVARS;
+ left = (uint8_t) (length - ((const uint8_t *) end - u.p8));
+ if (ALLOW_UNALIGNED_READS || spooky_is_aligned(u.p8, 8)) {
+ while (u.p64 < end) {
+ spooky_mix(u.p64, h);
+ u.p64 += SC_NUMVARS;
+ }
+ }
+ else {
+ while (u.p64 < end) {
+ memcpy(state->data, u.p8, SC_BLOCKSIZE);
+ spooky_mix(state->data, h);
+ u.p64 += SC_NUMVARS;
+ }
+ }
+
+ // stuff away the last few bytes
+ state->left = left;
+ memcpy(state->data, end, left);
+
+ // stuff away the variables
+ memcpy(state->state, h, sizeof(state->state));
+}
+
+// report the hash for the concatenation of all message fragments so far
+void
+spooky_final(struct spooky_state *restrict state,
+ uint64_t *restrict hash1, uint64_t *restrict hash2)
+{
+ // init the variables
+ if (state->length < SC_BUFSIZE) {
+ *hash1 = state->state[0];
+ *hash2 = state->state[1];
+ spooky_short(state->data, state->length, hash1, hash2);
+ return;
+ }
+
+ const uint64_t *data = (const uint64_t *) state->data;
+ uint8_t left = state->left;
+
+ uint64_t h[SC_NUMVARS];
+ memcpy(h, state->state, sizeof(state->state));
+
+ if (left >= SC_BLOCKSIZE) {
+ // m_data can contain two blocks; handle any whole first block
+ spooky_mix(data, h);
+ data += SC_NUMVARS;
+ left -= SC_BLOCKSIZE;
+ }
+
+ // mix in the last partial block, and the length mod SC_BLOCKSIZE
+ memset(&((uint8_t *) data)[left], 0, (SC_BLOCKSIZE - left));
+
+ ((uint8_t *) data)[SC_BLOCKSIZE - 1] = left;
+
+ // do some final mixing
+ spooky_end(data, h);
+
+ *hash1 = h[0];
+ *hash2 = h[1];
+}
+*/
--- /dev/null
+++ b/3rd/spooky.h
@@ -1,0 +1,65 @@
+/*
+ * SpookyHash - 128-bit noncryptographic hash function
+ *
+ * Written in 2012 by Bob Jenkins
+ *
+ * Converted to C in 2015 by Joergen Ibsen
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty. <http://creativecommons.org/publicdomain/zero/1.0/>
+ *
+ * Original comment from SpookyV2.h by Bob Jenkins:
+ *
+ * SpookyHash: a 128-bit noncryptographic hash function
+ * By Bob Jenkins, public domain
+ * Oct 31 2010: alpha, framework + SpookyHash::Mix appears right
+ * Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right
+ * Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas
+ * Feb 2 2012: production, same bits as beta
+ * Feb 5 2012: adjusted definitions of uint* to be more portable
+ * Mar 30 2012: 3 bytes/cycle, not 4. Alpha was 4 but wasn't thorough enough.
+ * August 5 2012: SpookyV2 (different results)
+ *
+ * Up to 3 bytes/cycle for long messages. Reasonably fast for short messages.
+ * All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit.
+ *
+ * This was developed for and tested on 64-bit x86-compatible processors.
+ * It assumes the processor is little-endian. There is a macro
+ * controlling whether unaligned reads are allowed (by default they are).
+ * This should be an equally good hash on big-endian machines, but it will
+ * compute different results on them than on little-endian machines.
+ *
+ * Google's CityHash has similar specs to SpookyHash, and CityHash is faster
+ * on new Intel boxes. MD4 and MD5 also have similar specs, but they are orders
+ * of magnitude slower. CRCs are two or more times slower, but unlike
+ * SpookyHash, they have nice math for combining the CRCs of pieces to form
+ * the CRCs of wholes. There are also cryptographic hashes, but those are even
+ * slower than MD5.
+ */
+
+#pragma once
+
+// number of uint64_t's in internal state
+#define SC_NUMVARS 12U
+
+// size of the internal state
+#define SC_BLOCKSIZE (SC_NUMVARS * 8U)
+
+// size of buffer of unhashed data, in bytes
+#define SC_BUFSIZE (2U * SC_BLOCKSIZE)
+
+struct spooky_state {
+ uint64_t data[2 * SC_NUMVARS]; // unhashed data, for partial messages
+ uint64_t state[SC_NUMVARS]; // internal state of the hash
+ size_t length; // total length of the input so far
+ uint8_t left; // length of unhashed data stashed in data
+};
+
+void spooky_hash128(const void *message, size_t length, uint64_t *hash1, uint64_t *hash2);
+uint64_t spooky_hash64(const void *message, size_t length, uint64_t seed);
+uint32_t spooky_hash32(const void *message, size_t length, uint32_t seed);
+//void spooky_init(struct spooky_state *state, uint64_t seed1, uint64_t seed2);
+//void spooky_update(struct spooky_state *state, const void *message, size_t length);
+//void spooky_final(struct spooky_state *state, uint64_t *hash1, uint64_t *hash2);
--- a/hashing.c
+++ b/hashing.c
@@ -1,5 +1,6 @@
#include "llt.h"
#include "hashing.h"
+#include "spooky.h"
lltuint_t
nextipow2(lltuint_t i)
@@ -55,22 +56,14 @@
return (uint32_t)key;
}
-#include "lookup3.c"
-
uint64_t
-memhash(const char* buf, size_t n)
+memhash(const char *buf, size_t n)
{
- uint32_t c = 0xcafe8881, b = 0x4d6a087c;
-
- hashlittle2(buf, n, &c, &b);
- return (uint64_t)c | (((uint64_t)b)<<32);
+ return spooky_hash64(buf, n, 0xcafe8881);
}
uint32_t
-memhash32(const char* buf, size_t n)
+memhash32(const char *buf, size_t n)
{
- uint32_t c = 0xcafe8881, b = 0x4d6a087c;
-
- hashlittle2(buf, n, &c, &b);
- return c;
+ return spooky_hash32(buf, n, 0xcafe8881);
}
--- a/meson.build
+++ b/meson.build
@@ -52,6 +52,7 @@
'3rd/mp/u16.c',
'3rd/mp/u32.c',
'3rd/mp/u64.c',
+ '3rd/spooky.c',
'3rd/utf/rune.c',
'3rd/utf/runeistype.c',
'3rd/utf/runetotype.c',
--- a/mkfile
+++ b/mkfile
@@ -12,6 +12,7 @@
OFILES=\
3rd/mt19937-64.$O\
3rd/wcwidth.$O\
+ 3rd/spooky.$O\
bitreverse.$O\
bitvector-ops.$O\
bitvector.$O\