stb_ds: major string hash fix, minor other changes

- arena and strdup string hashes were badly broken due to not setting up default slot correctly
  - tweak use of seed in 4-byte and 8-byte hash functions to hopefully be slightly stronger
  - a few internal #ifdefs for performance tuning
This commit is contained in:
Sean Barrett 2019-03-03 21:36:15 -08:00
parent 72990efc3e
commit b8960f32b8
1 changed files with 29 additions and 19 deletions

View File

@ -274,10 +274,10 @@ NOTES - HASH MAP
* For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel
and variants, the key must be an lvalue (so the macro can take the address of it).
For GCC and clang, extensions are used that eliminate this requirement if you're
using C99 and later or using C++.
Extensions are used that eliminate this requirement if you're using C99 and later
in GCC or clang, or if you're using C++ in GCC.
* To test for presence of a key in a hashmap, just do 'hmget(foo,key) >= 0'.
* To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'.
* The iteration order of your data in the hashmap is determined solely by the
order of insertions and deletions. In particular, if you never delete, new
@ -417,7 +417,7 @@ extern void * stbds_shmode_func(size_t elemsize, int mode);
#if __clang__
#define STBDS_ADDRESSOF(typevar, value) ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value
#else
#define STBDS_ADDRESSOF(typevar, value) ((typeof(typevar)[]){value}) // literal array decays to pointer to value
#define STBDS_ADDRESSOF(typevar, value) ((typeof(typevar)[1]){value}) // literal array decays to pointer to value
#endif
#else
#define STBDS_ADDRESSOF(typevar, value) &(value)
@ -648,10 +648,15 @@ void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap)
// stbds_hm hash table implementation
//
#define STBDS_CACHE_LINE_SIZE 64
#ifdef STBDS_INTERNAL_SMALL_BUCKET
#define STBDS_BUCKET_LENGTH 4
#else
#define STBDS_BUCKET_LENGTH 8
#define STBDS_BUCKET_SHIFT 3
#endif
#define STBDS_BUCKET_SHIFT (STBDS_BUCKET_LENGTH == 8 ? 3 : 2)
#define STBDS_BUCKET_MASK (STBDS_BUCKET_LENGTH-1)
#define STBDS_CACHE_LINE_SIZE 64
#define STBDS_ALIGN_FWD(n,a) (((n) + (a) - 1) & ~((a)-1))
@ -698,13 +703,12 @@ void stbds_rand_seed(size_t seed)
static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2)
{
#if 1
size_t pos = (hash >> (STBDS_SIZE_T_BITS-slot_log2));
STBDS_ASSERT(pos < slot_count);
return pos;
#else
return hash & (slot_count-1);
size_t pos;
pos = hash & (slot_count-1);
#ifdef STBDS_INTERNAL_BUCKET_START
pos &= ~STBDS_BUCKET_MASK;
#endif
return pos;
}
static size_t stbds_log2(size_t slot_count)
@ -812,7 +816,6 @@ static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_ind
for (;;) {
size_t limit,z;
stbds_hash_bucket *bucket;
pos &= (t->slot_count-1);
bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT];
STBDS_STATS(++stbds_rehash_probes);
@ -835,6 +838,7 @@ static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_ind
pos += step; // quadratic probing
step += STBDS_BUCKET_LENGTH;
pos &= (t->slot_count-1);
}
}
done:
@ -939,7 +943,7 @@ static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed)
#ifdef STBDS_SIPHASH_2_4
return v0^v1^v2^v3;
#else
return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation
return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply
#endif
}
@ -954,10 +958,11 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
#if 0
// HASH32-A Bob Jenkin's hash function w/o large constants
hash ^= seed ^ len;
hash ^= seed;
hash -= (hash<<6);
hash ^= (hash>>17);
hash -= (hash<<9);
hash ^= seed;
hash ^= (hash<<4);
hash -= (hash<<3);
hash ^= (hash<<10);
@ -966,22 +971,24 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
// HASH32-BB Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts.
// Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm
// not really sure what's going on.
hash ^= seed ^ len;
hash ^= seed;
hash = (hash ^ 61) ^ (hash >> 16);
hash = hash + (hash << 3);
hash = hash ^ (hash >> 4);
hash = hash * 0x27d4eb2d;
hash ^= seed;
hash = hash ^ (hash >> 15);
#else // HASH32-C - Murmur3
hash ^= seed;
hash *= 0xcc9e2d51;
hash = (hash << 17) | (hash >> 15);
hash *= 0x1b873593;
hash ^= seed;
hash = (hash << 19) | (hash >> 13);
hash = hash*5 + 0xe6546b64;
hash ^= len;
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= seed;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
@ -1006,16 +1013,17 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
} else if (len == 8 && sizeof(size_t) == 8) {
size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4
hash ^= seed ^ len;
hash ^= seed;
hash = (~hash) + (hash << 21);
hash ^= STBDS_ROTATE_RIGHT(hash,24);
hash *= 265;
hash ^= STBDS_ROTATE_RIGHT(hash,14);
hash ^= seed;
hash *= 21;
hash ^= STBDS_ROTATE_RIGHT(hash,28);
hash += (hash << 31);
hash = (~hash) + (hash << 18);
return hash^seed;
return hash;
} else {
return stbds_siphash_bytes(p,len,seed);
}
@ -1272,6 +1280,8 @@ void * stbds_shmode_func(size_t elemsize, int mode)
{
void *a = stbds_arrgrowf(0, elemsize, 0, 1);
stbds_hash_index *h;
memset(a, 0, elemsize);
stbds_header(a)->length = 1;
stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL);
h->string.mode = mode;
return STBDS_ARR_TO_HASH(a,elemsize);