mirror of
https://github.com/nothings/stb
synced 2024-12-15 20:32:33 +03:00
stb_ds: major string hash fix, minor other changes
- arena and strdup string hashes were badly broken due to not setting up default slot correctly - tweak use of seed in 4-byte and 8-byte hash functions to hopefully be slightly stronger - a few internal #ifdefs for performance tuning
This commit is contained in:
parent
72990efc3e
commit
b8960f32b8
48
stb_ds.h
48
stb_ds.h
@ -274,10 +274,10 @@ NOTES - HASH MAP
|
|||||||
|
|
||||||
* For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel
|
* For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel
|
||||||
and variants, the key must be an lvalue (so the macro can take the address of it).
|
and variants, the key must be an lvalue (so the macro can take the address of it).
|
||||||
For GCC and clang, extensions are used that eliminate this requirement if you're
|
Extensions are used that eliminate this requirement if you're using C99 and later
|
||||||
using C99 and later or using C++.
|
in GCC or clang, or if you're using C++ in GCC.
|
||||||
|
|
||||||
* To test for presence of a key in a hashmap, just do 'hmget(foo,key) >= 0'.
|
* To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'.
|
||||||
|
|
||||||
* The iteration order of your data in the hashmap is determined solely by the
|
* The iteration order of your data in the hashmap is determined solely by the
|
||||||
order of insertions and deletions. In particular, if you never delete, new
|
order of insertions and deletions. In particular, if you never delete, new
|
||||||
@ -417,7 +417,7 @@ extern void * stbds_shmode_func(size_t elemsize, int mode);
|
|||||||
#if __clang__
|
#if __clang__
|
||||||
#define STBDS_ADDRESSOF(typevar, value) ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value
|
#define STBDS_ADDRESSOF(typevar, value) ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value
|
||||||
#else
|
#else
|
||||||
#define STBDS_ADDRESSOF(typevar, value) ((typeof(typevar)[]){value}) // literal array decays to pointer to value
|
#define STBDS_ADDRESSOF(typevar, value) ((typeof(typevar)[1]){value}) // literal array decays to pointer to value
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#define STBDS_ADDRESSOF(typevar, value) &(value)
|
#define STBDS_ADDRESSOF(typevar, value) &(value)
|
||||||
@ -648,10 +648,15 @@ void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap)
|
|||||||
// stbds_hm hash table implementation
|
// stbds_hm hash table implementation
|
||||||
//
|
//
|
||||||
|
|
||||||
#define STBDS_CACHE_LINE_SIZE 64
|
#ifdef STBDS_INTERNAL_SMALL_BUCKET
|
||||||
|
#define STBDS_BUCKET_LENGTH 4
|
||||||
|
#else
|
||||||
#define STBDS_BUCKET_LENGTH 8
|
#define STBDS_BUCKET_LENGTH 8
|
||||||
#define STBDS_BUCKET_SHIFT 3
|
#endif
|
||||||
|
|
||||||
|
#define STBDS_BUCKET_SHIFT (STBDS_BUCKET_LENGTH == 8 ? 3 : 2)
|
||||||
#define STBDS_BUCKET_MASK (STBDS_BUCKET_LENGTH-1)
|
#define STBDS_BUCKET_MASK (STBDS_BUCKET_LENGTH-1)
|
||||||
|
#define STBDS_CACHE_LINE_SIZE 64
|
||||||
|
|
||||||
#define STBDS_ALIGN_FWD(n,a) (((n) + (a) - 1) & ~((a)-1))
|
#define STBDS_ALIGN_FWD(n,a) (((n) + (a) - 1) & ~((a)-1))
|
||||||
|
|
||||||
@ -698,13 +703,12 @@ void stbds_rand_seed(size_t seed)
|
|||||||
|
|
||||||
static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2)
|
static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2)
|
||||||
{
|
{
|
||||||
#if 1
|
size_t pos;
|
||||||
size_t pos = (hash >> (STBDS_SIZE_T_BITS-slot_log2));
|
pos = hash & (slot_count-1);
|
||||||
STBDS_ASSERT(pos < slot_count);
|
#ifdef STBDS_INTERNAL_BUCKET_START
|
||||||
return pos;
|
pos &= ~STBDS_BUCKET_MASK;
|
||||||
#else
|
|
||||||
return hash & (slot_count-1);
|
|
||||||
#endif
|
#endif
|
||||||
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t stbds_log2(size_t slot_count)
|
static size_t stbds_log2(size_t slot_count)
|
||||||
@ -812,7 +816,6 @@ static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_ind
|
|||||||
for (;;) {
|
for (;;) {
|
||||||
size_t limit,z;
|
size_t limit,z;
|
||||||
stbds_hash_bucket *bucket;
|
stbds_hash_bucket *bucket;
|
||||||
pos &= (t->slot_count-1);
|
|
||||||
bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT];
|
bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT];
|
||||||
STBDS_STATS(++stbds_rehash_probes);
|
STBDS_STATS(++stbds_rehash_probes);
|
||||||
|
|
||||||
@ -835,6 +838,7 @@ static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_ind
|
|||||||
|
|
||||||
pos += step; // quadratic probing
|
pos += step; // quadratic probing
|
||||||
step += STBDS_BUCKET_LENGTH;
|
step += STBDS_BUCKET_LENGTH;
|
||||||
|
pos &= (t->slot_count-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
@ -939,7 +943,7 @@ static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed)
|
|||||||
#ifdef STBDS_SIPHASH_2_4
|
#ifdef STBDS_SIPHASH_2_4
|
||||||
return v0^v1^v2^v3;
|
return v0^v1^v2^v3;
|
||||||
#else
|
#else
|
||||||
return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation
|
return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -954,10 +958,11 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
|
|||||||
unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
|
unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
|
||||||
#if 0
|
#if 0
|
||||||
// HASH32-A Bob Jenkin's hash function w/o large constants
|
// HASH32-A Bob Jenkin's hash function w/o large constants
|
||||||
hash ^= seed ^ len;
|
hash ^= seed;
|
||||||
hash -= (hash<<6);
|
hash -= (hash<<6);
|
||||||
hash ^= (hash>>17);
|
hash ^= (hash>>17);
|
||||||
hash -= (hash<<9);
|
hash -= (hash<<9);
|
||||||
|
hash ^= seed;
|
||||||
hash ^= (hash<<4);
|
hash ^= (hash<<4);
|
||||||
hash -= (hash<<3);
|
hash -= (hash<<3);
|
||||||
hash ^= (hash<<10);
|
hash ^= (hash<<10);
|
||||||
@ -966,22 +971,24 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
|
|||||||
// HASH32-BB Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts.
|
// HASH32-BB Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts.
|
||||||
// Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm
|
// Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm
|
||||||
// not really sure what's going on.
|
// not really sure what's going on.
|
||||||
hash ^= seed ^ len;
|
hash ^= seed;
|
||||||
hash = (hash ^ 61) ^ (hash >> 16);
|
hash = (hash ^ 61) ^ (hash >> 16);
|
||||||
hash = hash + (hash << 3);
|
hash = hash + (hash << 3);
|
||||||
hash = hash ^ (hash >> 4);
|
hash = hash ^ (hash >> 4);
|
||||||
hash = hash * 0x27d4eb2d;
|
hash = hash * 0x27d4eb2d;
|
||||||
|
hash ^= seed;
|
||||||
hash = hash ^ (hash >> 15);
|
hash = hash ^ (hash >> 15);
|
||||||
#else // HASH32-C - Murmur3
|
#else // HASH32-C - Murmur3
|
||||||
|
hash ^= seed;
|
||||||
hash *= 0xcc9e2d51;
|
hash *= 0xcc9e2d51;
|
||||||
hash = (hash << 17) | (hash >> 15);
|
hash = (hash << 17) | (hash >> 15);
|
||||||
hash *= 0x1b873593;
|
hash *= 0x1b873593;
|
||||||
hash ^= seed;
|
hash ^= seed;
|
||||||
hash = (hash << 19) | (hash >> 13);
|
hash = (hash << 19) | (hash >> 13);
|
||||||
hash = hash*5 + 0xe6546b64;
|
hash = hash*5 + 0xe6546b64;
|
||||||
hash ^= len;
|
|
||||||
hash ^= hash >> 16;
|
hash ^= hash >> 16;
|
||||||
hash *= 0x85ebca6b;
|
hash *= 0x85ebca6b;
|
||||||
|
hash ^= seed;
|
||||||
hash ^= hash >> 13;
|
hash ^= hash >> 13;
|
||||||
hash *= 0xc2b2ae35;
|
hash *= 0xc2b2ae35;
|
||||||
hash ^= hash >> 16;
|
hash ^= hash >> 16;
|
||||||
@ -1006,16 +1013,17 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
|
|||||||
} else if (len == 8 && sizeof(size_t) == 8) {
|
} else if (len == 8 && sizeof(size_t) == 8) {
|
||||||
size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
|
size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
|
||||||
hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4
|
hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4
|
||||||
hash ^= seed ^ len;
|
hash ^= seed;
|
||||||
hash = (~hash) + (hash << 21);
|
hash = (~hash) + (hash << 21);
|
||||||
hash ^= STBDS_ROTATE_RIGHT(hash,24);
|
hash ^= STBDS_ROTATE_RIGHT(hash,24);
|
||||||
hash *= 265;
|
hash *= 265;
|
||||||
hash ^= STBDS_ROTATE_RIGHT(hash,14);
|
hash ^= STBDS_ROTATE_RIGHT(hash,14);
|
||||||
|
hash ^= seed;
|
||||||
hash *= 21;
|
hash *= 21;
|
||||||
hash ^= STBDS_ROTATE_RIGHT(hash,28);
|
hash ^= STBDS_ROTATE_RIGHT(hash,28);
|
||||||
hash += (hash << 31);
|
hash += (hash << 31);
|
||||||
hash = (~hash) + (hash << 18);
|
hash = (~hash) + (hash << 18);
|
||||||
return hash^seed;
|
return hash;
|
||||||
} else {
|
} else {
|
||||||
return stbds_siphash_bytes(p,len,seed);
|
return stbds_siphash_bytes(p,len,seed);
|
||||||
}
|
}
|
||||||
@ -1272,6 +1280,8 @@ void * stbds_shmode_func(size_t elemsize, int mode)
|
|||||||
{
|
{
|
||||||
void *a = stbds_arrgrowf(0, elemsize, 0, 1);
|
void *a = stbds_arrgrowf(0, elemsize, 0, 1);
|
||||||
stbds_hash_index *h;
|
stbds_hash_index *h;
|
||||||
|
memset(a, 0, elemsize);
|
||||||
|
stbds_header(a)->length = 1;
|
||||||
stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL);
|
stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL);
|
||||||
h->string.mode = mode;
|
h->string.mode = mode;
|
||||||
return STBDS_ARR_TO_HASH(a,elemsize);
|
return STBDS_ARR_TO_HASH(a,elemsize);
|
||||||
|
Loading…
Reference in New Issue
Block a user