added the C++ version

This commit is contained in:
Attractive Chaos 2019-12-25 18:52:20 -05:00
parent cab74a24c5
commit 2d766f4d12
2 changed files with 135 additions and 10 deletions

130
cpp/khashl.hpp Normal file
View File

@ -0,0 +1,130 @@
#ifndef __AC_KHASHL_HPP
#define __AC_KHASHL_HPP
#include <functional>
#include <cstdlib>
#include <cstring>
#include <stdint.h>
namespace klib {
template<class T, class Hash, class Eq = std::equal_to<T>, typename khint_t = uint32_t>
class KHashL {
khint_t bits, count;
uint32_t *used;
T *keys;
static inline uint32_t __kh_used(const uint32_t *flag, khint_t i) { return flag[i>>5] >> (i&0x1fU) & 1U; };
static inline void __kh_set_used(uint32_t *flag, khint_t i) { flag[i>>5] |= 1U<<(i&0x1fU); };
static inline void __kh_set_unused(uint32_t *flag, khint_t i) { flag[i>>5] &= ~(1U<<(i&0x1fU)); };
static inline khint_t __kh_fsize(khint_t m) { return m<32? 1 : m>>5; }
static inline uint32_t __kh_h2b(uint32_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
static inline uint64_t __kh_h2b(uint64_t hash, khint_t bits) { return hash * 11400714819323198485ULL >> (64 - bits); }
public:
KHashL() : bits(0), count(0), used(0), keys(0) {};
~KHashL() { std::free(used); std::free(keys); };
inline khint_t n_buckets() const { return used? khint_t(1) << bits : 0; }
inline khint_t end() const { return n_buckets(); }
inline khint_t size() const { return count; }
inline T &at(khint_t x) { return keys[x]; };
inline bool exist(khint_t x) const { return (__kh_used(used, x) != 0); }
void clear(void) {
if (!used) return;
memset(used, 0, __kh_fsize(n_buckets()) * sizeof(uint32_t));
count = 0;
}
khint_t get(const T &key) const {
khint_t i, last, mask, nb;
if (keys == 0) return 0;
nb = n_buckets();
mask = nb - khint_t(1);
i = last = __kh_h2b(Hash()(key), bits);
while (__kh_used(used, i) && !Eq()(keys[i], key)) {
i = (i + khint_t(1)) & mask;
if (i == last) return nb;
}
return !__kh_used(used, i)? nb : i;
}
int resize(khint_t new_nb) {
uint32_t *new_used = 0;
khint_t j = 0, x = new_nb, nb, new_bits, new_mask;
while ((x >>= khint_t(1)) != 0) ++j;
if (new_nb & (new_nb - 1)) ++j;
new_bits = j > 2? j : 2;
new_nb = khint_t(1) << new_bits;
if (count > (new_nb>>1) + (new_nb>>2)) return 0; /* requested size is too small */
new_used = (uint32_t*)std::malloc(__kh_fsize(new_nb) * sizeof(uint32_t));
memset(new_used, 0, __kh_fsize(new_nb) * sizeof(uint32_t));
if (!new_used) return -1; /* not enough memory */
nb = n_buckets();
if (nb < new_nb) { /* expand */
T *new_keys = (T*)std::realloc(keys, new_nb * sizeof(T));
if (!new_keys) { std::free(new_used); return -1; }
keys = new_keys;
} /* otherwise shrink */
new_mask = new_nb - 1;
for (j = 0; j != nb; ++j) {
if (!__kh_used(used, j)) continue;
T key = keys[j];
__kh_set_unused(used, j);
while (1) { /* kick-out process; sort of like in Cuckoo hashing */
khint_t i;
i = __kh_h2b(Hash()(key), new_bits);
while (__kh_used(new_used, i)) i = (i + khint_t(1)) & new_mask;
__kh_set_used(new_used, i);
if (i < nb && __kh_used(used, i)) { /* kick out the existing element */
{ T tmp = keys[i]; keys[i] = key; key = tmp; }
__kh_set_unused(used, i); /* mark it as deleted in the old hash table */
} else { /* write the element and jump out of the loop */
keys[i] = key;
break;
}
}
}
if (nb > new_nb) /* shrink the hash table */
keys = (T*)std::realloc(keys, new_nb * sizeof(T));
std::free(used); /* free the working space */
used = new_used, bits = new_bits;
return 0;
}
khint_t put(const T &key, int *absent) {
khint_t nb, i, last, mask;
nb = n_buckets();
*absent = -1;
if (count >= (nb>>1) + (nb>>2)) { /* rehashing */
if (resize(nb + khint_t(1)) < 0)
return nb;
nb = n_buckets();
} /* TODO: to implement automatically shrinking; resize() already support shrinking */
mask = nb - 1;
i = last = __kh_h2b(Hash()(key), bits);
while (__kh_used(used, i) && !Eq()(keys[i], key)) {
i = (i + 1U) & mask;
if (i == last) break;
}
if (!__kh_used(used, i)) { /* not present at all */
keys[i] = key;
__kh_set_used(used, i);
++count;
*absent = 1;
} else *absent = 0; /* Don't touch keys[i] if present */
return i;
}
int del(khint_t i) {
khint_t j = i, k, mask;
if (keys == 0) return 0;
mask = n_buckets() - khint_t(1);
while (1) {
j = (j + khint_t(1)) & mask;
if (j == i || !__kh_used(used, j)) break; /* j==i only when the table is completely full */
k = __kh_h2b(Hash()(keys[j]), bits);
if (k <= i || k > j)
keys[i] = keys[j], i = j;
}
__kh_set_unused(used, i);
--count;
return 1;
}
};
}
#endif /* __AC_KHASHL_HPP */

View File

@ -89,17 +89,13 @@ typedef khint32_t khint_t;
* Simple private functions *
****************************/
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
#define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U)
#define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU))
#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU)))
#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5)
static kh_inline khint_t __kh_h2b(uint32_t hash, uint32_t bits) { return hash * 2654435769U >> (32 - bits); }
static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
/*******************
* Hash table base *
@ -107,7 +103,7 @@ static kh_inline khint_t __kh_h2b(uint32_t hash, uint32_t bits) { return hash *
#define __KHASHL_TYPE(HType, khkey_t) \
typedef struct { \
khint32_t bits, count; \
khint_t bits, count; \
khint32_t *used; \
khkey_t *keys; \
} HType;
@ -156,10 +152,9 @@ static kh_inline khint_t __kh_h2b(uint32_t hash, uint32_t bits) { return hash *
#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
SCOPE int prefix##_resize(HType *h, khint_t new_n_buckets) { \
khint32_t *new_used = 0; \
khint_t j, n_buckets, new_bits, new_mask; \
kroundup32(new_n_buckets); \
for (j = 0; j < 32; ++j) \
if (new_n_buckets>>j&1) break; \
khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \
while ((x >>= 1) != 0) ++j; \
if (new_n_buckets & (new_n_buckets - 1)) ++j; \
new_bits = j > 2? j : 2; \
new_n_buckets = 1U << new_bits; \
if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return 0; /* requested size is too small */ \