mirror of https://github.com/rui314/chibicc
Add string hashmap
This commit is contained in:
parent
f0c98e0d59
commit
0aad326f35
24
chibicc.h
24
chibicc.h
|
@ -409,6 +409,30 @@ bool is_ident1(uint32_t c);
|
||||||
bool is_ident2(uint32_t c);
|
bool is_ident2(uint32_t c);
|
||||||
int display_width(char *p, int len);
|
int display_width(char *p, int len);
|
||||||
|
|
||||||
|
//
|
||||||
|
// hashmap.c
|
||||||
|
//
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char *key;
|
||||||
|
int keylen;
|
||||||
|
void *val;
|
||||||
|
} HashEntry;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
HashEntry *buckets;
|
||||||
|
int capacity;
|
||||||
|
int used;
|
||||||
|
} HashMap;
|
||||||
|
|
||||||
|
void *hashmap_get(HashMap *map, char *key);
|
||||||
|
void *hashmap_get2(HashMap *map, char *key, int keylen);
|
||||||
|
void hashmap_put(HashMap *map, char *key, void *val);
|
||||||
|
void hashmap_put2(HashMap *map, char *key, int keylen, void *val);
|
||||||
|
void hashmap_delete(HashMap *map, char *key);
|
||||||
|
void hashmap_delete2(HashMap *map, char *key, int keylen);
|
||||||
|
void hashmap_test(void);
|
||||||
|
|
||||||
//
|
//
|
||||||
// main.c
|
// main.c
|
||||||
//
|
//
|
||||||
|
|
|
@ -0,0 +1,165 @@
|
||||||
|
// This is an implementation of the open-addressing hash table.
|
||||||
|
|
||||||
|
#include "chibicc.h"
|
||||||
|
|
||||||
|
// Initial hash bucket size
|
||||||
|
#define INIT_SIZE 16
|
||||||
|
|
||||||
|
// Rehash if the usage exceeds 70%.
|
||||||
|
#define HIGH_WATERMARK 70
|
||||||
|
|
||||||
|
// We'll keep the usage below 50% after rehashing.
|
||||||
|
#define LOW_WATERMARK 50
|
||||||
|
|
||||||
|
// Represents a deleted hash entry
|
||||||
|
#define TOMBSTONE ((void *)-1)
|
||||||
|
|
||||||
|
static uint64_t fnv_hash(char *s, int len) {
|
||||||
|
uint64_t hash = 0xcbf29ce484222325;
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
hash *= 0x100000001b3;
|
||||||
|
hash ^= (unsigned char)s[i];
|
||||||
|
}
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make room for new entires in a given hashmap by removing
|
||||||
|
// tombstones and possibly extending the bucket size.
|
||||||
|
static void rehash(HashMap *map) {
|
||||||
|
// Compute the size of the new hashmap.
|
||||||
|
int nkeys = 0;
|
||||||
|
for (int i = 0; i < map->capacity; i++)
|
||||||
|
if (map->buckets[i].key && map->buckets[i].key != TOMBSTONE)
|
||||||
|
nkeys++;
|
||||||
|
|
||||||
|
int cap = map->capacity;
|
||||||
|
while ((nkeys * 100) / cap >= LOW_WATERMARK)
|
||||||
|
cap = cap * 2;
|
||||||
|
assert(cap > 0);
|
||||||
|
|
||||||
|
// Create a new hashmap and copy all key-values.
|
||||||
|
HashMap map2 = {};
|
||||||
|
map2.buckets = calloc(cap, sizeof(HashEntry));
|
||||||
|
map2.capacity = cap;
|
||||||
|
|
||||||
|
for (int i = 0; i < map->capacity; i++) {
|
||||||
|
HashEntry *ent = &map->buckets[i];
|
||||||
|
if (ent->key && ent->key != TOMBSTONE)
|
||||||
|
hashmap_put2(&map2, ent->key, ent->keylen, ent->val);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(map2.used == nkeys);
|
||||||
|
*map = map2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool match(HashEntry *ent, char *key, int keylen) {
|
||||||
|
return ent->key && ent->key != TOMBSTONE &&
|
||||||
|
ent->keylen == keylen && memcmp(ent->key, key, keylen) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HashEntry *get_entry(HashMap *map, char *key, int keylen) {
|
||||||
|
if (!map->buckets)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
uint64_t hash = fnv_hash(key, keylen);
|
||||||
|
|
||||||
|
for (int i = 0; i < map->capacity; i++) {
|
||||||
|
HashEntry *ent = &map->buckets[(hash + i) % map->capacity];
|
||||||
|
if (match(ent, key, keylen))
|
||||||
|
return ent;
|
||||||
|
if (ent->key == NULL)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) {
|
||||||
|
if (!map->buckets) {
|
||||||
|
map->buckets = calloc(INIT_SIZE, sizeof(HashEntry));
|
||||||
|
map->capacity = INIT_SIZE;
|
||||||
|
} else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) {
|
||||||
|
rehash(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t hash = fnv_hash(key, keylen);
|
||||||
|
|
||||||
|
for (int i = 0; i < map->capacity; i++) {
|
||||||
|
HashEntry *ent = &map->buckets[(hash + i) % map->capacity];
|
||||||
|
|
||||||
|
if (match(ent, key, keylen))
|
||||||
|
return ent;
|
||||||
|
|
||||||
|
if (ent->key == TOMBSTONE) {
|
||||||
|
ent->key = key;
|
||||||
|
ent->keylen = keylen;
|
||||||
|
return ent;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ent->key == NULL) {
|
||||||
|
ent->key = key;
|
||||||
|
ent->keylen = keylen;
|
||||||
|
map->used++;
|
||||||
|
return ent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
void *hashmap_get(HashMap *map, char *key) {
|
||||||
|
return hashmap_get2(map, key, strlen(key));
|
||||||
|
}
|
||||||
|
|
||||||
|
void *hashmap_get2(HashMap *map, char *key, int keylen) {
|
||||||
|
HashEntry *ent = get_entry(map, key, keylen);
|
||||||
|
return ent ? ent->val : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hashmap_put(HashMap *map, char *key, void *val) {
|
||||||
|
hashmap_put2(map, key, strlen(key), val);
|
||||||
|
}
|
||||||
|
|
||||||
|
void hashmap_put2(HashMap *map, char *key, int keylen, void *val) {
|
||||||
|
HashEntry *ent = get_or_insert_entry(map, key, keylen);
|
||||||
|
ent->val = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hashmap_delete(HashMap *map, char *key) {
|
||||||
|
hashmap_delete2(map, key, strlen(key));
|
||||||
|
}
|
||||||
|
|
||||||
|
void hashmap_delete2(HashMap *map, char *key, int keylen) {
|
||||||
|
HashEntry *ent = get_entry(map, key, keylen);
|
||||||
|
if (ent)
|
||||||
|
ent->key = TOMBSTONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hashmap_test(void) {
|
||||||
|
HashMap *map = calloc(1, sizeof(HashMap));
|
||||||
|
|
||||||
|
for (int i = 0; i < 5000; i++)
|
||||||
|
hashmap_put(map, format("key %d", i), (void *)(size_t)i);
|
||||||
|
for (int i = 1000; i < 2000; i++)
|
||||||
|
hashmap_delete(map, format("key %d", i));
|
||||||
|
for (int i = 1500; i < 1600; i++)
|
||||||
|
hashmap_put(map, format("key %d", i), (void *)(size_t)i);
|
||||||
|
for (int i = 6000; i < 7000; i++)
|
||||||
|
hashmap_put(map, format("key %d", i), (void *)(size_t)i);
|
||||||
|
|
||||||
|
for (int i = 0; i < 1000; i++)
|
||||||
|
assert((size_t)hashmap_get(map, format("key %d", i)) == i);
|
||||||
|
for (int i = 1000; i < 1500; i++)
|
||||||
|
assert(hashmap_get(map, "no such key") == NULL);
|
||||||
|
for (int i = 1500; i < 1600; i++)
|
||||||
|
assert((size_t)hashmap_get(map, format("key %d", i)) == i);
|
||||||
|
for (int i = 1600; i < 2000; i++)
|
||||||
|
assert(hashmap_get(map, "no such key") == NULL);
|
||||||
|
for (int i = 2000; i < 5000; i++)
|
||||||
|
assert((size_t)hashmap_get(map, format("key %d", i)) == i);
|
||||||
|
for (int i = 5000; i < 6000; i++)
|
||||||
|
assert(hashmap_get(map, "no such key") == NULL);
|
||||||
|
for (int i = 6000; i < 7000; i++)
|
||||||
|
hashmap_put(map, format("key %d", i), (void *)(size_t)i);
|
||||||
|
|
||||||
|
assert(hashmap_get(map, "no such key") == NULL);
|
||||||
|
printf("OK\n");
|
||||||
|
}
|
5
main.c
5
main.c
|
@ -191,6 +191,11 @@ static void parse_args(int argc, char **argv) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!strcmp(argv[i], "-hashmap-test")) {
|
||||||
|
hashmap_test();
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
// These options are ignored for now.
|
// These options are ignored for now.
|
||||||
if (!strncmp(argv[i], "-O", 2) ||
|
if (!strncmp(argv[i], "-O", 2) ||
|
||||||
!strncmp(argv[i], "-W", 2) ||
|
!strncmp(argv[i], "-W", 2) ||
|
||||||
|
|
|
@ -218,4 +218,7 @@ echo 'void foo(); void bar(); int main() { foo(); bar(); }' > $tmp/main.c
|
||||||
$chibicc -o $tmp/foo $tmp/main.c $tmp/foo.so
|
$chibicc -o $tmp/foo $tmp/main.c $tmp/foo.so
|
||||||
check '.so'
|
check '.so'
|
||||||
|
|
||||||
|
$chibicc -hashmap-test
|
||||||
|
check 'hashmap'
|
||||||
|
|
||||||
echo OK
|
echo OK
|
||||||
|
|
Loading…
Reference in New Issue