hash tables (which should work with any object as keys?) and string interning from ch 20

This commit is contained in:
K. Lange 2020-12-26 14:33:34 +09:00
parent 14aeea5f5b
commit 417637ef21
9 changed files with 194 additions and 10 deletions

View File

@ -8,7 +8,7 @@
int main(int argc, char * argv[]) {
krk_initVM();
krk_interpret("\"hello\" + \"hellf\" + 1.4");
krk_interpret("(\"hello\" + \"hellf\" + 1.4) == \"hellohellf1.4\"");
#if 0
KrkChunk chunk;

View File

@ -6,6 +6,7 @@
#include "object.h"
#include "value.h"
#include "vm.h"
#include "table.h"
#define ALLOCATE_OBJECT(type, objectType) \
(type*)allocateObject(sizeof(type), objectType)
@ -18,22 +19,43 @@ static KrkObj * allocateObject(size_t size, ObjType type) {
return object;
}
static KrkString * allocateString(char * chars, size_t length) {
static KrkString * allocateString(char * chars, size_t length, uint32_t hash) {
KrkString * string = ALLOCATE_OBJECT(KrkString, OBJ_STRING);
string->length = length;
string->chars = chars;
string->hash = hash;
krk_tableSet(&vm.strings, OBJECT_VAL(string), NONE_VAL());
return string;
}
static uint32_t hashString(const char * key, size_t length) {
uint32_t hash = 0;
/* This is the so-called "sdbm" hash. It comes from a piece of
* public domain code from a clone of ndbm. */
for (size_t i = 0; i < length; ++i) {
hash = (int)key[i] + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
KrkString * takeString(char * chars, size_t length) {
return allocateString(chars, length);
uint32_t hash = hashString(chars, length);
KrkString * interned = tableFindString(&vm.strings, chars, length, hash);
if (interned != NULL) {
FREE_ARRAY(char, chars, length + 1);
return interned;
}
return allocateString(chars, length, hash);
}
KrkString * copyString(const char * chars, size_t length) {
uint32_t hash = hashString(chars, length);
KrkString * interned = tableFindString(&vm.strings, chars, length, hash);
if (interned != NULL) return interned;
char * heapChars = ALLOCATE(char, length + 1);
memcpy(heapChars, chars, length);
heapChars[length] = '\0';
return allocateString(heapChars, length);
return allocateString(heapChars, length, hash);
}
void krk_printObject(FILE * f, KrkValue value) {

View File

@ -23,6 +23,7 @@ struct ObjString {
KrkObj obj;
size_t length;
char * chars;
uint32_t hash;
};
static inline int isObjType(KrkValue value, ObjType type) {

127
table.c Normal file
View File

@ -0,0 +1,127 @@
#include <stdio.h>
#include <string.h>
#include "kuroko.h"
#include "object.h"
#include "value.h"
#include "memory.h"
#include "table.h"
#define TABLE_MAX_LOAD 0.75
void krk_initTable(KrkTable * table) {
table->count = 0;
table->capacity = 0;
table->entries = NULL;
}
void krk_freeTable(KrkTable * table) {
FREE_ARRAY(KrkTableEntry, table->entries, table->capacity);
krk_initTable(table);
}
static uint32_t hashValue(KrkValue value) {
if (IS_STRING(value)) return (AS_STRING(value))->hash;
if (IS_INTEGER(value)) return (uint32_t)(AS_INTEGER(value));
if (IS_FLOATING(value)) return (uint32_t)(AS_FLOATING(value) * 1000); /* arbitrary; what's a good way to hash floats? */
if (IS_BOOLEAN(value)) return (uint32_t)(AS_BOOLEAN(value));
if (IS_NONE(value)) return 0;
/* Warn? */
return 0;
}
static KrkTableEntry * findEntry(KrkTableEntry * entries, size_t capacity, KrkValue key) {
uint32_t index = hashValue(key) % capacity;
KrkTableEntry * tombstone = NULL;
for (;;) {
KrkTableEntry * entry = &entries[index];
if (entry->key.type == VAL_NONE) {
if (IS_NONE(entry->value)) {
return tombstone != NULL ? tombstone : entry;
} else {
if (tombstone == NULL) tombstone = entry;
}
} else if (krk_valuesEqual(entry->key, key)) {
return entry;
}
index = (index + 1) % capacity;
}
}
static void adjustCapacity(KrkTable * table, size_t capacity) {
KrkTableEntry * entries = ALLOCATE(KrkTableEntry, capacity);
for (size_t i = 0; i < capacity; ++i) {
entries[i].key = NONE_VAL();
entries[i].value = NONE_VAL();
}
table->count = 0;
for (size_t i = 0; i < table->capacity; ++i) {
KrkTableEntry * entry = &table->entries[i];
if (entry->key.type == VAL_NONE) continue;
KrkTableEntry * dest = findEntry(entries, capacity, entry->key);
dest->key = entry->key;
dest->value = entry->value;
table->count++;
}
FREE_ARRAY(KrkTableEntry, table->entries, table->capacity);
table->entries = entries;
table->capacity = capacity;
}
int krk_tableSet(KrkTable * table, KrkValue key, KrkValue value) {
if (table->count + 1 > table->capacity * TABLE_MAX_LOAD) {
size_t capacity = GROW_CAPACITY(table->capacity);
adjustCapacity(table, capacity);
}
KrkTableEntry * entry = findEntry(table->entries, table->capacity, key);
int isNewKey = entry->key.type == VAL_NONE;
if (isNewKey && IS_NONE(entry->value)) table->count++;
entry->key = key;
entry->value = value;
return isNewKey;
}
void krk_tableAddAll(KrkTable * from, KrkTable * to) {
for (size_t i = 0; i < from->capacity; ++i) {
KrkTableEntry * entry = &from->entries[i];
if (entry->key.type != VAL_NONE) {
krk_tableSet(to, entry->key, entry->value);
}
}
}
int krk_tableGet(KrkTable * table, KrkValue key, KrkValue * value) {
if (table->count == 0) return 0;
KrkTableEntry * entry = findEntry(table->entries, table->capacity, key);
if (entry->key.type == VAL_NONE) return 0;
*value = entry->value;
return 1;
}
int krk_tableDelete(KrkTable * table, KrkValue key) {
if (table->count == 0) return 0;
KrkTableEntry * entry = findEntry(table->entries, table->capacity, key);
if (entry->key.type == VAL_NONE) return 0;
entry->key = NONE_VAL();
entry->value = BOOLEAN_VAL(1);
return 1;
}
KrkString * tableFindString(KrkTable * table, const char * chars, size_t length, uint32_t hash) {
if (table->count == 0) return NULL;
uint32_t index = hash % table->capacity;
for (;;) {
KrkTableEntry * entry = &table->entries[index];
if (entry->key.type == VAL_NONE) {
if (IS_NONE(entry->value)) return NULL;
} else if (AS_STRING(entry->key)->length == length &&
AS_STRING(entry->key)->hash == hash &&
memcmp(AS_STRING(entry->key)->chars, chars, length) == 0) {
return AS_STRING(entry->key);
}
index = (index + 1) % table->capacity;
}
}

32
table.h Normal file
View File

@ -0,0 +1,32 @@
#pragma once
/*
* I was going to just use the ToaruOS hashmap library, but to make following
* the book easier, let's just start from their Table implementation; it has
* an advantage of using stored entries and fixed arrays, so it has some nice
* properties despite being chained internally...
*/
#include <stdlib.h>
#include "kuroko.h"
#include "value.h"
#include "object.h"
typedef struct {
KrkValue key;
KrkValue value;
} KrkTableEntry;
typedef struct {
size_t count;
size_t capacity;
KrkTableEntry * entries;
} KrkTable;
extern void krk_initTable(KrkTable * table);
extern void krk_freeTable(KrkTable * table);
extern void krk_tableAddAll(KrkTable * from, KrkTable * to);
extern KrkString * tableFindString(KrkTable * table, const char * chars, size_t length, uint32_t hash);
extern int krk_tableSet(KrkTable * table, KrkValue key, KrkValue value);
extern int krk_tableGet(KrkTable * table, KrkValue key, KrkValue * value);
extern int krk_tableDelete(KrkTable * table, KrkValue key);

View File

@ -48,10 +48,8 @@ int krk_valuesEqual(KrkValue a, KrkValue b) {
case VAL_INTEGER: return AS_INTEGER(a) == AS_INTEGER(b);
case VAL_FLOATING: return AS_FLOATING(a) == AS_FLOATING(b);
case VAL_OBJECT: {
if (IS_STRING(a) && IS_STRING(b)) {
return (AS_STRING(a)->length == AS_STRING(b)->length) &&
memcmp(AS_STRING(a)->chars, AS_STRING(b)->chars, AS_STRING(a)->length) == 0;
}
if (IS_STRING(a) && IS_STRING(b)) return AS_OBJECT(a) == AS_OBJECT(b);
/* otherwise we need to do... fun stuff (push, call compare, etc.)*/
return 0;
}
default: return 0;

View File

@ -7,8 +7,8 @@ typedef struct Obj KrkObj;
typedef struct ObjString KrkString;
typedef enum {
VAL_BOOLEAN,
VAL_NONE,
VAL_BOOLEAN,
VAL_INTEGER,
VAL_FLOATING,
VAL_OBJECT,

4
vm.c
View File

@ -5,6 +5,7 @@
#include "memory.h"
#include "compiler.h"
#include "object.h"
#include "table.h"
/* Why is this static... why do we do this to ourselves... */
KrkVM vm;
@ -52,10 +53,11 @@ KrkValue krk_peep(int distance) {
void krk_initVM() {
resetStack();
vm.objects = NULL;
krk_initTable(&vm.strings);
}
void krk_freeVM() {
/* todo */
krk_freeTable(&vm.strings);
krk_freeObjects();
FREE_ARRAY(size_t, vm.stack, vm.stackSize);
}

2
vm.h
View File

@ -3,6 +3,7 @@
#include "kuroko.h"
#include "chunk.h"
#include "value.h"
#include "table.h"
typedef struct {
KrkChunk * chunk;
@ -10,6 +11,7 @@ typedef struct {
size_t stackSize;
KrkValue * stack;
KrkValue * stackTop;
KrkTable strings;
KrkObj * objects;
} KrkVM;