From 82beca0432545857578537a206e0a6ccca7de252 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Sun, 12 Jul 2015 17:28:03 +0100 Subject: [PATCH] Complete hash table tests and clean up ineterface. --- monkey/filetype.c | 26 ------- test/bloom.c | 7 ++ test/hashtable.c | 173 +++++++++++++++++++++++++++++++++++++++++++ utils/hashtable.c | 185 ++++------------------------------------------ utils/hashtable.h | 46 +++++++++++- 5 files changed, 237 insertions(+), 200 deletions(-) diff --git a/monkey/filetype.c b/monkey/filetype.c index 891dcd30b..2f9566c09 100644 --- a/monkey/filetype.c +++ b/monkey/filetype.c @@ -212,29 +212,3 @@ const char *monkey_fetch_filetype(const char *unix_path) return type != NULL ? type : "text/plain"; } - -#ifdef TEST_RIG - -int main(int argc, char *argv[]) -{ - unsigned int c1, *c2; - const char *key; - - gtk_fetch_filetype_init("./mime.types"); - - c1 = 0; c2 = 0; - - while ( (key = hash_iterate(mime_hash, &c1, &c2)) != NULL) { - printf("%s ", key); - } - - printf("\n"); - - if (argc > 1) { - printf("%s maps to %s\n", argv[1], fetch_filetype(argv[1])); - } - - gtk_fetch_filetype_fin(); -} - -#endif diff --git a/test/bloom.c b/test/bloom.c index 8d8c21c24..71a0b013a 100644 --- a/test/bloom.c +++ b/test/bloom.c @@ -73,6 +73,13 @@ static void dict_bloom_teardown(void) /* Tests */ +/** + * Test blooom filter creation + * + * Create a bloom filter, add a single entry and test for presece and + * absence of that entry (single entry cannot have false positives). + * + */ START_TEST(bloom_create_test) { struct bloom_filter *b; diff --git a/test/hashtable.c b/test/hashtable.c index 2939f4f85..d69b10870 100644 --- a/test/hashtable.c +++ b/test/hashtable.c @@ -32,6 +32,96 @@ #include "utils/hashtable.h" +#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) + +struct test_pairs { + const char* test; + const char* res; +}; + +static struct hash_table *match_hash_a; +static struct hash_table *match_hash_b; + +static struct hash_table *dict_hash; + +static const struct test_pairs match_tests[] = { + { "cow", "moo" }, + { "pig", "oink" }, + { "chicken", "cluck" }, + { "dog", "woof" }, + { "sheep", "baaa" }, +}; + +/* Fixtures */ + +static void match_hashtable_create(void) +{ + int idx; + + match_hash_a = hash_create(79); + ck_assert(match_hash_a != NULL); + + match_hash_b = hash_create(103); + ck_assert(match_hash_b != NULL); + + for (idx = 0; idx < NELEMS(match_tests); idx++) { + hash_add(match_hash_a, + match_tests[idx].test, + match_tests[idx].res); + hash_add(match_hash_b, + match_tests[idx].res, + match_tests[idx].test); + } +} + +static void match_hashtable_teardown(void) +{ + hash_destroy(match_hash_a); + hash_destroy(match_hash_b); +} + + +/** + * create dictionary hashtable + * + * hashtable constructed from the odd/even rows of the + * dictionary + */ +static void dict_hashtable_create(int dict_hash_size) +{ + FILE *dictf; + char keybuf[BUFSIZ], valbuf[BUFSIZ]; + + dictf = fopen("/usr/share/dict/words", "r"); + ck_assert(dictf != NULL); + + dict_hash = hash_create(dict_hash_size); + ck_assert(dict_hash != NULL); + + while (!feof(dictf)) { + fscanf(dictf, "%s", keybuf); + fscanf(dictf, "%s", valbuf); + hash_add(dict_hash, keybuf, valbuf); + } + + fclose(dictf); +} + +static void dicts_hashtable_create(void) +{ + dict_hashtable_create(1031); +} + +static void dictl_hashtable_create(void) +{ + dict_hashtable_create(7919); +} + +static void dict_hashtable_teardown(void) +{ + hash_destroy(dict_hash); +} + /* Tests */ /** @@ -109,12 +199,60 @@ START_TEST(hashtable_positive_test) } END_TEST + +START_TEST(hashtable_matcha_test) +{ + const char *res; + + res = hash_get(match_hash_a, match_tests[_i].test); + ck_assert(res != NULL); + ck_assert_str_eq(res, match_tests[_i].res); +} +END_TEST + + +START_TEST(hashtable_matchb_test) +{ + const char *res; + + res = hash_get(match_hash_b, match_tests[_i].res); + ck_assert(res != NULL); + ck_assert_str_eq(res, match_tests[_i].test); +} +END_TEST + + +START_TEST(hashtable_dict_test) +{ + FILE *dictf; + char keybuf[BUFSIZ], valbuf[BUFSIZ]; + const char *res; + + dictf = fopen("/usr/share/dict/words", "r"); + ck_assert(dictf != NULL); + + while (!feof(dictf)) { + fscanf(dictf, "%s", keybuf); + fscanf(dictf, "%s", valbuf); + + res = hash_get(dict_hash, keybuf); + ck_assert(res != NULL); + ck_assert_str_eq(res, valbuf); + } + + fclose(dictf); +} +END_TEST + /* Suite */ Suite *hashtable_suite(void) { Suite *s; TCase *tc_create; + TCase *tc_match; + TCase *tc_dict_s; + TCase *tc_dict_l; s = suite_create("hash table filter"); @@ -127,7 +265,42 @@ Suite *hashtable_suite(void) suite_add_tcase(s, tc_create); + /* Matching entry tests */ + tc_match = tcase_create("Match"); + tcase_add_checked_fixture(tc_match, + match_hashtable_create, + match_hashtable_teardown); + + tcase_add_loop_test(tc_match, + hashtable_matcha_test, + 0, NELEMS(match_tests)); + tcase_add_loop_test(tc_match, + hashtable_matchb_test, + 0, NELEMS(match_tests)); + + suite_add_tcase(s, tc_match); + + /* small table dictionary test */ + tc_dict_s = tcase_create("small table dictionary"); + tcase_add_checked_fixture(tc_dict_s, + dicts_hashtable_create, + dict_hashtable_teardown); + + tcase_add_test(tc_dict_s, hashtable_dict_test); + + suite_add_tcase(s, tc_dict_s); + + + /* large table dictionary test */ + tc_dict_l = tcase_create("large table dictionary"); + tcase_add_checked_fixture(tc_dict_l, + dictl_hashtable_create, + dict_hashtable_teardown); + + tcase_add_test(tc_dict_l, hashtable_dict_test); + + suite_add_tcase(s, tc_dict_l); return s; } diff --git a/utils/hashtable.c b/utils/hashtable.c index d807904a2..af100dfc9 100644 --- a/utils/hashtable.c +++ b/utils/hashtable.c @@ -17,16 +17,20 @@ * along with this program. If not, see . */ -/** \file - * Write-Once hash table for string to string mappings */ +/** + * \file + * Write-Once hash table for string to string mappings. + * + * This implementation is unit tested, if you make changes please + * ensure the tests continute to pass and if possible, through + * valgrind to make sure there are no memory leaks or invalid memory + * accesses. If you add new functionality, please include a test for + * it that has good coverage along side the other tests. + */ #include #include #include -#ifdef TEST_RIG -#include -#include -#endif #include "utils/hashtable.h" #include "utils/log.h" @@ -42,6 +46,7 @@ struct hash_table { struct hash_entry **chain; }; + /** * Hash a string, returning a 32bit value. The hash algorithm used is * Fowler Noll Vo - a very fast and simple hash, ideal for short strings. @@ -51,7 +56,6 @@ struct hash_table { * \param len Pointer to unsigned integer to record datum's length in. * \return The calculated hash value for the datum. */ - static inline unsigned int hash_string_fnv(const char *datum, unsigned int *len) { unsigned int z = 0x811c9dc5; @@ -71,17 +75,7 @@ static inline unsigned int hash_string_fnv(const char *datum, unsigned int *len) } -/** - * Create a new hash table, and return a context for it. The memory consumption - * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty. - * - * \param chains Number of chains/buckets this hash table will have. This - * should be a prime number, and ideally a prime number just - * over a power of two, for best performance and distribution. - * \return struct hash_table containing the context of this hash table or NULL - * if there is insufficent memory to create it and its chains. - */ - +/* exported interface documented in utils/hashtable.h */ struct hash_table *hash_create(unsigned int chains) { struct hash_table *r = malloc(sizeof(struct hash_table)); @@ -103,13 +97,8 @@ struct hash_table *hash_create(unsigned int chains) return r; } -/** - * Destroys a hash table, freeing all memory associated with it. - * - * \param ht Hash table to destroy. After the function returns, this - * will nolonger be valid. - */ +/* exported interface documented in utils/hashtable.h */ void hash_destroy(struct hash_table *ht) { unsigned int i; @@ -133,19 +122,8 @@ void hash_destroy(struct hash_table *ht) free(ht); } -/** - * Adds a key/value pair to a hash table. If the key you're adding is already - * in the hash table, it does not replace it, but it does take precedent over - * it. The old key/value pair will be inaccessable but still in memory until - * hash_destroy() is called on the hash table. - * - * \param ht The hash table context to add the key/value pair to. - * \param key The key to associate the value with. A copy is made. - * \param value The value to associate the key with. A copy is made. - * \return true if the add succeeded, false otherwise. (Failure most likely - * indicates insufficent memory to make copies of the key and value. - */ +/* exported interface documented in utils/hashtable.h */ bool hash_add(struct hash_table *ht, const char *key, const char *value) { unsigned int h, c, v; @@ -179,14 +157,8 @@ bool hash_add(struct hash_table *ht, const char *key, const char *value) return true; } -/** - * Looks up a the value associated with with a key from a specific hash table. - * - * \param ht The hash table context to look up the key in. - * \param key The key to search for. - * \return The value associated with the key, or NULL if it was not found. - */ +/* exported interface documented in utils/hashtable.h */ const char *hash_get(struct hash_table *ht, const char *key) { unsigned int h, c, key_length; @@ -205,130 +177,3 @@ const char *hash_get(struct hash_table *ht, const char *key) return NULL; } - -/** - * Iterate through all available hash keys. - * - * \param ht The hash table context to iterate. - * \param c1 Pointer to first context - * \param c2 Pointer to second context (set to 0 on first call) - * \return The next hash key, or NULL for no more keys - */ - -const char *hash_iterate(struct hash_table *ht, unsigned int *c1, unsigned int **c2) { - struct hash_entry **he = (struct hash_entry **)c2; - - if (ht == NULL) - return NULL; - - if (!*he) - *c1 = -1; - else - *he = (*he)->next; - - if (*he) - return (*he)->pairing; - - while (!*he) { - (*c1)++; - if (*c1 >= ht->nchains) - return NULL; - *he = ht->chain[*c1]; - } - return (*he)->pairing; -} - -/* A simple test rig. To compile, use: - * gcc -o hashtest -I../ -DTEST_RIG utils/hashtable.c - * - * If you make changes to this hash table implementation, please rerun this - * test, and if possible, through valgrind to make sure there are no memory - * leaks or invalid memory accesses. If you add new functionality, please - * include a test for it that has good coverage along side the other tests. - */ - -#ifdef TEST_RIG - -int main(int argc, char *argv[]) -{ - struct hash_table *a, *b; - FILE *dict; - char keybuf[BUFSIZ], valbuf[BUFSIZ]; - int i; - - a = hash_create(79); - assert(a != NULL); - - b = hash_create(103); - assert(b != NULL); - - hash_add(a, "cow", "moo"); - hash_add(b, "moo", "cow"); - - hash_add(a, "pig", "oink"); - hash_add(b, "oink", "pig"); - - hash_add(a, "chicken", "cluck"); - hash_add(b, "cluck", "chicken"); - - hash_add(a, "dog", "woof"); - hash_add(b, "woof", "dog"); - - hash_add(a, "cat", "meow"); - hash_add(b, "meow", "cat"); - -#define MATCH(x,y) assert(!strcmp(hash_get(a, x), y)); assert(!strcmp(hash_get(b, y), x)) - MATCH("cow", "moo"); - MATCH("pig", "oink"); - MATCH("chicken", "cluck"); - MATCH("dog", "woof"); - MATCH("cat", "meow"); - - hash_destroy(a); - hash_destroy(b); - - /* this test requires /usr/share/dict/words - a large list of English - * words. We load the entire file - odd lines are used as keys, and - * even lines are used as the values for the previous line. we then - * work through it again making sure everything matches. - * - * We do this twice - once in a hash table with many chains, and once - * with a hash table with fewer chains. - */ - - a = hash_create(1031); - b = hash_create(7919); - - dict = fopen("/usr/share/dict/words", "r"); - if (dict == NULL) { - fprintf(stderr, "Unable to open /usr/share/dict/words - extensive testing skipped.\n"); - exit(0); - } - - while (!feof(dict)) { - fscanf(dict, "%s", keybuf); - fscanf(dict, "%s", valbuf); - hash_add(a, keybuf, valbuf); - hash_add(b, keybuf, valbuf); - } - - for (i = 0; i < 5; i++) { - fseek(dict, 0, SEEK_SET); - - while (!feof(dict)) { - fscanf(dict, "%s", keybuf); - fscanf(dict, "%s", valbuf); - assert(strcmp(hash_get(a, keybuf), valbuf) == 0); - assert(strcmp(hash_get(b, keybuf), valbuf) == 0); - } - } - - hash_destroy(a); - hash_destroy(b); - - fclose(dict); - - return 0; -} - -#endif diff --git a/utils/hashtable.h b/utils/hashtable.h index 432ccfe2a..b0e7392c6 100644 --- a/utils/hashtable.h +++ b/utils/hashtable.h @@ -16,8 +16,10 @@ * along with this program. If not, see . */ -/** \file - * Write-Once hash table for string to string mappings */ +/** + * \file + * Interface to Write-Once hash table for string to string mapping + */ #ifndef _NETSURF_UTILS_HASHTABLE_H_ #define _NETSURF_UTILS_HASHTABLE_H_ @@ -26,11 +28,47 @@ struct hash_table; +/** + * Create a new hash table, and return a context for it. The memory consumption + * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty. + * + * \param chains Number of chains/buckets this hash table will have. This + * should be a prime number, and ideally a prime number just + * over a power of two, for best performance and distribution. + * \return struct hash_table containing the context of this hash table or NULL + * if there is insufficent memory to create it and its chains. + */ struct hash_table *hash_create(unsigned int chains); + +/** + * Destroys a hash table, freeing all memory associated with it. + * + * \param ht Hash table to destroy. After the function returns, this + * will nolonger be valid. + */ void hash_destroy(struct hash_table *ht); + +/** + * Adds a key/value pair to a hash table. If the key you're adding is already + * in the hash table, it does not replace it, but it does take precedent over + * it. The old key/value pair will be inaccessable but still in memory until + * hash_destroy() is called on the hash table. + * + * \param ht The hash table context to add the key/value pair to. + * \param key The key to associate the value with. A copy is made. + * \param value The value to associate the key with. A copy is made. + * \return true if the add succeeded, false otherwise. (Failure most likely + * indicates insufficent memory to make copies of the key and value. + */ bool hash_add(struct hash_table *ht, const char *key, const char *value); + +/** + * Looks up a the value associated with with a key from a specific hash table. + * + * \param ht The hash table context to look up the key in. + * \param key The key to search for. + * \return The value associated with the key, or NULL if it was not found. + */ const char *hash_get(struct hash_table *ht, const char *key); -const char *hash_iterate(struct hash_table *ht, unsigned int *c1, - unsigned int **c2); #endif