Complete hash table tests and clean up ineterface.

2024-12-15 16:52:40 +03:00 · 2015-07-12 17:28:03 +01:00 · 2015-07-12 17:28:03 +01:00 · 82beca0432
commit 82beca0432
parent 3862549ed9
5 changed files with 237 additions and 200 deletions
--- a/monkey/filetype.c
+++ b/monkey/filetype.c
@ -212,29 +212,3 @@ const char *monkey_fetch_filetype(const char *unix_path)

 	return type != NULL ? type : "text/plain";
 }
-
-#ifdef TEST_RIG
-
-int main(int argc, char *argv[])
-{
-	unsigned int c1, *c2;
-	const char *key;
-
-	gtk_fetch_filetype_init("./mime.types");
-
-	c1 = 0; c2 = 0;
-
-	while ( (key = hash_iterate(mime_hash, &c1, &c2)) != NULL) {
-		printf("%s ", key);
-	}
-
-	printf("\n");
-
-	if (argc > 1) {
-		printf("%s maps to %s\n", argv[1], fetch_filetype(argv[1]));
-	}
-
-	gtk_fetch_filetype_fin();
-}
-
-#endif
--- a/test/bloom.c
+++ b/test/bloom.c
@ -73,6 +73,13 @@ static void dict_bloom_teardown(void)

 /* Tests */

+/**
+ * Test blooom filter creation
+ *
+ * Create a bloom filter, add a single entry and test for presece and
+ * absence of that entry (single entry cannot have false positives).
+ *
+ */
 START_TEST(bloom_create_test)
 {
 	struct bloom_filter *b;
--- a/test/hashtable.c
+++ b/test/hashtable.c
@ -32,6 +32,96 @@

 #include "utils/hashtable.h"

+#define NELEMS(x)  (sizeof(x) / sizeof((x)[0]))
+
+struct test_pairs {
+	const char* test;
+	const char* res;
+};
+
+static struct hash_table *match_hash_a;
+static struct hash_table *match_hash_b;
+
+static struct hash_table *dict_hash;
+
+static const struct test_pairs match_tests[] = {
+	{ "cow", "moo" },
+	{ "pig", "oink" },
+	{ "chicken", "cluck" },
+	{ "dog", "woof" },
+	{ "sheep", "baaa" },
+};
+
+/* Fixtures */
+
+static void match_hashtable_create(void)
+{
+	int idx;
+
+	match_hash_a = hash_create(79);
+	ck_assert(match_hash_a != NULL);
+
+	match_hash_b = hash_create(103);
+	ck_assert(match_hash_b != NULL);
+
+	for (idx = 0; idx < NELEMS(match_tests); idx++) {
+		hash_add(match_hash_a,
+			 match_tests[idx].test,
+			 match_tests[idx].res);
+		hash_add(match_hash_b,
+			 match_tests[idx].res,
+			 match_tests[idx].test);
+	}
+}
+
+static void match_hashtable_teardown(void)
+{
+	hash_destroy(match_hash_a);
+	hash_destroy(match_hash_b);
+}
+
+
+/**
+ * create dictionary hashtable
+ *
+ * hashtable constructed from the odd/even rows of the
+ * dictionary
+ */
+static void dict_hashtable_create(int dict_hash_size)
+{
+	FILE *dictf;
+	char keybuf[BUFSIZ], valbuf[BUFSIZ];
+
+	dictf = fopen("/usr/share/dict/words", "r");
+	ck_assert(dictf != NULL);
+
+	dict_hash = hash_create(dict_hash_size);
+	ck_assert(dict_hash != NULL);
+
+	while (!feof(dictf)) {
+		fscanf(dictf, "%s", keybuf);
+		fscanf(dictf, "%s", valbuf);
+		hash_add(dict_hash, keybuf, valbuf);
+	}
+
+	fclose(dictf);
+}
+
+static void dicts_hashtable_create(void)
+{
+	dict_hashtable_create(1031);
+}
+
+static void dictl_hashtable_create(void)
+{
+	dict_hashtable_create(7919);
+}
+
+static void dict_hashtable_teardown(void)
+{
+	hash_destroy(dict_hash);
+}
+
 /* Tests */

 /**
@ -109,12 +199,60 @@ START_TEST(hashtable_positive_test)
 }
 END_TEST

+
+START_TEST(hashtable_matcha_test)
+{
+	const char *res;
+
+	res = hash_get(match_hash_a, match_tests[_i].test);
+	ck_assert(res != NULL);
+	ck_assert_str_eq(res, match_tests[_i].res);
+}
+END_TEST
+
+
+START_TEST(hashtable_matchb_test)
+{
+	const char *res;
+
+	res = hash_get(match_hash_b, match_tests[_i].res);
+	ck_assert(res != NULL);
+	ck_assert_str_eq(res, match_tests[_i].test);
+}
+END_TEST
+
+
+START_TEST(hashtable_dict_test)
+{
+	FILE *dictf;
+	char keybuf[BUFSIZ], valbuf[BUFSIZ];
+	const char *res;
+
+	dictf = fopen("/usr/share/dict/words", "r");
+	ck_assert(dictf != NULL);
+
+	while (!feof(dictf)) {
+		fscanf(dictf, "%s", keybuf);
+		fscanf(dictf, "%s", valbuf);
+
+		res = hash_get(dict_hash, keybuf);
+		ck_assert(res != NULL);
+		ck_assert_str_eq(res, valbuf);
+	}
+
+	fclose(dictf);
+}
+END_TEST
+
 /* Suite */

 Suite *hashtable_suite(void)
 {
 	Suite *s;
 	TCase *tc_create;
+	TCase *tc_match;
+	TCase *tc_dict_s;
+	TCase *tc_dict_l;

 	s = suite_create("hash table filter");

@ -127,7 +265,42 @@ Suite *hashtable_suite(void)

 	suite_add_tcase(s, tc_create);

+	/* Matching entry tests */
+	tc_match = tcase_create("Match");

+	tcase_add_checked_fixture(tc_match,
+				  match_hashtable_create,
+				  match_hashtable_teardown);
+
+	tcase_add_loop_test(tc_match,
+			    hashtable_matcha_test,
+			    0, NELEMS(match_tests));
+	tcase_add_loop_test(tc_match,
+			    hashtable_matchb_test,
+			    0, NELEMS(match_tests));
+
+	suite_add_tcase(s, tc_match);
+
+	/* small table dictionary test */
+	tc_dict_s = tcase_create("small table dictionary");
+	tcase_add_checked_fixture(tc_dict_s,
+				  dicts_hashtable_create,
+				  dict_hashtable_teardown);
+
+	tcase_add_test(tc_dict_s, hashtable_dict_test);
+
+	suite_add_tcase(s, tc_dict_s);
+
+
+	/* large table dictionary test */
+	tc_dict_l = tcase_create("large table dictionary");
+	tcase_add_checked_fixture(tc_dict_l,
+				  dictl_hashtable_create,
+				  dict_hashtable_teardown);
+
+	tcase_add_test(tc_dict_l, hashtable_dict_test);
+
+	suite_add_tcase(s, tc_dict_l);

 	return s;
 }
--- a/utils/hashtable.c
+++ b/utils/hashtable.c
@ -17,16 +17,20 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-/** \file
- * Write-Once hash table for string to string mappings */
+/**
+ * \file
+ * Write-Once hash table for string to string mappings.
+ *
+ * This implementation is unit tested, if you make changes please
+ * ensure the tests continute to pass and if possible, through
+ * valgrind to make sure there are no memory leaks or invalid memory
+ * accesses.  If you add new functionality, please include a test for
+ * it that has good coverage along side the other tests.
+ */

 #include <stdlib.h>
 #include <string.h>
 #include <stdbool.h>
-#ifdef TEST_RIG
-#include <assert.h>
-#include <stdio.h>
-#endif
 #include "utils/hashtable.h"
 #include "utils/log.h"

@ -42,6 +46,7 @@ struct hash_table {
 	struct hash_entry **chain;
 };

+
 /**
 * Hash a string, returning a 32bit value.  The hash algorithm used is
 * Fowler Noll Vo - a very fast and simple hash, ideal for short strings.
@ -51,7 +56,6 @@ struct hash_table {
 * \param  len	   Pointer to unsigned integer to record datum's length in.
 * \return The calculated hash value for the datum.
 */
-
 static inline unsigned int hash_string_fnv(const char *datum, unsigned int *len)
 {
 	unsigned int z = 0x811c9dc5;
@ -71,17 +75,7 @@ static inline unsigned int hash_string_fnv(const char *datum, unsigned int *len)
 }


-/**
- * Create a new hash table, and return a context for it.  The memory consumption
- * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty.
- *
- * \param  chains Number of chains/buckets this hash table will have.  This
- *		  should be a prime number, and ideally a prime number just
- *		  over a power of two, for best performance and distribution.
- * \return struct hash_table containing the context of this hash table or NULL
- *	   if there is insufficent memory to create it and its chains.
- */
-
+/* exported interface documented in utils/hashtable.h */
 struct hash_table *hash_create(unsigned int chains)
 {
 	struct hash_table *r = malloc(sizeof(struct hash_table));
@ -103,13 +97,8 @@ struct hash_table *hash_create(unsigned int chains)
 	return r;
 }

-/**
- * Destroys a hash table, freeing all memory associated with it.
- *
- * \param  ht	 Hash table to destroy.  After the function returns, this
- *		 will nolonger be valid.
- */

+/* exported interface documented in utils/hashtable.h */
 void hash_destroy(struct hash_table *ht)
 {
 	unsigned int i;
@ -133,19 +122,8 @@ void hash_destroy(struct hash_table *ht)
 	free(ht);
 }

-/**
- * Adds a key/value pair to a hash table.  If the key you're adding is already
- * in the hash table, it does not replace it, but it does take precedent over
- * it.  The old key/value pair will be inaccessable but still in memory until
- * hash_destroy() is called on the hash table.
- *
- * \param  ht	  The hash table context to add the key/value pair to.
- * \param  key	  The key to associate the value with.  A copy is made.
- * \param  value  The value to associate the key with.  A copy is made.
- * \return true if the add succeeded, false otherwise.  (Failure most likely
- *	   indicates insufficent memory to make copies of the key and value.
- */

+/* exported interface documented in utils/hashtable.h */
 bool hash_add(struct hash_table *ht, const char *key, const char *value)
 {
 	unsigned int h, c, v;
@ -179,14 +157,8 @@ bool hash_add(struct hash_table *ht, const char *key, const char *value)
 	return true;
 }

-/**
- * Looks up a the value associated with with a key from a specific hash table.
- *
- * \param  ht	  The hash table context to look up the key in.
- * \param  key	  The key to search for.
- * \return The value associated with the key, or NULL if it was not found.
- */

+/* exported interface documented in utils/hashtable.h */
 const char *hash_get(struct hash_table *ht, const char *key)
 {
 	unsigned int h, c, key_length;
@ -205,130 +177,3 @@ const char *hash_get(struct hash_table *ht, const char *key)

 	return NULL;
 }
-
-/**
- * Iterate through all available hash keys.
- *
- * \param  ht	The hash table context to iterate.
- * \param  c1	Pointer to first context
- * \param  c2	Pointer to second context (set to 0 on first call)
- * \return The next hash key, or NULL for no more keys
- */
-
-const char *hash_iterate(struct hash_table *ht, unsigned int *c1, unsigned int **c2) {
-	struct hash_entry **he = (struct hash_entry **)c2;
-
-	if (ht == NULL)
-		return NULL;
-
-	if (!*he)
-		*c1 = -1;
-	else
-		*he = (*he)->next;
-
-	if (*he)
-		return (*he)->pairing;
-
-	while (!*he) {
-		(*c1)++;
-		if (*c1 >= ht->nchains)
-			return NULL;
-		*he = ht->chain[*c1];
-	}
-	return (*he)->pairing;
-}
-
-/* A simple test rig.  To compile, use:
- * gcc -o hashtest -I../ -DTEST_RIG utils/hashtable.c
- *
- * If you make changes to this hash table implementation, please rerun this
- * test, and if possible, through valgrind to make sure there are no memory
- * leaks or invalid memory accesses.  If you add new functionality, please
- * include a test for it that has good coverage along side the other tests.
- */
-
-#ifdef TEST_RIG
-
-int main(int argc, char *argv[])
-{
-	struct hash_table *a, *b;
-	FILE *dict;
-	char keybuf[BUFSIZ], valbuf[BUFSIZ];
-	int i;
-
-	a = hash_create(79);
-	assert(a != NULL);
-
-	b = hash_create(103);
-	assert(b != NULL);
-
-	hash_add(a, "cow", "moo");
-	hash_add(b, "moo", "cow");
-
-	hash_add(a, "pig", "oink");
-	hash_add(b, "oink", "pig");
-
-	hash_add(a, "chicken", "cluck");
-	hash_add(b, "cluck", "chicken");
-
-	hash_add(a, "dog", "woof");
-	hash_add(b, "woof", "dog");
-
-	hash_add(a, "cat", "meow");
-	hash_add(b, "meow", "cat");
-
-#define MATCH(x,y) assert(!strcmp(hash_get(a, x), y)); assert(!strcmp(hash_get(b, y), x))
-	MATCH("cow", "moo");
-	MATCH("pig", "oink");
-	MATCH("chicken", "cluck");
-	MATCH("dog", "woof");
-	MATCH("cat", "meow");
-
-	hash_destroy(a);
-	hash_destroy(b);
-
-	/* this test requires /usr/share/dict/words - a large list of English
-	 * words.  We load the entire file - odd lines are used as keys, and
-	 * even lines are used as the values for the previous line.  we then
-	 * work through it again making sure everything matches.
-	 *
-	 * We do this twice - once in a hash table with many chains, and once
-	 * with a hash table with fewer chains.
-	 */
-
-	a = hash_create(1031);
-	b = hash_create(7919);
-
-	dict = fopen("/usr/share/dict/words", "r");
-	if (dict == NULL) {
-		fprintf(stderr, "Unable to open /usr/share/dict/words - extensive testing skipped.\n");
-		exit(0);
-	}
-
-	while (!feof(dict)) {
-		fscanf(dict, "%s", keybuf);
-		fscanf(dict, "%s", valbuf);
-		hash_add(a, keybuf, valbuf);
-		hash_add(b, keybuf, valbuf);
-	}
-
-	for (i = 0; i < 5; i++) {
-		fseek(dict, 0, SEEK_SET);
-
-		while (!feof(dict)) {
-			fscanf(dict, "%s", keybuf);
-			fscanf(dict, "%s", valbuf);
-			assert(strcmp(hash_get(a, keybuf), valbuf) == 0);
-			assert(strcmp(hash_get(b, keybuf), valbuf) == 0);
-		}
-	}
-
-	hash_destroy(a);
-	hash_destroy(b);
-
-	fclose(dict);
-
-	return 0;
-}
-
-#endif
--- a/utils/hashtable.h
+++ b/utils/hashtable.h
@ -16,8 +16,10 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-/** \file
- * Write-Once hash table for string to string mappings */
+/**
+ * \file
+ * Interface to Write-Once hash table for string to string mapping
+ */

 #ifndef _NETSURF_UTILS_HASHTABLE_H_
 #define _NETSURF_UTILS_HASHTABLE_H_
@ -26,11 +28,47 @@

 struct hash_table;

+/**
+ * Create a new hash table, and return a context for it.  The memory consumption
+ * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty.
+ *
+ * \param chains Number of chains/buckets this hash table will have.  This
+ *		  should be a prime number, and ideally a prime number just
+ *		  over a power of two, for best performance and distribution.
+ * \return struct hash_table containing the context of this hash table or NULL
+ *	   if there is insufficent memory to create it and its chains.
+ */
 struct hash_table *hash_create(unsigned int chains);
+
+/**
+ * Destroys a hash table, freeing all memory associated with it.
+ *
+ * \param ht Hash table to destroy. After the function returns, this
+ *            will nolonger be valid.
+ */
 void hash_destroy(struct hash_table *ht);
+
+/**
+ * Adds a key/value pair to a hash table.  If the key you're adding is already
+ * in the hash table, it does not replace it, but it does take precedent over
+ * it.  The old key/value pair will be inaccessable but still in memory until
+ * hash_destroy() is called on the hash table.
+ *
+ * \param  ht	  The hash table context to add the key/value pair to.
+ * \param  key	  The key to associate the value with.  A copy is made.
+ * \param  value  The value to associate the key with.  A copy is made.
+ * \return true if the add succeeded, false otherwise.  (Failure most likely
+ *	   indicates insufficent memory to make copies of the key and value.
+ */
 bool hash_add(struct hash_table *ht, const char *key, const char *value);
+
+/**
+ * Looks up a the value associated with with a key from a specific hash table.
+ *
+ * \param  ht The hash table context to look up the key in.
+ * \param  key The key to search for.
+ * \return The value associated with the key, or NULL if it was not found.
+ */
 const char *hash_get(struct hash_table *ht, const char *key);
-const char *hash_iterate(struct hash_table *ht, unsigned int *c1,
-		unsigned int **c2);

 #endif