From b94b96e272140f17a82ce0847e1634d081b5dc6c Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Sun, 22 Apr 2018 12:03:49 +0100 Subject: [PATCH] add hash table population from file or memory --- utils/hashtable.c | 171 +++++++++++++++++++++++++++++++++++++++++++++- utils/hashtable.h | 53 +++++++++++--- utils/messages.c | 156 +++--------------------------------------- 3 files changed, 224 insertions(+), 156 deletions(-) diff --git a/utils/hashtable.c b/utils/hashtable.c index 3a1711da0..0de6f83d2 100644 --- a/utils/hashtable.c +++ b/utils/hashtable.c @@ -28,11 +28,15 @@ * it that has good coverage along side the other tests. */ +#include +#include #include #include -#include -#include "utils/hashtable.h" +#include +#include + #include "utils/log.h" +#include "utils/hashtable.h" struct hash_entry { @@ -46,6 +50,8 @@ struct hash_table { struct hash_entry **chain; }; +/** maximum length of line for file or inline add */ +#define LINE_BUFFER_SIZE 512 /** * Hash a string, returning a 32bit value. The hash algorithm used is @@ -179,3 +185,164 @@ const char *hash_get(struct hash_table *ht, const char *key) return NULL; } + + +/** + * process a line of input. + * + * \param hash The hash table to add the line to + * \param ln The line to process + * \param lnlen The length of \ln + * \return NSERROR_OK on success else NSERROR_INVALID + */ +static nserror +process_line(struct hash_table *hash, uint8_t *ln, int lnlen) +{ + uint8_t *key; + uint8_t *value; + uint8_t *colon; + + key = ln; /* set key to start of line */ + value = ln + lnlen; /* set value to end of line */ + + /* skip leading whitespace */ + while ((key < value) && + ((*key == ' ') || (*key == '\t'))) { + key++; + } + + /* empty or comment lines */ + if ((*key == 0) || (*key == '#')) { + return NSERROR_OK; + } + + /* find first colon as key/value separator */ + for (colon = key; colon < value; colon++) { + if (*colon == ':') { + break; + } + } + if (colon == value) { + /* no colon found */ + return NSERROR_INVALID; + } + + *colon = 0; /* terminate key */ + value = colon + 1; + + if (hash_add(hash, (char *)key, (char *)value) == false) { + NSLOG(netsurf, INFO, + "Unable to add %s:%s to hash table", ln, value); + return NSERROR_INVALID; + } + return NSERROR_OK; +} + + +/* exported interface documented in utils/hashtable.h */ +nserror hash_add_file(struct hash_table *ht, const char *path) +{ + nserror res = NSERROR_OK; + char s[LINE_BUFFER_SIZE]; /* line buffer */ + gzFile fp; /* compressed file handle */ + + if (path == NULL) { + return NSERROR_BAD_PARAMETER; + } + + fp = gzopen(path, "r"); + if (!fp) { + NSLOG(netsurf, INFO, + "Unable to open file \"%.100s\": %s", path, + strerror(errno)); + + return NSERROR_NOT_FOUND; + } + + while (gzgets(fp, s, sizeof s)) { + int slen = strlen(s); + s[--slen] = 0; /* remove \n at end */ + + res = process_line(ht, (uint8_t *)s, slen); + if (res != NSERROR_OK) { + break; + } + } + + gzclose(fp); + + return res; +} + +/* exported interface documented in utils/hashtable.h */ +nserror hash_add_inline(struct hash_table *ht, const uint8_t *data, size_t size) +{ + nserror res; + int ret; /* zlib return value */ + z_stream strm; + uint8_t s[LINE_BUFFER_SIZE]; /* line buffer */ + size_t used = 0; /* number of bytes in buffer in use */ + uint8_t *nl; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + + strm.next_in = (uint8_t *)data; + strm.avail_in = size; + + ret = inflateInit2(&strm, 32 + MAX_WBITS); + if (ret != Z_OK) { + NSLOG(netsurf, INFO, "inflateInit returned %d", ret); + return NSERROR_INVALID; + } + + do { + strm.next_out = s + used; + strm.avail_out = sizeof(s) - used; + + ret = inflate(&strm, Z_NO_FLUSH); + if ((ret != Z_OK) && (ret != Z_STREAM_END)) { + break; + } + + used = sizeof(s) - strm.avail_out; + while (used > 0) { + /* find nl */ + for (nl = &s[0]; nl < &s[used]; nl++) { + if (*nl == '\n') { + break; + } + } + if (nl == &s[used]) { + /* no nl found */ + break; + } + /* found newline */ + *nl = 0; /* null terminate line */ + res = process_line(ht, &s[0], nl - &s[0]); + if (res != NSERROR_OK) { + inflateEnd(&strm); + return res; + } + + /* move data down */ + memmove(&s[0], nl + 1, used - ((nl + 1) - &s[0]) ); + used -= ((nl +1) - &s[0]); + } + if (used == sizeof(s)) { + /* entire buffer used and no newline */ + NSLOG(netsurf, INFO, "Overlength line"); + used = 0; + } + } while (ret != Z_STREAM_END); + + inflateEnd(&strm); + + if (ret != Z_STREAM_END) { + NSLOG(netsurf, INFO, "inflate returned %d", ret); + return NSERROR_INVALID; + } + return NSERROR_OK; + +} diff --git a/utils/hashtable.h b/utils/hashtable.h index b0e7392c6..b1c0d5c41 100644 --- a/utils/hashtable.h +++ b/utils/hashtable.h @@ -29,8 +29,11 @@ struct hash_table; /** - * Create a new hash table, and return a context for it. The memory consumption - * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty. + * Create a new hash table + * + * Allocate a new hash table and return a context for it. The memory + * consumption of a hash table is approximately 8 + (nchains * 12) + * bytes if it is empty. * * \param chains Number of chains/buckets this hash table will have. This * should be a prime number, and ideally a prime number just @@ -41,18 +44,22 @@ struct hash_table; struct hash_table *hash_create(unsigned int chains); /** - * Destroys a hash table, freeing all memory associated with it. + * Destroys a hash table + * + * Destroy a hash table freeing all memory associated with it. * * \param ht Hash table to destroy. After the function returns, this - * will nolonger be valid. + * will no longer be valid. */ void hash_destroy(struct hash_table *ht); /** - * Adds a key/value pair to a hash table. If the key you're adding is already - * in the hash table, it does not replace it, but it does take precedent over - * it. The old key/value pair will be inaccessable but still in memory until - * hash_destroy() is called on the hash table. + * Adds a key/value pair to a hash table. + * + * If the key you're adding is already in the hash table, it does not + * replace it, but it does take precedent over it. The old key/value + * pair will be inaccessable but still in memory until hash_destroy() + * is called on the hash table. * * \param ht The hash table context to add the key/value pair to. * \param key The key to associate the value with. A copy is made. @@ -71,4 +78,34 @@ bool hash_add(struct hash_table *ht, const char *key, const char *value); */ const char *hash_get(struct hash_table *ht, const char *key); +/** + * Add key/value pairs to a hash table with data from a file + * + * The file should be formatted as a series of lines terminated with + * newline character. Each line should contain a key/value pair + * separated by a colon. If a line is empty or starts with a # + * character it will be ignored. + * + * The file may be optionally gzip compressed. + * + * \param ht The hash table context to add the key/value pairs to. + * \param path Path to file with key/value pairs in. + * \return NSERROR_OK on success else error code + */ +nserror hash_add_file(struct hash_table *ht, const char *path); + +/** + * Add key/value pairs to a hash table with data from a memory buffer + * + * The data format is the same as in hash_add_file() but held in memory + * + * The data may optionally be gzip compressed. + * + * \param ht The hash table context to add the key/value pairs to. + * \param data Source of key/value pairs + * \param size length of \a data + * \return NSERROR_OK on success else error code + */ +nserror hash_add_inline(struct hash_table *ht, const uint8_t *data, size_t size); + #endif diff --git a/utils/messages.c b/utils/messages.c index e2d45e9da..e1e61201f 100644 --- a/utils/messages.c +++ b/utils/messages.c @@ -45,66 +45,19 @@ /** The hash table used to store the standard Messages file for the old API */ static struct hash_table *messages_hash = NULL; -/** - * process a line of input. - */ -static nserror -message_process_line(struct hash_table *hash, uint8_t *ln, int lnlen) -{ - uint8_t *value; - uint8_t *colon; - - /* empty or comment lines */ - if (ln[0] == 0 || ln[0] == '#') { - return NSERROR_OK; - } - - /* find first colon as key/value separator */ - for (colon = ln; colon < (ln + lnlen); colon++) { - if (*colon == ':') { - break; - } - } - if (colon == (ln + lnlen)) { - /* no colon found */ - return NSERROR_INVALID; - } - - *colon = 0; /* terminate key */ - value = colon + 1; - - if (hash_add(hash, (char *)ln, (char *)value) == false) { - NSLOG(netsurf, INFO, "Unable to add %s:%s to hash table", ln, - value); - return NSERROR_INVALID; - } - return NSERROR_OK; -} /** * Read keys and values from messages file. * * \param path pathname of messages file - * \param ctx reference of hash table to merge with. + * \param ctx reference of hash table to merge with or NULL to create one. * \return NSERROR_OK on sucess and ctx updated or error code on faliure. */ static nserror messages_load_ctx(const char *path, struct hash_table **ctx) { - char s[400]; /* line buffer */ - gzFile fp; /* compressed file handle */ struct hash_table *nctx; /* new context */ - - assert(path != NULL); - - fp = gzopen(path, "r"); - if (!fp) { - NSLOG(netsurf, INFO, - "Unable to open messages file \"%.100s\": %s", path, - strerror(errno)); - - return NSERROR_NOT_FOUND; - } - + nserror res; + if (*ctx == NULL) { nctx = hash_create(HASH_SIZE); } else { @@ -118,40 +71,16 @@ static nserror messages_load_ctx(const char *path, struct hash_table **ctx) NSLOG(netsurf, INFO, "Unable to create hash table for messages file %s", path); - gzclose(fp); return NSERROR_NOMEM; } - while (gzgets(fp, s, sizeof s)) { - char *colon, *value; - if (s[0] == 0 || s[0] == '#') - continue; - - s[strlen(s) - 1] = 0; /* remove \n at end */ - colon = strchr(s, ':'); - if (!colon) - continue; - *colon = 0; /* terminate key */ - value = colon + 1; - - if (hash_add(nctx, s, value) == false) { - NSLOG(netsurf, INFO, - "Unable to add %s:%s to hash table of %s", s, - value, path); - gzclose(fp); - if (*ctx == NULL) { - hash_destroy(nctx); - } - return NSERROR_INVALID; - } + res = hash_add_file(nctx, path); + if (res == NSERROR_OK) { + *ctx = nctx; } - gzclose(fp); - - *ctx = nctx; - - return NSERROR_OK; + return res; } @@ -203,30 +132,19 @@ static void messages_destroy_ctx(struct hash_table *ctx) /* exported interface documented in messages.h */ nserror messages_add_from_file(const char *path) { - nserror err; - if (path == NULL) { return NSERROR_BAD_PARAMETER; } NSLOG(netsurf, INFO, "Loading Messages from '%s'", path); - err = messages_load_ctx(path, &messages_hash); - - - return err; + return messages_load_ctx(path, &messages_hash); } /* exported interface documented in messages.h */ -nserror messages_add_from_inline(const uint8_t *data, size_t data_size) +nserror messages_add_from_inline(const uint8_t *data, size_t size) { - z_stream strm; - int ret; - uint8_t s[512]; /* line buffer */ - size_t used = 0; /* number of bytes in buffer in use */ - uint8_t *nl; - /* ensure the hash table is initialised */ if (messages_hash == NULL) { messages_hash = hash_create(HASH_SIZE); @@ -235,61 +153,7 @@ nserror messages_add_from_inline(const uint8_t *data, size_t data_size) NSLOG(netsurf, INFO, "Unable to create hash table"); return NSERROR_NOMEM; } - - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - - strm.next_in = (uint8_t *)data; - strm.avail_in = data_size; - - ret = inflateInit2(&strm, 32 + MAX_WBITS); - if (ret != Z_OK) { - NSLOG(netsurf, INFO, "inflateInit returned %d", ret); - return NSERROR_INVALID; - } - - do { - strm.next_out = s + used; - strm.avail_out = sizeof(s) - used; - - ret = inflate(&strm, Z_NO_FLUSH); - if ((ret != Z_OK) && (ret != Z_STREAM_END)) { - break; - } - - used = sizeof(s) - strm.avail_out; - while (used > 0) { - /* find nl */ - for (nl = &s[0]; nl < &s[used]; nl++) { - if (*nl == '\n') { - break; - } - } - if (nl == &s[used]) { - /* no nl found */ - break; - } - /* found newline */ - *nl = 0; /* null terminate line */ - message_process_line(messages_hash, &s[0], nl - &s[0]); - memmove(&s[0], nl + 1, used - ((nl + 1) - &s[0]) ); - used -= ((nl +1) - &s[0]); - } - if (used == sizeof(s)) { - /* entire buffer used and no newline */ - NSLOG(netsurf, INFO, "Overlength line"); - used = 0; - } - } while (ret != Z_STREAM_END); - - inflateEnd(&strm); - - if (ret != Z_STREAM_END) { - NSLOG(netsurf, INFO, "inflate returned %d", ret); - return NSERROR_INVALID; - } - return NSERROR_OK; + return hash_add_inline(messages_hash, data, size); } /* exported interface documented in messages.h */