mirror of
https://github.com/netsurf-browser/netsurf
synced 2025-01-03 01:34:25 +03:00
add hash table population from file or memory
This commit is contained in:
parent
cfa5856eea
commit
b94b96e272
@ -28,11 +28,15 @@
|
|||||||
* it that has good coverage along side the other tests.
|
* it that has good coverage along side the other tests.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdbool.h>
|
#include <zlib.h>
|
||||||
#include "utils/hashtable.h"
|
#include <errno.h>
|
||||||
|
|
||||||
#include "utils/log.h"
|
#include "utils/log.h"
|
||||||
|
#include "utils/hashtable.h"
|
||||||
|
|
||||||
|
|
||||||
struct hash_entry {
|
struct hash_entry {
|
||||||
@ -46,6 +50,8 @@ struct hash_table {
|
|||||||
struct hash_entry **chain;
|
struct hash_entry **chain;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** maximum length of line for file or inline add */
|
||||||
|
#define LINE_BUFFER_SIZE 512
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hash a string, returning a 32bit value. The hash algorithm used is
|
* Hash a string, returning a 32bit value. The hash algorithm used is
|
||||||
@ -179,3 +185,164 @@ const char *hash_get(struct hash_table *ht, const char *key)
|
|||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* process a line of input.
|
||||||
|
*
|
||||||
|
* \param hash The hash table to add the line to
|
||||||
|
* \param ln The line to process
|
||||||
|
* \param lnlen The length of \ln
|
||||||
|
* \return NSERROR_OK on success else NSERROR_INVALID
|
||||||
|
*/
|
||||||
|
static nserror
|
||||||
|
process_line(struct hash_table *hash, uint8_t *ln, int lnlen)
|
||||||
|
{
|
||||||
|
uint8_t *key;
|
||||||
|
uint8_t *value;
|
||||||
|
uint8_t *colon;
|
||||||
|
|
||||||
|
key = ln; /* set key to start of line */
|
||||||
|
value = ln + lnlen; /* set value to end of line */
|
||||||
|
|
||||||
|
/* skip leading whitespace */
|
||||||
|
while ((key < value) &&
|
||||||
|
((*key == ' ') || (*key == '\t'))) {
|
||||||
|
key++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* empty or comment lines */
|
||||||
|
if ((*key == 0) || (*key == '#')) {
|
||||||
|
return NSERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find first colon as key/value separator */
|
||||||
|
for (colon = key; colon < value; colon++) {
|
||||||
|
if (*colon == ':') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (colon == value) {
|
||||||
|
/* no colon found */
|
||||||
|
return NSERROR_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
*colon = 0; /* terminate key */
|
||||||
|
value = colon + 1;
|
||||||
|
|
||||||
|
if (hash_add(hash, (char *)key, (char *)value) == false) {
|
||||||
|
NSLOG(netsurf, INFO,
|
||||||
|
"Unable to add %s:%s to hash table", ln, value);
|
||||||
|
return NSERROR_INVALID;
|
||||||
|
}
|
||||||
|
return NSERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* exported interface documented in utils/hashtable.h */
|
||||||
|
nserror hash_add_file(struct hash_table *ht, const char *path)
|
||||||
|
{
|
||||||
|
nserror res = NSERROR_OK;
|
||||||
|
char s[LINE_BUFFER_SIZE]; /* line buffer */
|
||||||
|
gzFile fp; /* compressed file handle */
|
||||||
|
|
||||||
|
if (path == NULL) {
|
||||||
|
return NSERROR_BAD_PARAMETER;
|
||||||
|
}
|
||||||
|
|
||||||
|
fp = gzopen(path, "r");
|
||||||
|
if (!fp) {
|
||||||
|
NSLOG(netsurf, INFO,
|
||||||
|
"Unable to open file \"%.100s\": %s", path,
|
||||||
|
strerror(errno));
|
||||||
|
|
||||||
|
return NSERROR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (gzgets(fp, s, sizeof s)) {
|
||||||
|
int slen = strlen(s);
|
||||||
|
s[--slen] = 0; /* remove \n at end */
|
||||||
|
|
||||||
|
res = process_line(ht, (uint8_t *)s, slen);
|
||||||
|
if (res != NSERROR_OK) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gzclose(fp);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* exported interface documented in utils/hashtable.h */
|
||||||
|
nserror hash_add_inline(struct hash_table *ht, const uint8_t *data, size_t size)
|
||||||
|
{
|
||||||
|
nserror res;
|
||||||
|
int ret; /* zlib return value */
|
||||||
|
z_stream strm;
|
||||||
|
uint8_t s[LINE_BUFFER_SIZE]; /* line buffer */
|
||||||
|
size_t used = 0; /* number of bytes in buffer in use */
|
||||||
|
uint8_t *nl;
|
||||||
|
|
||||||
|
strm.zalloc = Z_NULL;
|
||||||
|
strm.zfree = Z_NULL;
|
||||||
|
strm.opaque = Z_NULL;
|
||||||
|
|
||||||
|
strm.next_in = (uint8_t *)data;
|
||||||
|
strm.avail_in = size;
|
||||||
|
|
||||||
|
ret = inflateInit2(&strm, 32 + MAX_WBITS);
|
||||||
|
if (ret != Z_OK) {
|
||||||
|
NSLOG(netsurf, INFO, "inflateInit returned %d", ret);
|
||||||
|
return NSERROR_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
strm.next_out = s + used;
|
||||||
|
strm.avail_out = sizeof(s) - used;
|
||||||
|
|
||||||
|
ret = inflate(&strm, Z_NO_FLUSH);
|
||||||
|
if ((ret != Z_OK) && (ret != Z_STREAM_END)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
used = sizeof(s) - strm.avail_out;
|
||||||
|
while (used > 0) {
|
||||||
|
/* find nl */
|
||||||
|
for (nl = &s[0]; nl < &s[used]; nl++) {
|
||||||
|
if (*nl == '\n') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (nl == &s[used]) {
|
||||||
|
/* no nl found */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* found newline */
|
||||||
|
*nl = 0; /* null terminate line */
|
||||||
|
res = process_line(ht, &s[0], nl - &s[0]);
|
||||||
|
if (res != NSERROR_OK) {
|
||||||
|
inflateEnd(&strm);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* move data down */
|
||||||
|
memmove(&s[0], nl + 1, used - ((nl + 1) - &s[0]) );
|
||||||
|
used -= ((nl +1) - &s[0]);
|
||||||
|
}
|
||||||
|
if (used == sizeof(s)) {
|
||||||
|
/* entire buffer used and no newline */
|
||||||
|
NSLOG(netsurf, INFO, "Overlength line");
|
||||||
|
used = 0;
|
||||||
|
}
|
||||||
|
} while (ret != Z_STREAM_END);
|
||||||
|
|
||||||
|
inflateEnd(&strm);
|
||||||
|
|
||||||
|
if (ret != Z_STREAM_END) {
|
||||||
|
NSLOG(netsurf, INFO, "inflate returned %d", ret);
|
||||||
|
return NSERROR_INVALID;
|
||||||
|
}
|
||||||
|
return NSERROR_OK;
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -29,8 +29,11 @@
|
|||||||
struct hash_table;
|
struct hash_table;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new hash table, and return a context for it. The memory consumption
|
* Create a new hash table
|
||||||
* of a hash table is approximately 8 + (nchains * 12) bytes if it is empty.
|
*
|
||||||
|
* Allocate a new hash table and return a context for it. The memory
|
||||||
|
* consumption of a hash table is approximately 8 + (nchains * 12)
|
||||||
|
* bytes if it is empty.
|
||||||
*
|
*
|
||||||
* \param chains Number of chains/buckets this hash table will have. This
|
* \param chains Number of chains/buckets this hash table will have. This
|
||||||
* should be a prime number, and ideally a prime number just
|
* should be a prime number, and ideally a prime number just
|
||||||
@ -41,18 +44,22 @@ struct hash_table;
|
|||||||
struct hash_table *hash_create(unsigned int chains);
|
struct hash_table *hash_create(unsigned int chains);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Destroys a hash table, freeing all memory associated with it.
|
* Destroys a hash table
|
||||||
|
*
|
||||||
|
* Destroy a hash table freeing all memory associated with it.
|
||||||
*
|
*
|
||||||
* \param ht Hash table to destroy. After the function returns, this
|
* \param ht Hash table to destroy. After the function returns, this
|
||||||
* will nolonger be valid.
|
* will no longer be valid.
|
||||||
*/
|
*/
|
||||||
void hash_destroy(struct hash_table *ht);
|
void hash_destroy(struct hash_table *ht);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a key/value pair to a hash table. If the key you're adding is already
|
* Adds a key/value pair to a hash table.
|
||||||
* in the hash table, it does not replace it, but it does take precedent over
|
*
|
||||||
* it. The old key/value pair will be inaccessable but still in memory until
|
* If the key you're adding is already in the hash table, it does not
|
||||||
* hash_destroy() is called on the hash table.
|
* replace it, but it does take precedent over it. The old key/value
|
||||||
|
* pair will be inaccessable but still in memory until hash_destroy()
|
||||||
|
* is called on the hash table.
|
||||||
*
|
*
|
||||||
* \param ht The hash table context to add the key/value pair to.
|
* \param ht The hash table context to add the key/value pair to.
|
||||||
* \param key The key to associate the value with. A copy is made.
|
* \param key The key to associate the value with. A copy is made.
|
||||||
@ -71,4 +78,34 @@ bool hash_add(struct hash_table *ht, const char *key, const char *value);
|
|||||||
*/
|
*/
|
||||||
const char *hash_get(struct hash_table *ht, const char *key);
|
const char *hash_get(struct hash_table *ht, const char *key);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add key/value pairs to a hash table with data from a file
|
||||||
|
*
|
||||||
|
* The file should be formatted as a series of lines terminated with
|
||||||
|
* newline character. Each line should contain a key/value pair
|
||||||
|
* separated by a colon. If a line is empty or starts with a #
|
||||||
|
* character it will be ignored.
|
||||||
|
*
|
||||||
|
* The file may be optionally gzip compressed.
|
||||||
|
*
|
||||||
|
* \param ht The hash table context to add the key/value pairs to.
|
||||||
|
* \param path Path to file with key/value pairs in.
|
||||||
|
* \return NSERROR_OK on success else error code
|
||||||
|
*/
|
||||||
|
nserror hash_add_file(struct hash_table *ht, const char *path);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add key/value pairs to a hash table with data from a memory buffer
|
||||||
|
*
|
||||||
|
* The data format is the same as in hash_add_file() but held in memory
|
||||||
|
*
|
||||||
|
* The data may optionally be gzip compressed.
|
||||||
|
*
|
||||||
|
* \param ht The hash table context to add the key/value pairs to.
|
||||||
|
* \param data Source of key/value pairs
|
||||||
|
* \param size length of \a data
|
||||||
|
* \return NSERROR_OK on success else error code
|
||||||
|
*/
|
||||||
|
nserror hash_add_inline(struct hash_table *ht, const uint8_t *data, size_t size);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
156
utils/messages.c
156
utils/messages.c
@ -45,66 +45,19 @@
|
|||||||
/** The hash table used to store the standard Messages file for the old API */
|
/** The hash table used to store the standard Messages file for the old API */
|
||||||
static struct hash_table *messages_hash = NULL;
|
static struct hash_table *messages_hash = NULL;
|
||||||
|
|
||||||
/**
|
|
||||||
* process a line of input.
|
|
||||||
*/
|
|
||||||
static nserror
|
|
||||||
message_process_line(struct hash_table *hash, uint8_t *ln, int lnlen)
|
|
||||||
{
|
|
||||||
uint8_t *value;
|
|
||||||
uint8_t *colon;
|
|
||||||
|
|
||||||
/* empty or comment lines */
|
|
||||||
if (ln[0] == 0 || ln[0] == '#') {
|
|
||||||
return NSERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* find first colon as key/value separator */
|
|
||||||
for (colon = ln; colon < (ln + lnlen); colon++) {
|
|
||||||
if (*colon == ':') {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (colon == (ln + lnlen)) {
|
|
||||||
/* no colon found */
|
|
||||||
return NSERROR_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
*colon = 0; /* terminate key */
|
|
||||||
value = colon + 1;
|
|
||||||
|
|
||||||
if (hash_add(hash, (char *)ln, (char *)value) == false) {
|
|
||||||
NSLOG(netsurf, INFO, "Unable to add %s:%s to hash table", ln,
|
|
||||||
value);
|
|
||||||
return NSERROR_INVALID;
|
|
||||||
}
|
|
||||||
return NSERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read keys and values from messages file.
|
* Read keys and values from messages file.
|
||||||
*
|
*
|
||||||
* \param path pathname of messages file
|
* \param path pathname of messages file
|
||||||
* \param ctx reference of hash table to merge with.
|
* \param ctx reference of hash table to merge with or NULL to create one.
|
||||||
* \return NSERROR_OK on sucess and ctx updated or error code on faliure.
|
* \return NSERROR_OK on sucess and ctx updated or error code on faliure.
|
||||||
*/
|
*/
|
||||||
static nserror messages_load_ctx(const char *path, struct hash_table **ctx)
|
static nserror messages_load_ctx(const char *path, struct hash_table **ctx)
|
||||||
{
|
{
|
||||||
char s[400]; /* line buffer */
|
|
||||||
gzFile fp; /* compressed file handle */
|
|
||||||
struct hash_table *nctx; /* new context */
|
struct hash_table *nctx; /* new context */
|
||||||
|
nserror res;
|
||||||
assert(path != NULL);
|
|
||||||
|
|
||||||
fp = gzopen(path, "r");
|
|
||||||
if (!fp) {
|
|
||||||
NSLOG(netsurf, INFO,
|
|
||||||
"Unable to open messages file \"%.100s\": %s", path,
|
|
||||||
strerror(errno));
|
|
||||||
|
|
||||||
return NSERROR_NOT_FOUND;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*ctx == NULL) {
|
if (*ctx == NULL) {
|
||||||
nctx = hash_create(HASH_SIZE);
|
nctx = hash_create(HASH_SIZE);
|
||||||
} else {
|
} else {
|
||||||
@ -118,40 +71,16 @@ static nserror messages_load_ctx(const char *path, struct hash_table **ctx)
|
|||||||
NSLOG(netsurf, INFO,
|
NSLOG(netsurf, INFO,
|
||||||
"Unable to create hash table for messages file %s",
|
"Unable to create hash table for messages file %s",
|
||||||
path);
|
path);
|
||||||
gzclose(fp);
|
|
||||||
return NSERROR_NOMEM;
|
return NSERROR_NOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (gzgets(fp, s, sizeof s)) {
|
|
||||||
char *colon, *value;
|
|
||||||
|
|
||||||
if (s[0] == 0 || s[0] == '#')
|
res = hash_add_file(nctx, path);
|
||||||
continue;
|
if (res == NSERROR_OK) {
|
||||||
|
*ctx = nctx;
|
||||||
s[strlen(s) - 1] = 0; /* remove \n at end */
|
|
||||||
colon = strchr(s, ':');
|
|
||||||
if (!colon)
|
|
||||||
continue;
|
|
||||||
*colon = 0; /* terminate key */
|
|
||||||
value = colon + 1;
|
|
||||||
|
|
||||||
if (hash_add(nctx, s, value) == false) {
|
|
||||||
NSLOG(netsurf, INFO,
|
|
||||||
"Unable to add %s:%s to hash table of %s", s,
|
|
||||||
value, path);
|
|
||||||
gzclose(fp);
|
|
||||||
if (*ctx == NULL) {
|
|
||||||
hash_destroy(nctx);
|
|
||||||
}
|
|
||||||
return NSERROR_INVALID;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
gzclose(fp);
|
return res;
|
||||||
|
|
||||||
*ctx = nctx;
|
|
||||||
|
|
||||||
return NSERROR_OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -203,30 +132,19 @@ static void messages_destroy_ctx(struct hash_table *ctx)
|
|||||||
/* exported interface documented in messages.h */
|
/* exported interface documented in messages.h */
|
||||||
nserror messages_add_from_file(const char *path)
|
nserror messages_add_from_file(const char *path)
|
||||||
{
|
{
|
||||||
nserror err;
|
|
||||||
|
|
||||||
if (path == NULL) {
|
if (path == NULL) {
|
||||||
return NSERROR_BAD_PARAMETER;
|
return NSERROR_BAD_PARAMETER;
|
||||||
}
|
}
|
||||||
|
|
||||||
NSLOG(netsurf, INFO, "Loading Messages from '%s'", path);
|
NSLOG(netsurf, INFO, "Loading Messages from '%s'", path);
|
||||||
|
|
||||||
err = messages_load_ctx(path, &messages_hash);
|
return messages_load_ctx(path, &messages_hash);
|
||||||
|
|
||||||
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* exported interface documented in messages.h */
|
/* exported interface documented in messages.h */
|
||||||
nserror messages_add_from_inline(const uint8_t *data, size_t data_size)
|
nserror messages_add_from_inline(const uint8_t *data, size_t size)
|
||||||
{
|
{
|
||||||
z_stream strm;
|
|
||||||
int ret;
|
|
||||||
uint8_t s[512]; /* line buffer */
|
|
||||||
size_t used = 0; /* number of bytes in buffer in use */
|
|
||||||
uint8_t *nl;
|
|
||||||
|
|
||||||
/* ensure the hash table is initialised */
|
/* ensure the hash table is initialised */
|
||||||
if (messages_hash == NULL) {
|
if (messages_hash == NULL) {
|
||||||
messages_hash = hash_create(HASH_SIZE);
|
messages_hash = hash_create(HASH_SIZE);
|
||||||
@ -235,61 +153,7 @@ nserror messages_add_from_inline(const uint8_t *data, size_t data_size)
|
|||||||
NSLOG(netsurf, INFO, "Unable to create hash table");
|
NSLOG(netsurf, INFO, "Unable to create hash table");
|
||||||
return NSERROR_NOMEM;
|
return NSERROR_NOMEM;
|
||||||
}
|
}
|
||||||
|
return hash_add_inline(messages_hash, data, size);
|
||||||
strm.zalloc = Z_NULL;
|
|
||||||
strm.zfree = Z_NULL;
|
|
||||||
strm.opaque = Z_NULL;
|
|
||||||
|
|
||||||
strm.next_in = (uint8_t *)data;
|
|
||||||
strm.avail_in = data_size;
|
|
||||||
|
|
||||||
ret = inflateInit2(&strm, 32 + MAX_WBITS);
|
|
||||||
if (ret != Z_OK) {
|
|
||||||
NSLOG(netsurf, INFO, "inflateInit returned %d", ret);
|
|
||||||
return NSERROR_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
strm.next_out = s + used;
|
|
||||||
strm.avail_out = sizeof(s) - used;
|
|
||||||
|
|
||||||
ret = inflate(&strm, Z_NO_FLUSH);
|
|
||||||
if ((ret != Z_OK) && (ret != Z_STREAM_END)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
used = sizeof(s) - strm.avail_out;
|
|
||||||
while (used > 0) {
|
|
||||||
/* find nl */
|
|
||||||
for (nl = &s[0]; nl < &s[used]; nl++) {
|
|
||||||
if (*nl == '\n') {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (nl == &s[used]) {
|
|
||||||
/* no nl found */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* found newline */
|
|
||||||
*nl = 0; /* null terminate line */
|
|
||||||
message_process_line(messages_hash, &s[0], nl - &s[0]);
|
|
||||||
memmove(&s[0], nl + 1, used - ((nl + 1) - &s[0]) );
|
|
||||||
used -= ((nl +1) - &s[0]);
|
|
||||||
}
|
|
||||||
if (used == sizeof(s)) {
|
|
||||||
/* entire buffer used and no newline */
|
|
||||||
NSLOG(netsurf, INFO, "Overlength line");
|
|
||||||
used = 0;
|
|
||||||
}
|
|
||||||
} while (ret != Z_STREAM_END);
|
|
||||||
|
|
||||||
inflateEnd(&strm);
|
|
||||||
|
|
||||||
if (ret != Z_STREAM_END) {
|
|
||||||
NSLOG(netsurf, INFO, "inflate returned %d", ret);
|
|
||||||
return NSERROR_INVALID;
|
|
||||||
}
|
|
||||||
return NSERROR_OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* exported interface documented in messages.h */
|
/* exported interface documented in messages.h */
|
||||||
|
Loading…
Reference in New Issue
Block a user