From 6e628c49cacedd19ade6410551e64cf731728061 Mon Sep 17 00:00:00 2001 From: Damien George Date: Tue, 25 Mar 2014 15:27:15 +0000 Subject: [PATCH] py: Replace naive and teribble hash function with djb2. --- py/makeqstrdata.py | 4 ++-- py/qstr.c | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 934bc43beb..7413365712 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -18,9 +18,9 @@ codepoint2name[ord('/')] = 'slash' # this must match the equivalent function in qstr.c def compute_hash(qstr): - hash = 0 + hash = 5381 for char in qstr: - hash += ord(char) + hash = (hash * 33) ^ ord(char) return hash & 0xffff def do_work(infiles): diff --git a/py/qstr.c b/py/qstr.c index aebc2921cf..e4b5c111b5 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -18,7 +18,7 @@ // A qstr is an index into the qstr pool. // The data for a qstr contains (hash, length, data). // For now we use very simple encoding, just to get the framework correct: -// - hash is 2 bytes (simply the sum of data bytes) +// - hash is 2 bytes (see function below) // - length is 2 bytes // - data follows // - \0 terminated (for now, so they can be printed using printf) @@ -28,10 +28,12 @@ #define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8)) #define Q_GET_DATA(q) ((q) + 4) +// this must match the equivalent function in makeqstrdata.py machine_uint_t qstr_compute_hash(const byte *data, uint len) { - machine_uint_t hash = 0; + // djb2 algorithm; see http://www.cse.yorku.ca/~oz/hash.html + machine_uint_t hash = 5381; for (const byte *top = data + len; data < top; data++) { - hash += *data; + hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data } return hash & 0xffff; }