py: Replace naive and teribble hash function with djb2.

This commit is contained in:
Damien George 2014-03-25 15:27:15 +00:00
parent ffb5cfc8d8
commit 6e628c49ca
2 changed files with 7 additions and 5 deletions

View File

@ -18,9 +18,9 @@ codepoint2name[ord('/')] = 'slash'
# this must match the equivalent function in qstr.c # this must match the equivalent function in qstr.c
def compute_hash(qstr): def compute_hash(qstr):
hash = 0 hash = 5381
for char in qstr: for char in qstr:
hash += ord(char) hash = (hash * 33) ^ ord(char)
return hash & 0xffff return hash & 0xffff
def do_work(infiles): def do_work(infiles):

View File

@ -18,7 +18,7 @@
// A qstr is an index into the qstr pool. // A qstr is an index into the qstr pool.
// The data for a qstr contains (hash, length, data). // The data for a qstr contains (hash, length, data).
// For now we use very simple encoding, just to get the framework correct: // For now we use very simple encoding, just to get the framework correct:
// - hash is 2 bytes (simply the sum of data bytes) // - hash is 2 bytes (see function below)
// - length is 2 bytes // - length is 2 bytes
// - data follows // - data follows
// - \0 terminated (for now, so they can be printed using printf) // - \0 terminated (for now, so they can be printed using printf)
@ -28,10 +28,12 @@
#define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8)) #define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8))
#define Q_GET_DATA(q) ((q) + 4) #define Q_GET_DATA(q) ((q) + 4)
// this must match the equivalent function in makeqstrdata.py
machine_uint_t qstr_compute_hash(const byte *data, uint len) { machine_uint_t qstr_compute_hash(const byte *data, uint len) {
machine_uint_t hash = 0; // djb2 algorithm; see http://www.cse.yorku.ca/~oz/hash.html
machine_uint_t hash = 5381;
for (const byte *top = data + len; data < top; data++) { for (const byte *top = data + len; data < top; data++) {
hash += *data; hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data
} }
return hash & 0xffff; return hash & 0xffff;
} }