From 6e628c49cacedd19ade6410551e64cf731728061 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Tue, 25 Mar 2014 15:27:15 +0000
Subject: [PATCH] py: Replace naive and teribble hash function with djb2.

---
 py/makeqstrdata.py | 4 ++--
 py/qstr.c          | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py
index 934bc43beb..7413365712 100644
--- a/py/makeqstrdata.py
+++ b/py/makeqstrdata.py
@@ -18,9 +18,9 @@ codepoint2name[ord('/')] = 'slash'
 
 # this must match the equivalent function in qstr.c
 def compute_hash(qstr):
-    hash = 0
+    hash = 5381
     for char in qstr:
-        hash += ord(char)
+        hash = (hash * 33) ^ ord(char)
     return hash & 0xffff
 
 def do_work(infiles):
diff --git a/py/qstr.c b/py/qstr.c
index aebc2921cf..e4b5c111b5 100644
--- a/py/qstr.c
+++ b/py/qstr.c
@@ -18,7 +18,7 @@
 // A qstr is an index into the qstr pool.
 // The data for a qstr contains (hash, length, data).
 // For now we use very simple encoding, just to get the framework correct:
-//  - hash is 2 bytes (simply the sum of data bytes)
+//  - hash is 2 bytes (see function below)
 //  - length is 2 bytes
 //  - data follows
 //  - \0 terminated (for now, so they can be printed using printf)
@@ -28,10 +28,12 @@
 #define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8))
 #define Q_GET_DATA(q)   ((q) + 4)
 
+// this must match the equivalent function in makeqstrdata.py
 machine_uint_t qstr_compute_hash(const byte *data, uint len) {
-    machine_uint_t hash = 0;
+    // djb2 algorithm; see http://www.cse.yorku.ca/~oz/hash.html
+    machine_uint_t hash = 5381;
     for (const byte *top = data + len; data < top; data++) {
-        hash += *data;
+        hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data
     }
     return hash & 0xffff;
 }