From 8043ca2183f83a68006d9c28f2f6894a1b01bb12 Mon Sep 17 00:00:00 2001 From: "K. Lange" Date: Tue, 13 Sep 2022 19:46:17 +0900 Subject: [PATCH] Pre-hash strings from str(),hex(),oct(),bin() conversions --- src/obj_bytes.c | 4 +++- src/obj_long.c | 15 ++++++++++----- src/obj_str.c | 4 ++-- src/object.c | 4 +++- src/private.h | 5 +++++ 5 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/obj_bytes.c b/src/obj_bytes.c index b4cd0be..13a5469 100644 --- a/src/obj_bytes.c +++ b/src/obj_bytes.c @@ -4,6 +4,8 @@ #include #include +#include "private.h" + struct ByteArray { KrkInstance inst; KrkValue actual; @@ -61,7 +63,7 @@ KRK_Method(bytes,__hash__) { /* This is the so-called "sdbm" hash. It comes from a piece of * public domain code from a clone of ndbm. */ for (size_t i = 0; i < self->length; ++i) { - hash = (int)self->bytes[i] + (hash << 6) + (hash << 16) - hash; + krk_hash_advance(hash,self->bytes[i]); } return INTEGER_VAL(hash); } diff --git a/src/obj_long.c b/src/obj_long.c index 19e49cf..d522e11 100644 --- a/src/obj_long.c +++ b/src/obj_long.c @@ -20,6 +20,7 @@ #include #include #include +#include "private.h" #define DIGIT_SHIFT 31 #define DIGIT_MAX 0x7FFFFFFF @@ -1006,7 +1007,7 @@ static char * _fast_conversion(const KrkLong * abs, unsigned int bits, char * wr /** * @brief Convert a long to a string in a given base. */ -static char * krk_long_to_str(const KrkLong * n, int _base, const char * prefix, size_t *size) { +static char * krk_long_to_str(const KrkLong * n, int _base, const char * prefix, size_t *size, uint32_t *_hash) { KrkLong abs; krk_long_init_si(&abs, 0); @@ -1039,11 +1040,14 @@ static char * krk_long_to_str(const KrkLong * n, int _base, const char * prefix, char * rev = malloc(len); char * out = rev; + uint32_t hash = 0; while (writer != tmp) { - writer--; - *out++ = *writer; + *out = *--writer; + krk_hash_advance(hash,*out); + out++; } *out = '\0'; + *_hash = hash; free(tmp); @@ -1377,8 +1381,9 @@ KRK_Method(long,__rtruediv__) { #define PRINTER(name,base,prefix) \ KRK_Method(long,__ ## name ## __) { \ size_t size; \ - char * rev = krk_long_to_str(self->value, base, prefix, &size); \ - return OBJECT_VAL(krk_takeString(rev,size)); \ + uint32_t hash; \ + char * rev = krk_long_to_str(self->value, base, prefix, &size, &hash); \ + return OBJECT_VAL(krk_takeStringVetted(rev,size,size,KRK_OBJ_FLAGS_STRING_ASCII,hash)); \ } PRINTER(str,10,"") diff --git a/src/obj_str.c b/src/obj_str.c index 040a66b..9db5f8c 100644 --- a/src/obj_str.c +++ b/src/obj_str.c @@ -71,7 +71,7 @@ KRK_Method(str,__add__) { /* Hashes can be extended, which saves us calculating the whole thing */ uint32_t hash = self->obj.hash; for (size_t i = 0; i < bl; ++i) { - hash = (int)b[i] + (hash << 6) + (hash << 16) - hash; + krk_hash_advance(hash,b[i]); } KrkString * result = krk_takeStringVetted(chars, length, cpLength, type, hash); @@ -390,7 +390,7 @@ KRK_Method(str,__mul__) { for (krk_integer_type i = 0; i < howMany; ++i) { for (size_t j = 0; j < self->length; ++j) { *c = self->chars[j]; - hash = (int)*c + (hash << 6) + (hash << 16) - hash; + krk_hash_advance(hash, *c); c++; } } diff --git a/src/object.c b/src/object.c index 292fb1f..aedd961 100644 --- a/src/object.c +++ b/src/object.c @@ -8,6 +8,8 @@ #include #include +#include "private.h" + #define ALLOCATE_OBJECT(type, objectType) \ (type*)allocateObject(sizeof(type), objectType) @@ -195,7 +197,7 @@ static uint32_t hashString(const char * key, size_t length) { /* This is the so-called "sdbm" hash. It comes from a piece of * public domain code from a clone of ndbm. */ for (size_t i = 0; i < length; ++i) { - hash = (int)key[i] + (hash << 6) + (hash << 16) - hash; + krk_hash_advance(hash,key[i]); } return hash; } diff --git a/src/private.h b/src/private.h index ee08f27..4b2917d 100644 --- a/src/private.h +++ b/src/private.h @@ -62,3 +62,8 @@ struct ParsedFormatSpec { int hasPrecision; int fillSize; }; + +/* We inline hashing in a few places, so it's nice to have this in one place. + * This is the "sdbm" hash. I've been using it in various places for many years, + * and this specific version apparently traces to gawk. */ +#define krk_hash_advance(hash,c) do { hash = (int)(c) + (hash << 6) + (hash << 16) - hash; } while (0)