Add optimized C string hashing

Given an already-initialized hash state and a NUL-terminated string,
accumulate the hash of the string into the hash state and return the
length for the caller to (optionally) save for the finalizer. This
avoids a strlen call.

If the string pointer is aligned, we can use a word-at-a-time
algorithm for NUL lookahead. The aligned case is only used on 64-bit
platforms, since it's not worth the extra complexity for 32-bit.

Handling the tail of the string after finishing the word-wise loop
was inspired by NetBSD's strlen(), but no code was taken since that
is written in assembly language.

As demonstration, use this in the search path cache. This brings the
general case performance closer to the special case optimization done
in commit a86c61c9ee. There are other places that could benefit, but
that is left for future work.

Jeff Davis and John Naylor
Reviewed by Heikki Linnakangas, Jian He, Junwang Zhao

Discussion: https://postgr.es/m/3820f030fd008ff14134b3e9ce5cc6dd623ed479.camel%40j-davis.com
Discussion: https://postgr.es/m/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
This commit is contained in:
John Naylor 2024-01-16 16:32:48 +07:00
parent e97b672c88
commit 0aba255440
2 changed files with 145 additions and 5 deletions

View File

@ -41,7 +41,7 @@
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
#include "common/hashfn.h"
#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@ -253,11 +253,21 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
const unsigned char *bytes = (const unsigned char *) key.searchPath;
int blen = strlen(key.searchPath);
fasthash_state hs;
int sp_len;
return hash_combine(hash_bytes(bytes, blen),
hash_uint32(key.roleid));
fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
hs.accum = key.roleid;
fasthash_combine(&hs);
/*
* Combine search path into the hash and save the length for tweaking the
* final mix.
*/
sp_len = fasthash_accum_cstring(&hs, key.searchPath);
return fasthash_final32(&hs, sp_len);
}
static inline bool

View File

@ -58,6 +58,24 @@
* 2) Incremental interface. This can used for incorporating multiple
* inputs. The standalone functions use this internally, so see fasthash64()
* for an an example of how this works.
*
* The incremental interface is especially useful if any of the inputs
* are NUL-terminated C strings, since the length is not needed ahead
* of time. This avoids needing to call strlen(). This case is optimized
* in fasthash_accum_cstring() :
*
* fasthash_state hs;
* fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
* len = fasthash_accum_cstring(&hs, *str);
* ...
* return fasthash_final32(&hs, len);
*
* Here we pass FH_UNKNOWN_LENGTH as a convention, since passing zero
* would zero out the internal seed as well. fasthash_accum_cstring()
* returns the length of the string, which is computed on-the-fly while
* mixing the string into the hash. Experimentation has found that
* SMHasher fails unless we incorporate the length, so it is passed to
* the finalizer as a tweak.
*/
@ -151,6 +169,118 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
fasthash_combine(hs);
}
/*
* Set high bit in lowest byte where the input is zero, from:
* https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
*/
#define haszero64(v) \
(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
/*
* all-purpose workhorse for fasthash_accum_cstring
*/
static inline int
fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
{
const char *const start = str;
while (*str)
{
int chunk_len = 0;
while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
chunk_len++;
fasthash_accum(hs, str, chunk_len);
str += chunk_len;
}
return str - start;
}
/*
* specialized workhorse for fasthash_accum_cstring
*
* With an aligned pointer, we consume the string a word at a time.
* Loading the word containing the NUL terminator cannot segfault since
* allocation boundaries are suitably aligned.
*/
static inline int
fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
{
const char *const start = str;
int remainder;
uint64 zero_bytes_le;
Assert(PointerIsAligned(start, uint64));
for (;;)
{
uint64 chunk = *(uint64 *) str;
/*
* With little-endian representation, we can use this calculation,
* which sets bits in the first byte in the result word that
* corresponds to a zero byte in the original word. The rest of the
* bytes are indeterminate, so cannot be used on big-endian machines
* without either swapping or a bytewise check.
*/
#ifdef WORDS_BIGENDIAN
zero_bytes_le = haszero64(pg_bswap(chunk));
#else
zero_bytes_le = haszero64(chunk);
#endif
if (zero_bytes_le)
break;
hs->accum = chunk;
fasthash_combine(hs);
str += FH_SIZEOF_ACCUM;
}
/*
* For the last word, only use bytes up to the NUL for the hash. Bytes
* with set bits will be 0x80, so calculate the first occurrence of a zero
* byte within the input word by counting the number of trailing (because
* little-endian) zeros and dividing the result by 8.
*/
remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
fasthash_accum(hs, str, remainder);
str += remainder;
return str - start;
}
/*
* Mix 'str' into the hash state and return the length of the string.
*/
static inline int
fasthash_accum_cstring(fasthash_state *hs, const char *str)
{
#if SIZEOF_VOID_P >= 8
int len;
#ifdef USE_ASSERT_CHECKING
int len_check;
fasthash_state hs_check;
memcpy(&hs_check, hs, sizeof(fasthash_state));
len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
#endif
if (PointerIsAligned(str, uint64))
{
len = fasthash_accum_cstring_aligned(hs, str);
Assert(hs_check.hash == hs->hash && len_check == len);
return len;
}
#endif /* SIZEOF_VOID_P */
/*
* It's not worth it to try to make the word-at-a-time optimization work
* on 32-bit platforms.
*/
return fasthash_accum_cstring_unaligned(hs, str);
}
/*
* The finalizer
*