Add general purpose hasing functions to pgbench.
Hashing function is useful for simulating real-world workload in test like WEB workload, as an example - YCSB benchmarks. Author: Ildar Musin with minor editorization by me Reviewed by: Fabien Coelho, me Discussion: https://www.postgresql.org/message-id/flat/0e8bd39e-dfcd-2879-f88f-272799ad7ef2@postgrespro.ru
This commit is contained in:
parent
8bb3c7d347
commit
e51a04840a
@ -874,13 +874,18 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry> <literal>scale</literal> </entry>
|
||||
<entry>current scale factor</entry>
|
||||
<entry> <literal>client_id</literal> </entry>
|
||||
<entry>unique number identifying the client session (starts from zero)</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry> <literal>client_id</literal> </entry>
|
||||
<entry>unique number identifying the client session (starts from zero)</entry>
|
||||
<entry> <literal>default_seed</literal> </entry>
|
||||
<entry>seed used in hash functions by default</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry> <literal>scale</literal> </entry>
|
||||
<entry>current scale factor</entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
@ -1245,6 +1250,27 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
|
||||
<entry><literal>greatest(5, 4, 3, 2)</literal></entry>
|
||||
<entry><literal>5</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal><function>hash(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
|
||||
<entry>integer</entry>
|
||||
<entry>alias for <literal>hash_murmur2()</literal></entry>
|
||||
<entry><literal>hash(10, 5432)</literal></entry>
|
||||
<entry><literal>-5817877081768721676</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal><function>hash_fnv1a(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
|
||||
<entry>integer</entry>
|
||||
<entry><ulink url="https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function">FNV-1a hash</ulink></entry>
|
||||
<entry><literal>hash_fnv1a(10, 5432)</literal></entry>
|
||||
<entry><literal>-7793829335365542153</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal><function>hash_murmur2(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
|
||||
<entry>integer</entry>
|
||||
<entry><ulink url="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash2 hash</ulink></entry>
|
||||
<entry><literal>hash_murmur2(10, 5432)</literal></entry>
|
||||
<entry><literal>-5817877081768721676</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal><function>int(<replaceable>x</replaceable>)</function></literal></entry>
|
||||
<entry>integer</entry>
|
||||
@ -1423,6 +1449,31 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
Hash functions <literal>hash</literal>, <literal>hash_murmur2</literal> and
|
||||
<literal>hash_fnv1a</literal> accept an input value and an optional seed parameter.
|
||||
In case the seed isn't provided the value of <literal>:default_seed</literal>
|
||||
is used, which is initialized randomly unless set by the command-line
|
||||
<literal>-D</literal> option. Hash functions can be used to scatter the
|
||||
distribution of random functions such as <literal>random_zipfian</literal> or
|
||||
<literal>random_exponential</literal>. For instance, the following pgbench
|
||||
script simulates possible real world workload typical for social media and
|
||||
blogging platforms where few accounts generate excessive load:
|
||||
|
||||
<programlisting>
|
||||
\set r random_zipfian(0, 100000000, 1.07)
|
||||
\set k abs(hash(:r)) % 1000000
|
||||
</programlisting>
|
||||
|
||||
In some cases several distinct distributions are needed which don't correlate
|
||||
with each other and this is when implicit seed parameter comes in handy:
|
||||
|
||||
<programlisting>
|
||||
\set k1 abs(hash(:r), :default_seed + 123) % 1000000
|
||||
\set k2 abs(hash(:r), :default_seed + 321) % 1000000
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
As an example, the full definition of the built-in TPC-B-like
|
||||
transaction is:
|
||||
|
@ -16,6 +16,10 @@
|
||||
|
||||
#include "pgbench.h"
|
||||
|
||||
#define PGBENCH_NARGS_VARIABLE (-1)
|
||||
#define PGBENCH_NARGS_CASE (-2)
|
||||
#define PGBENCH_NARGS_HASH (-3)
|
||||
|
||||
PgBenchExpr *expr_parse_result;
|
||||
|
||||
static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
|
||||
@ -226,9 +230,13 @@ make_uop(yyscan_t yyscanner, const char *operator, PgBenchExpr *expr)
|
||||
/*
|
||||
* List of available functions:
|
||||
* - fname: function name, "!..." for special internal functions
|
||||
* - nargs: number of arguments
|
||||
* -1 is a special value for least & greatest meaning #args >= 1
|
||||
* -2 is for the "CASE WHEN ..." function, which has #args >= 3 and odd
|
||||
* - nargs: number of arguments. Special cases:
|
||||
* - PGBENCH_NARGS_VARIABLE is a special value for least & greatest
|
||||
* meaning #args >= 1;
|
||||
* - PGBENCH_NARGS_CASE is for the "CASE WHEN ..." function, which
|
||||
* has #args >= 3 and odd;
|
||||
* - PGBENCH_NARGS_HASH is for hash functions, which have one required
|
||||
* and one optional argument;
|
||||
* - tag: function identifier from PgBenchFunction enum
|
||||
*/
|
||||
static const struct
|
||||
@ -259,10 +267,10 @@ static const struct
|
||||
"abs", 1, PGBENCH_ABS
|
||||
},
|
||||
{
|
||||
"least", -1, PGBENCH_LEAST
|
||||
"least", PGBENCH_NARGS_VARIABLE, PGBENCH_LEAST
|
||||
},
|
||||
{
|
||||
"greatest", -1, PGBENCH_GREATEST
|
||||
"greatest", PGBENCH_NARGS_VARIABLE, PGBENCH_GREATEST
|
||||
},
|
||||
{
|
||||
"debug", 1, PGBENCH_DEBUG
|
||||
@ -347,7 +355,25 @@ static const struct
|
||||
},
|
||||
/* "case when ... then ... else ... end" construction */
|
||||
{
|
||||
"!case_end", -2, PGBENCH_CASE
|
||||
"!case_end", PGBENCH_NARGS_CASE, PGBENCH_CASE
|
||||
},
|
||||
{
|
||||
"hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
|
||||
},
|
||||
{
|
||||
"hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
|
||||
},
|
||||
{
|
||||
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
|
||||
},
|
||||
{
|
||||
"hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
|
||||
},
|
||||
{
|
||||
"hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
|
||||
},
|
||||
{
|
||||
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
|
||||
},
|
||||
/* keep as last array element */
|
||||
{
|
||||
@ -423,29 +449,51 @@ elist_length(PgBenchExprList *list)
|
||||
static PgBenchExpr *
|
||||
make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
|
||||
{
|
||||
int len = elist_length(args);
|
||||
|
||||
PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
|
||||
|
||||
Assert(fnumber >= 0);
|
||||
|
||||
if (PGBENCH_FUNCTIONS[fnumber].nargs >= 0 &&
|
||||
PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args))
|
||||
expr_yyerror_more(yyscanner, "unexpected number of arguments",
|
||||
PGBENCH_FUNCTIONS[fnumber].fname);
|
||||
|
||||
/* check at least one arg for least & greatest */
|
||||
if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
|
||||
elist_length(args) == 0)
|
||||
expr_yyerror_more(yyscanner, "at least one argument expected",
|
||||
PGBENCH_FUNCTIONS[fnumber].fname);
|
||||
/* special case: case (when ... then ...)+ (else ...)? end */
|
||||
if (PGBENCH_FUNCTIONS[fnumber].nargs == -2)
|
||||
/* validate arguments number including few special cases */
|
||||
switch (PGBENCH_FUNCTIONS[fnumber].nargs)
|
||||
{
|
||||
int len = elist_length(args);
|
||||
/* check at least one arg for least & greatest */
|
||||
case PGBENCH_NARGS_VARIABLE:
|
||||
if (len == 0)
|
||||
expr_yyerror_more(yyscanner, "at least one argument expected",
|
||||
PGBENCH_FUNCTIONS[fnumber].fname);
|
||||
break;
|
||||
|
||||
/* 'else' branch is always present, but could be a NULL-constant */
|
||||
if (len < 3 || len % 2 != 1)
|
||||
expr_yyerror_more(yyscanner, "odd and >= 3 number of arguments expected",
|
||||
"case control structure");
|
||||
/* case (when ... then ...)+ (else ...)? end */
|
||||
case PGBENCH_NARGS_CASE:
|
||||
/* 'else' branch is always present, but could be a NULL-constant */
|
||||
if (len < 3 || len % 2 != 1)
|
||||
expr_yyerror_more(yyscanner,
|
||||
"odd and >= 3 number of arguments expected",
|
||||
"case control structure");
|
||||
break;
|
||||
|
||||
/* hash functions with optional seed argument */
|
||||
case PGBENCH_NARGS_HASH:
|
||||
if (len > 2)
|
||||
expr_yyerror_more(yyscanner, "unexpected number of arguments",
|
||||
PGBENCH_FUNCTIONS[fnumber].fname);
|
||||
|
||||
if (len == 1)
|
||||
{
|
||||
PgBenchExpr *var = make_variable("default_seed");
|
||||
args = make_elist(var, args);
|
||||
}
|
||||
break;
|
||||
|
||||
/* common case: positive arguments number */
|
||||
default:
|
||||
Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
|
||||
|
||||
if (PGBENCH_FUNCTIONS[fnumber].nargs != len)
|
||||
expr_yyerror_more(yyscanner, "unexpected number of arguments",
|
||||
PGBENCH_FUNCTIONS[fnumber].fname);
|
||||
}
|
||||
|
||||
expr->etype = ENODE_FUNCTION;
|
||||
|
@ -60,6 +60,14 @@
|
||||
|
||||
#define ERRCODE_UNDEFINED_TABLE "42P01"
|
||||
|
||||
/*
|
||||
* Hashing constants
|
||||
*/
|
||||
#define FNV_PRIME 0x100000001b3
|
||||
#define FNV_OFFSET_BASIS 0xcbf29ce484222325
|
||||
#define MM2_MUL 0xc6a4a7935bd1e995
|
||||
#define MM2_ROT 47
|
||||
|
||||
/*
|
||||
* Multi-platform pthread implementations
|
||||
*/
|
||||
@ -915,6 +923,54 @@ getZipfianRand(TState *thread, int64 min, int64 max, double s)
|
||||
: computeHarmonicZipfian(thread, n, s));
|
||||
}
|
||||
|
||||
/*
|
||||
* FNV-1a hash function
|
||||
*/
|
||||
static int64
|
||||
getHashFnv1a(int64 val, uint64 seed)
|
||||
{
|
||||
int64 result;
|
||||
int i;
|
||||
|
||||
result = FNV_OFFSET_BASIS ^ seed;
|
||||
for (i = 0; i < 8; ++i)
|
||||
{
|
||||
int32 octet = val & 0xff;
|
||||
|
||||
val = val >> 8;
|
||||
result = result ^ octet;
|
||||
result = result * FNV_PRIME;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Murmur2 hash function
|
||||
*
|
||||
* Based on original work of Austin Appleby
|
||||
* https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
|
||||
*/
|
||||
static int64
|
||||
getHashMurmur2(int64 val, uint64 seed)
|
||||
{
|
||||
uint64 result = seed ^ (sizeof(int64) * MM2_MUL);
|
||||
uint64 k = (uint64) val;
|
||||
|
||||
k *= MM2_MUL;
|
||||
k ^= k >> MM2_ROT;
|
||||
k *= MM2_MUL;
|
||||
|
||||
result ^= k;
|
||||
result *= MM2_MUL;
|
||||
|
||||
result ^= result >> MM2_ROT;
|
||||
result *= MM2_MUL;
|
||||
result ^= result >> MM2_ROT;
|
||||
|
||||
return (int64) result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the given SimpleStats struct to all zeroes
|
||||
*/
|
||||
@ -2211,6 +2267,30 @@ evalStandardFunc(TState *thread, CState *st,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* hashing */
|
||||
case PGBENCH_HASH_FNV1A:
|
||||
case PGBENCH_HASH_MURMUR2:
|
||||
{
|
||||
int64 val,
|
||||
seed;
|
||||
|
||||
Assert(nargs == 2);
|
||||
|
||||
if (!coerceToInt(&vargs[0], &val) ||
|
||||
!coerceToInt(&vargs[1], &seed))
|
||||
return false;
|
||||
|
||||
if (func == PGBENCH_HASH_MURMUR2)
|
||||
setIntValue(retval, getHashMurmur2(val, seed));
|
||||
else if (func == PGBENCH_HASH_FNV1A)
|
||||
setIntValue(retval, getHashFnv1a(val, seed));
|
||||
else
|
||||
/* cannot get here */
|
||||
Assert(0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
/* cannot get here */
|
||||
Assert(0);
|
||||
@ -4963,6 +5043,10 @@ main(int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* set random seed */
|
||||
INSTR_TIME_SET_CURRENT(start_time);
|
||||
srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
|
||||
|
||||
if (internal_script_used)
|
||||
{
|
||||
/*
|
||||
@ -5024,6 +5108,19 @@ main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
/* set default seed for hash functions */
|
||||
if (lookupVariable(&state[0], "default_seed") == NULL)
|
||||
{
|
||||
uint64 seed = ((uint64) (random() & 0xFFFF) << 48) |
|
||||
((uint64) (random() & 0xFFFF) << 32) |
|
||||
((uint64) (random() & 0xFFFF) << 16) |
|
||||
(uint64) (random() & 0xFFFF);
|
||||
|
||||
for (i = 0; i < nclients; i++)
|
||||
if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed))
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!is_no_vacuum)
|
||||
{
|
||||
fprintf(stderr, "starting vacuum...");
|
||||
@ -5041,10 +5138,6 @@ main(int argc, char **argv)
|
||||
}
|
||||
PQfinish(con);
|
||||
|
||||
/* set random seed */
|
||||
INSTR_TIME_SET_CURRENT(start_time);
|
||||
srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
|
||||
|
||||
/* set up thread data structures */
|
||||
threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
|
||||
nclients_dealt = 0;
|
||||
|
@ -97,7 +97,9 @@ typedef enum PgBenchFunction
|
||||
PGBENCH_LE,
|
||||
PGBENCH_LT,
|
||||
PGBENCH_IS,
|
||||
PGBENCH_CASE
|
||||
PGBENCH_CASE,
|
||||
PGBENCH_HASH_FNV1A,
|
||||
PGBENCH_HASH_MURMUR2
|
||||
} PgBenchFunction;
|
||||
|
||||
typedef struct PgBenchExpr PgBenchExpr;
|
||||
|
@ -259,6 +259,11 @@ pgbench(
|
||||
qr{command=46.: int 46\b},
|
||||
qr{command=47.: boolean true\b},
|
||||
qr{command=48.: boolean true\b},
|
||||
qr{command=49.: int -5817877081768721676\b},
|
||||
qr{command=50.: boolean true\b},
|
||||
qr{command=51.: int -7793829335365542153\b},
|
||||
qr{command=52.: int -?\d+\b},
|
||||
qr{command=53.: boolean true\b},
|
||||
],
|
||||
'pgbench expressions',
|
||||
{ '001_pgbench_expressions' => q{-- integer functions
|
||||
@ -327,6 +332,12 @@ pgbench(
|
||||
\set n6 debug(:n IS NULL AND NOT :f AND :t)
|
||||
-- conditional truth
|
||||
\set cs debug(CASE WHEN 1 THEN TRUE END AND CASE WHEN 1.0 THEN TRUE END AND CASE WHEN :n THEN NULL ELSE TRUE END)
|
||||
-- hash functions
|
||||
\set h0 debug(hash(10, 5432))
|
||||
\set h1 debug(:h0 = hash_murmur2(10, 5432))
|
||||
\set h3 debug(hash_fnv1a(10, 5432))
|
||||
\set h4 debug(hash(10))
|
||||
\set h5 debug(hash(10) = hash(10, :default_seed))
|
||||
-- lazy evaluation
|
||||
\set zy 0
|
||||
\set yz debug(case when :zy = 0 then -1 else (1 / :zy) end)
|
||||
|
Loading…
x
Reference in New Issue
Block a user