diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 50cf22ba6b..8eb4f538d5 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1057,7 +1057,7 @@ pgbench options d
default_seed
- seed used in hash functions by default
+ seed used in hash and pseudorandom permutation functions by default
@@ -1864,6 +1864,24 @@ SELECT 4 AS four \; SELECT 5 AS five \aset
+
+
+ permute ( i, size [, seed ] )
+ integer
+
+
+ Permuted value of i, in the range
+ [0, size). This is the new position of
+ i (modulo size) in a
+ pseudorandom permutation of the integers 0...size-1,
+ parameterized by seed, see below.
+
+
+ permute(0, 4)
+ an integer between 0 and 3
+
+
+
pi ()
@@ -2071,29 +2089,70 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
+
+
+ When designing a benchmark which selects rows non-uniformly, be aware
+ that the rows chosen may be correlated with other data such as IDs from
+ a sequence or the physical row ordering, which may skew performance
+ measurements.
+
+
+ To avoid this, you may wish to use the permute
+ function, or some other additional step with similar effect, to shuffle
+ the selected rows and remove such correlations.
+
+
+
Hash functions hash, hash_murmur2 and
hash_fnv1a accept an input value and an optional seed parameter.
In case the seed isn't provided the value of :default_seed
is used, which is initialized randomly unless set by the command-line
- -D option. Hash functions can be used to scatter the
- distribution of random functions such as random_zipfian or
- random_exponential. For instance, the following pgbench
- script simulates possible real world workload typical for social media and
- blogging platforms where few accounts generate excessive load:
+ -D option.
+
+
+
+ permute accepts an input value, a size, and an optional
+ seed parameter. It generates a pseudorandom permutation of integers in
+ the range [0, size), and returns the index of the input
+ value in the permuted values. The permutation chosen is parameterized by
+ the seed, which defaults to :default_seed, if not
+ specified. Unlike the hash functions, permute ensures
+ that there are no collisions or holes in the output values. Input values
+ outside the interval are interpreted modulo the size. The function raises
+ an error if the size is not positive. permute can be
+ used to scatter the distribution of non-uniform random functions such as
+ random_zipfian or random_exponential
+ so that values drawn more often are not trivially correlated. For
+ instance, the following pgbench script
+ simulates a possible real world workload typical for social media and
+ blogging platforms where a few accounts generate excessive load:
-\set r random_zipfian(0, 100000000, 1.07)
-\set k abs(hash(:r)) % 1000000
+\set size 1000000
+\set r random_zipfian(1, :size, 1.07)
+\set k 1 + permute(:r, :size)
In some cases several distinct distributions are needed which don't correlate
- with each other and this is when implicit seed parameter comes in handy:
+ with each other and this is when the optional seed parameter comes in handy:
-\set k1 abs(hash(:r, :default_seed + 123)) % 1000000
-\set k2 abs(hash(:r, :default_seed + 321)) % 1000000
+\set k1 1 + permute(:r, :size, :default_seed + 123)
+\set k2 1 + permute(:r, :size, :default_seed + 321)
+
+ A similar behavior can also be approximated with hash:
+
+
+\set size 1000000
+\set r random_zipfian(1, 100 * :size, 1.07)
+\set k 1 + abs(hash(:r)) % :size
+
+
+ However, since hash generates collisions, some values
+ will not be reachable and others will be more frequent than expected from
+ the original distribution.
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index 4d529ea550..56f75ccd25 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -19,6 +19,7 @@
#define PGBENCH_NARGS_VARIABLE (-1)
#define PGBENCH_NARGS_CASE (-2)
#define PGBENCH_NARGS_HASH (-3)
+#define PGBENCH_NARGS_PERMUTE (-4)
PgBenchExpr *expr_parse_result;
@@ -370,6 +371,9 @@ static const struct
{
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
},
+ {
+ "permute", PGBENCH_NARGS_PERMUTE, PGBENCH_PERMUTE
+ },
/* keep as last array element */
{
NULL, 0, 0
@@ -482,6 +486,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
}
break;
+ /* pseudorandom permutation function with optional seed argument */
+ case PGBENCH_NARGS_PERMUTE:
+ if (len < 2 || len > 3)
+ expr_yyerror_more(yyscanner, "unexpected number of arguments",
+ PGBENCH_FUNCTIONS[fnumber].fname);
+
+ if (len == 2)
+ {
+ PgBenchExpr *var = make_variable("default_seed");
+ args = make_elist(var, args);
+ }
+ break;
+
/* common case: positive arguments number */
default:
Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 48ce1712cc..da1d9ec535 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -66,6 +66,7 @@
#include "getopt_long.h"
#include "libpq-fe.h"
#include "pgbench.h"
+#include "port/pg_bitutils.h"
#include "portability/instr_time.h"
#ifndef M_PI
@@ -1127,6 +1128,113 @@ getHashMurmur2(int64 val, uint64 seed)
return (int64) result;
}
+/*
+ * Pseudorandom permutation function
+ *
+ * For small sizes, this generates each of the (size!) possible permutations
+ * of integers in the range [0, size) with roughly equal probability. Once
+ * the size is larger than 20, the number of possible permutations exceeds the
+ * number of distinct states of the internal pseudorandom number generators,
+ * and so not all possible permutations can be generated, but the permutations
+ * chosen should continue to give the appearance of being random.
+ *
+ * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE.
+ * DO NOT USE FOR SUCH PURPOSE.
+ */
+static int64
+permute(const int64 val, const int64 isize, const int64 seed)
+{
+ RandomState random_state1;
+ RandomState random_state2;
+ uint64 size;
+ uint64 v;
+ int masklen;
+ uint64 mask;
+ int i;
+
+ if (isize < 2)
+ return 0; /* nothing to permute */
+
+ /* Initialize a pair of random states using the seed */
+ random_state1.xseed[0] = seed & 0xFFFF;
+ random_state1.xseed[1] = (seed >> 16) & 0xFFFF;
+ random_state1.xseed[2] = (seed >> 32) & 0xFFFF;
+
+ random_state2.xseed[0] = (((uint64) seed) >> 48) & 0xFFFF;
+ random_state2.xseed[1] = seed & 0xFFFF;
+ random_state2.xseed[2] = (seed >> 16) & 0xFFFF;
+
+ /* Computations are performed on unsigned values */
+ size = (uint64) isize;
+ v = (uint64) val % size;
+
+ /* Mask to work modulo largest power of 2 less than or equal to size */
+ masklen = pg_leftmost_one_pos64(size);
+ mask = (((uint64) 1) << masklen) - 1;
+
+ /*
+ * Permute the input value by applying several rounds of pseudorandom
+ * bijective transformations. The intention here is to distribute each
+ * input uniformly randomly across the range, and separate adjacent inputs
+ * approximately uniformly randomly from each other, leading to a fairly
+ * random overall choice of permutation.
+ *
+ * To separate adjacent inputs, we multiply by a random number modulo
+ * (mask + 1), which is a power of 2. For this to be a bijection, the
+ * multiplier must be odd. Since this is known to lead to less randomness
+ * in the lower bits, we also apply a rotation that shifts the topmost bit
+ * into the least significant bit. In the special cases where size <= 3,
+ * mask = 1 and each of these operations is actually a no-op, so we also
+ * XOR the value with a different random number to inject additional
+ * randomness. Since the size is generally not a power of 2, we apply
+ * this bijection on overlapping upper and lower halves of the input.
+ *
+ * To distribute the inputs uniformly across the range, we then also apply
+ * a random offset modulo the full range.
+ *
+ * Taken together, these operations resemble a modified linear
+ * congruential generator, as is commonly used in pseudorandom number
+ * generators. The number of rounds is fairly arbitrary, but six has been
+ * found empirically to give a fairly good tradeoff between performance
+ * and uniform randomness. For small sizes it selects each of the (size!)
+ * possible permutations with roughly equal probability. For larger
+ * sizes, not all permutations can be generated, but the intended random
+ * spread is still produced.
+ */
+ for (i = 0; i < 6; i++)
+ {
+ uint64 m,
+ r,
+ t;
+
+ /* Random multiply (by an odd number), XOR and rotate of lower half */
+ m = (uint64) getrand(&random_state1, 0, mask) | 1;
+ r = (uint64) getrand(&random_state2, 0, mask);
+ if (v <= mask)
+ {
+ v = ((v * m) ^ r) & mask;
+ v = ((v << 1) & mask) | (v >> (masklen - 1));
+ }
+
+ /* Random multiply (by an odd number), XOR and rotate of upper half */
+ m = (uint64) getrand(&random_state1, 0, mask) | 1;
+ r = (uint64) getrand(&random_state2, 0, mask);
+ t = size - 1 - v;
+ if (t <= mask)
+ {
+ t = ((t * m) ^ r) & mask;
+ t = ((t << 1) & mask) | (t >> (masklen - 1));
+ v = size - 1 - t;
+ }
+
+ /* Random offset */
+ r = (uint64) getrand(&random_state2, 0, size - 1);
+ v = (v + r) % size;
+ }
+
+ return (int64) v;
+}
+
/*
* Initialize the given SimpleStats struct to all zeroes
*/
@@ -2475,6 +2583,29 @@ evalStandardFunc(CState *st,
return true;
}
+ case PGBENCH_PERMUTE:
+ {
+ int64 val,
+ size,
+ seed;
+
+ Assert(nargs == 3);
+
+ if (!coerceToInt(&vargs[0], &val) ||
+ !coerceToInt(&vargs[1], &size) ||
+ !coerceToInt(&vargs[2], &seed))
+ return false;
+
+ if (size <= 0)
+ {
+ pg_log_error("permute size parameter must be greater than zero");
+ return false;
+ }
+
+ setIntValue(retval, permute(val, size, seed));
+ return true;
+ }
+
default:
/* cannot get here */
Assert(0);
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index 3a9d89e6f1..6ce1c98649 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -99,7 +99,8 @@ typedef enum PgBenchFunction
PGBENCH_IS,
PGBENCH_CASE,
PGBENCH_HASH_FNV1A,
- PGBENCH_HASH_MURMUR2
+ PGBENCH_HASH_MURMUR2,
+ PGBENCH_PERMUTE
} PgBenchFunction;
typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 82a46c72b6..c2482dea17 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -4,6 +4,7 @@ use warnings;
use PostgresNode;
use TestLib;
use Test::More;
+use Config;
# start a pgbench specific server
my $node = get_new_node('main');
@@ -483,6 +484,17 @@ pgbench(
qr{command=98.: int 5432\b}, # :random_seed
qr{command=99.: int -9223372036854775808\b}, # min int
qr{command=100.: int 9223372036854775807\b}, # max int
+ # pseudorandom permutation tests
+ qr{command=101.: boolean true\b},
+ qr{command=102.: boolean true\b},
+ qr{command=103.: boolean true\b},
+ qr{command=104.: boolean true\b},
+ qr{command=105.: boolean true\b},
+ qr{command=109.: boolean true\b},
+ qr{command=110.: boolean true\b},
+ qr{command=111.: boolean true\b},
+ qr{command=112.: int 9223372036854775797\b},
+ qr{command=113.: boolean true\b},
],
'pgbench expressions',
{
@@ -610,6 +622,33 @@ SELECT :v0, :v1, :v2, :v3;
-- minint constant parsing
\set min debug(-9223372036854775808)
\set max debug(-(:min + 1))
+-- parametric pseudorandom permutation function
+\set t debug(permute(0, 2) + permute(1, 2) = 1)
+\set t debug(permute(0, 3) + permute(1, 3) + permute(2, 3) = 3)
+\set t debug(permute(0, 4) + permute(1, 4) + permute(2, 4) + permute(3, 4) = 6)
+\set t debug(permute(0, 5) + permute(1, 5) + permute(2, 5) + permute(3, 5) + permute(4, 5) = 10)
+\set t debug(permute(0, 16) + permute(1, 16) + permute(2, 16) + permute(3, 16) + \
+ permute(4, 16) + permute(5, 16) + permute(6, 16) + permute(7, 16) + \
+ permute(8, 16) + permute(9, 16) + permute(10, 16) + permute(11, 16) + \
+ permute(12, 16) + permute(13, 16) + permute(14, 16) + permute(15, 16) = 120)
+-- random sanity checks
+\set size random(2, 1000)
+\set v random(0, :size - 1)
+\set p permute(:v, :size)
+\set t debug(0 <= :p and :p < :size and :p = permute(:v + :size, :size) and :p <> permute(:v + 1, :size))
+-- actual values
+\set t debug(permute(:v, 1) = 0)
+\set t debug(permute(0, 2, 5432) = 0 and permute(1, 2, 5432) = 1 and \
+ permute(0, 2, 5435) = 1 and permute(1, 2, 5435) = 0)
+-- 63 bits tests
+\set size debug(:max - 10)
+\set t debug(permute(:size-1, :size, 5432) = 5301702756001087507 and \
+ permute(:size-2, :size, 5432) = 8968485976055840695 and \
+ permute(:size-3, :size, 5432) = 6708495591295582115 and \
+ permute(:size-4, :size, 5432) = 2801794404574855121 and \
+ permute(:size-5, :size, 5432) = 1489011409218895840 and \
+ permute(:size-6, :size, 5432) = 2267749475878240183 and \
+ permute(:size-7, :size, 5432) = 1300324176838786780)
}
});
@@ -1048,6 +1087,10 @@ SELECT LEAST(} . join(', ', (':i') x 256) . q{)}
'bad boolean', 2,
[qr{malformed variable.*trueXXX}], q{\set b :badtrue or true}
],
+ [
+ 'invalid permute size', 2,
+ [qr{permute size parameter must be greater than zero}], q{\set i permute(0, 0)}
+ ],
# GSET
[
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index e38c7d77d1..4027e68dfa 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -341,6 +341,16 @@ my @script_tests = (
'set i',
[ qr{set i 1 }, qr{\^ error found here} ],
{ 'set_i_op' => "\\set i 1 +\n" }
+ ],
+ [
+ 'not enough arguments to permute',
+ [qr{unexpected number of arguments \(permute\)}],
+ { 'bad-permute-1.sql' => "\\set i permute(1)\n" }
+ ],
+ [
+ 'too many arguments to permute',
+ [qr{unexpected number of arguments \(permute\)}],
+ { 'bad-permute-2.sql' => "\\set i permute(1, 2, 3, 4)\n" }
],);
for my $t (@script_tests)