diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 88cf8b3933..301bcfa1ef 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -1493,15 +1493,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / in (1, 1000), a rejection method is used, based on "Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551, Springer 1986. The distribution is not defined when the parameter's - value is 1.0. The drawing performance is poor for parameter values + value is 1.0. The function's performance is poor for parameter values close and above 1.0 and on a small range. - parameter - defines how skewed the distribution is. The larger the parameter, the more - frequently values to the beginning of the interval are drawn. + parameter defines how skewed the distribution + is. The larger the parameter, the more + frequently values closer to the beginning of the interval are drawn. The closer to 0 parameter is, - the flatter (more uniform) the access distribution. + the flatter (more uniform) the output distribution. + The distribution is such that, assuming the range starts from 1, + the ratio of the probability of drawing k + versus drawing k+1 is + ((k+1)/k)**parameter. + For example, random_zipfian(1, ..., 2.5) produces + the value 1 about (2/1)**2.5 = + 5.66 times more frequently than 2, which + itself is produced (3/2)*2.5 = 2.76 times more + frequently than 3, and so on. diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 5e2de16cdd..9eaa192239 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -156,7 +156,7 @@ int64 latency_limit = 0; char *tablespace = NULL; char *index_tablespace = NULL; -/* random seed used when calling srandom() */ +/* random seed used to initialize base_random_sequence */ int64 random_seed = -1; /* @@ -250,6 +250,9 @@ typedef struct StatsData SimpleStats lag; } StatsData; +/* Various random sequences are initialized from this one. */ +static unsigned short base_random_sequence[3]; + /* * Connection state machine states. */ @@ -692,7 +695,14 @@ gotdigits: return ((sign < 0) ? -result : result); } -/* random number generator: uniform distribution from min to max inclusive */ +/* + * Random number generator: uniform distribution from min to max inclusive. + * + * Although the limits are expressed as int64, you can't generate the full + * int64 range in one call, because the difference of the limits mustn't + * overflow int64. In practice it's unwise to ask for more than an int32 + * range, because of the limited precision of pg_erand48(). + */ static int64 getrand(TState *thread, int64 min, int64 max) { @@ -4700,12 +4710,14 @@ printResults(TState *threads, StatsData *total, instr_time total_time, } } -/* call srandom based on some seed. NULL triggers the default behavior. */ +/* + * Set up a random seed according to seed parameter (NULL means default), + * and initialize base_random_sequence for use in initializing other sequences. + */ static bool set_random_seed(const char *seed) { - /* srandom expects an unsigned int */ - unsigned int iseed; + uint64 iseed; if (seed == NULL || strcmp(seed, "time") == 0) { @@ -4713,7 +4725,7 @@ set_random_seed(const char *seed) instr_time now; INSTR_TIME_SET_CURRENT(now); - iseed = (unsigned int) INSTR_TIME_GET_MICROSEC(now); + iseed = (uint64) INSTR_TIME_GET_MICROSEC(now); } else if (strcmp(seed, "rand") == 0) { @@ -4733,7 +4745,7 @@ set_random_seed(const char *seed) /* parse seed unsigned int value */ char garbage; - if (sscanf(seed, "%u%c", &iseed, &garbage) != 1) + if (sscanf(seed, UINT64_FORMAT "%c", &iseed, &garbage) != 1) { fprintf(stderr, "unrecognized random seed option \"%s\": expecting an unsigned integer, \"time\" or \"rand\"\n", @@ -4743,10 +4755,14 @@ set_random_seed(const char *seed) } if (seed != NULL) - fprintf(stderr, "setting random seed to %u\n", iseed); - srandom(iseed); - /* no precision loss: 32 bit unsigned int cast to 64 bit int */ + fprintf(stderr, "setting random seed to " UINT64_FORMAT "\n", iseed); random_seed = iseed; + + /* Fill base_random_sequence with low-order bits of seed */ + base_random_sequence[0] = iseed & 0xFFFF; + base_random_sequence[1] = (iseed >> 16) & 0xFFFF; + base_random_sequence[2] = (iseed >> 32) & 0xFFFF; + return true; } @@ -5444,10 +5460,9 @@ main(int argc, char **argv) /* set default seed for hash functions */ if (lookupVariable(&state[0], "default_seed") == NULL) { - uint64 seed = ((uint64) (random() & 0xFFFF) << 48) | - ((uint64) (random() & 0xFFFF) << 32) | - ((uint64) (random() & 0xFFFF) << 16) | - (uint64) (random() & 0xFFFF); + uint64 seed = + ((uint64) pg_jrand48(base_random_sequence) & 0xFFFFFFFF) | + (((uint64) pg_jrand48(base_random_sequence) & 0xFFFFFFFF) << 32); for (i = 0; i < nclients; i++) if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed)) @@ -5491,9 +5506,12 @@ main(int argc, char **argv) thread->state = &state[nclients_dealt]; thread->nstate = (nclients - nclients_dealt + nthreads - i - 1) / (nthreads - i); - thread->random_state[0] = random(); - thread->random_state[1] = random(); - thread->random_state[2] = random(); + thread->random_state[0] = (unsigned short) + (pg_jrand48(base_random_sequence) & 0xFFFF); + thread->random_state[1] = (unsigned short) + (pg_jrand48(base_random_sequence) & 0xFFFF); + thread->random_state[2] = (unsigned short) + (pg_jrand48(base_random_sequence) & 0xFFFF); thread->logfile = NULL; /* filled in later */ thread->latency_late = 0; thread->zipf_cache.nb_cells = 0; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 6b3bcef25a..1a6e96a454 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -259,11 +259,11 @@ pgbench( [ qr{setting random seed to 5432\b}, - # After explicit seeding, the four * random checks (1-3,20) should be - # deterministic, but not necessarily portable. - qr{command=1.: int 1\d\b}, # uniform random: 12 on linux - qr{command=2.: int 1\d\d\b}, # exponential random: 106 on linux - qr{command=3.: int 1\d\d\d\b}, # gaussian random: 1462 on linux + # After explicit seeding, the four random checks (1-3,20) are + # deterministic + qr{command=1.: int 18\b}, # uniform random + qr{command=2.: int 101\b}, # exponential random + qr{command=3.: int 1415\b}, # gaussian random qr{command=4.: int 4\b}, qr{command=5.: int 5\b}, qr{command=6.: int 6\b}, @@ -277,7 +277,7 @@ pgbench( qr{command=16.: double 16\b}, qr{command=17.: double 17\b}, qr{command=18.: int 9223372036854775807\b}, - qr{command=20.: int \d\b}, # zipfian random: 1 on linux + qr{command=20.: int 2\b}, # zipfian random qr{command=21.: double -27\b}, qr{command=22.: double 1024\b}, qr{command=23.: double 1\b}, @@ -468,7 +468,7 @@ for my $i (1, 2) \set ur random(1000, 1999) \set er random_exponential(2000, 2999, 2.0) \set gr random_gaussian(3000, 3999, 3.0) -\set zr random_zipfian(4000, 4999, 2.5) +\set zr random_zipfian(4000, 4999, 1.5) INSERT INTO seeded_random(seed, rand, val) VALUES (:random_seed, 'uniform', :ur), (:random_seed, 'exponential', :er),