diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 8ecc02f2b9..93b0bc2bc6 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1862,6 +1862,39 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in + + + + random + + random ( min integer, max integer ) + integer + + + random ( min bigint, max bigint ) + bigint + + + random ( min numeric, max numeric ) + numeric + + + Returns a random value in the range + min <= x <= max. + For type numeric, the result will have the same number of + fractional decimal digits as min or + max, whichever has more. + + + random(1, 10) + 7 + + + random(-0.499, 0.499) + 0.347 + + + @@ -1906,19 +1939,19 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in - The random() function uses a deterministic - pseudo-random number generator. + The random() and random_normal() + functions listed in use a + deterministic pseudo-random number generator. It is fast but not suitable for cryptographic applications; see the module for a more secure alternative. If setseed() is called, the series of results of - subsequent random() calls in the current session + subsequent calls to these functions in the current session can be repeated by re-issuing setseed() with the same argument. Without any prior setseed() call in the same - session, the first random() call obtains a seed + session, the first call to any of these functions obtains a seed from a platform-dependent source of random bits. - These remarks hold equally for random_normal(). diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 199eae525d..610ccf2f79 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -82,6 +82,7 @@ OBJS = \ pg_lsn.o \ pg_upgrade_support.o \ pgstatfuncs.o \ + pseudorandomfuncs.o \ pseudotypes.o \ quote.o \ rangetypes.o \ diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index 901edcc896..cbbb8aecaf 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -21,10 +21,8 @@ #include "catalog/pg_type.h" #include "common/int.h" -#include "common/pg_prng.h" #include "common/shortest_dec.h" #include "libpq/pqformat.h" -#include "miscadmin.h" #include "utils/array.h" #include "utils/float.h" #include "utils/fmgrprotos.h" @@ -64,10 +62,6 @@ float8 degree_c_sixty = 60.0; float8 degree_c_one_half = 0.5; float8 degree_c_one = 1.0; -/* State for drandom() and setseed() */ -static bool drandom_seed_set = false; -static pg_prng_state drandom_seed; - /* Local function prototypes */ static double sind_q1(double x); static double cosd_q1(double x); @@ -2785,95 +2779,6 @@ derfc(PG_FUNCTION_ARGS) } -/* ========== RANDOM FUNCTIONS ========== */ - - -/* - * initialize_drandom_seed - initialize drandom_seed if not yet done - */ -static void -initialize_drandom_seed(void) -{ - /* Initialize random seed, if not done yet in this process */ - if (unlikely(!drandom_seed_set)) - { - /* - * If possible, initialize the seed using high-quality random bits. - * Should that fail for some reason, we fall back on a lower-quality - * seed based on current time and PID. - */ - if (unlikely(!pg_prng_strong_seed(&drandom_seed))) - { - TimestampTz now = GetCurrentTimestamp(); - uint64 iseed; - - /* Mix the PID with the most predictable bits of the timestamp */ - iseed = (uint64) now ^ ((uint64) MyProcPid << 32); - pg_prng_seed(&drandom_seed, iseed); - } - drandom_seed_set = true; - } -} - -/* - * drandom - returns a random number - */ -Datum -drandom(PG_FUNCTION_ARGS) -{ - float8 result; - - initialize_drandom_seed(); - - /* pg_prng_double produces desired result range [0.0 - 1.0) */ - result = pg_prng_double(&drandom_seed); - - PG_RETURN_FLOAT8(result); -} - -/* - * drandom_normal - returns a random number from a normal distribution - */ -Datum -drandom_normal(PG_FUNCTION_ARGS) -{ - float8 mean = PG_GETARG_FLOAT8(0); - float8 stddev = PG_GETARG_FLOAT8(1); - float8 result, - z; - - initialize_drandom_seed(); - - /* Get random value from standard normal(mean = 0.0, stddev = 1.0) */ - z = pg_prng_double_normal(&drandom_seed); - /* Transform the normal standard variable (z) */ - /* using the target normal distribution parameters */ - result = (stddev * z) + mean; - - PG_RETURN_FLOAT8(result); -} - -/* - * setseed - set seed for the random number generator - */ -Datum -setseed(PG_FUNCTION_ARGS) -{ - float8 seed = PG_GETARG_FLOAT8(0); - - if (seed < -1 || seed > 1 || isnan(seed)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("setseed parameter %g is out of allowed range [-1,1]", - seed))); - - pg_prng_fseed(&drandom_seed, seed); - drandom_seed_set = true; - - PG_RETURN_VOID(); -} - - /* * ========================= diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index f3dfb52204..48dbcf59a5 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -69,6 +69,7 @@ backend_sources += files( 'pg_lsn.c', 'pg_upgrade_support.c', 'pgstatfuncs.c', + 'pseudorandomfuncs.c', 'pseudotypes.c', 'quote.c', 'rangetypes.c', diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index b818189d86..5510a203b0 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -583,6 +583,8 @@ static void power_var(const NumericVar *base, const NumericVar *exp, static void power_var_int(const NumericVar *base, int exp, int exp_dscale, NumericVar *result); static void power_ten_int(int exp, NumericVar *result); +static void random_var(pg_prng_state *state, const NumericVar *rmin, + const NumericVar *rmax, NumericVar *result); static int cmp_abs(const NumericVar *var1, const NumericVar *var2); static int cmp_abs_common(const NumericDigit *var1digits, int var1ndigits, @@ -4219,6 +4221,56 @@ numeric_trim_scale(PG_FUNCTION_ARGS) PG_RETURN_NUMERIC(res); } +/* + * Return a random numeric value in the range [rmin, rmax]. + */ +Numeric +random_numeric(pg_prng_state *state, Numeric rmin, Numeric rmax) +{ + NumericVar rmin_var; + NumericVar rmax_var; + NumericVar result; + Numeric res; + + /* Range bounds must not be NaN/infinity */ + if (NUMERIC_IS_SPECIAL(rmin)) + { + if (NUMERIC_IS_NAN(rmin)) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("lower bound cannot be NaN")); + else + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("lower bound cannot be infinity")); + } + if (NUMERIC_IS_SPECIAL(rmax)) + { + if (NUMERIC_IS_NAN(rmax)) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("upper bound cannot be NaN")); + else + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("upper bound cannot be infinity")); + } + + /* Return a random value in the range [rmin, rmax] */ + init_var_from_num(rmin, &rmin_var); + init_var_from_num(rmax, &rmax_var); + + init_var(&result); + + random_var(state, &rmin_var, &rmax_var, &result); + + res = make_result(&result); + + free_var(&result); + + return res; +} + /* ---------------------------------------------------------------------- * @@ -11262,6 +11314,173 @@ power_ten_int(int exp, NumericVar *result) result->digits[0] *= 10; } +/* + * random_var() - return a random value in the range [rmin, rmax]. + */ +static void +random_var(pg_prng_state *state, const NumericVar *rmin, + const NumericVar *rmax, NumericVar *result) +{ + int rscale; + NumericVar rlen; + int res_ndigits; + int n; + int pow10; + int i; + uint64 rlen64; + int rlen64_ndigits; + + rscale = Max(rmin->dscale, rmax->dscale); + + /* Compute rlen = rmax - rmin and check the range bounds */ + init_var(&rlen); + sub_var(rmax, rmin, &rlen); + + if (rlen.sign == NUMERIC_NEG) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("lower bound must be less than or equal to upper bound")); + + /* Special case for an empty range */ + if (rlen.ndigits == 0) + { + set_var_from_var(rmin, result); + result->dscale = rscale; + free_var(&rlen); + return; + } + + /* + * Otherwise, select a random value in the range [0, rlen = rmax - rmin], + * and shift it to the required range by adding rmin. + */ + + /* Required result digits */ + res_ndigits = rlen.weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS; + + /* + * To get the required rscale, the final result digit must be a multiple + * of pow10 = 10^n, where n = (-rscale) mod DEC_DIGITS. + */ + n = ((rscale + DEC_DIGITS - 1) / DEC_DIGITS) * DEC_DIGITS - rscale; + pow10 = 1; + for (i = 0; i < n; i++) + pow10 *= 10; + + /* + * To choose a random value uniformly from the range [0, rlen], we choose + * from the slightly larger range [0, rlen2], where rlen2 is formed from + * rlen by copying the first 4 NBASE digits, and setting all remaining + * decimal digits to "9". + * + * Without loss of generality, we can ignore the weight of rlen2 and treat + * it as a pure integer for the purposes of this discussion. The process + * above gives rlen2 + 1 = rlen64 * 10^N, for some integer N, where rlen64 + * is a 64-bit integer formed from the first 4 NBASE digits copied from + * rlen. Since this trivially factors into smaller pieces that fit in + * 64-bit integers, the task of choosing a random value uniformly from the + * rlen2 + 1 possible values in [0, rlen2] is much simpler. + * + * If the random value selected is too large, it is rejected, and we try + * again until we get a result <= rlen, ensuring that the overall result + * is uniform (no particular value is any more likely than any other). + * + * Since rlen64 holds 4 NBASE digits from rlen, it contains at least + * DEC_DIGITS * 3 + 1 decimal digits (i.e., at least 13 decimal digits, + * when DEC_DIGITS is 4). Therefore the probability of needing to reject + * the value chosen and retry is less than 1e-13. + */ + rlen64 = (uint64) rlen.digits[0]; + rlen64_ndigits = 1; + while (rlen64_ndigits < res_ndigits && rlen64_ndigits < 4) + { + rlen64 *= NBASE; + if (rlen64_ndigits < rlen.ndigits) + rlen64 += rlen.digits[rlen64_ndigits]; + rlen64_ndigits++; + } + + /* Loop until we get a result <= rlen */ + do + { + NumericDigit *res_digits; + uint64 rand; + int whole_ndigits; + + alloc_var(result, res_ndigits); + result->sign = NUMERIC_POS; + result->weight = rlen.weight; + result->dscale = rscale; + res_digits = result->digits; + + /* + * Set the first rlen64_ndigits using a random value in [0, rlen64]. + * + * If this is the whole result, and rscale is not a multiple of + * DEC_DIGITS (pow10 from above is not 1), then we need this to be a + * multiple of pow10. + */ + if (rlen64_ndigits == res_ndigits && pow10 != 1) + rand = pg_prng_uint64_range(state, 0, rlen64 / pow10) * pow10; + else + rand = pg_prng_uint64_range(state, 0, rlen64); + + for (i = rlen64_ndigits - 1; i >= 0; i--) + { + res_digits[i] = (NumericDigit) (rand % NBASE); + rand = rand / NBASE; + } + + /* + * Set the remaining digits to random values in range [0, NBASE), + * noting that the last digit needs to be a multiple of pow10. + */ + whole_ndigits = res_ndigits; + if (pow10 != 1) + whole_ndigits--; + + /* Set whole digits in groups of 4 for best performance */ + i = rlen64_ndigits; + while (i < whole_ndigits - 3) + { + rand = pg_prng_uint64_range(state, 0, + (uint64) NBASE * NBASE * NBASE * NBASE - 1); + res_digits[i++] = (NumericDigit) (rand % NBASE); + rand = rand / NBASE; + res_digits[i++] = (NumericDigit) (rand % NBASE); + rand = rand / NBASE; + res_digits[i++] = (NumericDigit) (rand % NBASE); + rand = rand / NBASE; + res_digits[i++] = (NumericDigit) rand; + } + + /* Remaining whole digits */ + while (i < whole_ndigits) + { + rand = pg_prng_uint64_range(state, 0, NBASE - 1); + res_digits[i++] = (NumericDigit) rand; + } + + /* Final partial digit (multiple of pow10) */ + if (i < res_ndigits) + { + rand = pg_prng_uint64_range(state, 0, NBASE / pow10 - 1) * pow10; + res_digits[i] = (NumericDigit) rand; + } + + /* Remove leading/trailing zeroes */ + strip_var(result); + + /* If result > rlen, try again */ + + } while (cmp_var(result, &rlen) > 0); + + /* Offset the result to the required range */ + add_var(result, rmin, result); + + free_var(&rlen); +} + /* ---------------------------------------------------------------------- * diff --git a/src/backend/utils/adt/pseudorandomfuncs.c b/src/backend/utils/adt/pseudorandomfuncs.c new file mode 100644 index 0000000000..8e82c7078c --- /dev/null +++ b/src/backend/utils/adt/pseudorandomfuncs.c @@ -0,0 +1,185 @@ +/*------------------------------------------------------------------------- + * + * pseudorandomfuncs.c + * Functions giving SQL access to a pseudorandom number generator. + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/pseudorandomfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "common/pg_prng.h" +#include "miscadmin.h" +#include "utils/fmgrprotos.h" +#include "utils/numeric.h" +#include "utils/timestamp.h" + +/* Shared PRNG state used by all the random functions */ +static pg_prng_state prng_state; +static bool prng_seed_set = false; + +/* + * initialize_prng() - + * + * Initialize (seed) the PRNG, if not done yet in this process. + */ +static void +initialize_prng(void) +{ + if (unlikely(!prng_seed_set)) + { + /* + * If possible, seed the PRNG using high-quality random bits. Should + * that fail for some reason, we fall back on a lower-quality seed + * based on current time and PID. + */ + if (unlikely(!pg_prng_strong_seed(&prng_state))) + { + TimestampTz now = GetCurrentTimestamp(); + uint64 iseed; + + /* Mix the PID with the most predictable bits of the timestamp */ + iseed = (uint64) now ^ ((uint64) MyProcPid << 32); + pg_prng_seed(&prng_state, iseed); + } + prng_seed_set = true; + } +} + +/* + * setseed() - + * + * Seed the PRNG from a specified value in the range [-1.0, 1.0]. + */ +Datum +setseed(PG_FUNCTION_ARGS) +{ + float8 seed = PG_GETARG_FLOAT8(0); + + if (seed < -1 || seed > 1 || isnan(seed)) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("setseed parameter %g is out of allowed range [-1,1]", + seed)); + + pg_prng_fseed(&prng_state, seed); + prng_seed_set = true; + + PG_RETURN_VOID(); +} + +/* + * drandom() - + * + * Returns a random number chosen uniformly in the range [0.0, 1.0). + */ +Datum +drandom(PG_FUNCTION_ARGS) +{ + float8 result; + + initialize_prng(); + + /* pg_prng_double produces desired result range [0.0, 1.0) */ + result = pg_prng_double(&prng_state); + + PG_RETURN_FLOAT8(result); +} + +/* + * drandom_normal() - + * + * Returns a random number from a normal distribution. + */ +Datum +drandom_normal(PG_FUNCTION_ARGS) +{ + float8 mean = PG_GETARG_FLOAT8(0); + float8 stddev = PG_GETARG_FLOAT8(1); + float8 result, + z; + + initialize_prng(); + + /* Get random value from standard normal(mean = 0.0, stddev = 1.0) */ + z = pg_prng_double_normal(&prng_state); + /* Transform the normal standard variable (z) */ + /* using the target normal distribution parameters */ + result = (stddev * z) + mean; + + PG_RETURN_FLOAT8(result); +} + +/* + * int4random() - + * + * Returns a random 32-bit integer chosen uniformly in the specified range. + */ +Datum +int4random(PG_FUNCTION_ARGS) +{ + int32 rmin = PG_GETARG_INT32(0); + int32 rmax = PG_GETARG_INT32(1); + int32 result; + + if (rmin > rmax) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("lower bound must be less than or equal to upper bound")); + + initialize_prng(); + + result = (int32) pg_prng_int64_range(&prng_state, rmin, rmax); + + PG_RETURN_INT32(result); +} + +/* + * int8random() - + * + * Returns a random 64-bit integer chosen uniformly in the specified range. + */ +Datum +int8random(PG_FUNCTION_ARGS) +{ + int64 rmin = PG_GETARG_INT64(0); + int64 rmax = PG_GETARG_INT64(1); + int64 result; + + if (rmin > rmax) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("lower bound must be less than or equal to upper bound")); + + initialize_prng(); + + result = pg_prng_int64_range(&prng_state, rmin, rmax); + + PG_RETURN_INT64(result); +} + +/* + * numeric_random() - + * + * Returns a random numeric value chosen uniformly in the specified range. + */ +Datum +numeric_random(PG_FUNCTION_ARGS) +{ + Numeric rmin = PG_GETARG_NUMERIC(0); + Numeric rmax = PG_GETARG_NUMERIC(1); + Numeric result; + + initialize_prng(); + + result = random_numeric(&prng_state, rmin, rmax); + + PG_RETURN_NUMERIC(result); +} diff --git a/src/common/pg_prng.c b/src/common/pg_prng.c index c1714a02bd..15b39411a9 100644 --- a/src/common/pg_prng.c +++ b/src/common/pg_prng.c @@ -184,6 +184,42 @@ pg_prng_int64p(pg_prng_state *state) return (int64) (xoroshiro128ss(state) & UINT64CONST(0x7FFFFFFFFFFFFFFF)); } +/* + * Select a random int64 uniformly from the range [rmin, rmax]. + * If the range is empty, rmin is always produced. + */ +int64 +pg_prng_int64_range(pg_prng_state *state, int64 rmin, int64 rmax) +{ + int64 val; + + if (likely(rmax > rmin)) + { + uint64 uval; + + /* + * Use pg_prng_uint64_range(). Can't simply pass it rmin and rmax, + * since (uint64) rmin will be larger than (uint64) rmax if rmin < 0. + */ + uval = (uint64) rmin + + pg_prng_uint64_range(state, 0, (uint64) rmax - (uint64) rmin); + + /* + * Safely convert back to int64, avoiding implementation-defined + * behavior for values larger than PG_INT64_MAX. Modern compilers + * will reduce this to a simple assignment. + */ + if (uval > PG_INT64_MAX) + val = (int64) (uval - PG_INT64_MIN) + PG_INT64_MIN; + else + val = (int64) uval; + } + else + val = rmin; + + return val; +} + /* * Select a random uint32 uniformly from the range [0, PG_UINT32_MAX]. */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index a00ed3075c..fd4a482d90 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202403271 +#define CATALOG_VERSION_NO 202403272 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 2f7cfc02c6..07023ee61d 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -3381,6 +3381,18 @@ proname => 'random_normal', provolatile => 'v', proparallel => 'r', prorettype => 'float8', proargtypes => 'float8 float8', prosrc => 'drandom_normal' }, +{ oid => '9719', descr => 'random integer in range', + proname => 'random', provolatile => 'v', proparallel => 'r', + prorettype => 'int4', proargtypes => 'int4 int4', + proargnames => '{min,max}', prosrc => 'int4random' }, +{ oid => '9720', descr => 'random bigint in range', + proname => 'random', provolatile => 'v', proparallel => 'r', + prorettype => 'int8', proargtypes => 'int8 int8', + proargnames => '{min,max}', prosrc => 'int8random' }, +{ oid => '9721', descr => 'random numeric in range', + proname => 'random', provolatile => 'v', proparallel => 'r', + prorettype => 'numeric', proargtypes => 'numeric numeric', + proargnames => '{min,max}', prosrc => 'numeric_random' }, { oid => '1599', descr => 'set random seed', proname => 'setseed', provolatile => 'v', proparallel => 'r', prorettype => 'void', proargtypes => 'float8', prosrc => 'setseed' }, diff --git a/src/include/common/pg_prng.h b/src/include/common/pg_prng.h index e201b95686..c114c6419d 100644 --- a/src/include/common/pg_prng.h +++ b/src/include/common/pg_prng.h @@ -51,6 +51,7 @@ extern uint64 pg_prng_uint64(pg_prng_state *state); extern uint64 pg_prng_uint64_range(pg_prng_state *state, uint64 rmin, uint64 rmax); extern int64 pg_prng_int64(pg_prng_state *state); extern int64 pg_prng_int64p(pg_prng_state *state); +extern int64 pg_prng_int64_range(pg_prng_state *state, int64 rmin, int64 rmax); extern uint32 pg_prng_uint32(pg_prng_state *state); extern int32 pg_prng_int32(pg_prng_state *state); extern int32 pg_prng_int32p(pg_prng_state *state); diff --git a/src/include/utils/numeric.h b/src/include/utils/numeric.h index 2f7184e299..43c75c436f 100644 --- a/src/include/utils/numeric.h +++ b/src/include/utils/numeric.h @@ -14,6 +14,7 @@ #ifndef _PG_NUMERIC_H_ #define _PG_NUMERIC_H_ +#include "common/pg_prng.h" #include "fmgr.h" /* @@ -103,4 +104,7 @@ extern Numeric numeric_mod_opt_error(Numeric num1, Numeric num2, extern int32 numeric_int4_opt_error(Numeric num, bool *have_error); extern int64 numeric_int8_opt_error(Numeric num, bool *have_error); +extern Numeric random_numeric(pg_prng_state *state, + Numeric rmin, Numeric rmax); + #endif /* _PG_NUMERIC_H_ */ diff --git a/src/test/regress/expected/random.out b/src/test/regress/expected/random.out index 223590720c..43cf88a363 100644 --- a/src/test/regress/expected/random.out +++ b/src/test/regress/expected/random.out @@ -120,6 +120,229 @@ SELECT ks_test_normal_random() OR t (1 row) +-- Test random(min, max) +-- invalid range bounds +SELECT random(1, 0); +ERROR: lower bound must be less than or equal to upper bound +SELECT random(1000000000001, 1000000000000); +ERROR: lower bound must be less than or equal to upper bound +SELECT random(-2.0, -3.0); +ERROR: lower bound must be less than or equal to upper bound +SELECT random('NaN'::numeric, 10); +ERROR: lower bound cannot be NaN +SELECT random('-Inf'::numeric, 0); +ERROR: lower bound cannot be infinity +SELECT random(0, 'NaN'::numeric); +ERROR: upper bound cannot be NaN +SELECT random(0, 'Inf'::numeric); +ERROR: upper bound cannot be infinity +-- empty range is OK +SELECT random(101, 101); + random +-------- + 101 +(1 row) + +SELECT random(1000000000001, 1000000000001); + random +--------------- + 1000000000001 +(1 row) + +SELECT random(3.14, 3.14); + random +-------- + 3.14 +(1 row) + +-- There should be no triple duplicates in 1000 full-range 32-bit random() +-- values. (Each of the C(1000, 3) choices of triplets from the 1000 values +-- has a probability of 1/(2^32)^2 of being a triple duplicate, so the +-- average number of triple duplicates is 1000 * 999 * 998 / 6 / 2^64, which +-- is roughly 9e-12.) +SELECT r, count(*) +FROM (SELECT random(-2147483648, 2147483647) r + FROM generate_series(1, 1000)) ss +GROUP BY r HAVING count(*) > 2; + r | count +---+------- +(0 rows) + +-- There should be no duplicates in 1000 full-range 64-bit random() values. +SELECT r, count(*) +FROM (SELECT random_normal(-9223372036854775808, 9223372036854775807) r + FROM generate_series(1, 1000)) ss +GROUP BY r HAVING count(*) > 1; + r | count +---+------- +(0 rows) + +-- There should be no duplicates in 1000 15-digit random() numeric values. +SELECT r, count(*) +FROM (SELECT random_normal(0, 1 - 1e-15) r + FROM generate_series(1, 1000)) ss +GROUP BY r HAVING count(*) > 1; + r | count +---+------- +(0 rows) + +-- Expect at least one out of 2000 random values to be in the lowest and +-- highest 1% of the range. +SELECT (count(*) FILTER (WHERE r < -2104533975)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 2104533974)) > 0 AS has_large +FROM (SELECT random(-2147483648, 2147483647) r FROM generate_series(1, 2000)) ss; + has_small | has_large +-----------+----------- + t | t +(1 row) + +SELECT count(*) FILTER (WHERE r < -1500000000 OR r > 1500000000) AS out_of_range, + (count(*) FILTER (WHERE r < -1470000000)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 1470000000)) > 0 AS has_large +FROM (SELECT random(-1500000000, 1500000000) r FROM generate_series(1, 2000)) ss; + out_of_range | has_small | has_large +--------------+-----------+----------- + 0 | t | t +(1 row) + +SELECT (count(*) FILTER (WHERE r < -9038904596117680292)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 9038904596117680291)) > 0 AS has_large +FROM (SELECT random(-9223372036854775808, 9223372036854775807) r + FROM generate_series(1, 2000)) ss; + has_small | has_large +-----------+----------- + t | t +(1 row) + +SELECT count(*) FILTER (WHERE r < -1500000000000000 OR r > 1500000000000000) AS out_of_range, + (count(*) FILTER (WHERE r < -1470000000000000)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 1470000000000000)) > 0 AS has_large +FROM (SELECT random(-1500000000000000, 1500000000000000) r + FROM generate_series(1, 2000)) ss; + out_of_range | has_small | has_large +--------------+-----------+----------- + 0 | t | t +(1 row) + +SELECT count(*) FILTER (WHERE r < -1.5 OR r > 1.5) AS out_of_range, + (count(*) FILTER (WHERE r < -1.47)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 1.47)) > 0 AS has_large +FROM (SELECT random(-1.500000000000000, 1.500000000000000) r + FROM generate_series(1, 2000)) ss; + out_of_range | has_small | has_large +--------------+-----------+----------- + 0 | t | t +(1 row) + +-- Every possible value should occur at least once in 2500 random() values +-- chosen from a range with 100 distinct values. +SELECT min(r), max(r), count(r) FROM ( + SELECT DISTINCT random(-50, 49) r FROM generate_series(1, 2500)); + min | max | count +-----+-----+------- + -50 | 49 | 100 +(1 row) + +SELECT min(r), max(r), count(r) FROM ( + SELECT DISTINCT random(123000000000, 123000000099) r + FROM generate_series(1, 2500)); + min | max | count +--------------+--------------+------- + 123000000000 | 123000000099 | 100 +(1 row) + +SELECT min(r), max(r), count(r) FROM ( + SELECT DISTINCT random(-0.5, 0.49) r FROM generate_series(1, 2500)); + min | max | count +-------+------+------- + -0.50 | 0.49 | 100 +(1 row) + +-- Check for uniform distribution using the Kolmogorov-Smirnov test. +CREATE FUNCTION ks_test_uniform_random_int_in_range() +RETURNS boolean AS +$$ +DECLARE + n int := 1000; -- Number of samples + c float8 := 1.94947; -- Critical value for 99.9% confidence + ok boolean; +BEGIN + ok := ( + WITH samples AS ( + SELECT random(0, 999999) / 1000000.0 r FROM generate_series(1, n) ORDER BY 1 + ), indexed_samples AS ( + SELECT (row_number() OVER())-1.0 i, r FROM samples + ) + SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples + ); + RETURN ok; +END +$$ +LANGUAGE plpgsql; +SELECT ks_test_uniform_random_int_in_range() OR + ks_test_uniform_random_int_in_range() OR + ks_test_uniform_random_int_in_range() AS uniform_int; + uniform_int +------------- + t +(1 row) + +CREATE FUNCTION ks_test_uniform_random_bigint_in_range() +RETURNS boolean AS +$$ +DECLARE + n int := 1000; -- Number of samples + c float8 := 1.94947; -- Critical value for 99.9% confidence + ok boolean; +BEGIN + ok := ( + WITH samples AS ( + SELECT random(0, 999999999999) / 1000000000000.0 r FROM generate_series(1, n) ORDER BY 1 + ), indexed_samples AS ( + SELECT (row_number() OVER())-1.0 i, r FROM samples + ) + SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples + ); + RETURN ok; +END +$$ +LANGUAGE plpgsql; +SELECT ks_test_uniform_random_bigint_in_range() OR + ks_test_uniform_random_bigint_in_range() OR + ks_test_uniform_random_bigint_in_range() AS uniform_bigint; + uniform_bigint +---------------- + t +(1 row) + +CREATE FUNCTION ks_test_uniform_random_numeric_in_range() +RETURNS boolean AS +$$ +DECLARE + n int := 1000; -- Number of samples + c float8 := 1.94947; -- Critical value for 99.9% confidence + ok boolean; +BEGIN + ok := ( + WITH samples AS ( + SELECT random(0, 0.999999) r FROM generate_series(1, n) ORDER BY 1 + ), indexed_samples AS ( + SELECT (row_number() OVER())-1.0 i, r FROM samples + ) + SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples + ); + RETURN ok; +END +$$ +LANGUAGE plpgsql; +SELECT ks_test_uniform_random_numeric_in_range() OR + ks_test_uniform_random_numeric_in_range() OR + ks_test_uniform_random_numeric_in_range() AS uniform_numeric; + uniform_numeric +----------------- + t +(1 row) + -- setseed() should produce a reproducible series of random() values. SELECT setseed(0.5); setseed @@ -176,3 +399,140 @@ SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10); 0.96403105557543 (10 rows) +-- Reproducible random(min, max) values. +SELECT random(1, 6) FROM generate_series(1, 10); + random +-------- + 5 + 4 + 5 + 1 + 6 + 1 + 1 + 3 + 6 + 5 +(10 rows) + +SELECT random(-2147483648, 2147483647) FROM generate_series(1, 10); + random +------------- + -84380014 + 1287883594 + -1927252904 + 13516867 + -1902961616 + -1824286201 + -871264469 + -1225880415 + 229836730 + -116039023 +(10 rows) + +SELECT random(-9223372036854775808, 9223372036854775807) FROM generate_series(1, 10); + random +---------------------- + -6205280962992680052 + -3583519428011353337 + 511801786318122700 + 4672737727839409655 + -6674868801536280768 + -7816052100626646489 + -4340613370136007199 + -5873174504107419786 + -2249910101649817824 + -4493828993910792325 +(10 rows) + +SELECT random(-1e30, 1e30) FROM generate_series(1, 10); + random +--------------------------------- + -732116469803315942112255539315 + 794641423514877972798449289857 + -576932746026123093304638334719 + 420625067723533225139761854757 + -339227806779403187811001078919 + -77667951539418104959241732636 + 239810941795708162629328071599 + 820784371155896967052141946697 + -377084684544126871150439048352 + -979773225250716295007225086726 +(10 rows) + +SELECT random(-0.4, 0.4) FROM generate_series(1, 10); + random +-------- + 0.1 + 0.0 + 0.4 + -0.2 + 0.1 + 0.2 + 0.3 + 0.0 + -0.2 + 0.2 +(10 rows) + +SELECT random(0, 1 - 1e-30) FROM generate_series(1, 10); + random +---------------------------------- + 0.676442053784930109917469287265 + 0.221310454098356723569995592911 + 0.060101338174419259555193956224 + 0.509960354695248239243002172364 + 0.248680813394555793693952296993 + 0.353262552880008646603494668901 + 0.760692600450339509843044233719 + 0.554987655310094483449494782510 + 0.330890988458592995280347745733 + 0.665435298280470361228607881507 +(10 rows) + +SELECT n, random(0, trim_scale(abs(1 - 10.0^(-n)))) FROM generate_series(-20, 20) n; + n | random +-----+------------------------ + -20 | 94174615760837282445 + -19 | 6692559888531296894 + -18 | 801114552709125931 + -17 | 44091460959939971 + -16 | 2956109297383113 + -15 | 783332278684523 + -14 | 81534303241440 + -13 | 2892623140500 + -12 | 269397605141 + -11 | 13027512296 + -10 | 9178377775 + -9 | 323534150 + -8 | 91897803 + -7 | 6091383 + -6 | 13174 + -5 | 92714 + -4 | 8079 + -3 | 429 + -2 | 30 + -1 | 3 + 0 | 0 + 1 | 0.1 + 2 | 0.69 + 3 | 0.492 + 4 | 0.7380 + 5 | 0.77078 + 6 | 0.738142 + 7 | 0.1808815 + 8 | 0.14908933 + 9 | 0.222654042 + 10 | 0.2281295170 + 11 | 0.73655782966 + 12 | 0.056357256884 + 13 | 0.8998407524375 + 14 | 0.28198400530206 + 15 | 0.713478222805230 + 16 | 0.0415046850936909 + 17 | 0.45946350291315119 + 18 | 0.310966980367873753 + 19 | 0.4967623661709676512 + 20 | 0.60795101234744211935 +(41 rows) + diff --git a/src/test/regress/sql/random.sql b/src/test/regress/sql/random.sql index 14cc76bc3c..ebfa7539ed 100644 --- a/src/test/regress/sql/random.sql +++ b/src/test/regress/sql/random.sql @@ -100,6 +100,161 @@ SELECT ks_test_normal_random() OR ks_test_normal_random() OR ks_test_normal_random() AS standard_normal; +-- Test random(min, max) + +-- invalid range bounds +SELECT random(1, 0); +SELECT random(1000000000001, 1000000000000); +SELECT random(-2.0, -3.0); +SELECT random('NaN'::numeric, 10); +SELECT random('-Inf'::numeric, 0); +SELECT random(0, 'NaN'::numeric); +SELECT random(0, 'Inf'::numeric); + +-- empty range is OK +SELECT random(101, 101); +SELECT random(1000000000001, 1000000000001); +SELECT random(3.14, 3.14); + +-- There should be no triple duplicates in 1000 full-range 32-bit random() +-- values. (Each of the C(1000, 3) choices of triplets from the 1000 values +-- has a probability of 1/(2^32)^2 of being a triple duplicate, so the +-- average number of triple duplicates is 1000 * 999 * 998 / 6 / 2^64, which +-- is roughly 9e-12.) +SELECT r, count(*) +FROM (SELECT random(-2147483648, 2147483647) r + FROM generate_series(1, 1000)) ss +GROUP BY r HAVING count(*) > 2; + +-- There should be no duplicates in 1000 full-range 64-bit random() values. +SELECT r, count(*) +FROM (SELECT random_normal(-9223372036854775808, 9223372036854775807) r + FROM generate_series(1, 1000)) ss +GROUP BY r HAVING count(*) > 1; + +-- There should be no duplicates in 1000 15-digit random() numeric values. +SELECT r, count(*) +FROM (SELECT random_normal(0, 1 - 1e-15) r + FROM generate_series(1, 1000)) ss +GROUP BY r HAVING count(*) > 1; + +-- Expect at least one out of 2000 random values to be in the lowest and +-- highest 1% of the range. +SELECT (count(*) FILTER (WHERE r < -2104533975)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 2104533974)) > 0 AS has_large +FROM (SELECT random(-2147483648, 2147483647) r FROM generate_series(1, 2000)) ss; + +SELECT count(*) FILTER (WHERE r < -1500000000 OR r > 1500000000) AS out_of_range, + (count(*) FILTER (WHERE r < -1470000000)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 1470000000)) > 0 AS has_large +FROM (SELECT random(-1500000000, 1500000000) r FROM generate_series(1, 2000)) ss; + +SELECT (count(*) FILTER (WHERE r < -9038904596117680292)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 9038904596117680291)) > 0 AS has_large +FROM (SELECT random(-9223372036854775808, 9223372036854775807) r + FROM generate_series(1, 2000)) ss; + +SELECT count(*) FILTER (WHERE r < -1500000000000000 OR r > 1500000000000000) AS out_of_range, + (count(*) FILTER (WHERE r < -1470000000000000)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 1470000000000000)) > 0 AS has_large +FROM (SELECT random(-1500000000000000, 1500000000000000) r + FROM generate_series(1, 2000)) ss; + +SELECT count(*) FILTER (WHERE r < -1.5 OR r > 1.5) AS out_of_range, + (count(*) FILTER (WHERE r < -1.47)) > 0 AS has_small, + (count(*) FILTER (WHERE r > 1.47)) > 0 AS has_large +FROM (SELECT random(-1.500000000000000, 1.500000000000000) r + FROM generate_series(1, 2000)) ss; + +-- Every possible value should occur at least once in 2500 random() values +-- chosen from a range with 100 distinct values. +SELECT min(r), max(r), count(r) FROM ( + SELECT DISTINCT random(-50, 49) r FROM generate_series(1, 2500)); + +SELECT min(r), max(r), count(r) FROM ( + SELECT DISTINCT random(123000000000, 123000000099) r + FROM generate_series(1, 2500)); + +SELECT min(r), max(r), count(r) FROM ( + SELECT DISTINCT random(-0.5, 0.49) r FROM generate_series(1, 2500)); + +-- Check for uniform distribution using the Kolmogorov-Smirnov test. + +CREATE FUNCTION ks_test_uniform_random_int_in_range() +RETURNS boolean AS +$$ +DECLARE + n int := 1000; -- Number of samples + c float8 := 1.94947; -- Critical value for 99.9% confidence + ok boolean; +BEGIN + ok := ( + WITH samples AS ( + SELECT random(0, 999999) / 1000000.0 r FROM generate_series(1, n) ORDER BY 1 + ), indexed_samples AS ( + SELECT (row_number() OVER())-1.0 i, r FROM samples + ) + SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples + ); + RETURN ok; +END +$$ +LANGUAGE plpgsql; + +SELECT ks_test_uniform_random_int_in_range() OR + ks_test_uniform_random_int_in_range() OR + ks_test_uniform_random_int_in_range() AS uniform_int; + +CREATE FUNCTION ks_test_uniform_random_bigint_in_range() +RETURNS boolean AS +$$ +DECLARE + n int := 1000; -- Number of samples + c float8 := 1.94947; -- Critical value for 99.9% confidence + ok boolean; +BEGIN + ok := ( + WITH samples AS ( + SELECT random(0, 999999999999) / 1000000000000.0 r FROM generate_series(1, n) ORDER BY 1 + ), indexed_samples AS ( + SELECT (row_number() OVER())-1.0 i, r FROM samples + ) + SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples + ); + RETURN ok; +END +$$ +LANGUAGE plpgsql; + +SELECT ks_test_uniform_random_bigint_in_range() OR + ks_test_uniform_random_bigint_in_range() OR + ks_test_uniform_random_bigint_in_range() AS uniform_bigint; + +CREATE FUNCTION ks_test_uniform_random_numeric_in_range() +RETURNS boolean AS +$$ +DECLARE + n int := 1000; -- Number of samples + c float8 := 1.94947; -- Critical value for 99.9% confidence + ok boolean; +BEGIN + ok := ( + WITH samples AS ( + SELECT random(0, 0.999999) r FROM generate_series(1, n) ORDER BY 1 + ), indexed_samples AS ( + SELECT (row_number() OVER())-1.0 i, r FROM samples + ) + SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples + ); + RETURN ok; +END +$$ +LANGUAGE plpgsql; + +SELECT ks_test_uniform_random_numeric_in_range() OR + ks_test_uniform_random_numeric_in_range() OR + ks_test_uniform_random_numeric_in_range() AS uniform_numeric; + -- setseed() should produce a reproducible series of random() values. SELECT setseed(0.5); @@ -113,3 +268,12 @@ SET extra_float_digits = -1; SELECT random_normal() FROM generate_series(1, 10); SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10); + +-- Reproducible random(min, max) values. +SELECT random(1, 6) FROM generate_series(1, 10); +SELECT random(-2147483648, 2147483647) FROM generate_series(1, 10); +SELECT random(-9223372036854775808, 9223372036854775807) FROM generate_series(1, 10); +SELECT random(-1e30, 1e30) FROM generate_series(1, 10); +SELECT random(-0.4, 0.4) FROM generate_series(1, 10); +SELECT random(0, 1 - 1e-30) FROM generate_series(1, 10); +SELECT n, random(0, trim_scale(abs(1 - 10.0^(-n)))) FROM generate_series(-20, 20) n;