From 0079547bcbedff2a653204571c3cda22c35907d4 Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Fri, 14 May 2004 21:42:30 +0000 Subject: [PATCH] Implement the width_bucket() function, per SQL2003. This commit only adds a variant of the function for the 'numeric' datatype; it would be possible to add additional variants for other datatypes, but I haven't done so yet. This commit includes regression tests and minimal documentation; if we want developers to actually use this function in applications, we'll probably need to document what it does more fully. --- doc/src/sgml/errcodes.sgml | 7 +- doc/src/sgml/func.sgml | 12 ++- doc/src/sgml/xfunc.sgml | 16 +-- src/backend/utils/adt/numeric.c | 149 ++++++++++++++++++++++++-- src/include/catalog/catversion.h | 4 +- src/include/catalog/pg_proc.h | 4 +- src/include/utils/builtins.h | 3 +- src/include/utils/errcodes.h | 3 +- src/test/regress/expected/numeric.out | 51 +++++++++ src/test/regress/sql/numeric.sql | 46 ++++++++ 10 files changed, 272 insertions(+), 23 deletions(-) diff --git a/doc/src/sgml/errcodes.sgml b/doc/src/sgml/errcodes.sgml index 2f211f462e..295c41ffad 100644 --- a/doc/src/sgml/errcodes.sgml +++ b/doc/src/sgml/errcodes.sgml @@ -1,4 +1,4 @@ - + <productname>PostgreSQL</productname> Error Codes @@ -310,6 +310,11 @@ INTERVAL FIELD OVERFLOW + +2201G +INVALID ARGUMENT FOR WIDTH BUCKET FUNCTION + + 22018 INVALID CHARACTER VALUE FOR CAST diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 1dde8b59a3..6fd36fe77b 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,5 +1,5 @@ @@ -751,6 +751,16 @@ PostgreSQL documentation 42.43 + + width_bucket(op numeric, b1 numeric, b2 numeric, count integer) + integer + return the bucket to which operand would + be assigned in an equidepth histogram with count + buckets, an upper bound of b1, and a lower bound + of b2 + width_bucket(5.35, 0.024, 10.06, 5) + 3 + diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml index a3c24a1c4c..ed2a502c69 100644 --- a/doc/src/sgml/xfunc.sgml +++ b/doc/src/sgml/xfunc.sgml @@ -1,5 +1,5 @@ @@ -56,7 +56,7 @@ $PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.82 2004/05/10 22:44:43 tgl Exp $ - It's easiest to define SQL + It's easiest to define SQL functions, so we'll start by discussing those. Most of the concepts presented for SQL functions will carry over to the other types of functions. @@ -64,12 +64,12 @@ $PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.82 2004/05/10 22:44:43 tgl Exp $ Throughout this chapter, it can be useful to look at the reference - page of the CREATE FUNCTION command to - understand the examples better. - Some examples from this chapter - can be found in funcs.sql - and funcs.c in the src/tutorial - directory in the PostgreSQL source distribution. + page of the command to + understand the examples better. Some examples from this chapter + can be found in funcs.sql and + funcs.c in the src/tutorial + directory in the PostgreSQL source + distribution. diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 28390ee5c3..4214d7af00 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -14,7 +14,7 @@ * Copyright (c) 1998-2003, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.73 2004/05/07 00:24:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.74 2004/05/14 21:42:28 neilc Exp $ * *------------------------------------------------------------------------- */ @@ -252,6 +252,7 @@ static Numeric make_result(NumericVar *var); static void apply_typmod(NumericVar *var, int32 typmod); +static int32 numericvar_to_int4(NumericVar *var); static bool numericvar_to_int8(NumericVar *var, int64 *result); static void int8_to_numericvar(int64 val, NumericVar *var); static double numeric_to_double_no_overflow(Numeric num); @@ -285,6 +286,8 @@ static void sub_abs(NumericVar *var1, NumericVar *var2, NumericVar *result); static void round_var(NumericVar *var, int rscale); static void trunc_var(NumericVar *var, int rscale); static void strip_var(NumericVar *var); +static void compute_bucket(Numeric operand, Numeric bound1, Numeric bound2, + NumericVar *count_var, NumericVar *result_var); /* ---------------------------------------------------------------------- @@ -803,6 +806,125 @@ numeric_floor(PG_FUNCTION_ARGS) PG_RETURN_NUMERIC(res); } +/* + * width_bucket_numeric() - + * + * 'bound1' and 'bound2' are the lower and upper bounds of the + * histogram's range, respectively. 'count' is the number of buckets + * in the histogram. width_bucket() returns an integer indicating the + * bucket number that 'operand' belongs in for an equiwidth histogram + * with the specified characteristics. An operand smaller than the + * lower bound is assigned to bucket 0. An operand greater than the + * upper bound is assigned to an additional bucket (with number + * count+1). + */ +Datum +width_bucket_numeric(PG_FUNCTION_ARGS) +{ + Numeric operand = PG_GETARG_NUMERIC(0); + Numeric bound1 = PG_GETARG_NUMERIC(1); + Numeric bound2 = PG_GETARG_NUMERIC(2); + int32 count = PG_GETARG_INT32(3); + NumericVar count_var; + NumericVar result_var; + int32 result; + + if (count <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("count must be greater than zero"))); + + init_var(&result_var); + init_var(&count_var); + + /* Convert 'count' to a numeric, for ease of use later */ + int8_to_numericvar((int64) count, &count_var); + + switch (cmp_numerics(bound1, bound2)) + { + case 0: + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("lower bound cannot equal upper bound"))); + + /* bound1 < bound2 */ + case -1: + if (cmp_numerics(operand, bound1) < 0) + set_var_from_var(&const_zero, &result_var); + else if (cmp_numerics(operand, bound2) >= 0) + add_var(&count_var, &const_one, &result_var); + else + compute_bucket(operand, bound1, bound2, + &count_var, &result_var); + break; + + /* bound1 > bound2 */ + case 1: + if (cmp_numerics(operand, bound1) > 0) + set_var_from_var(&const_zero, &result_var); + else if (cmp_numerics(operand, bound2) <= 0) + add_var(&count_var, &const_one, &result_var); + else + compute_bucket(operand, bound1, bound2, + &count_var, &result_var); + break; + } + + result = numericvar_to_int4(&result_var); + + free_var(&count_var); + free_var(&result_var); + + PG_RETURN_INT32(result); +} + +/* + * compute_bucket() - + * + * If 'operand' is not outside the bucket range, determine the correct + * bucket for it to go. The calculations performed by this function + * are derived directly from the SQL2003 spec. + */ +static void +compute_bucket(Numeric operand, Numeric bound1, Numeric bound2, + NumericVar *count_var, NumericVar *result_var) +{ + NumericVar bound1_var; + NumericVar bound2_var; + NumericVar operand_var; + + init_var(&bound1_var); + init_var(&bound2_var); + init_var(&operand_var); + + set_var_from_num(bound1, &bound1_var); + set_var_from_num(bound2, &bound2_var); + set_var_from_num(operand, &operand_var); + + if (cmp_var(&bound1_var, &bound2_var) < 0) + { + sub_var(&operand_var, &bound1_var, &operand_var); + sub_var(&bound2_var, &bound1_var, &bound2_var); + div_var(&operand_var, &bound2_var, result_var, + select_div_scale(&operand_var, &bound2_var)); + } + else + { + sub_var(&bound1_var, &operand_var, &operand_var); + sub_var(&bound1_var, &bound2_var, &bound1_var); + div_var(&operand_var, &bound1_var, result_var, + select_div_scale(&operand_var, &bound1_var)); + } + + mul_var(result_var, count_var, result_var, + result_var->dscale + count_var->dscale); + add_var(result_var, &const_one, result_var); + floor_var(result_var, result_var); + + free_var(&bound1_var); + free_var(&bound2_var); + free_var(&operand_var); +} /* ---------------------------------------------------------------------- * @@ -1612,7 +1734,6 @@ numeric_int4(PG_FUNCTION_ARGS) { Numeric num = PG_GETARG_NUMERIC(0); NumericVar x; - int64 val; int32 result; /* XXX would it be better to return NULL? */ @@ -1621,17 +1742,30 @@ numeric_int4(PG_FUNCTION_ARGS) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot convert NaN to integer"))); - /* Convert to variable format and thence to int8 */ + /* Convert to variable format, then convert to int4 */ init_var(&x); set_var_from_num(num, &x); + result = numericvar_to_int4(&x); + free_var(&x); + PG_RETURN_INT32(result); +} - if (!numericvar_to_int8(&x, &val)) +/* + * Given a NumericVar, convert it to an int32. If the NumericVar + * exceeds the range of an int32, raise the appropriate error via + * ereport(). The input NumericVar is *not* free'd. + */ +static int32 +numericvar_to_int4(NumericVar *var) +{ + int32 result; + int64 val; + + if (!numericvar_to_int8(var, &val)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("integer out of range"))); - free_var(&x); - /* Down-convert to int4 */ result = (int32) val; @@ -1641,10 +1775,9 @@ numeric_int4(PG_FUNCTION_ARGS) (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("integer out of range"))); - PG_RETURN_INT32(result); + return result; } - Datum int8_numeric(PG_FUNCTION_ARGS) { diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 0e957db82c..b6e8bd4c6e 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.229 2004/05/10 22:44:49 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.230 2004/05/14 21:42:28 neilc Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200405101 +#define CATALOG_VERSION_NO 200405141 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index f15290df3b..70b59d2c74 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.328 2004/05/07 16:57:16 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.329 2004/05/14 21:42:28 neilc Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -2508,6 +2508,8 @@ DATA(insert OID = 1745 ( float4 PGNSP PGUID 12 f f t f i 1 700 "1700" _null_ DESCR("(internal)"); DATA(insert OID = 1746 ( float8 PGNSP PGUID 12 f f t f i 1 701 "1700" _null_ numeric_float8 - _null_ )); DESCR("(internal)"); +DATA(insert OID = 2170 ( width_bucket PGNSP PGUID 12 f f t f i 4 23 "1700 1700 1700 23" _null_ width_bucket_numeric - _null_ )); +DESCR("bucket number of operand in equidepth histogram"); DATA(insert OID = 1747 ( time_pl_interval PGNSP PGUID 12 f f t f i 2 1083 "1083 1186" _null_ time_pl_interval - _null_ )); DESCR("plus"); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 59fb0a9a85..d2baf02418 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.237 2004/05/05 04:48:47 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.238 2004/05/14 21:42:30 neilc Exp $ * *------------------------------------------------------------------------- */ @@ -758,6 +758,7 @@ extern Datum int8_sum(PG_FUNCTION_ARGS); extern Datum int2_avg_accum(PG_FUNCTION_ARGS); extern Datum int4_avg_accum(PG_FUNCTION_ARGS); extern Datum int8_avg(PG_FUNCTION_ARGS); +extern Datum width_bucket_numeric(PG_FUNCTION_ARGS); /* ri_triggers.c */ extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS); diff --git a/src/include/utils/errcodes.h b/src/include/utils/errcodes.h index 64fa7d2718..ed1b4e413d 100644 --- a/src/include/utils/errcodes.h +++ b/src/include/utils/errcodes.h @@ -11,7 +11,7 @@ * * Copyright (c) 2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.9 2004/05/14 18:04:02 neilc Exp $ + * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.10 2004/05/14 21:42:30 neilc Exp $ * *------------------------------------------------------------------------- */ @@ -116,6 +116,7 @@ #define ERRCODE_ESCAPE_CHARACTER_CONFLICT MAKE_SQLSTATE('2','2', '0','0','B') #define ERRCODE_INDICATOR_OVERFLOW MAKE_SQLSTATE('2','2', '0','2','2') #define ERRCODE_INTERVAL_FIELD_OVERFLOW MAKE_SQLSTATE('2','2', '0','1','5') +#define ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION MAKE_SQLSTATE('2','2', '0', '1', 'G') #define ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST MAKE_SQLSTATE('2','2', '0','1','8') #define ERRCODE_INVALID_DATETIME_FORMAT MAKE_SQLSTATE('2','2', '0','0','7') #define ERRCODE_INVALID_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', '0','1','9') diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out index 6a536c704b..b95d79fb89 100644 --- a/src/test/regress/expected/numeric.out +++ b/src/test/regress/expected/numeric.out @@ -730,6 +730,57 @@ SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round; (7 rows) DROP TABLE ceil_floor_round; +-- Testing for width_bucket() +-- NULL result +SELECT width_bucket(NULL, NULL, NULL, NULL); + width_bucket +-------------- + +(1 row) + +-- errors +SELECT width_bucket(5.0, 3.0, 4.0, 0); +ERROR: count must be greater than zero +SELECT width_bucket(5.0, 3.0, 4.0, -5); +ERROR: count must be greater than zero +SELECT width_bucket(3.0, 3.0, 3.0, 888); +ERROR: lower bound cannot equal upper bound +-- normal operation +CREATE TABLE width_bucket_test (operand numeric); +COPY width_bucket_test FROM stdin; +SELECT + operand, + width_bucket(operand, 0, 10, 5) AS wb_1, + width_bucket(operand, 10, 0, 5) AS wb_2, + width_bucket(operand, 2, 8, 4) AS wb_3, + width_bucket(operand, 5.0, 5.5, 20) AS wb_4, + width_bucket(operand, -25, 25, 10) AS wb_5 + FROM width_bucket_test; + operand | wb_1 | wb_2 | wb_3 | wb_4 | wb_5 +------------------+------+------+------+------+------ + -5.2 | 0 | 6 | 0 | 0 | 4 + -0.0000000000001 | 0 | 6 | 0 | 0 | 5 + 0.0000000000001 | 1 | 5 | 0 | 0 | 6 + 1 | 1 | 5 | 0 | 0 | 6 + 1.99999999999999 | 1 | 5 | 0 | 0 | 6 + 2 | 2 | 5 | 1 | 0 | 6 + 2.00000000000001 | 2 | 4 | 1 | 0 | 6 + 3 | 2 | 4 | 1 | 0 | 6 + 4 | 3 | 4 | 2 | 0 | 6 + 4.5 | 3 | 3 | 2 | 0 | 6 + 5 | 3 | 3 | 3 | 1 | 7 + 5.5 | 3 | 3 | 3 | 21 | 7 + 6 | 4 | 3 | 3 | 21 | 7 + 7 | 4 | 2 | 4 | 21 | 7 + 8 | 5 | 2 | 5 | 21 | 7 + 9 | 5 | 1 | 5 | 21 | 7 + 9.99999999999999 | 5 | 1 | 5 | 21 | 7 + 10 | 6 | 1 | 5 | 21 | 8 + 10.0000000000001 | 6 | 0 | 5 | 21 | 8 + NaN | 6 | 0 | 5 | 21 | 11 +(20 rows) + +DROP TABLE width_bucket_test; -- TO_CHAR() -- SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999') diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql index 59693ab83d..06f9dfd749 100644 --- a/src/test/regress/sql/numeric.sql +++ b/src/test/regress/sql/numeric.sql @@ -667,6 +667,52 @@ INSERT INTO ceil_floor_round VALUES ('-0.000001'); SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round; DROP TABLE ceil_floor_round; +-- Testing for width_bucket() +-- NULL result +SELECT width_bucket(NULL, NULL, NULL, NULL); + +-- errors +SELECT width_bucket(5.0, 3.0, 4.0, 0); +SELECT width_bucket(5.0, 3.0, 4.0, -5); +SELECT width_bucket(3.0, 3.0, 3.0, 888); + +-- normal operation +CREATE TABLE width_bucket_test (operand numeric); + +COPY width_bucket_test FROM stdin; +-5.2 +-0.0000000000001 +0.0000000000001 +1 +1.99999999999999 +2 +2.00000000000001 +3 +4 +4.5 +5 +5.5 +6 +7 +8 +9 +9.99999999999999 +10 +10.0000000000001 +NaN +\. + +SELECT + operand, + width_bucket(operand, 0, 10, 5) AS wb_1, + width_bucket(operand, 10, 0, 5) AS wb_2, + width_bucket(operand, 2, 8, 4) AS wb_3, + width_bucket(operand, 5.0, 5.5, 20) AS wb_4, + width_bucket(operand, -25, 25, 10) AS wb_5 + FROM width_bucket_test; + +DROP TABLE width_bucket_test; + -- TO_CHAR() -- SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999')