From a2a0c7c29e47f39da905577659e66b0086b769cc Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 31 Mar 2023 16:29:55 -0400
Subject: [PATCH] Further tweaking of width_bucket() edge cases.

I realized that the third overflow case I posited in commit b0e9e4d76
actually should be handled in a different way: rather than tolerating
the idea that the quotient could round to 1, we should clamp so that
the output cannot be more than "count" when we know that the operand is
less than bound2.  That being the case, we don't need an overflow-aware
increment in that code path, which leads me to revert the movement of
the pg_add_s32_overflow() call.  (The diff in width_bucket_float8
might be easier to read by comparing against b0e9e4d76^.)

What's more, width_bucket_numeric also has this problem of the quotient
potentially rounding to 1, so add a clamp there too.

As before, I'm not quite convinced that a back-patch is warranted.

Discussion: https://postgr.es/m/391415.1680268470@sss.pgh.pa.us
---
 src/backend/utils/adt/float.c         | 69 +++++++++++++++++----------
 src/backend/utils/adt/numeric.c       | 17 +++++--
 src/test/regress/expected/numeric.out | 25 ++++++++++
 src/test/regress/sql/numeric.sql      |  5 ++
 4 files changed, 88 insertions(+), 28 deletions(-)

diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index 4b0795bd24..9b51da2382 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4108,39 +4108,63 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 
 	if (bound1 < bound2)
 	{
-		/* In all cases, we'll add one at the end */
 		if (operand < bound1)
-			result = -1;
+			result = 0;
 		else if (operand >= bound2)
-			result = count;
-		else if (!isinf(bound2 - bound1))
 		{
-			/* Result of division is surely in [0,1], so this can't overflow */
-			result = count * ((operand - bound1) / (bound2 - bound1));
+			if (pg_add_s32_overflow(count, 1, &result))
+				ereport(ERROR,
+						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+						 errmsg("integer out of range")));
 		}
 		else
 		{
-			/*
-			 * We get here if bound2 - bound1 overflows DBL_MAX.  Since both
-			 * bounds are finite, their difference can't exceed twice DBL_MAX;
-			 * so we can perform the computation without overflow by dividing
-			 * all the inputs by 2.  That should be exact, too, except in the
-			 * case where a very small operand underflows to zero, which would
-			 * have negligible impact on the result given such large bounds.
-			 */
-			result = count * ((operand / 2 - bound1 / 2) / (bound2 / 2 - bound1 / 2));
+			if (!isinf(bound2 - bound1))
+			{
+				/* The quotient is surely in [0,1], so this can't overflow */
+				result = count * ((operand - bound1) / (bound2 - bound1));
+			}
+			else
+			{
+				/*
+				 * We get here if bound2 - bound1 overflows DBL_MAX.  Since
+				 * both bounds are finite, their difference can't exceed twice
+				 * DBL_MAX; so we can perform the computation without overflow
+				 * by dividing all the inputs by 2.  That should be exact too,
+				 * except in the case where a very small operand underflows to
+				 * zero, which would have negligible impact on the result
+				 * given such large bounds.
+				 */
+				result = count * ((operand / 2 - bound1 / 2) / (bound2 / 2 - bound1 / 2));
+			}
+			/* The quotient could round to 1.0, which would be a lie */
+			if (result >= count)
+				result = count - 1;
+			/* Having done that, we can add 1 without fear of overflow */
+			result++;
 		}
 	}
 	else if (bound1 > bound2)
 	{
 		if (operand > bound1)
-			result = -1;
+			result = 0;
 		else if (operand <= bound2)
-			result = count;
-		else if (!isinf(bound1 - bound2))
-			result = count * ((bound1 - operand) / (bound1 - bound2));
+		{
+			if (pg_add_s32_overflow(count, 1, &result))
+				ereport(ERROR,
+						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+						 errmsg("integer out of range")));
+		}
 		else
-			result = count * ((bound1 / 2 - operand / 2) / (bound1 / 2 - bound2 / 2));
+		{
+			if (!isinf(bound1 - bound2))
+				result = count * ((bound1 - operand) / (bound1 - bound2));
+			else
+				result = count * ((bound1 / 2 - operand / 2) / (bound1 / 2 - bound2 / 2));
+			if (result >= count)
+				result = count - 1;
+			result++;
+		}
 	}
 	else
 	{
@@ -4150,10 +4174,5 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 		result = 0;				/* keep the compiler quiet */
 	}
 
-	if (pg_add_s32_overflow(result, 1, &result))
-		ereport(ERROR,
-				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-				 errmsg("integer out of range")));
-
 	PG_RETURN_INT32(result);
 }
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index a83feea396..3c3184f15b 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -1907,7 +1907,7 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
 }
 
 /*
- * If 'operand' is not outside the bucket range, determine the correct
+ * 'operand' is inside the bucket range, so determine the correct
  * bucket for it to go. The calculations performed by this function
  * are derived directly from the SQL2003 spec. Note however that we
  * multiply by count before dividing, to avoid unnecessary roundoff error.
@@ -1940,8 +1940,19 @@ compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
 			operand_var.dscale + count_var->dscale);
 	div_var(&operand_var, &bound2_var, result_var,
 			select_div_scale(&operand_var, &bound2_var), true);
-	add_var(result_var, &const_one, result_var);
-	floor_var(result_var, result_var);
+
+	/*
+	 * Roundoff in the division could give us a quotient exactly equal to
+	 * "count", which is too large.  Clamp so that we do not emit a result
+	 * larger than "count".
+	 */
+	if (cmp_var(result_var, count_var) >= 0)
+		set_var_from_var(count_var, result_var);
+	else
+	{
+		add_var(result_var, &const_one, result_var);
+		floor_var(result_var, result_var);
+	}
 
 	free_var(&bound1_var);
 	free_var(&bound2_var);
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index 65a9c75763..72f03c8a38 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -1473,6 +1473,31 @@ FROM generate_series(0, 110, 10) x;
  110 |   0 |   0
 (12 rows)
 
+-- Another roundoff-error hazard
+SELECT width_bucket(0, -1e100::numeric, 1, 10);
+ width_bucket 
+--------------
+           10
+(1 row)
+
+SELECT width_bucket(0, -1e100::float8, 1, 10);
+ width_bucket 
+--------------
+           10
+(1 row)
+
+SELECT width_bucket(1, 1e100::numeric, 0, 10);
+ width_bucket 
+--------------
+           10
+(1 row)
+
+SELECT width_bucket(1, 1e100::float8, 0, 10);
+ width_bucket 
+--------------
+           10
+(1 row)
+
 -- Check cases that could trigger overflow or underflow within the calculation
 SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt)
 FROM
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index 07ff98741f..83fc386333 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -909,6 +909,11 @@ FROM generate_series(0, 110, 10) x;
 SELECT x, width_bucket(x::float8, 100, 10, 9) as flt,
        width_bucket(x::numeric, 100, 10, 9) as num
 FROM generate_series(0, 110, 10) x;
+-- Another roundoff-error hazard
+SELECT width_bucket(0, -1e100::numeric, 1, 10);
+SELECT width_bucket(0, -1e100::float8, 1, 10);
+SELECT width_bucket(1, 1e100::numeric, 0, 10);
+SELECT width_bucket(1, 1e100::float8, 0, 10);
 
 -- Check cases that could trigger overflow or underflow within the calculation
 SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt)