Improve the numeric width_bucket() computation.

Formerly, the computation of the bucket index involved calling div_var() with a scale determined by select_div_scale(), and then taking the floor of the result. That involved computing anything from 16 to 1000 digits after the decimal point, only for floor_var() to throw them away. In addition, the quotient was computed with rounding in the final digit, which meant that in rare cases the whole result could round up to the wrong bucket, and could exceed count. Thus it was also necessary to clamp the result to the range [1, count], though that didn't prevent the result being in the wrong internal bucket. Instead, compute the quotient using floor division, which guarantees the correct result, as specified by the SQL spec, and doesn't need to be clamped. This is both much simpler and more efficient, since it no longer computes any quotient digits after the decimal point. In addition, it is not necessary to have separate code to handle reversed bounds, since the signs cancel out when dividing. As with b0e9e4d76c and a2a0c7c29e, no back-patch. Dean Rasheed, reviewed by Joel Jacobson. Discussion: https://postgr.es/m/CAEZATCVbJH%2BLE9EXW8Rk3AxLe%3DjbOk2yrT_AUJGGh5Rah6zoeg%40mail.gmail.com
2024-07-10 20:07:20 +01:00 · 2024-07-10 20:07:20 +01:00 · 0dcf753bd8
commit 0dcf753bd8
parent 628c1d1f2c
1 changed files with 18 additions and 31 deletions
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@ -608,7 +608,7 @@ static void round_var(NumericVar *var, int rscale);
 static void trunc_var(NumericVar *var, int rscale);
 static void strip_var(NumericVar *var);
 static void compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
-						   const NumericVar *count_var, bool reversed_bounds,
+						   const NumericVar *count_var,
 						   NumericVar *result_var);

 static void accum_sum_add(NumericSumAccum *accum, const NumericVar *val);
@ -1896,7 +1896,7 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
 			else if (cmp_numerics(operand, bound2) >= 0)
 				add_var(&count_var, &const_one, &result_var);
 			else
-				compute_bucket(operand, bound1, bound2, &count_var, false,
+				compute_bucket(operand, bound1, bound2, &count_var,
 							   &result_var);
 			break;

@ -1907,7 +1907,7 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
 			else if (cmp_numerics(operand, bound2) <= 0)
 				add_var(&count_var, &const_one, &result_var);
 			else
-				compute_bucket(operand, bound1, bound2, &count_var, true,
+				compute_bucket(operand, bound1, bound2, &count_var,
 							   &result_var);
 			break;
 	}
@ -1926,14 +1926,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS)

 /*
 * 'operand' is inside the bucket range, so determine the correct
- * bucket for it to go. The calculations performed by this function
+ * bucket for it to go in. The calculations performed by this function
 * are derived directly from the SQL2003 spec. Note however that we
 * multiply by count before dividing, to avoid unnecessary roundoff error.
 */
 static void
 compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
-			   const NumericVar *count_var, bool reversed_bounds,
-			   NumericVar *result_var)
+			   const NumericVar *count_var, NumericVar *result_var)
 {
 	NumericVar	bound1_var;
 	NumericVar	bound2_var;
@ -1943,34 +1942,22 @@ compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
 	init_var_from_num(bound2, &bound2_var);
 	init_var_from_num(operand, &operand_var);

-	if (!reversed_bounds)
-	{
-		sub_var(&operand_var, &bound1_var, &operand_var);
-		sub_var(&bound2_var, &bound1_var, &bound2_var);
-	}
-	else
-	{
-		sub_var(&bound1_var, &operand_var, &operand_var);
-		sub_var(&bound1_var, &bound2_var, &bound2_var);
-	}
+	/*
+	 * Per spec, bound1 is inclusive and bound2 is exclusive, and so we have
+	 * bound1 <= operand < bound2 or bound1 >= operand > bound2.  Either way,
+	 * the result is ((operand - bound1) * count) / (bound2 - bound1) + 1,
+	 * where the quotient is computed using floor division (i.e., division to
+	 * zero decimal places with truncation), which guarantees that the result
+	 * is in the range [1, count].  Reversing the bounds doesn't affect the
+	 * computation, because the signs cancel out when dividing.
+	 */
+	sub_var(&operand_var, &bound1_var, &operand_var);
+	sub_var(&bound2_var, &bound1_var, &bound2_var);

 	mul_var(&operand_var, count_var, &operand_var,
 			operand_var.dscale + count_var->dscale);
-	div_var(&operand_var, &bound2_var, result_var,
-			select_div_scale(&operand_var, &bound2_var), true);
-
-	/*
-	 * Roundoff in the division could give us a quotient exactly equal to
-	 * "count", which is too large.  Clamp so that we do not emit a result
-	 * larger than "count".
-	 */
-	if (cmp_var(result_var, count_var) >= 0)
-		set_var_from_var(count_var, result_var);
-	else
-	{
-		add_var(result_var, &const_one, result_var);
-		floor_var(result_var, result_var);
-	}
+	div_var(&operand_var, &bound2_var, result_var, 0, false);
+	add_var(result_var, &const_one, result_var);

 	free_var(&bound1_var);
 	free_var(&bound2_var);