Improve the numeric width_bucket() computation.

Formerly, the computation of the bucket index involved calling
div_var() with a scale determined by select_div_scale(), and then
taking the floor of the result. That involved computing anything from
16 to 1000 digits after the decimal point, only for floor_var() to
throw them away. In addition, the quotient was computed with rounding
in the final digit, which meant that in rare cases the whole result
could round up to the wrong bucket, and could exceed count. Thus it
was also necessary to clamp the result to the range [1, count], though
that didn't prevent the result being in the wrong internal bucket.

Instead, compute the quotient using floor division, which guarantees
the correct result, as specified by the SQL spec, and doesn't need to
be clamped. This is both much simpler and more efficient, since it no
longer computes any quotient digits after the decimal point.

In addition, it is not necessary to have separate code to handle
reversed bounds, since the signs cancel out when dividing.

As with b0e9e4d76c and a2a0c7c29e, no back-patch.

Dean Rasheed, reviewed by Joel Jacobson.

Discussion: https://postgr.es/m/CAEZATCVbJH%2BLE9EXW8Rk3AxLe%3DjbOk2yrT_AUJGGh5Rah6zoeg%40mail.gmail.com
This commit is contained in:
Dean Rasheed 2024-07-10 20:07:20 +01:00
parent 628c1d1f2c
commit 0dcf753bd8

View File

@ -608,7 +608,7 @@ static void round_var(NumericVar *var, int rscale);
static void trunc_var(NumericVar *var, int rscale);
static void strip_var(NumericVar *var);
static void compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
const NumericVar *count_var, bool reversed_bounds,
const NumericVar *count_var,
NumericVar *result_var);
static void accum_sum_add(NumericSumAccum *accum, const NumericVar *val);
@ -1896,7 +1896,7 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
else if (cmp_numerics(operand, bound2) >= 0)
add_var(&count_var, &const_one, &result_var);
else
compute_bucket(operand, bound1, bound2, &count_var, false,
compute_bucket(operand, bound1, bound2, &count_var,
&result_var);
break;
@ -1907,7 +1907,7 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
else if (cmp_numerics(operand, bound2) <= 0)
add_var(&count_var, &const_one, &result_var);
else
compute_bucket(operand, bound1, bound2, &count_var, true,
compute_bucket(operand, bound1, bound2, &count_var,
&result_var);
break;
}
@ -1926,14 +1926,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
/*
* 'operand' is inside the bucket range, so determine the correct
* bucket for it to go. The calculations performed by this function
* bucket for it to go in. The calculations performed by this function
* are derived directly from the SQL2003 spec. Note however that we
* multiply by count before dividing, to avoid unnecessary roundoff error.
*/
static void
compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
const NumericVar *count_var, bool reversed_bounds,
NumericVar *result_var)
const NumericVar *count_var, NumericVar *result_var)
{
NumericVar bound1_var;
NumericVar bound2_var;
@ -1943,34 +1942,22 @@ compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
init_var_from_num(bound2, &bound2_var);
init_var_from_num(operand, &operand_var);
if (!reversed_bounds)
{
sub_var(&operand_var, &bound1_var, &operand_var);
sub_var(&bound2_var, &bound1_var, &bound2_var);
}
else
{
sub_var(&bound1_var, &operand_var, &operand_var);
sub_var(&bound1_var, &bound2_var, &bound2_var);
}
/*
* Per spec, bound1 is inclusive and bound2 is exclusive, and so we have
* bound1 <= operand < bound2 or bound1 >= operand > bound2. Either way,
* the result is ((operand - bound1) * count) / (bound2 - bound1) + 1,
* where the quotient is computed using floor division (i.e., division to
* zero decimal places with truncation), which guarantees that the result
* is in the range [1, count]. Reversing the bounds doesn't affect the
* computation, because the signs cancel out when dividing.
*/
sub_var(&operand_var, &bound1_var, &operand_var);
sub_var(&bound2_var, &bound1_var, &bound2_var);
mul_var(&operand_var, count_var, &operand_var,
operand_var.dscale + count_var->dscale);
div_var(&operand_var, &bound2_var, result_var,
select_div_scale(&operand_var, &bound2_var), true);
/*
* Roundoff in the division could give us a quotient exactly equal to
* "count", which is too large. Clamp so that we do not emit a result
* larger than "count".
*/
if (cmp_var(result_var, count_var) >= 0)
set_var_from_var(count_var, result_var);
else
{
add_var(result_var, &const_one, result_var);
floor_var(result_var, result_var);
}
div_var(&operand_var, &bound2_var, result_var, 0, false);
add_var(result_var, &const_one, result_var);
free_var(&bound1_var);
free_var(&bound2_var);