Further tweaking of width_bucket() edge cases.

I realized that the third overflow case I posited in commit b0e9e4d76
actually should be handled in a different way: rather than tolerating
the idea that the quotient could round to 1, we should clamp so that
the output cannot be more than "count" when we know that the operand is
less than bound2.  That being the case, we don't need an overflow-aware
increment in that code path, which leads me to revert the movement of
the pg_add_s32_overflow() call.  (The diff in width_bucket_float8
might be easier to read by comparing against b0e9e4d76^.)

What's more, width_bucket_numeric also has this problem of the quotient
potentially rounding to 1, so add a clamp there too.

As before, I'm not quite convinced that a back-patch is warranted.

Discussion: https://postgr.es/m/391415.1680268470@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2023-03-31 16:29:55 -04:00
parent f0d65c0eaf
commit a2a0c7c29e
4 changed files with 88 additions and 28 deletions

View File

@ -4108,39 +4108,63 @@ width_bucket_float8(PG_FUNCTION_ARGS)
if (bound1 < bound2)
{
/* In all cases, we'll add one at the end */
if (operand < bound1)
result = -1;
result = 0;
else if (operand >= bound2)
result = count;
else if (!isinf(bound2 - bound1))
{
/* Result of division is surely in [0,1], so this can't overflow */
if (pg_add_s32_overflow(count, 1, &result))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("integer out of range")));
}
else
{
if (!isinf(bound2 - bound1))
{
/* The quotient is surely in [0,1], so this can't overflow */
result = count * ((operand - bound1) / (bound2 - bound1));
}
else
{
/*
* We get here if bound2 - bound1 overflows DBL_MAX. Since both
* bounds are finite, their difference can't exceed twice DBL_MAX;
* so we can perform the computation without overflow by dividing
* all the inputs by 2. That should be exact, too, except in the
* case where a very small operand underflows to zero, which would
* have negligible impact on the result given such large bounds.
* We get here if bound2 - bound1 overflows DBL_MAX. Since
* both bounds are finite, their difference can't exceed twice
* DBL_MAX; so we can perform the computation without overflow
* by dividing all the inputs by 2. That should be exact too,
* except in the case where a very small operand underflows to
* zero, which would have negligible impact on the result
* given such large bounds.
*/
result = count * ((operand / 2 - bound1 / 2) / (bound2 / 2 - bound1 / 2));
}
/* The quotient could round to 1.0, which would be a lie */
if (result >= count)
result = count - 1;
/* Having done that, we can add 1 without fear of overflow */
result++;
}
}
else if (bound1 > bound2)
{
if (operand > bound1)
result = -1;
result = 0;
else if (operand <= bound2)
result = count;
else if (!isinf(bound1 - bound2))
{
if (pg_add_s32_overflow(count, 1, &result))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("integer out of range")));
}
else
{
if (!isinf(bound1 - bound2))
result = count * ((bound1 - operand) / (bound1 - bound2));
else
result = count * ((bound1 / 2 - operand / 2) / (bound1 / 2 - bound2 / 2));
if (result >= count)
result = count - 1;
result++;
}
}
else
{
@ -4150,10 +4174,5 @@ width_bucket_float8(PG_FUNCTION_ARGS)
result = 0; /* keep the compiler quiet */
}
if (pg_add_s32_overflow(result, 1, &result))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("integer out of range")));
PG_RETURN_INT32(result);
}

View File

@ -1907,7 +1907,7 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
}
/*
* If 'operand' is not outside the bucket range, determine the correct
* 'operand' is inside the bucket range, so determine the correct
* bucket for it to go. The calculations performed by this function
* are derived directly from the SQL2003 spec. Note however that we
* multiply by count before dividing, to avoid unnecessary roundoff error.
@ -1940,8 +1940,19 @@ compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
operand_var.dscale + count_var->dscale);
div_var(&operand_var, &bound2_var, result_var,
select_div_scale(&operand_var, &bound2_var), true);
/*
* Roundoff in the division could give us a quotient exactly equal to
* "count", which is too large. Clamp so that we do not emit a result
* larger than "count".
*/
if (cmp_var(result_var, count_var) >= 0)
set_var_from_var(count_var, result_var);
else
{
add_var(result_var, &const_one, result_var);
floor_var(result_var, result_var);
}
free_var(&bound1_var);
free_var(&bound2_var);

View File

@ -1473,6 +1473,31 @@ FROM generate_series(0, 110, 10) x;
110 | 0 | 0
(12 rows)
-- Another roundoff-error hazard
SELECT width_bucket(0, -1e100::numeric, 1, 10);
width_bucket
--------------
10
(1 row)
SELECT width_bucket(0, -1e100::float8, 1, 10);
width_bucket
--------------
10
(1 row)
SELECT width_bucket(1, 1e100::numeric, 0, 10);
width_bucket
--------------
10
(1 row)
SELECT width_bucket(1, 1e100::float8, 0, 10);
width_bucket
--------------
10
(1 row)
-- Check cases that could trigger overflow or underflow within the calculation
SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt)
FROM

View File

@ -909,6 +909,11 @@ FROM generate_series(0, 110, 10) x;
SELECT x, width_bucket(x::float8, 100, 10, 9) as flt,
width_bucket(x::numeric, 100, 10, 9) as num
FROM generate_series(0, 110, 10) x;
-- Another roundoff-error hazard
SELECT width_bucket(0, -1e100::numeric, 1, 10);
SELECT width_bucket(0, -1e100::float8, 1, 10);
SELECT width_bucket(1, 1e100::numeric, 0, 10);
SELECT width_bucket(1, 1e100::float8, 0, 10);
-- Check cases that could trigger overflow or underflow within the calculation
SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt)