Fix behavior of float aggregates for single Inf or NaN inputs.
When there is just one non-null input value, and it is infinity or NaN, aggregates such as stddev_pop and covar_pop should produce a NaN result, because the calculation is not well-defined. They used to do so, but since we adopted Youngs-Cramer aggregation in commit e954a727f, they produced zero instead. That's an oversight, so fix it. Add tests exercising these edge cases. Affected aggregates are var_pop(double precision) stddev_pop(double precision) var_pop(real) stddev_pop(real) regr_sxx(double precision,double precision) regr_syy(double precision,double precision) regr_sxy(double precision,double precision) regr_r2(double precision,double precision) regr_slope(double precision,double precision) regr_intercept(double precision,double precision) covar_pop(double precision,double precision) corr(double precision,double precision) Back-patch to v12 where the behavior change was accidentally introduced. Report and patch by me; thanks to Dean Rasheed for review. Discussion: https://postgr.es/m/353062.1591898766@sss.pgh.pa.us
This commit is contained in:
parent
e745bcc001
commit
33dd9bb3b0
@ -2925,6 +2925,17 @@ float8_accum(PG_FUNCTION_ARGS)
|
|||||||
Sxx = get_float8_nan();
|
Sxx = get_float8_nan();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* At the first input, we normally can leave Sxx as 0. However, if
|
||||||
|
* the first input is Inf or NaN, we'd better force Sxx to NaN;
|
||||||
|
* otherwise we will falsely report variance zero when there are no
|
||||||
|
* more inputs.
|
||||||
|
*/
|
||||||
|
if (isnan(newval) || isinf(newval))
|
||||||
|
Sxx = get_float8_nan();
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're invoked as an aggregate, we can cheat and modify our first
|
* If we're invoked as an aggregate, we can cheat and modify our first
|
||||||
@ -2999,6 +3010,17 @@ float4_accum(PG_FUNCTION_ARGS)
|
|||||||
Sxx = get_float8_nan();
|
Sxx = get_float8_nan();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* At the first input, we normally can leave Sxx as 0. However, if
|
||||||
|
* the first input is Inf or NaN, we'd better force Sxx to NaN;
|
||||||
|
* otherwise we will falsely report variance zero when there are no
|
||||||
|
* more inputs.
|
||||||
|
*/
|
||||||
|
if (isnan(newval) || isinf(newval))
|
||||||
|
Sxx = get_float8_nan();
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're invoked as an aggregate, we can cheat and modify our first
|
* If we're invoked as an aggregate, we can cheat and modify our first
|
||||||
@ -3225,6 +3247,19 @@ float8_regr_accum(PG_FUNCTION_ARGS)
|
|||||||
Sxy = get_float8_nan();
|
Sxy = get_float8_nan();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* At the first input, we normally can leave Sxx et al as 0. However,
|
||||||
|
* if the first input is Inf or NaN, we'd better force the dependent
|
||||||
|
* sums to NaN; otherwise we will falsely report variance zero when
|
||||||
|
* there are no more inputs.
|
||||||
|
*/
|
||||||
|
if (isnan(newvalX) || isinf(newvalX))
|
||||||
|
Sxx = Sxy = get_float8_nan();
|
||||||
|
if (isnan(newvalY) || isinf(newvalY))
|
||||||
|
Syy = Sxy = get_float8_nan();
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're invoked as an aggregate, we can cheat and modify our first
|
* If we're invoked as an aggregate, we can cheat and modify our first
|
||||||
|
@ -127,7 +127,79 @@ SELECT var_samp(b::numeric) FROM aggtest;
|
|||||||
|
|
||||||
-- population variance is defined for a single tuple, sample variance
|
-- population variance is defined for a single tuple, sample variance
|
||||||
-- is not
|
-- is not
|
||||||
SELECT var_pop(1.0), var_samp(2.0);
|
SELECT var_pop(1.0::float8), var_samp(2.0::float8);
|
||||||
|
var_pop | var_samp
|
||||||
|
---------+----------
|
||||||
|
0 |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8);
|
||||||
|
stddev_pop | stddev_samp
|
||||||
|
------------+-------------
|
||||||
|
0 |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT var_pop('inf'::float8), var_samp('inf'::float8);
|
||||||
|
var_pop | var_samp
|
||||||
|
---------+----------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8);
|
||||||
|
stddev_pop | stddev_samp
|
||||||
|
------------+-------------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT var_pop('nan'::float8), var_samp('nan'::float8);
|
||||||
|
var_pop | var_samp
|
||||||
|
---------+----------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8);
|
||||||
|
stddev_pop | stddev_samp
|
||||||
|
------------+-------------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT var_pop(1.0::float4), var_samp(2.0::float4);
|
||||||
|
var_pop | var_samp
|
||||||
|
---------+----------
|
||||||
|
0 |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4);
|
||||||
|
stddev_pop | stddev_samp
|
||||||
|
------------+-------------
|
||||||
|
0 |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT var_pop('inf'::float4), var_samp('inf'::float4);
|
||||||
|
var_pop | var_samp
|
||||||
|
---------+----------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4);
|
||||||
|
stddev_pop | stddev_samp
|
||||||
|
------------+-------------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT var_pop('nan'::float4), var_samp('nan'::float4);
|
||||||
|
var_pop | var_samp
|
||||||
|
---------+----------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4);
|
||||||
|
stddev_pop | stddev_samp
|
||||||
|
------------+-------------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT var_pop(1.0::numeric), var_samp(2.0::numeric);
|
||||||
var_pop | var_samp
|
var_pop | var_samp
|
||||||
---------+----------
|
---------+----------
|
||||||
0 |
|
0 |
|
||||||
@ -139,6 +211,18 @@ SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
|
|||||||
0 |
|
0 |
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT var_pop('nan'::numeric), var_samp('nan'::numeric);
|
||||||
|
var_pop | var_samp
|
||||||
|
---------+----------
|
||||||
|
NaN | NaN
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric);
|
||||||
|
stddev_pop | stddev_samp
|
||||||
|
------------+-------------
|
||||||
|
NaN | NaN
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- verify correct results for null and NaN inputs
|
-- verify correct results for null and NaN inputs
|
||||||
select sum(null::int4) from generate_series(1,3);
|
select sum(null::int4) from generate_series(1,3);
|
||||||
sum
|
sum
|
||||||
@ -299,6 +383,25 @@ SELECT corr(b, a) FROM aggtest;
|
|||||||
0.139634516517873
|
0.139634516517873
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- check single-tuple behavior
|
||||||
|
SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
|
||||||
|
covar_pop | covar_samp
|
||||||
|
-----------+------------
|
||||||
|
0 |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
|
||||||
|
covar_pop | covar_samp
|
||||||
|
-----------+------------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
|
||||||
|
covar_pop | covar_samp
|
||||||
|
-----------+------------
|
||||||
|
NaN |
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- test accum and combine functions directly
|
-- test accum and combine functions directly
|
||||||
CREATE TABLE regr_test (x float8, y float8);
|
CREATE TABLE regr_test (x float8, y float8);
|
||||||
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
|
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
|
||||||
|
@ -39,8 +39,22 @@ SELECT var_samp(b::numeric) FROM aggtest;
|
|||||||
|
|
||||||
-- population variance is defined for a single tuple, sample variance
|
-- population variance is defined for a single tuple, sample variance
|
||||||
-- is not
|
-- is not
|
||||||
SELECT var_pop(1.0), var_samp(2.0);
|
SELECT var_pop(1.0::float8), var_samp(2.0::float8);
|
||||||
|
SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8);
|
||||||
|
SELECT var_pop('inf'::float8), var_samp('inf'::float8);
|
||||||
|
SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8);
|
||||||
|
SELECT var_pop('nan'::float8), var_samp('nan'::float8);
|
||||||
|
SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8);
|
||||||
|
SELECT var_pop(1.0::float4), var_samp(2.0::float4);
|
||||||
|
SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4);
|
||||||
|
SELECT var_pop('inf'::float4), var_samp('inf'::float4);
|
||||||
|
SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4);
|
||||||
|
SELECT var_pop('nan'::float4), var_samp('nan'::float4);
|
||||||
|
SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4);
|
||||||
|
SELECT var_pop(1.0::numeric), var_samp(2.0::numeric);
|
||||||
SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
|
SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
|
||||||
|
SELECT var_pop('nan'::numeric), var_samp('nan'::numeric);
|
||||||
|
SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric);
|
||||||
|
|
||||||
-- verify correct results for null and NaN inputs
|
-- verify correct results for null and NaN inputs
|
||||||
select sum(null::int4) from generate_series(1,3);
|
select sum(null::int4) from generate_series(1,3);
|
||||||
@ -81,6 +95,11 @@ SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
|
|||||||
SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
|
SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
|
||||||
SELECT corr(b, a) FROM aggtest;
|
SELECT corr(b, a) FROM aggtest;
|
||||||
|
|
||||||
|
-- check single-tuple behavior
|
||||||
|
SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
|
||||||
|
SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
|
||||||
|
SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
|
||||||
|
|
||||||
-- test accum and combine functions directly
|
-- test accum and combine functions directly
|
||||||
CREATE TABLE regr_test (x float8, y float8);
|
CREATE TABLE regr_test (x float8, y float8);
|
||||||
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
|
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user