From 69c8fbac201652282e18b0e2e301d4ada991fbde Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 16 Nov 2013 18:46:34 -0500
Subject: [PATCH] Improve performance of numeric sum(), avg(), stddev(),
 variance(), etc.

This patch improves performance of most built-in aggregates that formerly
used a NUMERIC or NUMERIC array as their transition type; this includes
not only aggregates on numeric inputs, but some aggregates on integer
inputs where overflow of an int8 value is a possibility.  The code now
uses a special-purpose data structure to avoid array construction and
deconstruction overhead, as well as packing and unpacking overhead for
numeric values.

These aggregates' transition type is now declared as INTERNAL, since
it doesn't correspond to any SQL data type.  To keep the planner from
thinking that that means a lot of storage will be used, we make use
of the just-added pg_aggregate.aggtransspace feature.  The space estimate
is set to 128 bytes, which is at least in the right ballpark.

Hadi Moshayedi, reviewed by Pavel Stehule and Tomas Vondra
---
 src/backend/utils/adt/numeric.c          | 376 ++++++++++++++---------
 src/include/catalog/catversion.h         |   2 +-
 src/include/catalog/pg_aggregate.h       |  56 ++--
 src/include/catalog/pg_proc.h            |  24 +-
 src/include/utils/builtins.h             |   1 +
 src/test/regress/expected/aggregates.out |  61 ++++
 src/test/regress/sql/aggregates.sql      |  12 +
 7 files changed, 354 insertions(+), 178 deletions(-)

diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index b4d639428a..0c28b771ff 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -2464,106 +2464,145 @@ numeric_float4(PG_FUNCTION_ARGS)
  *
  * Aggregate functions
  *
- * The transition datatype for all these aggregates is a 3-element array
- * of Numeric, holding the values N, sum(X), sum(X*X) in that order.
+ * The transition datatype for all these aggregates is declared as INTERNAL.
+ * Actually, it's a pointer to a NumericAggState allocated in the aggregate
+ * context.  The digit buffers for the NumericVars will be there too.
  *
- * We represent N as a numeric mainly to avoid having to build a special
- * datatype; it's unlikely it'd overflow an int4, but ...
+ * Note that the transition functions don't bother to create a NumericAggState
+ * until they see the first non-null input value; therefore, the final
+ * functions will never see N == 0.  (The case is represented as a NULL
+ * state pointer, instead.)
  *
  * ----------------------------------------------------------------------
  */
 
-static ArrayType *
-do_numeric_accum(ArrayType *transarray, Numeric newval)
+typedef struct NumericAggState
 {
-	Datum	   *transdatums;
-	int			ndatums;
-	Datum		N,
-				sumX,
-				sumX2;
-	ArrayType  *result;
+	bool		calcSumX2;		/* if true, calculate sumX2 */
+	bool		isNaN;			/* true if any processed number was NaN */
+	MemoryContext agg_context;	/* context we're calculating in */
+	int64		N;				/* count of processed numbers */
+	NumericVar	sumX;			/* sum of processed numbers */
+	NumericVar	sumX2;			/* sum of squares of processed numbers */
+} NumericAggState;
 
-	/* We assume the input is array of numeric */
-	deconstruct_array(transarray,
-					  NUMERICOID, -1, false, 'i',
-					  &transdatums, NULL, &ndatums);
-	if (ndatums != 3)
-		elog(ERROR, "expected 3-element numeric array");
-	N = transdatums[0];
-	sumX = transdatums[1];
-	sumX2 = transdatums[2];
+/*
+ * Prepare state data for a numeric aggregate function that needs to compute
+ * sum, count and optionally sum of squares of the input.
+ */
+static NumericAggState *
+makeNumericAggState(FunctionCallInfo fcinfo, bool calcSumX2)
+{
+	NumericAggState *state;
+	MemoryContext agg_context;
+	MemoryContext old_context;
 
-	N = DirectFunctionCall1(numeric_inc, N);
-	sumX = DirectFunctionCall2(numeric_add, sumX,
-							   NumericGetDatum(newval));
-	sumX2 = DirectFunctionCall2(numeric_add, sumX2,
-								DirectFunctionCall2(numeric_mul,
-													NumericGetDatum(newval),
-													NumericGetDatum(newval)));
+	if (!AggCheckCallContext(fcinfo, &agg_context))
+		elog(ERROR, "aggregate function called in non-aggregate context");
 
-	transdatums[0] = N;
-	transdatums[1] = sumX;
-	transdatums[2] = sumX2;
+	old_context = MemoryContextSwitchTo(agg_context);
 
-	result = construct_array(transdatums, 3,
-							 NUMERICOID, -1, false, 'i');
+	state = (NumericAggState *) palloc0(sizeof(NumericAggState));
+	state->calcSumX2 = calcSumX2;
+	state->agg_context = agg_context;
 
-	return result;
+	MemoryContextSwitchTo(old_context);
+
+	return state;
 }
 
 /*
- * Improve avg performance by not caclulating sum(X*X).
+ * Accumulate a new input value for numeric aggregate functions.
  */
-static ArrayType *
-do_numeric_avg_accum(ArrayType *transarray, Numeric newval)
+static void
+do_numeric_accum(NumericAggState *state, Numeric newval)
 {
-	Datum	   *transdatums;
-	int			ndatums;
-	Datum		N,
-				sumX;
-	ArrayType  *result;
+	NumericVar	X;
+	NumericVar	X2;
+	MemoryContext old_context;
 
-	/* We assume the input is array of numeric */
-	deconstruct_array(transarray,
-					  NUMERICOID, -1, false, 'i',
-					  &transdatums, NULL, &ndatums);
-	if (ndatums != 2)
-		elog(ERROR, "expected 2-element numeric array");
-	N = transdatums[0];
-	sumX = transdatums[1];
+	/* result is NaN if any processed number is NaN */
+	if (state->isNaN || NUMERIC_IS_NAN(newval))
+	{
+		state->isNaN = true;
+		return;
+	}
 
-	N = DirectFunctionCall1(numeric_inc, N);
-	sumX = DirectFunctionCall2(numeric_add, sumX,
-							   NumericGetDatum(newval));
+	/* load processed number in short-lived context */
+	init_var_from_num(newval, &X);
 
-	transdatums[0] = N;
-	transdatums[1] = sumX;
+	/* if we need X^2, calculate that in short-lived context */
+	if (state->calcSumX2)
+	{
+		init_var(&X2);
+		mul_var(&X, &X, &X2, X.dscale * 2);
+	}
 
-	result = construct_array(transdatums, 2,
-							 NUMERICOID, -1, false, 'i');
+	/* The rest of this needs to work in the aggregate context */
+	old_context = MemoryContextSwitchTo(state->agg_context);
 
-	return result;
+	if (state->N++ > 0)
+	{
+		/* Accumulate sums */
+		add_var(&X, &(state->sumX), &(state->sumX));
+
+		if (state->calcSumX2)
+			add_var(&X2, &(state->sumX2), &(state->sumX2));
+	}
+	else
+	{
+		/* First input, so initialize sums */
+		set_var_from_var(&X, &(state->sumX));
+
+		if (state->calcSumX2)
+			set_var_from_var(&X2, &(state->sumX2));
+	}
+
+	MemoryContextSwitchTo(old_context);
 }
 
+/*
+ * Generic transition function for numeric aggregates that require sumX2.
+ */
 Datum
 numeric_accum(PG_FUNCTION_ARGS)
 {
-	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
-	Numeric		newval = PG_GETARG_NUMERIC(1);
+	NumericAggState *state;
 
-	PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+	if (!PG_ARGISNULL(1))
+	{
+		/* Create the state data when we see the first non-null input. */
+		if (state == NULL)
+			state = makeNumericAggState(fcinfo, true);
+
+		do_numeric_accum(state, PG_GETARG_NUMERIC(1));
+	}
+
+	PG_RETURN_POINTER(state);
 }
 
 /*
- * Optimized case for average of numeric.
+ * Generic transition function for numeric aggregates that don't require sumX2.
  */
 Datum
 numeric_avg_accum(PG_FUNCTION_ARGS)
 {
-	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
-	Numeric		newval = PG_GETARG_NUMERIC(1);
+	NumericAggState *state;
 
-	PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval));
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+	if (!PG_ARGISNULL(1))
+	{
+		/* Create the state data when we see the first non-null input. */
+		if (state == NULL)
+			state = makeNumericAggState(fcinfo, false);
+
+		do_numeric_accum(state, PG_GETARG_NUMERIC(1));
+	}
+
+	PG_RETURN_POINTER(state);
 }
 
 /*
@@ -2578,87 +2617,142 @@ numeric_avg_accum(PG_FUNCTION_ARGS)
 Datum
 int2_accum(PG_FUNCTION_ARGS)
 {
-	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
-	Datum		newval2 = PG_GETARG_DATUM(1);
-	Numeric		newval;
+	NumericAggState *state;
 
-	newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric, newval2));
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
 
-	PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
+	if (!PG_ARGISNULL(1))
+	{
+		Numeric		newval;
+
+		newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric,
+													 PG_GETARG_DATUM(1)));
+
+		/* Create the state data when we see the first non-null input. */
+		if (state == NULL)
+			state = makeNumericAggState(fcinfo, true);
+
+		do_numeric_accum(state, newval);
+	}
+
+	PG_RETURN_POINTER(state);
 }
 
 Datum
 int4_accum(PG_FUNCTION_ARGS)
 {
-	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
-	Datum		newval4 = PG_GETARG_DATUM(1);
-	Numeric		newval;
+	NumericAggState *state;
 
-	newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric, newval4));
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
 
-	PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
+	if (!PG_ARGISNULL(1))
+	{
+		Numeric		newval;
+
+		newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
+													 PG_GETARG_DATUM(1)));
+
+		/* Create the state data when we see the first non-null input. */
+		if (state == NULL)
+			state = makeNumericAggState(fcinfo, true);
+
+		do_numeric_accum(state, newval);
+	}
+
+	PG_RETURN_POINTER(state);
 }
 
 Datum
 int8_accum(PG_FUNCTION_ARGS)
 {
-	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
-	Datum		newval8 = PG_GETARG_DATUM(1);
-	Numeric		newval;
+	NumericAggState *state;
 
-	newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8));
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
 
-	PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
+	if (!PG_ARGISNULL(1))
+	{
+		Numeric		newval;
+
+		newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
+													 PG_GETARG_DATUM(1)));
+
+		/* Create the state data when we see the first non-null input. */
+		if (state == NULL)
+			state = makeNumericAggState(fcinfo, true);
+
+		do_numeric_accum(state, newval);
+	}
+
+	PG_RETURN_POINTER(state);
 }
 
 /*
- * Optimized case for average of int8.
+ * Transition function for int8 input when we don't need sumX2.
  */
 Datum
 int8_avg_accum(PG_FUNCTION_ARGS)
 {
-	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
-	Datum		newval8 = PG_GETARG_DATUM(1);
-	Numeric		newval;
+	NumericAggState *state;
 
-	newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8));
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
 
-	PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval));
+	if (!PG_ARGISNULL(1))
+	{
+		Numeric		newval;
+
+		newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
+													 PG_GETARG_DATUM(1)));
+
+		/* Create the state data when we see the first non-null input. */
+		if (state == NULL)
+			state = makeNumericAggState(fcinfo, false);
+
+		do_numeric_accum(state, newval);
+	}
+
+	PG_RETURN_POINTER(state);
 }
 
 
 Datum
 numeric_avg(PG_FUNCTION_ARGS)
 {
-	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
-	Datum	   *transdatums;
-	int			ndatums;
-	Numeric		N,
-				sumX;
+	NumericAggState *state;
+	Datum		N_datum;
+	Datum		sumX_datum;
 
-	/* We assume the input is array of numeric */
-	deconstruct_array(transarray,
-					  NUMERICOID, -1, false, 'i',
-					  &transdatums, NULL, &ndatums);
-	if (ndatums != 2)
-		elog(ERROR, "expected 2-element numeric array");
-	N = DatumGetNumeric(transdatums[0]);
-	sumX = DatumGetNumeric(transdatums[1]);
-
-	/* SQL defines AVG of no values to be NULL */
-	/* N is zero iff no digits (cf. numeric_uminus) */
-	if (NUMERIC_NDIGITS(N) == 0)
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+	if (state == NULL)			/* there were no non-null inputs */
 		PG_RETURN_NULL();
 
-	PG_RETURN_DATUM(DirectFunctionCall2(numeric_div,
-										NumericGetDatum(sumX),
-										NumericGetDatum(N)));
+	if (state->isNaN)			/* there was at least one NaN input */
+		PG_RETURN_NUMERIC(make_result(&const_nan));
+
+	N_datum = DirectFunctionCall1(int8_numeric, Int64GetDatum(state->N));
+	sumX_datum = NumericGetDatum(make_result(&state->sumX));
+
+	PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumX_datum, N_datum));
+}
+
+Datum
+numeric_sum(PG_FUNCTION_ARGS)
+{
+	NumericAggState *state;
+
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+	if (state == NULL)			/* there were no non-null inputs */
+		PG_RETURN_NULL();
+
+	if (state->isNaN)			/* there was at least one NaN input */
+		PG_RETURN_NUMERIC(make_result(&const_nan));
+
+	PG_RETURN_NUMERIC(make_result(&(state->sumX)));
 }
 
 /*
  * Workhorse routine for the standard deviance and variance
- * aggregates. 'transarray' is the aggregate's transition
- * array. 'variance' specifies whether we should calculate the
+ * aggregates. 'state' is aggregate's transition state.
+ * 'variance' specifies whether we should calculate the
  * variance or the standard deviation. 'sample' indicates whether the
  * caller is interested in the sample or the population
  * variance/stddev.
@@ -2667,16 +2761,11 @@ numeric_avg(PG_FUNCTION_ARGS)
  * *is_null is set to true and NULL is returned.
  */
 static Numeric
-numeric_stddev_internal(ArrayType *transarray,
+numeric_stddev_internal(NumericAggState *state,
 						bool variance, bool sample,
 						bool *is_null)
 {
-	Datum	   *transdatums;
-	int			ndatums;
-	Numeric		N,
-				sumX,
-				sumX2,
-				res;
+	Numeric		res;
 	NumericVar	vN,
 				vsumX,
 				vsumX2,
@@ -2684,22 +2773,25 @@ numeric_stddev_internal(ArrayType *transarray,
 	NumericVar *comp;
 	int			rscale;
 
+	/* Deal with empty input and NaN-input cases */
+	if (state == NULL)
+	{
+		*is_null = true;
+		return NULL;
+	}
+
 	*is_null = false;
 
-	/* We assume the input is array of numeric */
-	deconstruct_array(transarray,
-					  NUMERICOID, -1, false, 'i',
-					  &transdatums, NULL, &ndatums);
-	if (ndatums != 3)
-		elog(ERROR, "expected 3-element numeric array");
-	N = DatumGetNumeric(transdatums[0]);
-	sumX = DatumGetNumeric(transdatums[1]);
-	sumX2 = DatumGetNumeric(transdatums[2]);
-
-	if (NUMERIC_IS_NAN(N) || NUMERIC_IS_NAN(sumX) || NUMERIC_IS_NAN(sumX2))
+	if (state->isNaN)
 		return make_result(&const_nan);
 
-	init_var_from_num(N, &vN);
+	init_var(&vN);
+	init_var(&vsumX);
+	init_var(&vsumX2);
+
+	int8_to_numericvar(state->N, &vN);
+	set_var_from_var(&(state->sumX), &vsumX);
+	set_var_from_var(&(state->sumX2), &vsumX2);
 
 	/*
 	 * Sample stddev and variance are undefined when N <= 1; population stddev
@@ -2719,9 +2811,6 @@ numeric_stddev_internal(ArrayType *transarray,
 	init_var(&vNminus1);
 	sub_var(&vN, &const_one, &vNminus1);
 
-	init_var_from_num(sumX, &vsumX);
-	init_var_from_num(sumX2, &vsumX2);
-
 	/* compute rscale for mul_var calls */
 	rscale = vsumX.dscale * 2;
 
@@ -2758,11 +2847,13 @@ numeric_stddev_internal(ArrayType *transarray,
 Datum
 numeric_var_samp(PG_FUNCTION_ARGS)
 {
+	NumericAggState *state;
 	Numeric		res;
 	bool		is_null;
 
-	res = numeric_stddev_internal(PG_GETARG_ARRAYTYPE_P(0),
-								  true, true, &is_null);
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+	res = numeric_stddev_internal(state, true, true, &is_null);
 
 	if (is_null)
 		PG_RETURN_NULL();
@@ -2773,11 +2864,13 @@ numeric_var_samp(PG_FUNCTION_ARGS)
 Datum
 numeric_stddev_samp(PG_FUNCTION_ARGS)
 {
+	NumericAggState *state;
 	Numeric		res;
 	bool		is_null;
 
-	res = numeric_stddev_internal(PG_GETARG_ARRAYTYPE_P(0),
-								  false, true, &is_null);
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+	res = numeric_stddev_internal(state, false, true, &is_null);
 
 	if (is_null)
 		PG_RETURN_NULL();
@@ -2788,11 +2881,13 @@ numeric_stddev_samp(PG_FUNCTION_ARGS)
 Datum
 numeric_var_pop(PG_FUNCTION_ARGS)
 {
+	NumericAggState *state;
 	Numeric		res;
 	bool		is_null;
 
-	res = numeric_stddev_internal(PG_GETARG_ARRAYTYPE_P(0),
-								  true, false, &is_null);
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+	res = numeric_stddev_internal(state, true, false, &is_null);
 
 	if (is_null)
 		PG_RETURN_NULL();
@@ -2803,11 +2898,13 @@ numeric_var_pop(PG_FUNCTION_ARGS)
 Datum
 numeric_stddev_pop(PG_FUNCTION_ARGS)
 {
+	NumericAggState *state;
 	Numeric		res;
 	bool		is_null;
 
-	res = numeric_stddev_internal(PG_GETARG_ARRAYTYPE_P(0),
-								  false, false, &is_null);
+	state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+	res = numeric_stddev_internal(state, false, false, &is_null);
 
 	if (is_null)
 		PG_RETURN_NULL();
@@ -2930,6 +3027,9 @@ int4_sum(PG_FUNCTION_ARGS)
 	}
 }
 
+/*
+ * Note: this function is obsolete, it's no longer used for SUM(int8).
+ */
 Datum
 int8_sum(PG_FUNCTION_ARGS)
 {
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index cb9021c525..b0286157f7 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201311161
+#define CATALOG_VERSION_NO	201311162
 
 #endif
diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h
index 416d651bf0..034456a315 100644
--- a/src/include/catalog/pg_aggregate.h
+++ b/src/include/catalog/pg_aggregate.h
@@ -80,23 +80,23 @@ typedef FormData_pg_aggregate *Form_pg_aggregate;
  */
 
 /* avg */
-DATA(insert ( 2100	int8_avg_accum	numeric_avg		0	1231	0	"{0,0}" ));
+DATA(insert ( 2100	int8_avg_accum	numeric_avg		0	2281	128	_null_ ));
 DATA(insert ( 2101	int4_avg_accum	int8_avg		0	1016	0	"{0,0}" ));
 DATA(insert ( 2102	int2_avg_accum	int8_avg		0	1016	0	"{0,0}" ));
-DATA(insert ( 2103	numeric_avg_accum	numeric_avg 0	1231	0	"{0,0}" ));
+DATA(insert ( 2103	numeric_avg_accum	numeric_avg 0	2281	128	_null_ ));
 DATA(insert ( 2104	float4_accum	float8_avg		0	1022	0	"{0,0,0}" ));
 DATA(insert ( 2105	float8_accum	float8_avg		0	1022	0	"{0,0,0}" ));
 DATA(insert ( 2106	interval_accum	interval_avg	0	1187	0	"{0 second,0 second}" ));
 
 /* sum */
-DATA(insert ( 2107	int8_sum		-				0	1700	0	_null_ ));
+DATA(insert ( 2107	int8_avg_accum	numeric_sum		0	2281	128	_null_ ));
 DATA(insert ( 2108	int4_sum		-				0	20		0	_null_ ));
 DATA(insert ( 2109	int2_sum		-				0	20		0	_null_ ));
 DATA(insert ( 2110	float4pl		-				0	700		0	_null_ ));
 DATA(insert ( 2111	float8pl		-				0	701		0	_null_ ));
 DATA(insert ( 2112	cash_pl			-				0	790		0	_null_ ));
 DATA(insert ( 2113	interval_pl		-				0	1186	0	_null_ ));
-DATA(insert ( 2114	numeric_add		-				0	1700	0	_null_ ));
+DATA(insert ( 2114	numeric_avg_accum	numeric_sum	0	2281	128	_null_ ));
 
 /* max */
 DATA(insert ( 2115	int8larger		-				413		20		0	_null_ ));
@@ -147,52 +147,52 @@ DATA(insert ( 2147	int8inc_any		-				0		20		0	"0" ));
 DATA(insert ( 2803	int8inc			-				0		20		0	"0" ));
 
 /* var_pop */
-DATA(insert ( 2718	int8_accum	numeric_var_pop 0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2719	int4_accum	numeric_var_pop 0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2720	int2_accum	numeric_var_pop 0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2718	int8_accum	numeric_var_pop 0	2281	128	_null_ ));
+DATA(insert ( 2719	int4_accum	numeric_var_pop 0	2281	128	_null_ ));
+DATA(insert ( 2720	int2_accum	numeric_var_pop 0	2281	128	_null_ ));
 DATA(insert ( 2721	float4_accum	float8_var_pop 0	1022	0	"{0,0,0}" ));
 DATA(insert ( 2722	float8_accum	float8_var_pop 0	1022	0	"{0,0,0}" ));
-DATA(insert ( 2723	numeric_accum  numeric_var_pop 0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2723	numeric_accum  numeric_var_pop 0	2281	128	_null_ ));
 
 /* var_samp */
-DATA(insert ( 2641	int8_accum	numeric_var_samp	0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2642	int4_accum	numeric_var_samp	0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2643	int2_accum	numeric_var_samp	0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2641	int8_accum	numeric_var_samp	0	2281	128	_null_ ));
+DATA(insert ( 2642	int4_accum	numeric_var_samp	0	2281	128	_null_ ));
+DATA(insert ( 2643	int2_accum	numeric_var_samp	0	2281	128	_null_ ));
 DATA(insert ( 2644	float4_accum	float8_var_samp 0	1022	0	"{0,0,0}" ));
 DATA(insert ( 2645	float8_accum	float8_var_samp 0	1022	0	"{0,0,0}" ));
-DATA(insert ( 2646	numeric_accum  numeric_var_samp 0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2646	numeric_accum  numeric_var_samp 0	2281	128	_null_ ));
 
 /* variance: historical Postgres syntax for var_samp */
-DATA(insert ( 2148	int8_accum	numeric_var_samp	0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2149	int4_accum	numeric_var_samp	0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2150	int2_accum	numeric_var_samp	0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2148	int8_accum	numeric_var_samp	0	2281	128	_null_ ));
+DATA(insert ( 2149	int4_accum	numeric_var_samp	0	2281	128	_null_ ));
+DATA(insert ( 2150	int2_accum	numeric_var_samp	0	2281	128	_null_ ));
 DATA(insert ( 2151	float4_accum	float8_var_samp 0	1022	0	"{0,0,0}" ));
 DATA(insert ( 2152	float8_accum	float8_var_samp 0	1022	0	"{0,0,0}" ));
-DATA(insert ( 2153	numeric_accum  numeric_var_samp 0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2153	numeric_accum  numeric_var_samp 0	2281	128	_null_ ));
 
 /* stddev_pop */
-DATA(insert ( 2724	int8_accum	numeric_stddev_pop		0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2725	int4_accum	numeric_stddev_pop		0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2726	int2_accum	numeric_stddev_pop		0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2724	int8_accum	numeric_stddev_pop		0	2281	128	_null_ ));
+DATA(insert ( 2725	int4_accum	numeric_stddev_pop		0	2281	128	_null_ ));
+DATA(insert ( 2726	int2_accum	numeric_stddev_pop		0	2281	128	_null_ ));
 DATA(insert ( 2727	float4_accum	float8_stddev_pop	0	1022	0	"{0,0,0}" ));
 DATA(insert ( 2728	float8_accum	float8_stddev_pop	0	1022	0	"{0,0,0}" ));
-DATA(insert ( 2729	numeric_accum	numeric_stddev_pop	0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2729	numeric_accum	numeric_stddev_pop	0	2281	128	_null_ ));
 
 /* stddev_samp */
-DATA(insert ( 2712	int8_accum	numeric_stddev_samp		0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2713	int4_accum	numeric_stddev_samp		0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2714	int2_accum	numeric_stddev_samp		0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2712	int8_accum	numeric_stddev_samp		0	2281	128	_null_ ));
+DATA(insert ( 2713	int4_accum	numeric_stddev_samp		0	2281	128	_null_ ));
+DATA(insert ( 2714	int2_accum	numeric_stddev_samp		0	2281	128	_null_ ));
 DATA(insert ( 2715	float4_accum	float8_stddev_samp	0	1022	0	"{0,0,0}" ));
 DATA(insert ( 2716	float8_accum	float8_stddev_samp	0	1022	0	"{0,0,0}" ));
-DATA(insert ( 2717	numeric_accum	numeric_stddev_samp 0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2717	numeric_accum	numeric_stddev_samp 0	2281	128	_null_ ));
 
 /* stddev: historical Postgres syntax for stddev_samp */
-DATA(insert ( 2154	int8_accum	numeric_stddev_samp		0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2155	int4_accum	numeric_stddev_samp		0	1231	0	"{0,0,0}" ));
-DATA(insert ( 2156	int2_accum	numeric_stddev_samp		0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2154	int8_accum	numeric_stddev_samp		0	2281	128	_null_ ));
+DATA(insert ( 2155	int4_accum	numeric_stddev_samp		0	2281	128	_null_ ));
+DATA(insert ( 2156	int2_accum	numeric_stddev_samp		0	2281	128	_null_ ));
 DATA(insert ( 2157	float4_accum	float8_stddev_samp	0	1022	0	"{0,0,0}" ));
 DATA(insert ( 2158	float8_accum	float8_stddev_samp	0	1022	0	"{0,0,0}" ));
-DATA(insert ( 2159	numeric_accum	numeric_stddev_samp 0	1231	0	"{0,0,0}" ));
+DATA(insert ( 2159	numeric_accum	numeric_stddev_samp 0	2281	128	_null_ ));
 
 /* SQL2003 binary regression aggregates */
 DATA(insert ( 2818	int8inc_float8_float8		-				0	20		0	"0" ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index ca4fc62bcd..c5d5d29155 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2390,27 +2390,29 @@ DATA(insert OID = 2513 (  float8_stddev_pop PGNSP PGUID 12 1 0 0 0 f f f f t f i
 DESCR("aggregate final function");
 DATA(insert OID = 1832 (  float8_stddev_samp	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 701 "1022" _null_ _null_ _null_ _null_ float8_stddev_samp _null_ _null_ _null_ ));
 DESCR("aggregate final function");
-DATA(insert OID = 1833 (  numeric_accum    PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1231 "1231 1700" _null_ _null_ _null_ _null_ numeric_accum _null_ _null_ _null_ ));
+DATA(insert OID = 1833 (  numeric_accum    PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 1700" _null_ _null_ _null_ _null_ numeric_accum _null_ _null_ _null_ ));
 DESCR("aggregate transition function");
-DATA(insert OID = 2858 (  numeric_avg_accum    PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1231 "1231 1700" _null_ _null_ _null_ _null_ numeric_avg_accum _null_ _null_ _null_ ));
+DATA(insert OID = 2858 (  numeric_avg_accum    PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 1700" _null_ _null_ _null_ _null_ numeric_avg_accum _null_ _null_ _null_ ));
 DESCR("aggregate transition function");
-DATA(insert OID = 1834 (  int2_accum	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1231 "1231 21" _null_ _null_ _null_ _null_ int2_accum _null_ _null_ _null_ ));
+DATA(insert OID = 1834 (  int2_accum	   PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 21" _null_ _null_ _null_ _null_ int2_accum _null_ _null_ _null_ ));
 DESCR("aggregate transition function");
-DATA(insert OID = 1835 (  int4_accum	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1231 "1231 23" _null_ _null_ _null_ _null_ int4_accum _null_ _null_ _null_ ));
+DATA(insert OID = 1835 (  int4_accum	   PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 23" _null_ _null_ _null_ _null_ int4_accum _null_ _null_ _null_ ));
 DESCR("aggregate transition function");
-DATA(insert OID = 1836 (  int8_accum	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1231 "1231 20" _null_ _null_ _null_ _null_ int8_accum _null_ _null_ _null_ ));
+DATA(insert OID = 1836 (  int8_accum	   PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 20" _null_ _null_ _null_ _null_ int8_accum _null_ _null_ _null_ ));
 DESCR("aggregate transition function");
-DATA(insert OID = 2746 (  int8_avg_accum	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1231 "1231 20" _null_ _null_ _null_ _null_ int8_avg_accum _null_ _null_ _null_ ));
+DATA(insert OID = 2746 (  int8_avg_accum	   PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 20" _null_ _null_ _null_ _null_ int8_avg_accum _null_ _null_ _null_ ));
 DESCR("aggregate transition function");
-DATA(insert OID = 1837 (  numeric_avg	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 1700 "1231" _null_ _null_ _null_ _null_ numeric_avg _null_ _null_ _null_ ));
+DATA(insert OID = 3178 (  numeric_sum	   PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_ numeric_sum _null_ _null_ _null_ ));
 DESCR("aggregate final function");
-DATA(insert OID = 2514 (  numeric_var_pop  PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 1700 "1231" _null_ _null_ _null_ _null_ numeric_var_pop _null_ _null_ _null_ ));
+DATA(insert OID = 1837 (  numeric_avg	   PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_ numeric_avg _null_ _null_ _null_ ));
 DESCR("aggregate final function");
-DATA(insert OID = 1838 (  numeric_var_samp PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 1700 "1231" _null_ _null_ _null_ _null_ numeric_var_samp _null_ _null_ _null_ ));
+DATA(insert OID = 2514 (  numeric_var_pop  PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_ numeric_var_pop _null_ _null_ _null_ ));
 DESCR("aggregate final function");
-DATA(insert OID = 2596 (  numeric_stddev_pop PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 1700 "1231" _null_ _null_ _null_ _null_	numeric_stddev_pop _null_ _null_ _null_ ));
+DATA(insert OID = 1838 (  numeric_var_samp PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_ numeric_var_samp _null_ _null_ _null_ ));
 DESCR("aggregate final function");
-DATA(insert OID = 1839 (  numeric_stddev_samp	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 1700 "1231" _null_ _null_ _null_ _null_ numeric_stddev_samp _null_ _null_ _null_ ));
+DATA(insert OID = 2596 (  numeric_stddev_pop PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_	numeric_stddev_pop _null_ _null_ _null_ ));
+DESCR("aggregate final function");
+DATA(insert OID = 1839 (  numeric_stddev_samp	PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_ numeric_stddev_samp _null_ _null_ _null_ ));
 DESCR("aggregate final function");
 DATA(insert OID = 1840 (  int2_sum		   PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 20 "20 21" _null_ _null_ _null_ _null_ int2_sum _null_ _null_ _null_ ));
 DESCR("aggregate transition function");
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index ce3f00bc12..f25a9de8d3 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -989,6 +989,7 @@ extern Datum int4_accum(PG_FUNCTION_ARGS);
 extern Datum int8_accum(PG_FUNCTION_ARGS);
 extern Datum int8_avg_accum(PG_FUNCTION_ARGS);
 extern Datum numeric_avg(PG_FUNCTION_ARGS);
+extern Datum numeric_sum(PG_FUNCTION_ARGS);
 extern Datum numeric_var_pop(PG_FUNCTION_ARGS);
 extern Datum numeric_var_samp(PG_FUNCTION_ARGS);
 extern Datum numeric_stddev_pop(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 298b2e4eb9..e17881c59f 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -137,6 +137,67 @@ SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
           0 |            
 (1 row)
 
+-- verify correct results for null and NaN inputs
+select sum(null::int4) from generate_series(1,3);
+ sum 
+-----
+    
+(1 row)
+
+select sum(null::int8) from generate_series(1,3);
+ sum 
+-----
+    
+(1 row)
+
+select sum(null::numeric) from generate_series(1,3);
+ sum 
+-----
+    
+(1 row)
+
+select sum(null::float8) from generate_series(1,3);
+ sum 
+-----
+    
+(1 row)
+
+select avg(null::int4) from generate_series(1,3);
+ avg 
+-----
+    
+(1 row)
+
+select avg(null::int8) from generate_series(1,3);
+ avg 
+-----
+    
+(1 row)
+
+select avg(null::numeric) from generate_series(1,3);
+ avg 
+-----
+    
+(1 row)
+
+select avg(null::float8) from generate_series(1,3);
+ avg 
+-----
+    
+(1 row)
+
+select sum('NaN'::numeric) from generate_series(1,3);
+ sum 
+-----
+ NaN
+(1 row)
+
+select avg('NaN'::numeric) from generate_series(1,3);
+ avg 
+-----
+ NaN
+(1 row)
+
 -- SQL2003 binary aggregates
 SELECT regr_count(b, a) FROM aggtest;
  regr_count 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 397edffd3a..1bbe073351 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -39,6 +39,18 @@ SELECT var_samp(b::numeric) FROM aggtest;
 SELECT var_pop(1.0), var_samp(2.0);
 SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
 
+-- verify correct results for null and NaN inputs
+select sum(null::int4) from generate_series(1,3);
+select sum(null::int8) from generate_series(1,3);
+select sum(null::numeric) from generate_series(1,3);
+select sum(null::float8) from generate_series(1,3);
+select avg(null::int4) from generate_series(1,3);
+select avg(null::int8) from generate_series(1,3);
+select avg(null::numeric) from generate_series(1,3);
+select avg(null::float8) from generate_series(1,3);
+select sum('NaN'::numeric) from generate_series(1,3);
+select avg('NaN'::numeric) from generate_series(1,3);
+
 -- SQL2003 binary aggregates
 SELECT regr_count(b, a) FROM aggtest;
 SELECT regr_sxx(b, a) FROM aggtest;