From e06a38965b3bcdaa881e7e06892d4d8ab6c2c980 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 21 Mar 2016 09:20:53 -0400
Subject: [PATCH] Support parallel aggregation.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Parallel workers can now partially aggregate the data and pass the
transition values back to the leader, which can combine the partial
results to produce the final answer.

David Rowley, based on earlier work by Haribabu Kommi.  Reviewed by
Álvaro Herrera, Tomas Vondra, Amit Kapila, James Sewell, and me.
---
 src/backend/executor/execQual.c         |   8 +
 src/backend/nodes/copyfuncs.c           |   1 +
 src/backend/nodes/equalfuncs.c          |   1 +
 src/backend/nodes/nodeFuncs.c           |   2 +-
 src/backend/nodes/outfuncs.c            |   1 +
 src/backend/nodes/readfuncs.c           |   1 +
 src/backend/optimizer/path/allpaths.c   |   3 +-
 src/backend/optimizer/path/costsize.c   |  12 +-
 src/backend/optimizer/plan/createplan.c |   4 +-
 src/backend/optimizer/plan/planner.c    | 518 +++++++++++++++++++++---
 src/backend/optimizer/plan/setrefs.c    | 251 +++++++++++-
 src/backend/optimizer/prep/prepunion.c  |   4 +-
 src/backend/optimizer/util/clauses.c    |  79 ++++
 src/backend/optimizer/util/pathnode.c   |  16 +-
 src/backend/optimizer/util/tlist.c      |  45 ++
 src/backend/parser/parse_func.c         |   3 +-
 src/include/catalog/catversion.h        |   2 +-
 src/include/nodes/primnodes.h           |  11 +-
 src/include/nodes/relation.h            |   2 +
 src/include/optimizer/clauses.h         |  18 +
 src/include/optimizer/cost.h            |   2 +-
 src/include/optimizer/pathnode.h        |   7 +-
 src/include/optimizer/tlist.h           |   1 +
 23 files changed, 910 insertions(+), 82 deletions(-)

diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c
index 778b6c1552..4df4a9b0f7 100644
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -4515,6 +4515,14 @@ ExecInitExpr(Expr *node, PlanState *parent)
 				if (parent && IsA(parent, AggState))
 				{
 					AggState   *aggstate = (AggState *) parent;
+					Aggref	   *aggref = (Aggref *) node;
+
+					if (aggstate->finalizeAggs &&
+						aggref->aggoutputtype != aggref->aggtype)
+					{
+						/* planner messed up */
+						elog(ERROR, "Aggref aggoutputtype must match aggtype");
+					}
 
 					aggstate->aggs = lcons(astate, aggstate->aggs);
 					aggstate->numaggs++;
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 4589834305..6b5d1d6efc 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -1233,6 +1233,7 @@ _copyAggref(const Aggref *from)
 
 	COPY_SCALAR_FIELD(aggfnoid);
 	COPY_SCALAR_FIELD(aggtype);
+	COPY_SCALAR_FIELD(aggoutputtype);
 	COPY_SCALAR_FIELD(aggcollid);
 	COPY_SCALAR_FIELD(inputcollid);
 	COPY_NODE_FIELD(aggdirectargs);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index b9c3959428..87eb859e05 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -192,6 +192,7 @@ _equalAggref(const Aggref *a, const Aggref *b)
 {
 	COMPARE_SCALAR_FIELD(aggfnoid);
 	COMPARE_SCALAR_FIELD(aggtype);
+	COMPARE_SCALAR_FIELD(aggoutputtype);
 	COMPARE_SCALAR_FIELD(aggcollid);
 	COMPARE_SCALAR_FIELD(inputcollid);
 	COMPARE_NODE_FIELD(aggdirectargs);
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index b4ea440f05..46af872116 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -57,7 +57,7 @@ exprType(const Node *expr)
 			type = ((const Param *) expr)->paramtype;
 			break;
 		case T_Aggref:
-			type = ((const Aggref *) expr)->aggtype;
+			type = ((const Aggref *) expr)->aggoutputtype;
 			break;
 		case T_GroupingFunc:
 			type = INT4OID;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 1144a4c1c7..32d03f7f25 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1033,6 +1033,7 @@ _outAggref(StringInfo str, const Aggref *node)
 
 	WRITE_OID_FIELD(aggfnoid);
 	WRITE_OID_FIELD(aggtype);
+	WRITE_OID_FIELD(aggoutputtype);
 	WRITE_OID_FIELD(aggcollid);
 	WRITE_OID_FIELD(inputcollid);
 	WRITE_NODE_FIELD(aggdirectargs);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index d63de7f907..6db0492e15 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -552,6 +552,7 @@ _readAggref(void)
 
 	READ_OID_FIELD(aggfnoid);
 	READ_OID_FIELD(aggtype);
+	READ_OID_FIELD(aggoutputtype);
 	READ_OID_FIELD(aggcollid);
 	READ_OID_FIELD(inputcollid);
 	READ_NODE_FIELD(aggdirectargs);
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 4f60b85861..e1a5d339f2 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -1968,7 +1968,8 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
 	 */
 	cheapest_partial_path = linitial(rel->partial_pathlist);
 	simple_gather_path = (Path *)
-		create_gather_path(root, rel, cheapest_partial_path, NULL);
+		create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
+						   NULL, NULL);
 	add_path(rel, simple_gather_path);
 }
 
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 943fcde3b8..9f572d759b 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -350,16 +350,22 @@ cost_samplescan(Path *path, PlannerInfo *root,
  *
  * 'rel' is the relation to be operated upon
  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ * 'rows' may be used to point to a row estimate; if non-NULL, it overrides
+ * both 'rel' and 'param_info'.  This is useful when the path doesn't exactly
+ * correspond to any particular RelOptInfo.
  */
 void
 cost_gather(GatherPath *path, PlannerInfo *root,
-			RelOptInfo *rel, ParamPathInfo *param_info)
+			RelOptInfo *rel, ParamPathInfo *param_info,
+			double *rows)
 {
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
 
 	/* Mark the path with the correct row estimate */
-	if (param_info)
+	if (rows)
+		path->path.rows = *rows;
+	else if (param_info)
 		path->path.rows = param_info->ppi_rows;
 	else
 		path->path.rows = rel->rows;
@@ -1751,6 +1757,8 @@ cost_agg(Path *path, PlannerInfo *root,
 	{
 		/* must be AGG_HASHED */
 		startup_cost = input_total_cost;
+		if (!enable_hashagg)
+			startup_cost += disable_cost;
 		startup_cost += aggcosts->transCost.startup;
 		startup_cost += aggcosts->transCost.per_tuple * input_tuples;
 		startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 087cb9c441..d159a17fd2 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1575,8 +1575,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path)
 
 	plan = make_agg(tlist, quals,
 					best_path->aggstrategy,
-					false,
-					true,
+					best_path->combineStates,
+					best_path->finalizeAggs,
 					list_length(best_path->groupClause),
 					extract_grouping_cols(best_path->groupClause,
 										  subplan->targetlist),
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fc0a2d8de3..5229c845d2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -106,6 +106,11 @@ static double get_number_of_groups(PlannerInfo *root,
 					 double path_rows,
 					 List *rollup_lists,
 					 List *rollup_groupclauses);
+static void set_grouped_rel_consider_parallel(PlannerInfo *root,
+					 RelOptInfo *grouped_rel,
+					 PathTarget *target);
+static Size estimate_hashagg_tablesize(Path *path, AggClauseCosts *agg_costs,
+					 double dNumGroups);
 static RelOptInfo *create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
@@ -134,6 +139,8 @@ static RelOptInfo *create_ordered_paths(PlannerInfo *root,
 					 double limit_tuples);
 static PathTarget *make_group_input_target(PlannerInfo *root,
 						PathTarget *final_target);
+static PathTarget *make_partialgroup_input_target(PlannerInfo *root,
+												  PathTarget *final_target);
 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
 static PathTarget *make_window_input_target(PlannerInfo *root,
@@ -1740,6 +1747,19 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			}
 		}
 
+		/*
+		 * Likewise for any partial paths, although this case is simpler, since
+		 * we don't track the cheapest path.
+		 */
+		foreach(lc, current_rel->partial_pathlist)
+		{
+			Path	   *subpath = (Path *) lfirst(lc);
+
+			Assert(subpath->param_info == NULL);
+			lfirst(lc) = apply_projection_to_path(root, current_rel,
+											subpath, scanjoin_target);
+		}
+
 		/*
 		 * Save the various upper-rel PathTargets we just computed into
 		 * root->upper_targets[].  The core code doesn't use this, but it
@@ -3133,6 +3153,79 @@ get_number_of_groups(PlannerInfo *root,
 	return dNumGroups;
 }
 
+/*
+ * set_grouped_rel_consider_parallel
+ *	  Determine if it's safe to generate partial paths for grouping.
+ */
+static void
+set_grouped_rel_consider_parallel(PlannerInfo *root, RelOptInfo *grouped_rel,
+								  PathTarget *target)
+{
+	Query	   *parse = root->parse;
+
+	Assert(grouped_rel->reloptkind == RELOPT_UPPER_REL);
+
+	/*
+	 * If there are no aggregates or GROUP BY clause, then no parallel
+	 * aggregation is possible.  At present, it doesn't matter whether
+	 * consider_parallel gets set in this case, because none of the upper rels
+	 * on top of this one try to set the flag or examine it, so we just bail
+	 * out as quickly as possible.  We might need to be more clever here in
+	 * the future.
+	 */
+	if (!parse->hasAggs && parse->groupClause == NIL)
+		return;
+
+	/*
+	 * Similarly, bail out quickly if GROUPING SETS are present; we can't
+	 * support those at present.
+	 */
+	if (parse->groupingSets)
+		return;
+
+	/*
+	 * If parallel-restricted functiosn are present in the target list or the
+	 * HAVING clause, we cannot safely go parallel.
+	 */
+	if (has_parallel_hazard((Node *) target->exprs, false) ||
+		has_parallel_hazard((Node *) parse->havingQual, false))
+		return;
+
+	/*
+	 * All that's left to check now is to make sure all aggregate functions
+	 * support partial mode. If there's no aggregates then we can skip checking
+	 * that.
+	 */
+	if (!parse->hasAggs)
+		grouped_rel->consider_parallel = true;
+	else if (aggregates_allow_partial((Node *) target->exprs) == PAT_ANY &&
+			 aggregates_allow_partial(root->parse->havingQual) == PAT_ANY)
+		grouped_rel->consider_parallel = true;
+}
+
+/*
+ * estimate_hashagg_tablesize
+ *	  estimate the number of bytes that a hash aggregate hashtable will
+ *	  require based on the agg_costs, path width and dNumGroups.
+ */
+static Size
+estimate_hashagg_tablesize(Path *path, AggClauseCosts *agg_costs,
+						   double dNumGroups)
+{
+	Size		hashentrysize;
+
+	/* Estimate per-hash-entry space at tuple width... */
+	hashentrysize = MAXALIGN(path->pathtarget->width) +
+		MAXALIGN(SizeofMinimalTupleHeader);
+
+	/* plus space for pass-by-ref transition values... */
+	hashentrysize += agg_costs->transitionSpace;
+	/* plus the per-hash-entry overhead */
+	hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
+
+	return hashentrysize * dNumGroups;
+}
+
 /*
  * create_grouping_paths
  *
@@ -3149,9 +3242,8 @@ get_number_of_groups(PlannerInfo *root,
  *
  * We need to consider sorted and hashed aggregation in the same function,
  * because otherwise (1) it would be harder to throw an appropriate error
- * message if neither way works, and (2) we should not allow enable_hashagg or
- * hashtable size considerations to dissuade us from using hashing if sorting
- * is not possible.
+ * message if neither way works, and (2) we should not allow hashtable size
+ * considerations to dissuade us from using hashing if sorting is not possible.
  */
 static RelOptInfo *
 create_grouping_paths(PlannerInfo *root,
@@ -3163,9 +3255,14 @@ create_grouping_paths(PlannerInfo *root,
 	Query	   *parse = root->parse;
 	Path	   *cheapest_path = input_rel->cheapest_total_path;
 	RelOptInfo *grouped_rel;
+	PathTarget *partial_grouping_target = NULL;
 	AggClauseCosts agg_costs;
+	Size		hashaggtablesize;
 	double		dNumGroups;
-	bool		allow_hash;
+	double		dNumPartialGroups = 0;
+	bool		can_hash;
+	bool		can_sort;
+
 	ListCell   *lc;
 
 	/* For now, do all work in the (GROUP_AGG, NULL) upperrel */
@@ -3259,12 +3356,155 @@ create_grouping_paths(PlannerInfo *root,
 									  rollup_groupclauses);
 
 	/*
-	 * Consider sort-based implementations of grouping, if possible.  (Note
-	 * that if groupClause is empty, grouping_is_sortable() is trivially true,
-	 * and all the pathkeys_contained_in() tests will succeed too, so that
-	 * we'll consider every surviving input path.)
+	 * Partial paths in the input rel could allow us to perform aggregation in
+	 * parallel. set_grouped_rel_consider_parallel() will determine if it's
+	 * going to be safe to do so.
 	 */
-	if (grouping_is_sortable(parse->groupClause))
+	if (input_rel->partial_pathlist != NIL)
+		set_grouped_rel_consider_parallel(root, grouped_rel, target);
+
+	/*
+	 * Determine whether it's possible to perform sort-based implementations
+	 * of grouping.  (Note that if groupClause is empty, grouping_is_sortable()
+	 * is trivially true, and all the pathkeys_contained_in() tests will
+	 * succeed too, so that we'll consider every surviving input path.)
+	 */
+	can_sort = grouping_is_sortable(parse->groupClause);
+
+	/*
+	 * Determine whether we should consider hash-based implementations of
+	 * grouping.
+	 *
+	 * Hashed aggregation only applies if we're grouping.  We currently can't
+	 * hash if there are grouping sets, though.
+	 *
+	 * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
+	 * aggregates.  (Doing so would imply storing *all* the input values in
+	 * the hash table, and/or running many sorts in parallel, either of which
+	 * seems like a certain loser.)  We similarly don't support ordered-set
+	 * aggregates in hashed aggregation, but that case is also included in the
+	 * numOrderedAggs count.
+	 *
+	 * Note: grouping_is_hashable() is much more expensive to check than the
+	 * other gating conditions, so we want to do it last.
+	 */
+	can_hash = (parse->groupClause != NIL &&
+				parse->groupingSets == NIL &&
+				agg_costs.numOrderedAggs == 0 &&
+				grouping_is_hashable(parse->groupClause));
+
+	/*
+	 * Before generating paths for grouped_rel, we first generate any possible
+	 * partial paths; that way, later code can easily consider both parallel
+	 * and non-parallel approaches to grouping.  Note that the partial paths
+	 * we generate here are also partially aggregated, so simply pushing a
+	 * Gather node on top is insufficient to create a final path, as would be
+	 * the case for a scan/join rel.
+	 */
+	if (grouped_rel->consider_parallel)
+	{
+		Path   *cheapest_partial_path = linitial(input_rel->partial_pathlist);
+
+		/*
+		 * Build target list for partial aggregate paths. We cannot reuse the
+		 * final target as Aggrefs must be set in partial mode, and we must
+		 * also include Aggrefs from the HAVING clause in the target as these
+		 * may not be present in the final target.
+		 */
+		partial_grouping_target = make_partialgroup_input_target(root, target);
+
+		/* Estimate number of partial groups. */
+		dNumPartialGroups = get_number_of_groups(root,
+								clamp_row_est(cheapest_partial_path->rows),
+												 NIL,
+												 NIL);
+
+		if (can_sort)
+		{
+			/* Checked in set_grouped_rel_consider_parallel() */
+			Assert(parse->hasAggs || parse->groupClause);
+
+			/*
+			 * Use any available suitably-sorted path as input, and also
+			 * consider sorting the cheapest partial path.
+			 */
+			foreach(lc, input_rel->partial_pathlist)
+			{
+				Path	   *path = (Path *) lfirst(lc);
+				bool		is_sorted;
+
+				is_sorted = pathkeys_contained_in(root->group_pathkeys,
+												  path->pathkeys);
+				if (path == cheapest_partial_path || is_sorted)
+				{
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 root->group_pathkeys,
+														 -1.0);
+
+					if (parse->hasAggs)
+						add_partial_path(grouped_rel, (Path *)
+									create_agg_path(root,
+													grouped_rel,
+													path,
+													partial_grouping_target,
+								parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													parse->groupClause,
+													NIL,
+													&agg_costs,
+													dNumPartialGroups,
+													false,
+													false));
+					else
+						add_partial_path(grouped_rel, (Path *)
+									create_group_path(root,
+													  grouped_rel,
+													  path,
+													  partial_grouping_target,
+													  parse->groupClause,
+													  NIL,
+													  dNumPartialGroups));
+				}
+			}
+		}
+
+		if (can_hash)
+		{
+			/* Checked above */
+			Assert(parse->hasAggs || parse->groupClause);
+
+			hashaggtablesize =
+				estimate_hashagg_tablesize(cheapest_partial_path,
+										   &agg_costs,
+										   dNumPartialGroups);
+
+			/*
+			 * Tentatively produce a partial HashAgg Path, depending on if it
+			 * looks as if the hash table will fit in work_mem.
+			 */
+			if (hashaggtablesize < work_mem * 1024L)
+			{
+				add_partial_path(grouped_rel, (Path *)
+							create_agg_path(root,
+											grouped_rel,
+											cheapest_partial_path,
+											partial_grouping_target,
+											AGG_HASHED,
+											parse->groupClause,
+											NIL,
+											&agg_costs,
+											dNumPartialGroups,
+											false,
+											false));
+			}
+		}
+	}
+
+	/* Build final grouping paths */
+	if (can_sort)
 	{
 		/*
 		 * Use any available suitably-sorted path as input, and also consider
@@ -3320,7 +3560,9 @@ create_grouping_paths(PlannerInfo *root,
 											 parse->groupClause,
 											 (List *) parse->havingQual,
 											 &agg_costs,
-											 dNumGroups));
+											 dNumGroups,
+											 false,
+											 true));
 				}
 				else if (parse->groupClause)
 				{
@@ -3344,69 +3586,131 @@ create_grouping_paths(PlannerInfo *root,
 				}
 			}
 		}
-	}
 
-	/*
-	 * Consider hash-based implementations of grouping, if possible.
-	 *
-	 * Hashed aggregation only applies if we're grouping.  We currently can't
-	 * hash if there are grouping sets, though.
-	 *
-	 * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
-	 * aggregates.  (Doing so would imply storing *all* the input values in
-	 * the hash table, and/or running many sorts in parallel, either of which
-	 * seems like a certain loser.)  We similarly don't support ordered-set
-	 * aggregates in hashed aggregation, but that case is also included in the
-	 * numOrderedAggs count.
-	 *
-	 * Note: grouping_is_hashable() is much more expensive to check than the
-	 * other gating conditions, so we want to do it last.
-	 */
-	allow_hash = (parse->groupClause != NIL &&
-				  parse->groupingSets == NIL &&
-				  agg_costs.numOrderedAggs == 0);
-
-	/* Consider reasons to disable hashing, but only if we can sort instead */
-	if (allow_hash && grouped_rel->pathlist != NIL)
-	{
-		if (!enable_hashagg)
-			allow_hash = false;
-		else
+		/*
+		 * Now generate a complete GroupAgg Path atop of the cheapest partial
+		 * path. We need only bother with the cheapest path here, as the output
+		 * of Gather is never sorted.
+		 */
+		if (grouped_rel->partial_pathlist)
 		{
+			Path   *path = (Path *) linitial(grouped_rel->partial_pathlist);
+			double total_groups = path->rows * path->parallel_degree;
+
+			path = (Path *) create_gather_path(root,
+											   grouped_rel,
+											   path,
+											   partial_grouping_target,
+											   NULL,
+											   &total_groups);
+
 			/*
-			 * Don't hash if it doesn't look like the hashtable will fit into
-			 * work_mem.
+			 * Gather is always unsorted, so we'll need to sort, unless there's
+			 * no GROUP BY clause, in which case there will only be a single
+			 * group.
 			 */
-			Size		hashentrysize;
+			if (parse->groupClause)
+				path = (Path *) create_sort_path(root,
+												 grouped_rel,
+												 path,
+												 root->group_pathkeys,
+												 -1.0);
 
-			/* Estimate per-hash-entry space at tuple width... */
-			hashentrysize = MAXALIGN(cheapest_path->pathtarget->width) +
-				MAXALIGN(SizeofMinimalTupleHeader);
-			/* plus space for pass-by-ref transition values... */
-			hashentrysize += agg_costs.transitionSpace;
-			/* plus the per-hash-entry overhead */
-			hashentrysize += hash_agg_entry_size(agg_costs.numAggs);
-
-			if (hashentrysize * dNumGroups > work_mem * 1024L)
-				allow_hash = false;
+			if (parse->hasAggs)
+				add_path(grouped_rel, (Path *)
+							create_agg_path(root,
+											grouped_rel,
+											path,
+											target,
+								parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											parse->groupClause,
+											(List *) parse->havingQual,
+											&agg_costs,
+											dNumGroups,
+											true,
+											true));
+			else
+				add_path(grouped_rel, (Path *)
+							create_group_path(root,
+											  grouped_rel,
+											  path,
+											  target,
+											  parse->groupClause,
+											  (List *) parse->havingQual,
+											  dNumGroups));
 		}
 	}
 
-	if (allow_hash && grouping_is_hashable(parse->groupClause))
+	if (can_hash)
 	{
+		hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+													  &agg_costs,
+													  dNumGroups);
+
 		/*
-		 * We just need an Agg over the cheapest-total input path, since input
-		 * order won't matter.
+		 * Provided that the estimated size of the hashtable does not exceed
+		 * work_mem, we'll generate a HashAgg Path, although if we were unable
+		 * to sort above, then we'd better generate a Path, so that we at least
+		 * have one.
 		 */
-		add_path(grouped_rel, (Path *)
-				 create_agg_path(root, grouped_rel,
-								 cheapest_path,
-								 target,
-								 AGG_HASHED,
-								 parse->groupClause,
-								 (List *) parse->havingQual,
-								 &agg_costs,
-								 dNumGroups));
+		if (hashaggtablesize < work_mem * 1024L ||
+			grouped_rel->pathlist == NIL)
+		{
+			/*
+			 * We just need an Agg over the cheapest-total input path, since input
+			 * order won't matter.
+			 */
+			add_path(grouped_rel, (Path *)
+					 create_agg_path(root, grouped_rel,
+									 cheapest_path,
+									 target,
+									 AGG_HASHED,
+									 parse->groupClause,
+									 (List *) parse->havingQual,
+									 &agg_costs,
+									 dNumGroups,
+									 false,
+									 true));
+		}
+
+		/*
+		 * Generate a HashAgg Path atop of the cheapest partial path. Once
+		 * again, we'll only do this if it looks as though the hash table won't
+		 * exceed work_mem.
+		 */
+		if (grouped_rel->partial_pathlist)
+		{
+			Path   *path = (Path *) linitial(grouped_rel->partial_pathlist);
+
+			hashaggtablesize = estimate_hashagg_tablesize(path,
+														  &agg_costs,
+														  dNumGroups);
+
+			if (hashaggtablesize < work_mem * 1024L)
+			{
+				double total_groups = path->rows * path->parallel_degree;
+
+				path = (Path *) create_gather_path(root,
+												   grouped_rel,
+												   path,
+												   partial_grouping_target,
+												   NULL,
+												   &total_groups);
+
+				add_path(grouped_rel, (Path *)
+							create_agg_path(root,
+											grouped_rel,
+											path,
+											target,
+											AGG_HASHED,
+											parse->groupClause,
+											(List *) parse->havingQual,
+											&agg_costs,
+											dNumGroups,
+											true,
+											true));
+			}
+		}
 	}
 
 	/* Give a helpful error if we failed to find any implementation */
@@ -3735,7 +4039,9 @@ create_distinct_paths(PlannerInfo *root,
 								 parse->distinctClause,
 								 NIL,
 								 NULL,
-								 numDistinctRows));
+								 numDistinctRows,
+								 false,
+								 true));
 	}
 
 	/* Give a helpful error if we failed to find any implementation */
@@ -3914,6 +4220,92 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
 	return set_pathtarget_cost_width(root, input_target);
 }
 
+/*
+ * make_partialgroup_input_target
+ *	  Generate appropriate PathTarget for input for Partial Aggregate nodes.
+ *
+ * Similar to make_group_input_target(), only we don't recurse into Aggrefs, as
+ * we need these to remain intact so that they can be found later in Combine
+ * Aggregate nodes during set_combineagg_references(). Vars will be still
+ * pulled out of non-Aggref nodes as these will still be required by the
+ * combine aggregate phase.
+ *
+ * We also convert any Aggrefs which we do find and put them into partial mode,
+ * this adjusts the Aggref's return type so that the partially calculated
+ * aggregate value can make its way up the execution tree up to the Finalize
+ * Aggregate node.
+ */
+static PathTarget *
+make_partialgroup_input_target(PlannerInfo *root, PathTarget *final_target)
+{
+	Query	   *parse = root->parse;
+	PathTarget *input_target;
+	List	   *non_group_cols;
+	List	   *non_group_exprs;
+	int			i;
+	ListCell   *lc;
+
+	input_target = create_empty_pathtarget();
+	non_group_cols = NIL;
+
+	i = 0;
+	foreach(lc, final_target->exprs)
+	{
+		Expr	   *expr = (Expr *) lfirst(lc);
+		Index		sgref = final_target->sortgrouprefs[i];
+
+		if (sgref && parse->groupClause &&
+			get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+		{
+			/*
+			 * It's a grouping column, so add it to the input target as-is.
+			 */
+			add_column_to_pathtarget(input_target, expr, sgref);
+		}
+		else
+		{
+			/*
+			 * Non-grouping column, so just remember the expression for later
+			 * call to pull_var_clause.
+			 */
+			non_group_cols = lappend(non_group_cols, expr);
+		}
+
+		i++;
+	}
+
+	/*
+	 * If there's a HAVING clause, we'll need the Aggrefs it uses, too.
+	 */
+	if (parse->havingQual)
+		non_group_cols = lappend(non_group_cols, parse->havingQual);
+
+	/*
+	 * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
+	 * add them to the input target if not already present.  (A Var used
+	 * directly as a GROUP BY item will be present already.)  Note this
+	 * includes Vars used in resjunk items, so we are covering the needs of
+	 * ORDER BY and window specifications.  Vars used within Aggrefs will be
+	 * ignored and the Aggrefs themselves will be added to the PathTarget.
+	 */
+	non_group_exprs = pull_var_clause((Node *) non_group_cols,
+									  PVC_INCLUDE_AGGREGATES |
+									  PVC_RECURSE_WINDOWFUNCS |
+									  PVC_INCLUDE_PLACEHOLDERS);
+
+	add_new_columns_to_pathtarget(input_target, non_group_exprs);
+
+	/* clean up cruft */
+	list_free(non_group_exprs);
+	list_free(non_group_cols);
+
+	/* Adjust Aggrefs to put them in partial mode. */
+	apply_partialaggref_adjustment(input_target);
+
+	/* XXX this causes some redundant cost calculation ... */
+	return set_pathtarget_cost_width(root, input_target);
+}
+
 /*
  * postprocess_setop_tlist
  *	  Fix up targetlist returned by plan_set_operations().
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index aa2c3084fc..16f572faf4 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -104,6 +104,8 @@ static Node *fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context);
 static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context);
 static void set_join_references(PlannerInfo *root, Join *join, int rtoffset);
 static void set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset);
+static void set_combineagg_references(PlannerInfo *root, Plan *plan,
+									  int rtoffset);
 static void set_dummy_tlist_references(Plan *plan, int rtoffset);
 static indexed_tlist *build_tlist_index(List *tlist);
 static Var *search_indexed_tlist_for_var(Var *var,
@@ -117,6 +119,8 @@ static Var *search_indexed_tlist_for_sortgroupref(Node *node,
 									  Index sortgroupref,
 									  indexed_tlist *itlist,
 									  Index newvarno);
+static Var *search_indexed_tlist_for_partial_aggref(Aggref *aggref,
+					   indexed_tlist *itlist, Index newvarno);
 static List *fix_join_expr(PlannerInfo *root,
 			  List *clauses,
 			  indexed_tlist *outer_itlist,
@@ -131,6 +135,13 @@ static Node *fix_upper_expr(PlannerInfo *root,
 			   int rtoffset);
 static Node *fix_upper_expr_mutator(Node *node,
 					   fix_upper_expr_context *context);
+static Node *fix_combine_agg_expr(PlannerInfo *root,
+								  Node *node,
+								  indexed_tlist *subplan_itlist,
+								  Index newvarno,
+								  int rtoffset);
+static Node *fix_combine_agg_expr_mutator(Node *node,
+										  fix_upper_expr_context *context);
 static List *set_returning_clause_references(PlannerInfo *root,
 								List *rlist,
 								Plan *topplan,
@@ -667,8 +678,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
 			}
 			break;
 		case T_Agg:
-			set_upper_references(root, plan, rtoffset);
-			break;
+			{
+				Agg *aggplan = (Agg *) plan;
+
+				if (aggplan->combineStates)
+					set_combineagg_references(root, plan, rtoffset);
+				else
+					set_upper_references(root, plan, rtoffset);
+
+				break;
+			}
 		case T_Group:
 			set_upper_references(root, plan, rtoffset);
 			break;
@@ -1701,6 +1720,73 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset)
 	pfree(subplan_itlist);
 }
 
+/*
+ * set_combineagg_references
+ *	  This serves the same function as set_upper_references(), but treats
+ *	  Aggrefs differently. Here we transform Aggref nodes args to suit the
+ *	  combine aggregate phase. This means that the Aggref->args are converted
+ *	  to reference the corresponding aggregate function in the subplan rather
+ *	  than simple Var(s), as would be the case for a non-combine aggregate
+ *	  node.
+ */
+static void
+set_combineagg_references(PlannerInfo *root, Plan *plan, int rtoffset)
+{
+	Plan	   *subplan = plan->lefttree;
+	indexed_tlist *subplan_itlist;
+	List	   *output_targetlist;
+	ListCell   *l;
+
+	Assert(IsA(plan, Agg));
+	Assert(((Agg *) plan)->combineStates);
+
+	subplan_itlist = build_tlist_index(subplan->targetlist);
+
+	output_targetlist = NIL;
+
+	foreach(l, plan->targetlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(l);
+		Node	   *newexpr;
+
+		/* If it's a non-Var sort/group item, first try to match by sortref */
+		if (tle->ressortgroupref != 0 && !IsA(tle->expr, Var))
+		{
+			newexpr = (Node *)
+				search_indexed_tlist_for_sortgroupref((Node *) tle->expr,
+													  tle->ressortgroupref,
+													  subplan_itlist,
+													  OUTER_VAR);
+			if (!newexpr)
+				newexpr = fix_combine_agg_expr(root,
+											   (Node *) tle->expr,
+											   subplan_itlist,
+											   OUTER_VAR,
+											   rtoffset);
+		}
+		else
+			newexpr = fix_combine_agg_expr(root,
+										   (Node *) tle->expr,
+										   subplan_itlist,
+										   OUTER_VAR,
+										   rtoffset);
+		tle = flatCopyTargetEntry(tle);
+		tle->expr = (Expr *) newexpr;
+		output_targetlist = lappend(output_targetlist, tle);
+	}
+
+	plan->targetlist = output_targetlist;
+
+	plan->qual = (List *)
+		fix_combine_agg_expr(root,
+							 (Node *) plan->qual,
+							 subplan_itlist,
+							 OUTER_VAR,
+							 rtoffset);
+
+	pfree(subplan_itlist);
+}
+
 /*
  * set_dummy_tlist_references
  *	  Replace the targetlist of an upper-level plan node with a simple
@@ -1967,6 +2053,68 @@ search_indexed_tlist_for_sortgroupref(Node *node,
 	return NULL;				/* no match */
 }
 
+/*
+ * search_indexed_tlist_for_partial_aggref - find an Aggref in an indexed tlist
+ *
+ * Aggrefs for partial aggregates have their aggoutputtype adjusted to set it
+ * to the aggregate state's type. This means that a standard equal() comparison
+ * won't match when comparing an Aggref which is in partial mode with an Aggref
+ * which is not. Here we manually compare all of the fields apart from
+ * aggoutputtype.
+ */
+static Var *
+search_indexed_tlist_for_partial_aggref(Aggref *aggref, indexed_tlist *itlist,
+										Index newvarno)
+{
+	ListCell *lc;
+
+	foreach(lc, itlist->tlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+		if (IsA(tle->expr, Aggref))
+		{
+			Aggref	   *tlistaggref = (Aggref *) tle->expr;
+			Var		   *newvar;
+
+			if (aggref->aggfnoid != tlistaggref->aggfnoid)
+				continue;
+			if (aggref->aggtype != tlistaggref->aggtype)
+				continue;
+			/* ignore aggoutputtype */
+			if (aggref->aggcollid != tlistaggref->aggcollid)
+				continue;
+			if (aggref->inputcollid != tlistaggref->inputcollid)
+				continue;
+			if (!equal(aggref->aggdirectargs, tlistaggref->aggdirectargs))
+				continue;
+			if (!equal(aggref->args, tlistaggref->args))
+				continue;
+			if (!equal(aggref->aggorder, tlistaggref->aggorder))
+				continue;
+			if (!equal(aggref->aggdistinct, tlistaggref->aggdistinct))
+				continue;
+			if (!equal(aggref->aggfilter, tlistaggref->aggfilter))
+				continue;
+			if (aggref->aggstar != tlistaggref->aggstar)
+				continue;
+			if (aggref->aggvariadic != tlistaggref->aggvariadic)
+				continue;
+			if (aggref->aggkind != tlistaggref->aggkind)
+				continue;
+			if (aggref->agglevelsup != tlistaggref->agglevelsup)
+				continue;
+
+			newvar = makeVarFromTargetEntry(newvarno, tle);
+			newvar->varnoold = 0;	/* wasn't ever a plain Var */
+			newvar->varoattno = 0;
+
+			return newvar;
+		}
+	}
+	return NULL;
+}
+
 /*
  * fix_join_expr
  *	   Create a new set of targetlist entries or join qual clauses by
@@ -2237,6 +2385,105 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
 								   (void *) context);
 }
 
+/*
+ * fix_combine_agg_expr
+ *	  Like fix_upper_expr() but additionally adjusts the Aggref->args of
+ *	  Aggrefs so that they references the corresponding Aggref in the subplan.
+ */
+static Node *
+fix_combine_agg_expr(PlannerInfo *root,
+			   Node *node,
+			   indexed_tlist *subplan_itlist,
+			   Index newvarno,
+			   int rtoffset)
+{
+	fix_upper_expr_context context;
+
+	context.root = root;
+	context.subplan_itlist = subplan_itlist;
+	context.newvarno = newvarno;
+	context.rtoffset = rtoffset;
+	return fix_combine_agg_expr_mutator(node, &context);
+}
+
+static Node *
+fix_combine_agg_expr_mutator(Node *node, fix_upper_expr_context *context)
+{
+	Var		   *newvar;
+
+	if (node == NULL)
+		return NULL;
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+
+		newvar = search_indexed_tlist_for_var(var,
+											  context->subplan_itlist,
+											  context->newvarno,
+											  context->rtoffset);
+		if (!newvar)
+			elog(ERROR, "variable not found in subplan target list");
+		return (Node *) newvar;
+	}
+	if (IsA(node, PlaceHolderVar))
+	{
+		PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+		/* See if the PlaceHolderVar has bubbled up from a lower plan node */
+		if (context->subplan_itlist->has_ph_vars)
+		{
+			newvar = search_indexed_tlist_for_non_var((Node *) phv,
+													  context->subplan_itlist,
+													  context->newvarno);
+			if (newvar)
+				return (Node *) newvar;
+		}
+		/* If not supplied by input plan, evaluate the contained expr */
+		return fix_upper_expr_mutator((Node *) phv->phexpr, context);
+	}
+	if (IsA(node, Param))
+		return fix_param_node(context->root, (Param *) node);
+	if (IsA(node, Aggref))
+	{
+		Aggref		   *aggref = (Aggref *) node;
+
+		newvar = search_indexed_tlist_for_partial_aggref(aggref,
+													 context->subplan_itlist,
+														 context->newvarno);
+		if (newvar)
+		{
+			Aggref		   *newaggref;
+			TargetEntry	   *newtle;
+
+			/*
+			 * Now build a new TargetEntry for the Aggref's arguments which is
+			 * a single Var which references the corresponding AggRef in the
+			 * node below.
+			 */
+			newtle = makeTargetEntry((Expr *) newvar, 1, NULL, false);
+			newaggref = (Aggref *) copyObject(aggref);
+			newaggref->args = list_make1(newtle);
+
+			return (Node *) newaggref;
+		}
+		else
+			elog(ERROR, "Aggref not found in subplan target list");
+	}
+	/* Try matching more complex expressions too, if tlist has any */
+	if (context->subplan_itlist->has_non_vars)
+	{
+		newvar = search_indexed_tlist_for_non_var(node,
+												  context->subplan_itlist,
+												  context->newvarno);
+		if (newvar)
+			return (Node *) newvar;
+	}
+	fix_expr_common(context->root, node);
+	return expression_tree_mutator(node,
+								   fix_combine_agg_expr_mutator,
+								   (void *) context);
+}
+
 /*
  * set_returning_clause_references
  *		Perform setrefs.c's work on a RETURNING targetlist
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 6ea3319e5f..fb139af2c1 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -859,7 +859,9 @@ make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
 										groupList,
 										NIL,
 										NULL,
-										dNumGroups);
+										dNumGroups,
+										false,
+										true);
 	}
 	else
 	{
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index b692e18e3d..d80dfbe5c9 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -52,6 +52,10 @@
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
+typedef struct
+{
+	PartialAggType allowedtype;
+} partial_agg_context;
 
 typedef struct
 {
@@ -93,6 +97,8 @@ typedef struct
 	bool		allow_restricted;
 } has_parallel_hazard_arg;
 
+static bool aggregates_allow_partial_walker(Node *node,
+											partial_agg_context *context);
 static bool contain_agg_clause_walker(Node *node, void *context);
 static bool count_agg_clauses_walker(Node *node,
 						 count_agg_clauses_context *context);
@@ -399,6 +405,79 @@ make_ands_implicit(Expr *clause)
  *		Aggregate-function clause manipulation
  *****************************************************************************/
 
+/*
+ * aggregates_allow_partial
+ *		Recursively search for Aggref clauses and determine the maximum
+ *		level of partial aggregation which can be supported.
+ */
+PartialAggType
+aggregates_allow_partial(Node *clause)
+{
+	partial_agg_context context;
+
+	/* initially any type is okay, until we find Aggrefs which say otherwise */
+	context.allowedtype = PAT_ANY;
+
+	if (!aggregates_allow_partial_walker(clause, &context))
+		return context.allowedtype;
+	return context.allowedtype;
+}
+
+static bool
+aggregates_allow_partial_walker(Node *node, partial_agg_context *context)
+{
+	if (node == NULL)
+		return false;
+	if (IsA(node, Aggref))
+	{
+		Aggref	   *aggref = (Aggref *) node;
+		HeapTuple	aggTuple;
+		Form_pg_aggregate aggform;
+
+		Assert(aggref->agglevelsup == 0);
+
+		/*
+		 * We can't perform partial aggregation with Aggrefs containing a
+		 * DISTINCT or ORDER BY clause.
+		 */
+		if (aggref->aggdistinct || aggref->aggorder)
+		{
+			context->allowedtype = PAT_DISABLED;
+			return true;	/* abort search */
+		}
+		aggTuple = SearchSysCache1(AGGFNOID,
+								   ObjectIdGetDatum(aggref->aggfnoid));
+		if (!HeapTupleIsValid(aggTuple))
+			elog(ERROR, "cache lookup failed for aggregate %u",
+				 aggref->aggfnoid);
+		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+		/*
+		 * If there is no combine function, then partial aggregation is not
+		 * possible.
+		 */
+		if (!OidIsValid(aggform->aggcombinefn))
+		{
+			ReleaseSysCache(aggTuple);
+			context->allowedtype = PAT_DISABLED;
+			return true;	/* abort search */
+		}
+
+		/*
+		 * If we find any aggs with an internal transtype then we must ensure
+		 * that pointers to aggregate states are not passed to other processes;
+		 * therefore, we set the maximum allowed type to PAT_INTERNAL_ONLY.
+		 */
+		if (aggform->aggtranstype == INTERNALOID)
+			context->allowedtype = PAT_INTERNAL_ONLY;
+
+		ReleaseSysCache(aggTuple);
+		return false; /* continue searching */
+	}
+	return expression_tree_walker(node, aggregates_allow_partial_walker,
+								  (void *) context);
+}
+
 /*
  * contain_agg_clause
  *	  Recursively search for Aggref/GroupingFunc nodes within a clause.
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 541f7790ab..16b34fcf46 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1645,10 +1645,12 @@ translate_sub_tlist(List *tlist, int relid)
  * create_gather_path
  *	  Creates a path corresponding to a gather scan, returning the
  *	  pathnode.
+ *
+ * 'rows' may optionally be set to override row estimates from other sources.
  */
 GatherPath *
 create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
-				   Relids required_outer)
+				   PathTarget *target, Relids required_outer, double *rows)
 {
 	GatherPath *pathnode = makeNode(GatherPath);
 
@@ -1656,7 +1658,7 @@ create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 
 	pathnode->path.pathtype = T_Gather;
 	pathnode->path.parent = rel;
-	pathnode->path.pathtarget = rel->reltarget;
+	pathnode->path.pathtarget = target;
 	pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
 														  required_outer);
 	pathnode->path.parallel_aware = false;
@@ -1674,7 +1676,7 @@ create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		pathnode->single_copy = true;
 	}
 
-	cost_gather(pathnode, root, rel, pathnode->path.param_info);
+	cost_gather(pathnode, root, rel, pathnode->path.param_info, rows);
 
 	return pathnode;
 }
@@ -2417,6 +2419,8 @@ create_upper_unique_path(PlannerInfo *root,
  * 'qual' is the HAVING quals if any
  * 'aggcosts' contains cost info about the aggregate functions to be computed
  * 'numGroups' is the estimated number of groups (1 if not grouping)
+ * 'combineStates' is set to true if the Agg node should combine agg states
+ * 'finalizeAggs' is set to false if the Agg node should not call the finalfn
  */
 AggPath *
 create_agg_path(PlannerInfo *root,
@@ -2427,7 +2431,9 @@ create_agg_path(PlannerInfo *root,
 				List *groupClause,
 				List *qual,
 				const AggClauseCosts *aggcosts,
-				double numGroups)
+				double numGroups,
+				bool combineStates,
+				bool finalizeAggs)
 {
 	AggPath    *pathnode = makeNode(AggPath);
 
@@ -2450,6 +2456,8 @@ create_agg_path(PlannerInfo *root,
 	pathnode->numGroups = numGroups;
 	pathnode->groupClause = groupClause;
 	pathnode->qual = qual;
+	pathnode->finalizeAggs = finalizeAggs;
+	pathnode->combineStates = combineStates;
 
 	cost_agg(&pathnode->path, root,
 			 aggstrategy, aggcosts,
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index b297d87e7e..cd421b1463 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -14,9 +14,12 @@
  */
 #include "postgres.h"
 
+#include "access/htup_details.h"
+#include "catalog/pg_aggregate.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/tlist.h"
+#include "utils/syscache.h"
 
 
 /*****************************************************************************
@@ -748,3 +751,45 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target)
 		i++;
 	}
 }
+
+/*
+ * apply_partialaggref_adjustment
+ *	  Convert PathTarget to be suitable for a partial aggregate node. We simply
+ *	  adjust any Aggref nodes found in the target and set the aggoutputtype to
+ *	  the aggtranstype. This allows exprType() to return the actual type that
+ *	  will be produced.
+ *
+ * Note: We expect 'target' to be a flat target list and not have Aggrefs burried
+ * within other expressions.
+ */
+void
+apply_partialaggref_adjustment(PathTarget *target)
+{
+	ListCell *lc;
+
+	foreach(lc, target->exprs)
+	{
+		Aggref *aggref = (Aggref *) lfirst(lc);
+
+		if (IsA(aggref, Aggref))
+		{
+			HeapTuple	aggTuple;
+			Form_pg_aggregate aggform;
+			Aggref	   *newaggref;
+
+			aggTuple = SearchSysCache1(AGGFNOID,
+									   ObjectIdGetDatum(aggref->aggfnoid));
+			if (!HeapTupleIsValid(aggTuple))
+				elog(ERROR, "cache lookup failed for aggregate %u",
+					 aggref->aggfnoid);
+			aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+			newaggref = (Aggref *) copyObject(aggref);
+			newaggref->aggoutputtype = aggform->aggtranstype;
+
+			lfirst(lc) = newaggref;
+
+			ReleaseSysCache(aggTuple);
+		}
+	}
+}
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 9744d0dc68..485960f753 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -647,7 +647,8 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 		Aggref	   *aggref = makeNode(Aggref);
 
 		aggref->aggfnoid = funcid;
-		aggref->aggtype = rettype;
+		/* default the outputtype to be the same as aggtype */
+		aggref->aggtype = aggref->aggoutputtype = rettype;
 		/* aggcollid and inputcollid will be set by parse_collate.c */
 		/* aggdirectargs and args will be set by transformAggregateCall */
 		/* aggorder and aggdistinct will be set by transformAggregateCall */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 7c9e9eb19f..3568cb27e8 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201603181
+#define CATALOG_VERSION_NO	201603211
 
 #endif
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index f942378363..1ffc0a1e5e 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -255,12 +255,21 @@ typedef struct Param
  * DISTINCT is not supported in this case, so aggdistinct will be NIL.
  * The direct arguments appear in aggdirectargs (as a list of plain
  * expressions, not TargetEntry nodes).
+ *
+ * 'aggtype' and 'aggoutputtype' are the same except when we're performing
+ * partal aggregation; in that case, we output transition states.  Nothing
+ * interesting happens in the Aggref itself, but we must set the output data
+ * type to whatever type is used for transition values.
+ *
+ * Note: If you are adding fields here you may also need to add a comparison
+ * in search_indexed_tlist_for_partial_aggref()
  */
 typedef struct Aggref
 {
 	Expr		xpr;
 	Oid			aggfnoid;		/* pg_proc Oid of the aggregate */
-	Oid			aggtype;		/* type Oid of result of the aggregate */
+	Oid			aggtype;		/* type Oid of final result of the aggregate */
+	Oid			aggoutputtype;	/* type Oid of result of this aggregate */
 	Oid			aggcollid;		/* OID of collation of result */
 	Oid			inputcollid;	/* OID of collation that function should use */
 	List	   *aggdirectargs;	/* direct arguments, if an ordered-set agg */
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 503269693b..ee7007aace 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1309,6 +1309,8 @@ typedef struct AggPath
 	double		numGroups;		/* estimated number of groups in input */
 	List	   *groupClause;	/* a list of SortGroupClause's */
 	List	   *qual;			/* quals (HAVING quals), if any */
+	bool		combineStates;	/* input is partially aggregated agg states */
+	bool		finalizeAggs;	/* should the executor call the finalfn? */
 } AggPath;
 
 /*
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 3b3fd0fc9a..3ab57f155d 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -27,6 +27,23 @@ typedef struct
 	List	  **windowFuncs;	/* lists of WindowFuncs for each winref */
 } WindowFuncLists;
 
+/*
+ * PartialAggType
+ *	PartialAggType stores whether partial aggregation is allowed and
+ *	which context it is allowed in. We require three states here as there are
+ *	two different contexts in which partial aggregation is safe. For aggregates
+ *	which have an 'stype' of INTERNAL, it is okay to pass a pointer to the
+ *  aggregate state within a single process, since the datum is just a
+ *	pointer. In cases where the aggregate state must be passed between
+ *  different processes, for example during parallel aggregation, passing
+ *  pointers directly is not going to work.
+ */
+typedef enum
+{
+	PAT_ANY = 0,		/* Any type of partial aggregation is okay. */
+	PAT_INTERNAL_ONLY,	/* Some aggregates support only internal mode. */
+	PAT_DISABLED		/* Some aggregates don't support partial mode at all */
+} PartialAggType;
 
 extern Expr *make_opclause(Oid opno, Oid opresulttype, bool opretset,
 			  Expr *leftop, Expr *rightop,
@@ -47,6 +64,7 @@ extern Node *make_and_qual(Node *qual1, Node *qual2);
 extern Expr *make_ands_explicit(List *andclauses);
 extern List *make_ands_implicit(Expr *clause);
 
+extern PartialAggType aggregates_allow_partial(Node *clause);
 extern bool contain_agg_clause(Node *clause);
 extern void count_agg_clauses(PlannerInfo *root, Node *clause,
 				  AggClauseCosts *costs);
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index fea2bb77f4..d4adca6836 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -150,7 +150,7 @@ extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 					SpecialJoinInfo *sjinfo,
 					SemiAntiJoinFactors *semifactors);
 extern void cost_gather(GatherPath *path, PlannerInfo *root,
-			RelOptInfo *baserel, ParamPathInfo *param_info);
+			RelOptInfo *baserel, ParamPathInfo *param_info, double *rows);
 extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
 extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
 extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index d1eb22f27a..1744ff058e 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -74,7 +74,8 @@ extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
 extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
 				   Path *subpath, SpecialJoinInfo *sjinfo);
 extern GatherPath *create_gather_path(PlannerInfo *root,
-				   RelOptInfo *rel, Path *subpath, Relids required_outer);
+				   RelOptInfo *rel, Path *subpath, PathTarget *target,
+				   Relids required_outer, double *rows);
 extern SubqueryScanPath *create_subqueryscan_path(PlannerInfo *root,
 						 RelOptInfo *rel, Path *subpath,
 						 List *pathkeys, Relids required_outer);
@@ -168,7 +169,9 @@ extern AggPath *create_agg_path(PlannerInfo *root,
 				List *groupClause,
 				List *qual,
 				const AggClauseCosts *aggcosts,
-				double numGroups);
+				double numGroups,
+				bool combineStates,
+				bool finalizeAggs);
 extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
 						 RelOptInfo *rel,
 						 Path *subpath,
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index 0d745a0891..de58db1db2 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -61,6 +61,7 @@ extern void add_column_to_pathtarget(PathTarget *target,
 extern void add_new_column_to_pathtarget(PathTarget *target, Expr *expr);
 extern void add_new_columns_to_pathtarget(PathTarget *target, List *exprs);
 extern void apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target);
+extern void apply_partialaggref_adjustment(PathTarget *target);
 
 /* Convenience macro to get a PathTarget with valid cost/width fields */
 #define create_pathtarget(root, tlist) \