From 3570ea4248caafcd44d015eaaf7f5924e2b58781 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 24 Aug 2016 14:37:51 -0400 Subject: [PATCH] Fix improper repetition of previous results from a hashed aggregate. ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: --- src/backend/executor/nodeAgg.c | 15 +++++--- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/outfuncs.c | 1 + src/backend/optimizer/plan/createplan.c | 1 + src/backend/optimizer/plan/subselect.c | 47 +++++++++++++++++++++++- src/include/nodes/plannodes.h | 2 + src/test/regress/expected/aggregates.out | 32 ++++++++++++++++ src/test/regress/sql/aggregates.sql | 10 +++++ 8 files changed, 102 insertions(+), 7 deletions(-) diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 34afb24b38..b4b6699dcd 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -1905,13 +1905,14 @@ void ExecReScanAgg(AggState *node) { ExprContext *econtext = node->ss.ps.ps_ExprContext; + Agg *aggnode = (Agg *) node->ss.ps.plan; int aggno; node->agg_done = false; node->ss.ps.ps_TupFromTlist = false; - if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + if (aggnode->aggstrategy == AGG_HASHED) { /* * In the hashed case, if we haven't yet built the hash table then we @@ -1923,11 +1924,13 @@ ExecReScanAgg(AggState *node) return; /* - * If we do have the hash table and the subplan does not have any - * parameter changes, then we can just rescan the existing hash table; - * no need to build it again. + * If we do have the hash table, and the subplan does not have any + * parameter changes, and none of our own parameter changes affect + * input expressions of the aggregated functions, then we can just + * rescan the existing hash table; no need to build it again. */ - if (node->ss.ps.lefttree->chgParam == NULL) + if (node->ss.ps.lefttree->chgParam == NULL && + !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams)) { ResetTupleHashIterator(node->hashtable, &node->hashiter); return; @@ -1964,7 +1967,7 @@ ExecReScanAgg(AggState *node) */ MemoryContextResetAndDeleteChildren(node->aggcontext); - if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + if (aggnode->aggstrategy == AGG_HASHED) { /* Rebuild an empty hash table */ build_hash_table(node); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index eb613aec7a..5ed00f68a2 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -769,6 +769,7 @@ _copyAgg(Agg *from) COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); + COPY_BITMAPSET_FIELD(aggParams); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 01b2d6d2d4..55d4d9b74b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -642,6 +642,7 @@ _outAgg(StringInfo str, Agg *node) appendStringInfo(str, " %u", node->grpOperators[i]); WRITE_LONG_FIELD(numGroups); + WRITE_BITMAPSET_FIELD(aggParams); } static void diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 77cf33d1f7..339f515635 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -3940,6 +3940,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; node->numGroups = numGroups; + node->aggParams = NULL; /* SS_finalize_plan() will fill this */ copy_plan_costsize(plan, lefttree); /* only care about copying size */ cost_agg(&agg_path, root, diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index c50709ccc1..7890305a94 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -81,6 +81,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root, Bitmapset *valid_params, Bitmapset *scan_params); static bool finalize_primnode(Node *node, finalize_primnode_context *context); +static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context); /* @@ -2351,6 +2352,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, locally_added_param); break; + case T_Agg: + { + Agg *agg = (Agg *) plan; + + /* + * AGG_HASHED plans need to know which Params are referenced + * in aggregate calls. Do a separate scan to identify them. + */ + if (agg->aggstrategy == AGG_HASHED) + { + finalize_primnode_context aggcontext; + + aggcontext.root = root; + aggcontext.paramids = NULL; + finalize_agg_primnode((Node *) agg->plan.targetlist, + &aggcontext); + finalize_agg_primnode((Node *) agg->plan.qual, + &aggcontext); + agg->aggParams = aggcontext.paramids; + } + } + break; + case T_WindowAgg: finalize_primnode(((WindowAgg *) plan)->startOffset, &context); @@ -2359,7 +2383,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, break; case T_Hash: - case T_Agg: case T_Material: case T_Sort: case T_Unique: @@ -2503,6 +2526,28 @@ finalize_primnode(Node *node, finalize_primnode_context *context) (void *) context); } +/* + * finalize_agg_primnode: find all Aggref nodes in the given expression tree, + * and add IDs of all PARAM_EXEC params appearing within their aggregated + * arguments to the result set. + */ +static bool +finalize_agg_primnode(Node *node, finalize_primnode_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Aggref *agg = (Aggref *) node; + + /* we should not consider the direct arguments, if any */ + finalize_primnode((Node *) agg->args, context); + return false; /* there can't be any Aggrefs below here */ + } + return expression_tree_walker(node, finalize_agg_primnode, + (void *) context); +} + /* * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan * diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b7b037a5d8..fd469be630 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -604,6 +604,8 @@ typedef struct Agg AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ + Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ + /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */ } Agg; /* ---------------- diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 84342e7b0b..c9006a77da 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -305,6 +305,38 @@ from tenk1 o; 9999 (1 row) +-- Test handling of Params within aggregate arguments in hashed aggregation. +-- Per bug report from Jeevan Chalke. +explain (verbose, costs off) +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + QUERY PLAN +------------------------------------------------------------------- + Function Scan on pg_catalog.generate_series x + Output: (SubPlan 1) + Function Call: generate_series(1, 3) + SubPlan 1 + -> Sort + Output: (sum(($0 + y.y))), y.y + Sort Key: (sum(($0 + y.y))) + -> HashAggregate + Output: sum((x.x + y.y)), y.y + -> Function Scan on pg_catalog.generate_series y + Output: y.y + Function Call: generate_series(1, 3) +(12 rows) + +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + ?column? +---------- + {2,3,4} + {3,4,5} + {4,5,6} +(3 rows) + -- -- test for bitwise integer aggregates -- diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 631c6dbe80..062ee9bc7b 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -86,6 +86,16 @@ select (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))) from tenk1 o; +-- Test handling of Params within aggregate arguments in hashed aggregation. +-- Per bug report from Jeevan Chalke. +explain (verbose, costs off) +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + -- -- test for bitwise integer aggregates --