diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index d871396e20..47644b26c6 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -726,6 +726,10 @@ get_eclass_for_sort_expr(PlannerInfo *root, { RelOptInfo *rel = root->simple_rel_array[i]; + /* ignore the RTE_GROUP RTE */ + if (i == root->group_rtindex) + continue; + if (rel == NULL) /* must be an outer join */ { Assert(bms_is_member(i, root->outer_join_rels)); @@ -1087,6 +1091,10 @@ generate_base_implied_equalities(PlannerInfo *root) { RelOptInfo *rel = root->simple_rel_array[i]; + /* ignore the RTE_GROUP RTE */ + if (i == root->group_rtindex) + continue; + if (rel == NULL) /* must be an outer join */ { Assert(bms_is_member(i, root->outer_join_rels)); @@ -3354,6 +3362,10 @@ get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids) { RelOptInfo *rel = root->simple_rel_array[i]; + /* ignore the RTE_GROUP RTE */ + if (i == root->group_rtindex) + continue; + if (rel == NULL) /* must be an outer join */ { Assert(bms_is_member(i, root->outer_join_rels)); diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index e25798972f..035bbaa385 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -25,6 +25,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" +#include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" /* Consider reordering of GROUP BY keys? */ @@ -1341,6 +1342,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, &sortclauses, tlist, false, + false, &sortable, false); /* It's caller error if not all clauses were sortable */ @@ -1359,6 +1361,9 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, * give rise to redundant pathkeys are removed from the sortclauses list * (which therefore must be pass-by-reference in this version). * + * If remove_group_rtindex is true, then we need to remove the RT index of the + * grouping step from the sort expressions before we make PathKeys for them. + * * *sortable is set to true if all the sort clauses are in fact sortable. * If any are not, they are ignored except for setting *sortable false. * (In that case, the output pathkey list isn't really useful. However, @@ -1375,6 +1380,7 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root, List **sortclauses, List *tlist, bool remove_redundant, + bool remove_group_rtindex, bool *sortable, bool set_ec_sortref) { @@ -1394,6 +1400,14 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root, *sortable = false; continue; } + if (remove_group_rtindex) + { + Assert(root->group_rtindex > 0); + sortkey = (Expr *) + remove_nulling_relids((Node *) sortkey, + bms_make_singleton(root->group_rtindex), + NULL); + } pathkey = make_pathkey_from_sortop(root, sortkey, sortcl->sortop, diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index e2c68fe6f9..f3b9821498 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1328,6 +1328,10 @@ mark_rels_nulled_by_join(PlannerInfo *root, Index ojrelid, { RelOptInfo *rel = root->simple_rel_array[relid]; + /* ignore the RTE_GROUP RTE */ + if (relid == root->group_rtindex) + continue; + if (rel == NULL) /* must be an outer join */ { Assert(bms_is_member(relid, root->outer_join_rels)); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index bd4b652f7a..df35d1ff9c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -58,6 +58,7 @@ #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "partitioning/partdesc.h" +#include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/selfuncs.h" @@ -3506,9 +3507,23 @@ standard_qp_callback(PlannerInfo *root, void *extra) if (grouping_is_sortable(groupClause)) { - root->group_pathkeys = make_pathkeys_for_sortclauses(root, - groupClause, - tlist); + bool sortable; + + /* + * The groupClause is logically below the grouping step. So if + * there is an RTE entry for the grouping step, we need to remove + * its RT index from the sort expressions before we make PathKeys + * for them. + */ + root->group_pathkeys = + make_pathkeys_for_sortclauses_extended(root, + &groupClause, + tlist, + false, + parse->hasGroupRTE, + &sortable, + false); + Assert(sortable); root->num_groupby_pathkeys = list_length(root->group_pathkeys); } else @@ -3538,6 +3553,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) &root->processed_groupClause, tlist, true, + false, &sortable, true); if (!sortable) @@ -3589,6 +3605,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) &root->processed_distinctClause, tlist, true, + false, &sortable, false); if (!sortable) @@ -3616,6 +3633,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) &groupClauses, tlist, false, + false, &sortable, false); if (!sortable) @@ -5526,7 +5544,19 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target) { /* * It's a grouping column, so add it to the input target as-is. + * + * Note that the target is logically below the grouping step. So + * with grouping sets we need to remove the RT index of the + * grouping step if there is any from the target expression. */ + if (parse->hasGroupRTE && parse->groupingSets != NIL) + { + Assert(root->group_rtindex > 0); + expr = (Expr *) + remove_nulling_relids((Node *) expr, + bms_make_singleton(root->group_rtindex), + NULL); + } add_column_to_pathtarget(input_target, expr, sgref); } else @@ -5554,11 +5584,23 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target) * includes Vars used in resjunk items, so we are covering the needs of * ORDER BY and window specifications. Vars used within Aggrefs and * WindowFuncs will be pulled out here, too. + * + * Note that the target is logically below the grouping step. So with + * grouping sets we need to remove the RT index of the grouping step if + * there is any from the non-group Vars. */ non_group_vars = pull_var_clause((Node *) non_group_cols, PVC_RECURSE_AGGREGATES | PVC_RECURSE_WINDOWFUNCS | PVC_INCLUDE_PLACEHOLDERS); + if (parse->hasGroupRTE && parse->groupingSets != NIL) + { + Assert(root->group_rtindex > 0); + non_group_vars = (List *) + remove_nulling_relids((Node *) non_group_vars, + bms_make_singleton(root->group_rtindex), + NULL); + } add_new_columns_to_pathtarget(input_target, non_group_vars); /* clean up cruft */ @@ -6207,6 +6249,7 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, &wc->partitionClause, tlist, true, + false, &sortable, false); diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 8caf094f7d..91c7c4fe2f 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -26,6 +26,7 @@ #include "optimizer/subselect.h" #include "optimizer/tlist.h" #include "parser/parse_relation.h" +#include "rewrite/rewriteManip.h" #include "tcop/utility.h" #include "utils/syscache.h" @@ -2426,6 +2427,28 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) subplan_itlist = build_tlist_index(subplan->targetlist); + /* + * If it's a grouping node with grouping sets, any Vars and PHVs appearing + * in the targetlist and quals should have nullingrels that include the + * effects of the grouping step, ie they will have nullingrels equal to + * the input Vars/PHVs' nullingrels plus the RT index of the grouping + * step. In order to perform exact nullingrels matches, we remove the RT + * index of the grouping step first. + */ + if (IsA(plan, Agg) && + root->group_rtindex > 0 && + ((Agg *) plan)->groupingSets) + { + plan->targetlist = (List *) + remove_nulling_relids((Node *) plan->targetlist, + bms_make_singleton(root->group_rtindex), + NULL); + plan->qual = (List *) + remove_nulling_relids((Node *) plan->qual, + bms_make_singleton(root->group_rtindex), + NULL); + } + output_targetlist = NIL; foreach(l, plan->targetlist) { diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index b189185fca..f7534ad53d 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -22,6 +22,7 @@ #include "access/sysattr.h" #include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" #include "optimizer/optimizer.h" #include "optimizer/placeholder.h" #include "optimizer/prep.h" @@ -83,6 +84,8 @@ static Node *flatten_join_alias_vars_mutator(Node *node, flatten_join_alias_vars_context *context); static Node *flatten_group_exprs_mutator(Node *node, flatten_join_alias_vars_context *context); +static Node *mark_nullable_by_grouping(PlannerInfo *root, Node *newnode, + Var *oldvar); static Node *add_nullingrels_if_needed(PlannerInfo *root, Node *newnode, Var *oldvar); static bool is_standard_join_alias_expression(Node *newnode, Var *oldvar); @@ -909,6 +912,18 @@ flatten_join_alias_vars_mutator(Node *node, * flatten_group_exprs * Replace Vars that reference GROUP outputs with the underlying grouping * expressions. + * + * We have to preserve any varnullingrels info attached to the group Vars we're + * replacing. If the replacement expression is a Var or PlaceHolderVar or + * constructed from those, we can just add the varnullingrels bits to the + * existing nullingrels field(s); otherwise we have to add a PlaceHolderVar + * wrapper. + * + * NOTE: this is also used by ruleutils.c, to deparse one query parsetree back + * to source text. For that use-case, root will be NULL, which is why we have + * to pass the Query separately. We need the root itself only for preserving + * varnullingrels. We can avoid preserving varnullingrels in the ruleutils.c's + * usage because it does not make any difference to the deparsed source text. */ Node * flatten_group_exprs(PlannerInfo *root, Query *query, Node *node) @@ -973,7 +988,8 @@ flatten_group_exprs_mutator(Node *node, if (context->possible_sublink && !context->inserted_sublink) context->inserted_sublink = checkExprHasSubLink(newvar); - return newvar; + /* Lastly, add any varnullingrels to the replacement expression */ + return mark_nullable_by_grouping(context->root, newvar, var); } if (IsA(node, Aggref)) @@ -1040,6 +1056,76 @@ flatten_group_exprs_mutator(Node *node, (void *) context); } +/* + * Add oldvar's varnullingrels, if any, to a flattened grouping expression. + * The newnode has been copied, so we can modify it freely. + */ +static Node * +mark_nullable_by_grouping(PlannerInfo *root, Node *newnode, Var *oldvar) +{ + Relids relids; + + if (root == NULL) + return newnode; + if (oldvar->varnullingrels == NULL) + return newnode; /* nothing to do */ + + Assert(bms_equal(oldvar->varnullingrels, + bms_make_singleton(root->group_rtindex))); + + relids = pull_varnos_of_level(root, newnode, oldvar->varlevelsup); + + if (!bms_is_empty(relids)) + { + /* + * If the newnode is not variable-free, we set the nullingrels of Vars + * or PHVs that are contained in the expression. This is not really + * 'correct' in theory, because it is the whole expression that can be + * nullable by grouping sets, not its individual vars. But it works + * in practice, because what we need is that the expression can be + * somehow distinguished from the same expression in ECs, and marking + * its vars is sufficient for this purpose. + */ + newnode = add_nulling_relids(newnode, + relids, + oldvar->varnullingrels); + } + else /* variable-free? */ + { + /* + * If the newnode is variable-free and does not contain volatile + * functions or set-returning functions, it can be treated as a member + * of EC that is redundant. So wrap it in a new PlaceHolderVar to + * carry the nullingrels. Otherwise we do not bother to make any + * changes. + * + * Aggregate functions and window functions are not allowed in + * grouping expressions. + */ + Assert(!contain_agg_clause(newnode)); + Assert(!contain_window_function(newnode)); + + if (!contain_volatile_functions(newnode) && + !expression_returns_set(newnode)) + { + PlaceHolderVar *newphv; + Relids phrels; + + phrels = get_relids_in_jointree((Node *) root->parse->jointree, + true, false); + Assert(!bms_is_empty(phrels)); + + newphv = make_placeholder_expr(root, (Expr *) newnode, phrels); + /* newphv has zero phlevelsup and NULL phnullingrels; fix it */ + newphv->phlevelsup = oldvar->varlevelsup; + newphv->phnullingrels = bms_copy(oldvar->varnullingrels); + newnode = (Node *) newphv; + } + } + + return newnode; +} + /* * Add oldvar's varnullingrels, if any, to a flattened join alias expression. * The newnode has been copied, so we can modify it freely. diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index bd095d05c0..102accd071 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -1333,9 +1333,6 @@ substitute_grouped_columns_mutator(Node *node, if (node == NULL) return NULL; - if (IsA(node, Const) || - IsA(node, Param)) - return node; /* constants are always acceptable */ if (IsA(node, Aggref)) { @@ -1409,6 +1406,16 @@ substitute_grouped_columns_mutator(Node *node, } } + /* + * Constants are always acceptable. We have to do this after we checked + * the subexpression as a whole for a match, because it is possible that + * we have GROUP BY items that are constants, and the constants would + * become not so constant after the grouping step. + */ + if (IsA(node, Const) || + IsA(node, Param)) + return node; + /* * If we have an ungrouped Var of the original query level, we have a * failure. Vars below the original query level are not a problem, and diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index d2a512b61c..8bace2754c 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202409101 +#define CATALOG_VERSION_NO 202409102 #endif diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 970499c469..a78e90610f 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -240,6 +240,7 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root, List **sortclauses, List *tlist, bool remove_redundant, + bool remove_group_rtindex, bool *sortable, bool set_ec_sortref); extern void initialize_mergeclause_eclasses(PlannerInfo *root, diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index c860eab1c6..717383c4f3 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -442,19 +442,22 @@ select * from ( group by grouping sets(1, 2) ) ss where x = 1 and q1 = 123; - QUERY PLAN --------------------------------------------- + QUERY PLAN +-------------------------------------------------- Subquery Scan on ss Output: ss.x, ss.q1, ss.sum Filter: ((ss.x = 1) AND (ss.q1 = 123)) -> GroupAggregate Output: (1), i1.q1, sum(i1.q2) - Group Key: 1 + Group Key: (1) Sort Key: i1.q1 Group Key: i1.q1 - -> Seq Scan on public.int8_tbl i1 - Output: 1, i1.q1, i1.q2 -(10 rows) + -> Sort + Output: (1), i1.q1, i1.q2 + Sort Key: (1) + -> Seq Scan on public.int8_tbl i1 + Output: 1, i1.q1, i1.q2 +(13 rows) select * from ( select 1 as x, q1, sum(q2) @@ -736,15 +739,18 @@ select a, b, sum(v.x) -- Test reordering of grouping sets explain (costs off) select * from gstest1 group by grouping sets((a,b,v),(v)) order by v,b,a; - QUERY PLAN ------------------------------------------------------------------------------- - GroupAggregate - Group Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 - Group Key: "*VALUES*".column3 - -> Sort - Sort Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 - -> Values Scan on "*VALUES*" -(6 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Incremental Sort + Sort Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 + Presorted Key: "*VALUES*".column3 + -> GroupAggregate + Group Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 + Group Key: "*VALUES*".column3 + -> Sort + Sort Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 + -> Values Scan on "*VALUES*" +(9 rows) -- Agg level check. This query should error out. select (select grouping(a,b) from gstest2) from gstest2 group by a,b; @@ -816,16 +822,18 @@ select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 or explain (costs off) select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a; - QUERY PLAN ----------------------------------- - GroupAggregate - Group Key: a - Group Key: () - Filter: (a IS DISTINCT FROM 1) - -> Sort - Sort Key: a - -> Seq Scan on gstest2 -(7 rows) + QUERY PLAN +---------------------------------------- + Sort + Sort Key: a + -> GroupAggregate + Group Key: a + Group Key: () + Filter: (a IS DISTINCT FROM 1) + -> Sort + Sort Key: a + -> Seq Scan on gstest2 +(9 rows) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; @@ -2288,4 +2296,137 @@ select not a from (values(true)) t(a) group by rollup(not a) having not not a; f (1 row) +-- test handling of expressions nullable by grouping sets +explain (costs off) +select distinct on (a, b) a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b; + QUERY PLAN +---------------------------------------------------------------- + Unique + -> Sort + Sort Key: "*VALUES*".column1, "*VALUES*".column2 + -> HashAggregate + Hash Key: "*VALUES*".column1, "*VALUES*".column2 + Hash Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + Filter: (column1 = column2) +(8 rows) + +select distinct on (a, b) a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b; + a | b +---+--- + 1 | 1 + 1 | + 2 | 2 + 2 | +(4 rows) + +explain (costs off) +select distinct on (a, b+1) a, b+1 +from (values (1, 0), (2, 1)) as t (a, b) where a = b+1 +group by grouping sets((a, b+1), (a)) +order by a, b+1; + QUERY PLAN +---------------------------------------------------------------------- + Unique + -> Sort + Sort Key: "*VALUES*".column1, (("*VALUES*".column2 + 1)) + -> HashAggregate + Hash Key: "*VALUES*".column1, ("*VALUES*".column2 + 1) + Hash Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + Filter: (column1 = (column2 + 1)) +(8 rows) + +select distinct on (a, b+1) a, b+1 +from (values (1, 0), (2, 1)) as t (a, b) where a = b+1 +group by grouping sets((a, b+1), (a)) +order by a, b+1; + a | ?column? +---+---------- + 1 | 1 + 1 | + 2 | 2 + 2 | +(4 rows) + +explain (costs off) +select a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b nulls first; + QUERY PLAN +---------------------------------------------------------------- + Sort + Sort Key: "*VALUES*".column1, "*VALUES*".column2 NULLS FIRST + -> HashAggregate + Hash Key: "*VALUES*".column1, "*VALUES*".column2 + Hash Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + Filter: (column1 = column2) +(7 rows) + +select a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b nulls first; + a | b +---+--- + 1 | + 1 | 1 + 2 | + 2 | 2 +(4 rows) + +explain (costs off) +select 1 as one group by rollup(one) order by one nulls first; + QUERY PLAN +----------------------------- + Sort + Sort Key: (1) NULLS FIRST + -> MixedAggregate + Hash Key: 1 + Group Key: () + -> Result +(6 rows) + +select 1 as one group by rollup(one) order by one nulls first; + one +----- + + 1 +(2 rows) + +explain (costs off) +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)); + QUERY PLAN +---------------------------------------------------------------------- + WindowAgg + -> Sort + Sort Key: "*VALUES*".column1, "*VALUES*".column2 NULLS FIRST + -> HashAggregate + Hash Key: "*VALUES*".column1, "*VALUES*".column2 + Hash Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + Filter: (column1 = column2) +(8 rows) + +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)); + a | b | row_number +---+---+------------ + 1 | | 1 + 1 | 1 | 2 + 2 | | 3 + 2 | 2 | 4 +(4 rows) + -- end diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index add76ac4a3..660ca33efc 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -640,4 +640,51 @@ explain (costs off) select not a from (values(true)) t(a) group by rollup(not a) having not not a; select not a from (values(true)) t(a) group by rollup(not a) having not not a; +-- test handling of expressions nullable by grouping sets +explain (costs off) +select distinct on (a, b) a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b; + +select distinct on (a, b) a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b; + +explain (costs off) +select distinct on (a, b+1) a, b+1 +from (values (1, 0), (2, 1)) as t (a, b) where a = b+1 +group by grouping sets((a, b+1), (a)) +order by a, b+1; + +select distinct on (a, b+1) a, b+1 +from (values (1, 0), (2, 1)) as t (a, b) where a = b+1 +group by grouping sets((a, b+1), (a)) +order by a, b+1; + +explain (costs off) +select a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b nulls first; + +select a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b nulls first; + +explain (costs off) +select 1 as one group by rollup(one) order by one nulls first; +select 1 as one group by rollup(one) order by one nulls first; + +explain (costs off) +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)); + +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)); + -- end