Mark expressions nullable by grouping sets

When generating window_pathkeys, distinct_pathkeys, or sort_pathkeys, we failed to realize that the grouping/ordering expressions might be nullable by grouping sets. As a result, we may incorrectly deem that the PathKeys are redundant by EquivalenceClass processing and thus remove them from the pathkeys list. That would lead to wrong results in some cases. To fix this issue, we mark the grouping expressions nullable by grouping sets if that is the case. If the grouping expression is a Var or PlaceHolderVar or constructed from those, we can just add the RT index of the RTE_GROUP RTE to the existing nullingrels field(s); otherwise we have to add a PlaceHolderVar to carry on the nullingrel bit. However, we have to manually remove this nullingrel bit from expressions in various cases where these expressions are logically below the grouping step, such as when we generate groupClause pathkeys for grouping sets, or when we generate PathTarget for initial input to grouping nodes. Furthermore, in set_upper_references, the targetlist and quals of an Agg node should have nullingrels that include the effects of the grouping step, ie they will have nullingrels equal to the input Vars/PHVs' nullingrels plus the nullingrel bit that references the grouping RTE. In order to perform exact nullingrels matches, we also need to manually remove this nullingrel bit. Bump catversion because this changes the querytree produced by the parser. Thanks to Tom Lane for the idea to invent a new kind of RTE. Per reports from Geoff Winkless, Tobias Wendorff, Richard Guo from various threads. Author: Richard Guo Reviewed-by: Ashutosh Bapat, Sutou Kouhei Discussion: https://postgr.es/m/CAMbWs4_dp7e7oTwaiZeBX8+P1rXw4ThkZxh1QG81rhu9Z47VsQ@mail.gmail.com
2024-09-10 12:36:48 +09:00 · 2024-09-10 12:36:48 +09:00 · f5050f795a
commit f5050f795a
parent 247dea89f7
11 changed files with 411 additions and 33 deletions
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@ -726,6 +726,10 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 		{
 			RelOptInfo *rel = root->simple_rel_array[i];

+			/* ignore the RTE_GROUP RTE */
+			if (i == root->group_rtindex)
+				continue;
+
 			if (rel == NULL)	/* must be an outer join */
 			{
 				Assert(bms_is_member(i, root->outer_join_rels));
@ -1087,6 +1091,10 @@ generate_base_implied_equalities(PlannerInfo *root)
 		{
 			RelOptInfo *rel = root->simple_rel_array[i];

+			/* ignore the RTE_GROUP RTE */
+			if (i == root->group_rtindex)
+				continue;
+
 			if (rel == NULL)	/* must be an outer join */
 			{
 				Assert(bms_is_member(i, root->outer_join_rels));
@ -3354,6 +3362,10 @@ get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids)
 	{
 		RelOptInfo *rel = root->simple_rel_array[i];

+		/* ignore the RTE_GROUP RTE */
+		if (i == root->group_rtindex)
+			continue;
+
 		if (rel == NULL)		/* must be an outer join */
 		{
 			Assert(bms_is_member(i, root->outer_join_rels));
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@ -25,6 +25,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
+#include "rewrite/rewriteManip.h"
 #include "utils/lsyscache.h"

 /* Consider reordering of GROUP BY keys? */
@ -1341,6 +1342,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 													&sortclauses,
 													tlist,
 													false,
+													false,
 													&sortable,
 													false);
 	/* It's caller error if not all clauses were sortable */
@ -1359,6 +1361,9 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 * give rise to redundant pathkeys are removed from the sortclauses list
 * (which therefore must be pass-by-reference in this version).
 *
+ * If remove_group_rtindex is true, then we need to remove the RT index of the
+ * grouping step from the sort expressions before we make PathKeys for them.
+ *
 * *sortable is set to true if all the sort clauses are in fact sortable.
 * If any are not, they are ignored except for setting *sortable false.
 * (In that case, the output pathkey list isn't really useful.  However,
@ -1375,6 +1380,7 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 									   List **sortclauses,
 									   List *tlist,
 									   bool remove_redundant,
+									   bool remove_group_rtindex,
 									   bool *sortable,
 									   bool set_ec_sortref)
 {
@ -1394,6 +1400,14 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 			*sortable = false;
 			continue;
 		}
+		if (remove_group_rtindex)
+		{
+			Assert(root->group_rtindex > 0);
+			sortkey = (Expr *)
+				remove_nulling_relids((Node *) sortkey,
+									  bms_make_singleton(root->group_rtindex),
+									  NULL);
+		}
 		pathkey = make_pathkey_from_sortop(root,
 										   sortkey,
 										   sortcl->sortop,
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@ -1328,6 +1328,10 @@ mark_rels_nulled_by_join(PlannerInfo *root, Index ojrelid,
 	{
 		RelOptInfo *rel = root->simple_rel_array[relid];

+		/* ignore the RTE_GROUP RTE */
+		if (relid == root->group_rtindex)
+			continue;
+
 		if (rel == NULL)		/* must be an outer join */
 		{
 			Assert(bms_is_member(relid, root->outer_join_rels));
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -58,6 +58,7 @@
 #include "parser/parse_relation.h"
 #include "parser/parsetree.h"
 #include "partitioning/partdesc.h"
+#include "rewrite/rewriteManip.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
 #include "utils/selfuncs.h"
@ -3506,9 +3507,23 @@ standard_qp_callback(PlannerInfo *root, void *extra)

 		if (grouping_is_sortable(groupClause))
 		{
-			root->group_pathkeys = make_pathkeys_for_sortclauses(root,
-																 groupClause,
-																 tlist);
+			bool		sortable;
+
+			/*
+			 * The groupClause is logically below the grouping step.  So if
+			 * there is an RTE entry for the grouping step, we need to remove
+			 * its RT index from the sort expressions before we make PathKeys
+			 * for them.
+			 */
+			root->group_pathkeys =
+				make_pathkeys_for_sortclauses_extended(root,
+													   &groupClause,
+													   tlist,
+													   false,
+													   parse->hasGroupRTE,
+													   &sortable,
+													   false);
+			Assert(sortable);
 			root->num_groupby_pathkeys = list_length(root->group_pathkeys);
 		}
 		else
@ -3538,6 +3553,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &root->processed_groupClause,
 												   tlist,
 												   true,
+												   false,
 												   &sortable,
 												   true);
 		if (!sortable)
@ -3589,6 +3605,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &root->processed_distinctClause,
 												   tlist,
 												   true,
+												   false,
 												   &sortable,
 												   false);
 		if (!sortable)
@ -3616,6 +3633,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &groupClauses,
 												   tlist,
 												   false,
+												   false,
 												   &sortable,
 												   false);
 		if (!sortable)
@ -5526,7 +5544,19 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
 		{
 			/*
 			 * It's a grouping column, so add it to the input target as-is.
+			 *
+			 * Note that the target is logically below the grouping step.  So
+			 * with grouping sets we need to remove the RT index of the
+			 * grouping step if there is any from the target expression.
 			 */
+			if (parse->hasGroupRTE && parse->groupingSets != NIL)
+			{
+				Assert(root->group_rtindex > 0);
+				expr = (Expr *)
+					remove_nulling_relids((Node *) expr,
+										  bms_make_singleton(root->group_rtindex),
+										  NULL);
+			}
 			add_column_to_pathtarget(input_target, expr, sgref);
 		}
 		else
@ -5554,11 +5584,23 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
 	 * includes Vars used in resjunk items, so we are covering the needs of
 	 * ORDER BY and window specifications.  Vars used within Aggrefs and
 	 * WindowFuncs will be pulled out here, too.
+	 *
+	 * Note that the target is logically below the grouping step.  So with
+	 * grouping sets we need to remove the RT index of the grouping step if
+	 * there is any from the non-group Vars.
 	 */
 	non_group_vars = pull_var_clause((Node *) non_group_cols,
 									 PVC_RECURSE_AGGREGATES |
 									 PVC_RECURSE_WINDOWFUNCS |
 									 PVC_INCLUDE_PLACEHOLDERS);
+	if (parse->hasGroupRTE && parse->groupingSets != NIL)
+	{
+		Assert(root->group_rtindex > 0);
+		non_group_vars = (List *)
+			remove_nulling_relids((Node *) non_group_vars,
+								  bms_make_singleton(root->group_rtindex),
+								  NULL);
+	}
 	add_new_columns_to_pathtarget(input_target, non_group_vars);

 	/* clean up cruft */
@ -6207,6 +6249,7 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 																 &wc->partitionClause,
 																 tlist,
 																 true,
+																 false,
 																 &sortable,
 																 false);

--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@ -26,6 +26,7 @@
 #include "optimizer/subselect.h"
 #include "optimizer/tlist.h"
 #include "parser/parse_relation.h"
+#include "rewrite/rewriteManip.h"
 #include "tcop/utility.h"
 #include "utils/syscache.h"

@ -2426,6 +2427,28 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset)

 	subplan_itlist = build_tlist_index(subplan->targetlist);

+	/*
+	 * If it's a grouping node with grouping sets, any Vars and PHVs appearing
+	 * in the targetlist and quals should have nullingrels that include the
+	 * effects of the grouping step, ie they will have nullingrels equal to
+	 * the input Vars/PHVs' nullingrels plus the RT index of the grouping
+	 * step.  In order to perform exact nullingrels matches, we remove the RT
+	 * index of the grouping step first.
+	 */
+	if (IsA(plan, Agg) &&
+		root->group_rtindex > 0 &&
+		((Agg *) plan)->groupingSets)
+	{
+		plan->targetlist = (List *)
+			remove_nulling_relids((Node *) plan->targetlist,
+								  bms_make_singleton(root->group_rtindex),
+								  NULL);
+		plan->qual = (List *)
+			remove_nulling_relids((Node *) plan->qual,
+								  bms_make_singleton(root->group_rtindex),
+								  NULL);
+	}
+
 	output_targetlist = NIL;
 	foreach(l, plan->targetlist)
 	{
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@ -22,6 +22,7 @@

 #include "access/sysattr.h"
 #include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/placeholder.h"
 #include "optimizer/prep.h"
@ -83,6 +84,8 @@ static Node *flatten_join_alias_vars_mutator(Node *node,
 											 flatten_join_alias_vars_context *context);
 static Node *flatten_group_exprs_mutator(Node *node,
 										 flatten_join_alias_vars_context *context);
+static Node *mark_nullable_by_grouping(PlannerInfo *root, Node *newnode,
+									   Var *oldvar);
 static Node *add_nullingrels_if_needed(PlannerInfo *root, Node *newnode,
 									   Var *oldvar);
 static bool is_standard_join_alias_expression(Node *newnode, Var *oldvar);
@ -909,6 +912,18 @@ flatten_join_alias_vars_mutator(Node *node,
 * flatten_group_exprs
 *	  Replace Vars that reference GROUP outputs with the underlying grouping
 *	  expressions.
+ *
+ * We have to preserve any varnullingrels info attached to the group Vars we're
+ * replacing.  If the replacement expression is a Var or PlaceHolderVar or
+ * constructed from those, we can just add the varnullingrels bits to the
+ * existing nullingrels field(s); otherwise we have to add a PlaceHolderVar
+ * wrapper.
+ *
+ * NOTE: this is also used by ruleutils.c, to deparse one query parsetree back
+ * to source text.  For that use-case, root will be NULL, which is why we have
+ * to pass the Query separately.  We need the root itself only for preserving
+ * varnullingrels.  We can avoid preserving varnullingrels in the ruleutils.c's
+ * usage because it does not make any difference to the deparsed source text.
 */
 Node *
 flatten_group_exprs(PlannerInfo *root, Query *query, Node *node)
@ -973,7 +988,8 @@ flatten_group_exprs_mutator(Node *node,
 		if (context->possible_sublink && !context->inserted_sublink)
 			context->inserted_sublink = checkExprHasSubLink(newvar);

-		return newvar;
+		/* Lastly, add any varnullingrels to the replacement expression */
+		return mark_nullable_by_grouping(context->root, newvar, var);
 	}

 	if (IsA(node, Aggref))
@ -1040,6 +1056,76 @@ flatten_group_exprs_mutator(Node *node,
 								   (void *) context);
 }

+/*
+ * Add oldvar's varnullingrels, if any, to a flattened grouping expression.
+ * The newnode has been copied, so we can modify it freely.
+ */
+static Node *
+mark_nullable_by_grouping(PlannerInfo *root, Node *newnode, Var *oldvar)
+{
+	Relids		relids;
+
+	if (root == NULL)
+		return newnode;
+	if (oldvar->varnullingrels == NULL)
+		return newnode;			/* nothing to do */
+
+	Assert(bms_equal(oldvar->varnullingrels,
+					 bms_make_singleton(root->group_rtindex)));
+
+	relids = pull_varnos_of_level(root, newnode, oldvar->varlevelsup);
+
+	if (!bms_is_empty(relids))
+	{
+		/*
+		 * If the newnode is not variable-free, we set the nullingrels of Vars
+		 * or PHVs that are contained in the expression.  This is not really
+		 * 'correct' in theory, because it is the whole expression that can be
+		 * nullable by grouping sets, not its individual vars.  But it works
+		 * in practice, because what we need is that the expression can be
+		 * somehow distinguished from the same expression in ECs, and marking
+		 * its vars is sufficient for this purpose.
+		 */
+		newnode = add_nulling_relids(newnode,
+									 relids,
+									 oldvar->varnullingrels);
+	}
+	else						/* variable-free? */
+	{
+		/*
+		 * If the newnode is variable-free and does not contain volatile
+		 * functions or set-returning functions, it can be treated as a member
+		 * of EC that is redundant.  So wrap it in a new PlaceHolderVar to
+		 * carry the nullingrels.  Otherwise we do not bother to make any
+		 * changes.
+		 *
+		 * Aggregate functions and window functions are not allowed in
+		 * grouping expressions.
+		 */
+		Assert(!contain_agg_clause(newnode));
+		Assert(!contain_window_function(newnode));
+
+		if (!contain_volatile_functions(newnode) &&
+			!expression_returns_set(newnode))
+		{
+			PlaceHolderVar *newphv;
+			Relids		phrels;
+
+			phrels = get_relids_in_jointree((Node *) root->parse->jointree,
+											true, false);
+			Assert(!bms_is_empty(phrels));
+
+			newphv = make_placeholder_expr(root, (Expr *) newnode, phrels);
+			/* newphv has zero phlevelsup and NULL phnullingrels; fix it */
+			newphv->phlevelsup = oldvar->varlevelsup;
+			newphv->phnullingrels = bms_copy(oldvar->varnullingrels);
+			newnode = (Node *) newphv;
+		}
+	}
+
+	return newnode;
+}
+
 /*
 * Add oldvar's varnullingrels, if any, to a flattened join alias expression.
 * The newnode has been copied, so we can modify it freely.
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@ -1333,9 +1333,6 @@ substitute_grouped_columns_mutator(Node *node,

 	if (node == NULL)
 		return NULL;
-	if (IsA(node, Const) ||
-		IsA(node, Param))
-		return node;			/* constants are always acceptable */

 	if (IsA(node, Aggref))
 	{
@ -1409,6 +1406,16 @@ substitute_grouped_columns_mutator(Node *node,
 		}
 	}

+	/*
+	 * Constants are always acceptable.  We have to do this after we checked
+	 * the subexpression as a whole for a match, because it is possible that
+	 * we have GROUP BY items that are constants, and the constants would
+	 * become not so constant after the grouping step.
+	 */
+	if (IsA(node, Const) ||
+		IsA(node, Param))
+		return node;
+
 	/*
 	 * If we have an ungrouped Var of the original query level, we have a
 	 * failure.  Vars below the original query level are not a problem, and
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@ -57,6 +57,6 @@
 */

 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202409101
+#define CATALOG_VERSION_NO	202409102

 #endif
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@ -240,6 +240,7 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 													List **sortclauses,
 													List *tlist,
 													bool remove_redundant,
+													bool remove_group_rtindex,
 													bool *sortable,
 													bool set_ec_sortref);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@ -442,19 +442,22 @@ select * from (
  group by grouping sets(1, 2)
 ) ss
 where x = 1 and q1 = 123;
-                 QUERY PLAN                 
--------------------------------------------
+                    QUERY PLAN                    
+--------------------------------------------------
 Subquery Scan on ss
   Output: ss.x, ss.q1, ss.sum
   Filter: ((ss.x = 1) AND (ss.q1 = 123))
   ->  GroupAggregate
         Output: (1), i1.q1, sum(i1.q2)
-         Group Key: 1
+         Group Key: (1)
         Sort Key: i1.q1
           Group Key: i1.q1
-         ->  Seq Scan on public.int8_tbl i1
-               Output: 1, i1.q1, i1.q2
-(10 rows)
+         ->  Sort
+               Output: (1), i1.q1, i1.q2
+               Sort Key: (1)
+               ->  Seq Scan on public.int8_tbl i1
+                     Output: 1, i1.q1, i1.q2
+(13 rows)

 select * from (
  select 1 as x, q1, sum(q2)
@ -736,15 +739,18 @@ select a, b, sum(v.x)
 -- Test reordering of grouping sets
 explain (costs off)
 select * from gstest1 group by grouping sets((a,b,v),(v)) order by v,b,a;
-                                  QUERY PLAN                                  
------------------------------------------------------------------------------
- GroupAggregate
-   Group Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1
-   Group Key: "*VALUES*".column3
-   ->  Sort
-         Sort Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1
-         ->  Values Scan on "*VALUES*"
-(6 rows)
+                                     QUERY PLAN                                     
+------------------------------------------------------------------------------------
+ Incremental Sort
+   Sort Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1
+   Presorted Key: "*VALUES*".column3
+   ->  GroupAggregate
+         Group Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1
+         Group Key: "*VALUES*".column3
+         ->  Sort
+               Sort Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1
+               ->  Values Scan on "*VALUES*"
+(9 rows)

 -- Agg level check. This query should error out.
 select (select grouping(a,b) from gstest2) from gstest2 group by a,b;
@ -816,16 +822,18 @@ select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 or

 explain (costs off)
  select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a;
-            QUERY PLAN            
----------------------------------
- GroupAggregate
-   Group Key: a
-   Group Key: ()
-   Filter: (a IS DISTINCT FROM 1)
-   ->  Sort
-         Sort Key: a
-         ->  Seq Scan on gstest2
-(7 rows)
+               QUERY PLAN               
+----------------------------------------
+ Sort
+   Sort Key: a
+   ->  GroupAggregate
+         Group Key: a
+         Group Key: ()
+         Filter: (a IS DISTINCT FROM 1)
+         ->  Sort
+               Sort Key: a
+               ->  Seq Scan on gstest2
+(9 rows)

 select v.c, (select count(*) from gstest2 group by () having v.c)
  from (values (false),(true)) v(c) order by v.c;
@ -2288,4 +2296,137 @@ select not a from (values(true)) t(a) group by rollup(not a) having not not a;
 f
 (1 row)

+-- test handling of expressions nullable by grouping sets
+explain (costs off)
+select distinct on (a, b) a, b
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a))
+order by a, b;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1, "*VALUES*".column2
+         ->  HashAggregate
+               Hash Key: "*VALUES*".column1, "*VALUES*".column2
+               Hash Key: "*VALUES*".column1
+               ->  Values Scan on "*VALUES*"
+                     Filter: (column1 = column2)
+(8 rows)
+
+select distinct on (a, b) a, b
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a))
+order by a, b;
+ a | b 
+---+---
+ 1 | 1
+ 1 |  
+ 2 | 2
+ 2 |  
+(4 rows)
+
+explain (costs off)
+select distinct on (a, b+1) a, b+1
+from (values (1, 0), (2, 1)) as t (a, b) where a = b+1
+group by grouping sets((a, b+1), (a))
+order by a, b+1;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1, (("*VALUES*".column2 + 1))
+         ->  HashAggregate
+               Hash Key: "*VALUES*".column1, ("*VALUES*".column2 + 1)
+               Hash Key: "*VALUES*".column1
+               ->  Values Scan on "*VALUES*"
+                     Filter: (column1 = (column2 + 1))
+(8 rows)
+
+select distinct on (a, b+1) a, b+1
+from (values (1, 0), (2, 1)) as t (a, b) where a = b+1
+group by grouping sets((a, b+1), (a))
+order by a, b+1;
+ a | ?column? 
+---+----------
+ 1 |        1
+ 1 |         
+ 2 |        2
+ 2 |         
+(4 rows)
+
+explain (costs off)
+select a, b
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a))
+order by a, b nulls first;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Sort
+   Sort Key: "*VALUES*".column1, "*VALUES*".column2 NULLS FIRST
+   ->  HashAggregate
+         Hash Key: "*VALUES*".column1, "*VALUES*".column2
+         Hash Key: "*VALUES*".column1
+         ->  Values Scan on "*VALUES*"
+               Filter: (column1 = column2)
+(7 rows)
+
+select a, b
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a))
+order by a, b nulls first;
+ a | b 
+---+---
+ 1 |  
+ 1 | 1
+ 2 |  
+ 2 | 2
+(4 rows)
+
+explain (costs off)
+select 1 as one group by rollup(one) order by one nulls first;
+         QUERY PLAN          
+-----------------------------
+ Sort
+   Sort Key: (1) NULLS FIRST
+   ->  MixedAggregate
+         Hash Key: 1
+         Group Key: ()
+         ->  Result
+(6 rows)
+
+select 1 as one group by rollup(one) order by one nulls first;
+ one 
+-----
+    
+   1
+(2 rows)
+
+explain (costs off)
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a));
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ WindowAgg
+   ->  Sort
+         Sort Key: "*VALUES*".column1, "*VALUES*".column2 NULLS FIRST
+         ->  HashAggregate
+               Hash Key: "*VALUES*".column1, "*VALUES*".column2
+               Hash Key: "*VALUES*".column1
+               ->  Values Scan on "*VALUES*"
+                     Filter: (column1 = column2)
+(8 rows)
+
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a));
+ a | b | row_number 
+---+---+------------
+ 1 |   |          1
+ 1 | 1 |          2
+ 2 |   |          3
+ 2 | 2 |          4
+(4 rows)
+
 -- end
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@ -640,4 +640,51 @@ explain (costs off)
 select not a from (values(true)) t(a) group by rollup(not a) having not not a;
 select not a from (values(true)) t(a) group by rollup(not a) having not not a;

+-- test handling of expressions nullable by grouping sets
+explain (costs off)
+select distinct on (a, b) a, b
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a))
+order by a, b;
+
+select distinct on (a, b) a, b
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a))
+order by a, b;
+
+explain (costs off)
+select distinct on (a, b+1) a, b+1
+from (values (1, 0), (2, 1)) as t (a, b) where a = b+1
+group by grouping sets((a, b+1), (a))
+order by a, b+1;
+
+select distinct on (a, b+1) a, b+1
+from (values (1, 0), (2, 1)) as t (a, b) where a = b+1
+group by grouping sets((a, b+1), (a))
+order by a, b+1;
+
+explain (costs off)
+select a, b
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a))
+order by a, b nulls first;
+
+select a, b
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a))
+order by a, b nulls first;
+
+explain (costs off)
+select 1 as one group by rollup(one) order by one nulls first;
+select 1 as one group by rollup(one) order by one nulls first;
+
+explain (costs off)
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a));
+
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a));
+
 -- end