Remove the RTE_GROUP RTE if we drop the groupClause

For an EXISTS subquery, the only thing that matters is whether it returns zero or more than zero rows. Therefore, we remove certain SQL features that won't affect that, among them the GROUP BY clauses. After we drop the groupClause, we'd better remove the RTE_GROUP RTE and clear the hasGroupRTE flag, as they depend on the groupClause. Failing to do so could result in a bogus RTE_GROUP entry in the parent query, leading to an assertion failure on the hasGroupRTE flag. Reported-by: David Rowley Author: Richard Guo Discussion: https://postgr.es/m/CAApHDvp2_yht8uPLyWO-kVGWZhYvx5zjGfSrg4fBQ9fsC13V0g@mail.gmail.com
2024-10-25 09:52:34 +09:00 · 2024-10-25 09:52:34 +09:00 · ffe12d1d22
commit ffe12d1d22
parent d32d146399
3 changed files with 47 additions and 0 deletions
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@ -1539,6 +1539,8 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 static bool
 simplify_EXISTS_query(PlannerInfo *root, Query *query)
 {
+	ListCell   *lc;
+
 	/*
 	 * We don't try to simplify at all if the query uses set operations,
 	 * aggregates, grouping sets, SRFs, modifying CTEs, HAVING, OFFSET, or FOR
@ -1607,6 +1609,28 @@ simplify_EXISTS_query(PlannerInfo *root, Query *query)
 	query->sortClause = NIL;
 	query->hasDistinctOn = false;

+	/*
+	 * Since we have thrown away the GROUP BY clauses, we'd better remove the
+	 * RTE_GROUP RTE and clear the hasGroupRTE flag.
+	 */
+	foreach(lc, query->rtable)
+	{
+		RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
+
+		/*
+		 * Remove the RTE_GROUP RTE and clear the hasGroupRTE flag.  (Since
+		 * we'll exit the foreach loop immediately, we don't bother with
+		 * foreach_delete_current.)
+		 */
+		if (rte->rtekind == RTE_GROUP)
+		{
+			Assert(query->hasGroupRTE);
+			query->rtable = list_delete_cell(query->rtable, lc);
+			query->hasGroupRTE = false;
+			break;
+		}
+	}
+
 	return true;
 }

--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@ -3182,6 +3182,21 @@ where b.unique2 is null;
         ->  Index Only Scan using tenk1_unique2 on tenk1 b
 (5 rows)

+--
+-- regression test for bogus RTE_GROUP entries
+--
+explain (costs off)
+select a.* from tenk1 a
+where exists (select 1 from tenk1 b where a.unique1 = b.unique2 group by b.unique1);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (a.unique1 = b.unique2)
+   ->  Seq Scan on tenk1 a
+   ->  Hash
+         ->  Index Only Scan using tenk1_unique2 on tenk1 b
+(5 rows)
+
 --
 -- regression test for proper handling of outer joins within antijoins
 --
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@ -818,6 +818,14 @@ explain (costs off)
 select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2
 where b.unique2 is null;

+--
+-- regression test for bogus RTE_GROUP entries
+--
+
+explain (costs off)
+select a.* from tenk1 a
+where exists (select 1 from tenk1 b where a.unique1 = b.unique2 group by b.unique1);
+
 --
 -- regression test for proper handling of outer joins within antijoins
 --