From 29aa0ce361d16a79e4ebf7561cbb16ed1d0e2211 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 14 Sep 2021 15:11:21 -0400 Subject: [PATCH] Fix planner error with multiple copies of an AlternativeSubPlan. It's possible for us to copy an AlternativeSubPlan expression node into multiple places, for example the scan quals of several partition children. Then it's possible that we choose a different one of the alternatives as optimal in each place. Commit 41efb8340 failed to consider this scenario, so its attempt to remove "unused" subplans could remove subplans that were still used elsewhere. Fix by delaying the removal logic until we've examined all the AlternativeSubPlans in a given query level. (This does assume that AlternativeSubPlans couldn't get copied to other query levels, but for the foreseeable future that's fine; cf qual_is_pushdown_safe.) Per report from Rajkumar Raghuwanshi. Back-patch to v14 where the faulty logic came in. Discussion: https://postgr.es/m/CAKcux6==O3NNZC3bZ2prRYv3cjm3_Zw1GfzmOjEVqYN4jub2+Q@mail.gmail.com --- src/backend/optimizer/plan/setrefs.c | 71 ++++++++++++++++--------- src/include/nodes/pathnodes.h | 4 ++ src/test/regress/expected/subselect.out | 40 ++++++++++++++ src/test/regress/sql/subselect.sql | 12 +++++ 4 files changed, 103 insertions(+), 24 deletions(-) diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 82538539e0..a10a57b0f8 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -249,6 +249,7 @@ static List *set_returning_clause_references(PlannerInfo *root, Plan * set_plan_references(PlannerInfo *root, Plan *plan) { + Plan *result; PlannerGlobal *glob = root->glob; int rtoffset = list_length(glob->finalrtable); ListCell *lc; @@ -301,8 +302,44 @@ set_plan_references(PlannerInfo *root, Plan *plan) glob->appendRelations = lappend(glob->appendRelations, appinfo); } + /* If needed, create workspace for processing AlternativeSubPlans */ + if (root->hasAlternativeSubPlans) + { + root->isAltSubplan = (bool *) + palloc0(list_length(glob->subplans) * sizeof(bool)); + root->isUsedSubplan = (bool *) + palloc0(list_length(glob->subplans) * sizeof(bool)); + } + /* Now fix the Plan tree */ - return set_plan_refs(root, plan, rtoffset); + result = set_plan_refs(root, plan, rtoffset); + + /* + * If we have AlternativeSubPlans, it is likely that we now have some + * unreferenced subplans in glob->subplans. To avoid expending cycles on + * those subplans later, get rid of them by setting those list entries to + * NULL. (Note: we can't do this immediately upon processing an + * AlternativeSubPlan, because there may be multiple copies of the + * AlternativeSubPlan, and they can get resolved differently.) + */ + if (root->hasAlternativeSubPlans) + { + foreach(lc, glob->subplans) + { + int ndx = foreach_current_index(lc); + + /* + * If it was used by some AlternativeSubPlan in this query level, + * but wasn't selected as best by any AlternativeSubPlan, then we + * don't need it. Do not touch subplans that aren't parts of + * AlternativeSubPlans. + */ + if (root->isAltSubplan[ndx] && !root->isUsedSubplan[ndx]) + lfirst(lc) = NULL; + } + } + + return result; } /* @@ -1762,8 +1799,7 @@ fix_param_node(PlannerInfo *root, Param *p) * Note: caller must still recurse into the result! * * We don't make any attempt to fix up cost estimates in the parent plan - * node or higher-level nodes. However, we do remove the rejected subplan(s) - * from root->glob->subplans, to minimize cycles expended on them later. + * node or higher-level nodes. */ static Node * fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan, @@ -1775,9 +1811,8 @@ fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan, /* * Compute the estimated cost of each subplan assuming num_exec - * executions, and keep the cheapest one. Replace discarded subplans with - * NULL pointers in the global subplans list. In event of exact equality - * of estimates, we prefer the later plan; this is a bit arbitrary, but in + * executions, and keep the cheapest one. In event of exact equality of + * estimates, we prefer the later plan; this is a bit arbitrary, but in * current usage it biases us to break ties against fast-start subplans. */ Assert(asplan->subplans != NIL); @@ -1788,31 +1823,19 @@ fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan, Cost curcost; curcost = curplan->startup_cost + num_exec * curplan->per_call_cost; - if (bestplan == NULL) + if (bestplan == NULL || curcost <= bestcost) { bestplan = curplan; bestcost = curcost; } - else if (curcost <= bestcost) - { - /* drop old bestplan */ - ListCell *lc2 = list_nth_cell(root->glob->subplans, - bestplan->plan_id - 1); - lfirst(lc2) = NULL; - bestplan = curplan; - bestcost = curcost; - } - else - { - /* drop curplan */ - ListCell *lc2 = list_nth_cell(root->glob->subplans, - curplan->plan_id - 1); - - lfirst(lc2) = NULL; - } + /* Also mark all subplans that are in AlternativeSubPlans */ + root->isAltSubplan[curplan->plan_id - 1] = true; } + /* Mark the subplan we selected */ + root->isUsedSubplan[bestplan->plan_id - 1] = true; + return (Node *) bestplan; } diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index e884f3c9dc..dd8678b240 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -367,6 +367,10 @@ struct PlannerInfo Relids curOuterRels; /* outer rels above current node */ List *curOuterParams; /* not-yet-assigned NestLoopParams */ + /* These fields are workspace for setrefs.c */ + bool *isAltSubplan; /* array corresponding to glob->subplans */ + bool *isUsedSubplan; /* array corresponding to glob->subplans */ + /* optional private data for join_search_hook, e.g., GEQO */ void *join_search_private; diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 30615dd6bc..0742626033 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -921,6 +921,46 @@ where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0) 10 (1 row) +-- It's possible for the same EXISTS to get resolved both ways +create temp table exists_tbl (c1 int, c2 int, c3 int) partition by list (c1); +create temp table exists_tbl_null partition of exists_tbl for values in (null); +create temp table exists_tbl_def partition of exists_tbl default; +insert into exists_tbl select x, x/2, x+1 from generate_series(0,10) x; +analyze exists_tbl; +explain (costs off) +select * from exists_tbl t1 + where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0); + QUERY PLAN +------------------------------------------------------ + Append + -> Seq Scan on exists_tbl_null t1_1 + Filter: ((SubPlan 1) OR (c3 < 0)) + SubPlan 1 + -> Append + -> Seq Scan on exists_tbl_null t2_1 + Filter: (t1_1.c1 = c2) + -> Seq Scan on exists_tbl_def t2_2 + Filter: (t1_1.c1 = c2) + -> Seq Scan on exists_tbl_def t1_2 + Filter: ((hashed SubPlan 2) OR (c3 < 0)) + SubPlan 2 + -> Append + -> Seq Scan on exists_tbl_null t2_4 + -> Seq Scan on exists_tbl_def t2_5 +(15 rows) + +select * from exists_tbl t1 + where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0); + c1 | c2 | c3 +----+----+---- + 0 | 0 | 1 + 1 | 0 | 2 + 2 | 1 | 3 + 3 | 1 | 4 + 4 | 2 | 5 + 5 | 2 | 6 +(6 rows) + -- -- Test case for planner bug with nested EXISTS handling -- diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index bd17f5d264..e879999708 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -526,6 +526,18 @@ select count(*) from tenk1 t where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0) and thousand = 1; +-- It's possible for the same EXISTS to get resolved both ways +create temp table exists_tbl (c1 int, c2 int, c3 int) partition by list (c1); +create temp table exists_tbl_null partition of exists_tbl for values in (null); +create temp table exists_tbl_def partition of exists_tbl default; +insert into exists_tbl select x, x/2, x+1 from generate_series(0,10) x; +analyze exists_tbl; +explain (costs off) +select * from exists_tbl t1 + where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0); +select * from exists_tbl t1 + where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0); + -- -- Test case for planner bug with nested EXISTS handling --