diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 47644b26c6..8f6f005ecb 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1293,7 +1293,8 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, * For the moment we force all the Vars to be available at all join nodes * for this eclass. Perhaps this could be improved by doing some * pre-analysis of which members we prefer to join, but it's no worse than - * what happened in the pre-8.3 code. + * what happened in the pre-8.3 code. (Note: rebuild_eclass_attr_needed + * needs to match this code.) */ foreach(lc, ec->ec_members) { @@ -2422,6 +2423,44 @@ reconsider_full_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo) return false; /* failed to make any deduction */ } +/* + * rebuild_eclass_attr_needed + * Put back attr_needed bits for Vars/PHVs needed for join eclasses. + * + * This is used to rebuild attr_needed/ph_needed sets after removal of a + * useless outer join. It should match what + * generate_base_implied_equalities_no_const did, except that we call + * add_vars_to_attr_needed not add_vars_to_targetlist. + */ +void +rebuild_eclass_attr_needed(PlannerInfo *root) +{ + ListCell *lc; + + foreach(lc, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc); + + /* Need do anything only for a multi-member, no-const EC. */ + if (list_length(ec->ec_members) > 1 && !ec->ec_has_const) + { + ListCell *lc2; + + foreach(lc2, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + List *vars = pull_var_clause((Node *) cur_em->em_expr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + + add_vars_to_attr_needed(root, vars, ec->ec_relids); + list_free(vars); + } + } + } +} + /* * find_join_domain * Find the highest JoinDomain enclosed within the given relid set. diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index c3fd4a81f8..928d926645 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -27,6 +27,7 @@ #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/placeholder.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" #include "utils/lsyscache.h" @@ -341,35 +342,6 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) Assert(ojrelid != 0); joinrelids = bms_add_member(joinrelids, ojrelid); - /* - * Remove references to the rel from other baserels' attr_needed arrays. - */ - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *otherrel = root->simple_rel_array[rti]; - int attroff; - - /* there may be empty slots corresponding to non-baserel RTEs */ - if (otherrel == NULL) - continue; - - Assert(otherrel->relid == rti); /* sanity check on array */ - - /* no point in processing target rel itself */ - if (otherrel == rel) - continue; - - for (attroff = otherrel->max_attr - otherrel->min_attr; - attroff >= 0; - attroff--) - { - otherrel->attr_needed[attroff] = - bms_del_member(otherrel->attr_needed[attroff], relid); - otherrel->attr_needed[attroff] = - bms_del_member(otherrel->attr_needed[attroff], ojrelid); - } - } - /* * Update all_baserels and related relid sets. */ @@ -450,9 +422,11 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid); phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid); Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */ - phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid); - phinfo->ph_needed = bms_del_member(phinfo->ph_needed, ojrelid); - /* ph_needed might or might not become empty */ + /* Reduce ph_needed to contain only "relation 0"; see below */ + if (bms_is_member(0, phinfo->ph_needed)) + phinfo->ph_needed = bms_make_singleton(0); + else + phinfo->ph_needed = NULL; phv->phrels = bms_del_member(phv->phrels, relid); phv->phrels = bms_del_member(phv->phrels, ojrelid); Assert(!bms_is_empty(phv->phrels)); @@ -540,7 +514,7 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) */ /* - * Finally, remove the rel from the baserel array to prevent it from being + * Now remove the rel from the baserel array to prevent it from being * referenced again. (We can't do this earlier because * remove_join_clause_from_rels will touch it.) */ @@ -548,6 +522,51 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) /* And nuke the RelOptInfo, just in case there's another access path */ pfree(rel); + + /* + * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar + * ph_needed relid sets. These have to be known accurately, else we may + * fail to remove other now-removable outer joins. And our removal of the + * join clause(s) for this outer join may mean that Vars that were + * formerly needed no longer are. So we have to do this honestly by + * repeating the construction of those relid sets. We can cheat to one + * small extent: we can avoid re-examining the targetlist and HAVING qual + * by preserving "relation 0" bits from the existing relid sets. This is + * safe because we'd never remove such references. + * + * So, start by removing all other bits from attr_needed sets. (We + * already did this above for ph_needed.) + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *otherrel = root->simple_rel_array[rti]; + int attroff; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (otherrel == NULL) + continue; + + Assert(otherrel->relid == rti); /* sanity check on array */ + + for (attroff = otherrel->max_attr - otherrel->min_attr; + attroff >= 0; + attroff--) + { + if (bms_is_member(0, otherrel->attr_needed[attroff])) + otherrel->attr_needed[attroff] = bms_make_singleton(0); + else + otherrel->attr_needed[attroff] = NULL; + } + } + + /* + * Now repeat construction of attr_needed bits coming from all other + * sources. + */ + rebuild_placeholder_attr_needed(root); + rebuild_joinclause_attr_needed(root); + rebuild_eclass_attr_needed(root); + rebuild_lateral_attr_needed(root); } /* diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index f3b9821498..c5bc0f51e9 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -274,6 +274,8 @@ build_base_rel_tlists(PlannerInfo *root, List *final_tlist) * have a single owning relation; we keep their attr_needed info in * root->placeholder_list instead. Find or create the associated * PlaceHolderInfo entry, and update its ph_needed. + * + * See also add_vars_to_attr_needed. */ void add_vars_to_targetlist(PlannerInfo *root, List *vars, @@ -327,6 +329,63 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars, } } +/* + * add_vars_to_attr_needed + * This does a subset of what add_vars_to_targetlist does: it just + * updates attr_needed for Vars and ph_needed for PlaceHolderVars. + * We assume the Vars are already in their relations' targetlists. + * + * This is used to rebuild attr_needed/ph_needed sets after removal + * of a useless outer join. The removed join clause might have been + * the only upper-level use of some other relation's Var, in which + * case we can reduce that Var's attr_needed and thereby possibly + * open the door to further join removals. But we can't tell that + * without tedious reconstruction of the attr_needed data. + * + * Note that if a Var's attr_needed is successfully reduced to empty, + * it will still be in the relation's targetlist even though we do + * not really need the scan plan node to emit it. The extra plan + * inefficiency seems tiny enough to not be worth spending planner + * cycles to get rid of it. + */ +void +add_vars_to_attr_needed(PlannerInfo *root, List *vars, + Relids where_needed) +{ + ListCell *temp; + + Assert(!bms_is_empty(where_needed)); + + foreach(temp, vars) + { + Node *node = (Node *) lfirst(temp); + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + RelOptInfo *rel = find_base_rel(root, var->varno); + int attno = var->varattno; + + if (bms_is_subset(where_needed, rel->relids)) + continue; + Assert(attno >= rel->min_attr && attno <= rel->max_attr); + attno -= rel->min_attr; + rel->attr_needed[attno] = bms_add_members(rel->attr_needed[attno], + where_needed); + } + else if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + PlaceHolderInfo *phinfo = find_placeholder_info(root, phv); + + phinfo->ph_needed = bms_add_members(phinfo->ph_needed, + where_needed); + } + else + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); + } +} + /***************************************************************************** * @@ -488,10 +547,54 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex) */ add_vars_to_targetlist(root, newvars, where_needed); - /* Remember the lateral references for create_lateral_join_info */ + /* + * Remember the lateral references for rebuild_lateral_attr_needed and + * create_lateral_join_info. + */ brel->lateral_vars = newvars; } +/* + * rebuild_lateral_attr_needed + * Put back attr_needed bits for Vars/PHVs needed for lateral references. + * + * This is used to rebuild attr_needed/ph_needed sets after removal of a + * useless outer join. It should match what find_lateral_references did, + * except that we call add_vars_to_attr_needed not add_vars_to_targetlist. + */ +void +rebuild_lateral_attr_needed(PlannerInfo *root) +{ + Index rti; + + /* We need do nothing if the query contains no LATERAL RTEs */ + if (!root->hasLateralRTEs) + return; + + /* Examine the same baserels that find_lateral_references did */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + Relids where_needed; + + if (brel == NULL) + continue; + if (brel->reloptkind != RELOPT_BASEREL) + continue; + + /* + * We don't need to repeat all of extract_lateral_references, since it + * kindly saved the extracted Vars/PHVs in lateral_vars. + */ + if (brel->lateral_vars == NIL) + continue; + + where_needed = bms_make_singleton(rti); + + add_vars_to_attr_needed(root, brel->lateral_vars, where_needed); + } +} + /* * create_lateral_join_info * Fill in the per-base-relation direct_lateral_relids, lateral_relids @@ -2445,6 +2548,9 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * var propagation is ensured by making ojscope include input rels from * both sides of the join. * + * See also rebuild_joinclause_attr_needed, which has to partially repeat + * this work after removal of an outer join. + * * Note: if the clause gets absorbed into an EquivalenceClass then this * may be unnecessary, but for now we have to do it to cover the case * where the EC becomes ec_broken and we end up reinserting the original @@ -3014,6 +3120,11 @@ process_implied_equality(PlannerInfo *root, * some of the Vars could have missed having that done because they only * appeared in single-relation clauses originally. So do it here for * safety. + * + * See also rebuild_joinclause_attr_needed, which has to partially repeat + * this work after removal of an outer join. (Since we will put this + * clause into the joininfo lists, that function needn't do any extra work + * to find it.) */ if (bms_membership(relids) == BMS_MULTIPLE) { @@ -3155,6 +3266,72 @@ get_join_domain_min_rels(PlannerInfo *root, Relids domain_relids) } +/* + * rebuild_joinclause_attr_needed + * Put back attr_needed bits for Vars/PHVs needed for join clauses. + * + * This is used to rebuild attr_needed/ph_needed sets after removal of a + * useless outer join. It should match what distribute_qual_to_rels did, + * except that we call add_vars_to_attr_needed not add_vars_to_targetlist. + */ +void +rebuild_joinclause_attr_needed(PlannerInfo *root) +{ + /* + * We must examine all join clauses, but there's no value in processing + * any join clause more than once. So it's slightly annoying that we have + * to find them via the per-base-relation joininfo lists. Avoid duplicate + * processing by tracking the rinfo_serial numbers of join clauses we've + * already seen. (This doesn't work for is_clone clauses, so we must + * waste effort on them.) + */ + Bitmapset *seen_serials = NULL; + Index rti; + + /* Scan all baserels for join clauses */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + ListCell *lc; + + if (brel == NULL) + continue; + if (brel->reloptkind != RELOPT_BASEREL) + continue; + + foreach(lc, brel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Relids relids = rinfo->required_relids; + + if (!rinfo->is_clone) /* else serial number is not unique */ + { + if (bms_is_member(rinfo->rinfo_serial, seen_serials)) + continue; /* saw it already */ + seen_serials = bms_add_member(seen_serials, + rinfo->rinfo_serial); + } + + if (bms_membership(relids) == BMS_MULTIPLE) + { + List *vars = pull_var_clause((Node *) rinfo->clause, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + Relids where_needed; + + if (rinfo->is_clone) + where_needed = bms_intersect(relids, root->all_baserels); + else + where_needed = relids; + add_vars_to_attr_needed(root, vars, where_needed); + list_free(vars); + } + } + } +} + + /* * match_foreign_keys_to_quals * Match foreign-key constraints to equivalence classes and join quals diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index 81abadd6db..cce9086531 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -314,6 +314,33 @@ fix_placeholder_input_needed_levels(PlannerInfo *root) } } +/* + * rebuild_placeholder_attr_needed + * Put back attr_needed bits for Vars/PHVs needed in PlaceHolderVars. + * + * This is used to rebuild attr_needed/ph_needed sets after removal of a + * useless outer join. It should match what + * fix_placeholder_input_needed_levels did, except that we call + * add_vars_to_attr_needed not add_vars_to_targetlist. + */ +void +rebuild_placeholder_attr_needed(PlannerInfo *root) +{ + ListCell *lc; + + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + List *vars = pull_var_clause((Node *) phinfo->ph_var->phexpr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + + add_vars_to_attr_needed(root, vars, phinfo->ph_eval_at); + list_free(vars); + } +} + /* * add_placeholders_to_base_rels * Add any required PlaceHolderVars to base rels' targetlists. diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index a78e90610f..54869d4401 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -128,6 +128,7 @@ extern bool process_equivalence(PlannerInfo *root, extern Expr *canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation); extern void reconsider_outer_join_clauses(PlannerInfo *root); +extern void rebuild_eclass_attr_needed(PlannerInfo *root); extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, List *opfamilies, diff --git a/src/include/optimizer/placeholder.h b/src/include/optimizer/placeholder.h index d8610863fc..97c12eae03 100644 --- a/src/include/optimizer/placeholder.h +++ b/src/include/optimizer/placeholder.h @@ -23,6 +23,7 @@ extern PlaceHolderInfo *find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv); extern void find_placeholders_in_jointree(PlannerInfo *root); extern void fix_placeholder_input_needed_levels(PlannerInfo *root); +extern void rebuild_placeholder_attr_needed(PlannerInfo *root); extern void add_placeholders_to_base_rels(PlannerInfo *root); extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index aafc173792..93137261e4 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -72,7 +72,10 @@ extern void add_other_rels_to_query(PlannerInfo *root); extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist); extern void add_vars_to_targetlist(PlannerInfo *root, List *vars, Relids where_needed); +extern void add_vars_to_attr_needed(PlannerInfo *root, List *vars, + Relids where_needed); extern void find_lateral_references(PlannerInfo *root); +extern void rebuild_lateral_attr_needed(PlannerInfo *root); extern void create_lateral_join_info(PlannerInfo *root); extern List *deconstruct_jointree(PlannerInfo *root); extern bool restriction_is_always_true(PlannerInfo *root, @@ -96,6 +99,7 @@ extern RestrictInfo *build_implied_join_equality(PlannerInfo *root, Expr *item2, Relids qualscope, Index security_level); +extern void rebuild_joinclause_attr_needed(PlannerInfo *root); extern void match_foreign_keys_to_quals(PlannerInfo *root); /* diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 31fb7d142e..12abd3a0e7 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5457,11 +5457,13 @@ CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int); CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int); CREATE TEMP TABLE c (id int PRIMARY KEY); CREATE TEMP TABLE d (a int, b int); +CREATE TEMP TABLE e (id1 int, id2 int, PRIMARY KEY(id1, id2)); INSERT INTO a VALUES (0, 0), (1, NULL); INSERT INTO b VALUES (0, 0), (1, NULL); INSERT INTO c VALUES (0), (1); INSERT INTO d VALUES (1,3), (2,2), (3,1); --- all three cases should be optimizable into a simple seqscan +INSERT INTO e VALUES (0,0), (2,2), (3,1); +-- all these cases should be optimizable into a simple seqscan explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id; QUERY PLAN --------------- @@ -5482,6 +5484,14 @@ explain (costs off) Seq Scan on a (1 row) +explain (costs off) + SELECT a.* FROM a LEFT JOIN b ON a.id = b.id + LEFT JOIN e ON e.id1 = a.b_id AND b.c_id = e.id2; + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + -- check optimization of outer join within another special join explain (costs off) select id from a where id in ( diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index d81ff63be5..0c65e5af4b 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1952,17 +1952,22 @@ CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int); CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int); CREATE TEMP TABLE c (id int PRIMARY KEY); CREATE TEMP TABLE d (a int, b int); +CREATE TEMP TABLE e (id1 int, id2 int, PRIMARY KEY(id1, id2)); INSERT INTO a VALUES (0, 0), (1, NULL); INSERT INTO b VALUES (0, 0), (1, NULL); INSERT INTO c VALUES (0), (1); INSERT INTO d VALUES (1,3), (2,2), (3,1); +INSERT INTO e VALUES (0,0), (2,2), (3,1); --- all three cases should be optimizable into a simple seqscan +-- all these cases should be optimizable into a simple seqscan explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id; explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id; explain (costs off) SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id) ON (a.b_id = b.id); +explain (costs off) + SELECT a.* FROM a LEFT JOIN b ON a.id = b.id + LEFT JOIN e ON e.id1 = a.b_id AND b.c_id = e.id2; -- check optimization of outer join within another special join explain (costs off)