diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e42895dc31..415edadd3e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -4085,6 +4085,7 @@ get_foreign_key_join_selectivity(PlannerInfo *root, { ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); bool ref_is_outer; + bool use_smallest_selectivity = false; List *removedlist; ListCell *cell; ListCell *prev; @@ -4205,9 +4206,9 @@ get_foreign_key_join_selectivity(PlannerInfo *root, * be double-counting the null fraction, and (2) it's not very clear * how to combine null fractions for multiple referencing columns. * - * In the first branch of the logic below, null derating is done - * implicitly by relying on clause_selectivity(); in the other two - * paths, we do nothing for now about correcting for nulls. + * In the use_smallest_selectivity code below, null derating is done + * implicitly by relying on clause_selectivity(); in the other cases, + * we do nothing for now about correcting for nulls. * * XXX another point here is that if either side of an FK constraint * is an inheritance parent, we estimate as though the constraint @@ -4230,28 +4231,41 @@ get_foreign_key_join_selectivity(PlannerInfo *root, * the smallest per-column selectivity, instead. (This should * correspond to the FK column with the most nulls.) */ - Selectivity thisfksel = 1.0; - - foreach(cell, removedlist) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); - Selectivity csel; - - csel = clause_selectivity(root, (Node *) rinfo, - 0, jointype, sjinfo); - thisfksel = Min(thisfksel, csel); - } - fkselec *= thisfksel; + use_smallest_selectivity = true; } else if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) { /* * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the - * fraction of LHS rows that have matches. If the referenced - * table is on the inner side, that means the selectivity is 1.0 - * (modulo nulls, which we're ignoring for now). We already - * covered the other case, so no work here. + * fraction of LHS rows that have matches. The referenced table + * is on the inner side (we already handled the other case above), + * so the FK implies that every LHS row has a match *in the + * referenced table*. But any restriction or join clauses below + * here will reduce the number of matches. */ + if (bms_membership(inner_relids) == BMS_SINGLETON) + { + /* + * When the inner side of the semi/anti join is just the + * referenced table, we may take the FK selectivity as equal + * to the selectivity of the table's restriction clauses. + */ + RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid); + double ref_tuples = Max(ref_rel->tuples, 1.0); + + fkselec *= ref_rel->rows / ref_tuples; + } + else + { + /* + * When the inner side of the semi/anti join is itself a join, + * it's hard to guess what fraction of the referenced table + * will get through the join. But we still don't want to + * multiply per-column estimates together. Take the smallest + * per-column selectivity, instead. + */ + use_smallest_selectivity = true; + } } else { @@ -4265,6 +4279,26 @@ get_foreign_key_join_selectivity(PlannerInfo *root, fkselec *= 1.0 / ref_tuples; } + + /* + * Common code for cases where we should use the smallest selectivity + * that would be computed for any one of the FK's clauses. + */ + if (use_smallest_selectivity) + { + Selectivity thisfksel = 1.0; + + foreach(cell, removedlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + Selectivity csel; + + csel = clause_selectivity(root, (Node *) rinfo, + 0, jointype, sjinfo); + thisfksel = Min(thisfksel, csel); + } + fkselec *= thisfksel; + } } *restrictlist = worklist;