diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index edff6da410..ea95b8068d 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2767,29 +2767,67 @@ neqjoinsel(PG_FUNCTION_ARGS) List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); - Oid eqop; float8 result; - /* - * We want 1 - eqjoinsel() where the equality operator is the one - * associated with this != operator, that is, its negator. - */ - eqop = get_negator(operator); - if (eqop) + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) { - result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel, - PointerGetDatum(root), - ObjectIdGetDatum(eqop), - PointerGetDatum(args), - Int16GetDatum(jointype), - PointerGetDatum(sjinfo))); + /* + * For semi-joins, if there is more than one distinct value in the RHS + * relation then every non-null LHS row must find a row to join since + * it can only be equal to one of them. We'll assume that there is + * always more than one distinct RHS value for the sake of stability, + * though in theory we could have special cases for empty RHS + * (selectivity = 0) and single-distinct-value RHS (selectivity = + * fraction of LHS that has the same value as the single RHS value). + * + * For anti-joins, if we use the same assumption that there is more + * than one distinct key in the RHS relation, then every non-null LHS + * row must be suppressed by the anti-join. + * + * So either way, the selectivity estimate should be 1 - nullfrac. + */ + VariableStatData leftvar; + VariableStatData rightvar; + bool reversed; + HeapTuple statsTuple; + double nullfrac; + + get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed); + statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple; + if (HeapTupleIsValid(statsTuple)) + nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac; + else + nullfrac = 0.0; + ReleaseVariableStats(leftvar); + ReleaseVariableStats(rightvar); + + result = 1.0 - nullfrac; } else { - /* Use default selectivity (should we raise an error instead?) */ - result = DEFAULT_EQ_SEL; + /* + * We want 1 - eqjoinsel() where the equality operator is the one + * associated with this != operator, that is, its negator. + */ + Oid eqop = get_negator(operator); + + if (eqop) + { + result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel, + PointerGetDatum(root), + ObjectIdGetDatum(eqop), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + } + else + { + /* Use default selectivity (should we raise an error instead?) */ + result = DEFAULT_EQ_SEL; + } + result = 1.0 - result; } - result = 1.0 - result; + PG_RETURN_FLOAT8(result); } diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index f02ef3f978..b7d1790097 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -1845,6 +1845,28 @@ SELECT '' AS "xxx", * | 1 | 4 | one | -1 (1 row) +-- +-- semijoin selectivity for <> +-- +explain (costs off) +select * from int4_tbl i4, tenk1 a +where exists(select * from tenk1 b + where a.twothousand = b.twothousand and a.fivethous <> b.fivethous) + and i4.f1 = a.tenthous; + QUERY PLAN +---------------------------------------------- + Hash Semi Join + Hash Cond: (a.twothousand = b.twothousand) + Join Filter: (a.fivethous <> b.fivethous) + -> Hash Join + Hash Cond: (a.tenthous = i4.f1) + -> Seq Scan on tenk1 a + -> Hash + -> Seq Scan on int4_tbl i4 + -> Hash + -> Seq Scan on tenk1 b +(10 rows) + -- -- More complicated constructs -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index cbb71c5b1b..c6d4a513e8 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -193,6 +193,15 @@ SELECT '' AS "xxx", * SELECT '' AS "xxx", * FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (i = 1); +-- +-- semijoin selectivity for <> +-- +explain (costs off) +select * from int4_tbl i4, tenk1 a +where exists(select * from tenk1 b + where a.twothousand = b.twothousand and a.fivethous <> b.fivethous) + and i4.f1 = a.tenthous; + -- -- More complicated constructs