diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c index 11690aa8ce..94b3bf2862 100644 --- a/src/backend/catalog/pg_operator.c +++ b/src/backend/catalog/pg_operator.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.104 2008/06/19 00:46:04 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.105 2008/08/16 00:01:35 tgl Exp $ * * NOTES * these routines moved here from commands/define.c and somewhat cleaned up. @@ -21,12 +21,12 @@ #include "access/xact.h" #include "catalog/dependency.h" #include "catalog/indexing.h" +#include "catalog/namespace.h" #include "catalog/pg_namespace.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "miscadmin.h" -#include "parser/parse_func.h" #include "parser/parse_oper.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -273,6 +273,11 @@ OperatorShellMake(const char *operatorName, heap_freetuple(tup); + /* + * Make sure the tuple is visible for subsequent lookups/updates. + */ + CommandCounterIncrement(); + /* * close the operator relation and return the oid. */ @@ -289,14 +294,19 @@ OperatorShellMake(const char *operatorName, * operatorNamespace namespace for new operator * leftTypeId X left type ID * rightTypeId X right type ID - * procedureName procedure for operator + * procedureId procedure ID for operator * commutatorName X commutator operator * negatorName X negator operator - * restrictionName X restriction sel. procedure - * joinName X join sel. procedure + * restrictionId X restriction selectivity procedure ID + * joinId X join selectivity procedure ID * canMerge merge join can be used with this operator * canHash hash join can be used with this operator * + * The caller should have validated properties and permissions for the + * objects passed as OID references. We must handle the commutator and + * negator operator references specially, however, since those need not + * exist beforehand. + * * This routine gets complicated because it allows the user to * specify operators that do not exist. For example, if operator * "op" is being defined, the negator operator "negop" and the @@ -310,59 +320,17 @@ OperatorShellMake(const char *operatorName, * information about the operator (just its name and types). * Forward declaration is used only for this purpose, it is * not available to the user as it is for type definition. - * - * Algorithm: - * - * check if operator already defined - * if so, but oprcode is null, save the Oid -- we are filling in a shell - * otherwise error - * get the attribute types from relation descriptor for pg_operator - * assign values to the fields of the operator: - * operatorName - * owner id (simply the user id of the caller) - * operator "kind" either "b" for binary or "l" for left unary - * canMerge boolean - * canHash boolean - * leftTypeObjectId -- type must already be defined - * rightTypeObjectId -- this is optional, enter ObjectId=0 if none specified - * resultType -- defer this, since it must be determined from - * the pg_procedure catalog - * commutatorObjectId -- if this is NULL, enter ObjectId=0 - * else if this already exists, enter its ObjectId - * else if this does not yet exist, and is not - * the same as the main operatorName, then create - * a shell and enter the new ObjectId - * else if this does not exist but IS the same - * name & types as the main operator, set the ObjectId=0. - * (We are creating a self-commutating operator.) - * The link will be fixed later by OperatorUpd. - * negatorObjectId -- same as for commutatorObjectId - * operatorProcedure -- must access the pg_procedure catalog to get the - * ObjectId of the procedure that actually does the operator - * actions this is required. Do a lookup to find out the - * return type of the procedure - * restrictionProcedure -- must access the pg_procedure catalog to get - * the ObjectId but this is optional - * joinProcedure -- same as restrictionProcedure - * now either insert or replace the operator into the pg_operator catalog - * if the operator shell is being filled in - * access the catalog in order to get a valid buffer - * create a tuple using ModifyHeapTuple - * get the t_self from the modified tuple and call RelationReplaceHeapTuple - * else if a new operator is being created - * create a tuple using heap_formtuple - * call simple_heap_insert */ void OperatorCreate(const char *operatorName, Oid operatorNamespace, Oid leftTypeId, Oid rightTypeId, - List *procedureName, + Oid procedureId, List *commutatorName, List *negatorName, - List *restrictionName, - List *joinName, + Oid restrictionId, + Oid joinId, bool canMerge, bool canHash) { @@ -373,15 +341,10 @@ OperatorCreate(const char *operatorName, Datum values[Natts_pg_operator]; Oid operatorObjectId; bool operatorAlreadyDefined; - Oid procOid; Oid operResultType; Oid commutatorId, - negatorId, - restOid, - joinOid; + negatorId; bool selfCommutator = false; - Oid typeId[4]; /* only need up to 4 args here */ - int nargs; NameData oname; TupleDesc tupDesc; int i; @@ -395,11 +358,6 @@ OperatorCreate(const char *operatorName, errmsg("\"%s\" is not a valid operator name", operatorName))); - if (!OidIsValid(leftTypeId) && !OidIsValid(rightTypeId)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), - errmsg("at least one of leftarg or rightarg must be specified"))); - if (!(OidIsValid(leftTypeId) && OidIsValid(rightTypeId))) { /* If it's not a binary op, these things mustn't be set: */ @@ -407,7 +365,7 @@ OperatorCreate(const char *operatorName, ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("only binary operators can have commutators"))); - if (joinName) + if (OidIsValid(joinId)) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("only binary operators can have join selectivity"))); @@ -421,6 +379,33 @@ OperatorCreate(const char *operatorName, errmsg("only binary operators can hash"))); } + operResultType = get_func_rettype(procedureId); + + if (operResultType != BOOLOID) + { + /* If it's not a boolean op, these things mustn't be set: */ + if (negatorName) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can have negators"))); + if (OidIsValid(restrictionId)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can have restriction selectivity"))); + if (OidIsValid(joinId)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can have join selectivity"))); + if (canMerge) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can merge join"))); + if (canHash) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can hash"))); + } + operatorObjectId = OperatorGet(operatorName, operatorNamespace, leftTypeId, @@ -435,62 +420,61 @@ OperatorCreate(const char *operatorName, /* * At this point, if operatorObjectId is not InvalidOid then we are - * filling in a previously-created shell. + * filling in a previously-created shell. Insist that the user own + * any such shell. */ + if (OidIsValid(operatorObjectId) && + !pg_oper_ownercheck(operatorObjectId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER, + operatorName); /* - * Look up registered procedures -- find the return type of procedureName - * to place in "result" field. Do this before shells are created so we - * don't have to worry about deleting them later. + * Set up the other operators. If they do not currently exist, create + * shells in order to get ObjectId's. */ - if (!OidIsValid(leftTypeId)) + + if (commutatorName) { - typeId[0] = rightTypeId; - nargs = 1; - } - else if (!OidIsValid(rightTypeId)) - { - typeId[0] = leftTypeId; - nargs = 1; + /* commutator has reversed arg types */ + commutatorId = get_other_operator(commutatorName, + rightTypeId, leftTypeId, + operatorName, operatorNamespace, + leftTypeId, rightTypeId, + true); + + /* Permission check: must own other operator */ + if (OidIsValid(commutatorId) && + !pg_oper_ownercheck(commutatorId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER, + NameListToString(commutatorName)); + + /* + * self-linkage to this operator; will fix below. Note that only + * self-linkage for commutation makes sense. + */ + if (!OidIsValid(commutatorId)) + selfCommutator = true; } else - { - typeId[0] = leftTypeId; - typeId[1] = rightTypeId; - nargs = 2; - } - procOid = LookupFuncName(procedureName, nargs, typeId, false); - operResultType = get_func_rettype(procOid); + commutatorId = InvalidOid; - /* - * find restriction estimator - */ - if (restrictionName) + if (negatorName) { - typeId[0] = INTERNALOID; /* Query */ - typeId[1] = OIDOID; /* operator OID */ - typeId[2] = INTERNALOID; /* args list */ - typeId[3] = INT4OID; /* varRelid */ + /* negator has same arg types */ + negatorId = get_other_operator(negatorName, + leftTypeId, rightTypeId, + operatorName, operatorNamespace, + leftTypeId, rightTypeId, + false); - restOid = LookupFuncName(restrictionName, 4, typeId, false); + /* Permission check: must own other operator */ + if (OidIsValid(negatorId) && + !pg_oper_ownercheck(negatorId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER, + NameListToString(negatorName)); } else - restOid = InvalidOid; - - /* - * find join estimator - */ - if (joinName) - { - typeId[0] = INTERNALOID; /* Query */ - typeId[1] = OIDOID; /* operator OID */ - typeId[2] = INTERNALOID; /* args list */ - typeId[3] = INT2OID; /* jointype */ - - joinOid = LookupFuncName(joinName, 4, typeId, false); - } - else - joinOid = InvalidOid; + negatorId = InvalidOid; /* * set up values in the operator tuple @@ -514,53 +498,16 @@ OperatorCreate(const char *operatorName, values[i++] = ObjectIdGetDatum(leftTypeId); /* oprleft */ values[i++] = ObjectIdGetDatum(rightTypeId); /* oprright */ values[i++] = ObjectIdGetDatum(operResultType); /* oprresult */ - - /* - * Set up the other operators. If they do not currently exist, create - * shells in order to get ObjectId's. - */ - - if (commutatorName) - { - /* commutator has reversed arg types */ - commutatorId = get_other_operator(commutatorName, - rightTypeId, leftTypeId, - operatorName, operatorNamespace, - leftTypeId, rightTypeId, - true); - - /* - * self-linkage to this operator; will fix below. Note that only - * self-linkage for commutation makes sense. - */ - if (!OidIsValid(commutatorId)) - selfCommutator = true; - } - else - commutatorId = InvalidOid; values[i++] = ObjectIdGetDatum(commutatorId); /* oprcom */ - - if (negatorName) - { - /* negator has same arg types */ - negatorId = get_other_operator(negatorName, - leftTypeId, rightTypeId, - operatorName, operatorNamespace, - leftTypeId, rightTypeId, - false); - } - else - negatorId = InvalidOid; - values[i++] = ObjectIdGetDatum(negatorId); /* oprnegate */ - - values[i++] = ObjectIdGetDatum(procOid); /* oprcode */ - values[i++] = ObjectIdGetDatum(restOid); /* oprrest */ - values[i++] = ObjectIdGetDatum(joinOid); /* oprjoin */ + values[i++] = ObjectIdGetDatum(negatorId); /* oprnegate */ + values[i++] = ObjectIdGetDatum(procedureId); /* oprcode */ + values[i++] = ObjectIdGetDatum(restrictionId); /* oprrest */ + values[i++] = ObjectIdGetDatum(joinId); /* oprjoin */ pg_operator_desc = heap_open(OperatorRelationId, RowExclusiveLock); /* - * If we are adding to an operator shell, update; else insert + * If we are replacing an operator shell, update; else insert */ if (operatorObjectId) { @@ -706,7 +653,8 @@ OperatorUpd(Oid baseId, Oid commId, Oid negId) /* * check and update the commutator & negator, if necessary * - * First make sure we can see them... + * We need a CommandCounterIncrement here in case of a self-commutator + * operator: we'll need to update the tuple that we just inserted. */ CommandCounterIncrement(); diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c index 884181c5cd..391b4d6507 100644 --- a/src/backend/commands/operatorcmds.c +++ b/src/backend/commands/operatorcmds.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.40 2008/06/19 00:46:04 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.41 2008/08/16 00:01:35 tgl Exp $ * * DESCRIPTION * The "DefineFoo" routines take the parse tree and pick out the @@ -39,8 +39,10 @@ #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/pg_operator.h" +#include "catalog/pg_type.h" #include "commands/defrem.h" #include "miscadmin.h" +#include "parser/parse_func.h" #include "parser/parse_oper.h" #include "parser/parse_type.h" #include "utils/acl.h" @@ -76,6 +78,11 @@ DefineOperator(List *names, List *parameters) List *negatorName = NIL; /* optional negator operator name */ List *restrictionName = NIL; /* optional restrict. sel. procedure */ List *joinName = NIL; /* optional join sel. procedure */ + Oid functionOid; /* functions converted to OID */ + Oid restrictionOid; + Oid joinOid; + Oid typeId[5]; /* only need up to 5 args here */ + int nargs; ListCell *pl; /* Convert list of names to a name and namespace */ @@ -154,6 +161,109 @@ DefineOperator(List *names, List *parameters) if (typeName2) typeId2 = typenameTypeId(NULL, typeName2, NULL); + if (!OidIsValid(typeId1) && !OidIsValid(typeId2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("at least one of leftarg or rightarg must be specified"))); + + /* + * Look up the operator's underlying function. + */ + if (!OidIsValid(typeId1)) + { + typeId[0] = typeId2; + nargs = 1; + } + else if (!OidIsValid(typeId2)) + { + typeId[0] = typeId1; + nargs = 1; + } + else + { + typeId[0] = typeId1; + typeId[1] = typeId2; + nargs = 2; + } + functionOid = LookupFuncName(functionName, nargs, typeId, false); + + /* + * We require EXECUTE rights for the function. This isn't strictly + * necessary, since EXECUTE will be checked at any attempted use of + * the operator, but it seems like a good idea anyway. + */ + aclresult = pg_proc_aclcheck(functionOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_PROC, + NameListToString(functionName)); + + /* + * Look up restriction estimator if specified + */ + if (restrictionName) + { + typeId[0] = INTERNALOID; /* PlannerInfo */ + typeId[1] = OIDOID; /* operator OID */ + typeId[2] = INTERNALOID; /* args list */ + typeId[3] = INT4OID; /* varRelid */ + + restrictionOid = LookupFuncName(restrictionName, 4, typeId, false); + + /* estimators must return float8 */ + if (get_func_rettype(restrictionOid) != FLOAT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("restriction estimator function %s must return type \"float8\"", + NameListToString(restrictionName)))); + + /* Require EXECUTE rights for the estimator */ + aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_PROC, + NameListToString(restrictionName)); + } + else + restrictionOid = InvalidOid; + + /* + * Look up join estimator if specified + */ + if (joinName) + { + typeId[0] = INTERNALOID; /* PlannerInfo */ + typeId[1] = OIDOID; /* operator OID */ + typeId[2] = INTERNALOID; /* args list */ + typeId[3] = INT2OID; /* jointype */ + typeId[4] = INTERNALOID; /* SpecialJoinInfo */ + + /* + * As of Postgres 8.4, the preferred signature for join estimators + * has 5 arguments, but we still allow the old 4-argument form. + * Try the preferred form first. + */ + joinOid = LookupFuncName(joinName, 5, typeId, true); + if (!OidIsValid(joinOid)) + joinOid = LookupFuncName(joinName, 4, typeId, true); + /* If not found, reference the 5-argument signature in error msg */ + if (!OidIsValid(joinOid)) + joinOid = LookupFuncName(joinName, 5, typeId, false); + + /* estimators must return float8 */ + if (get_func_rettype(joinOid) != FLOAT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("join estimator function %s must return type \"float8\"", + NameListToString(joinName)))); + + /* Require EXECUTE rights for the estimator */ + aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_PROC, + NameListToString(joinName)); + } + else + joinOid = InvalidOid; + /* * now have OperatorCreate do all the work.. */ @@ -161,11 +271,11 @@ DefineOperator(List *names, List *parameters) oprNamespace, /* namespace */ typeId1, /* left type id */ typeId2, /* right type id */ - functionName, /* function for operator */ + functionOid, /* function for operator */ commutatorName, /* optional commutator operator name */ negatorName, /* optional negator operator name */ - restrictionName, /* optional restrict. sel. procedure */ - joinName, /* optional join sel. procedure name */ + restrictionOid, /* optional restrict. sel. procedure */ + joinOid, /* optional join sel. procedure name */ canMerge, /* operator merges */ canHash); /* operator hashes */ } diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index e788f27873..6db42c5f58 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -398,6 +398,50 @@ bms_is_subset_singleton(const Bitmapset *s, int x) return false; } +/* + * treat_as_join_clause - + * Decide whether an operator clause is to be handled by the + * restriction or join estimator. Subroutine for clause_selectivity(). + */ +static inline bool +treat_as_join_clause(Node *clause, RestrictInfo *rinfo, + int varRelid, SpecialJoinInfo *sjinfo) +{ + if (varRelid != 0) + { + /* + * Caller is forcing restriction mode (eg, because we are examining + * an inner indexscan qual). + */ + return false; + } + else if (sjinfo == NULL) + { + /* + * It must be a restriction clause, since it's being evaluated at + * a scan node. + */ + return false; + } + else + { + /* + * Otherwise, it's a join if there's more than one relation used. + * We can optimize this calculation if an rinfo was passed. + * + * XXX Since we know the clause is being evaluated at a join, + * the only way it could be single-relation is if it was delayed + * by outer joins. Although we can make use of the restriction + * qual estimators anyway, it seems likely that we ought to account + * for the probability of injected nulls somehow. + */ + if (rinfo) + return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); + else + return (NumRelids(clause) > 1); + } +} + /* * clause_selectivity - @@ -429,9 +473,6 @@ bms_is_subset_singleton(const Bitmapset *s, int x) * root->join_info_list. * 2. For an INNER join, sjinfo is just a transient struct, and only the * relids and jointype fields in it can be trusted. - * 3. XXX sjinfo might be NULL even though it really is a join. This case - * will go away soon, but fixing it requires API changes for oprjoin and - * amcostestimate functions. * It is possible for jointype to be different from sjinfo->jointype. * This indicates we are considering a variant join: either with * the LHS and RHS switched, or with one input unique-ified. @@ -603,36 +644,14 @@ clause_selectivity(PlannerInfo *root, else if (is_opclause(clause) || IsA(clause, DistinctExpr)) { Oid opno = ((OpExpr *) clause)->opno; - bool is_join_clause; - if (varRelid != 0) - { - /* - * If we are considering a nestloop join then all clauses are - * restriction clauses, since we are only interested in the one - * relation. - */ - is_join_clause = false; - } - else - { - /* - * Otherwise, it's a join if there's more than one relation used. - * We can optimize this calculation if an rinfo was passed. - */ - if (rinfo) - is_join_clause = (bms_membership(rinfo->clause_relids) == - BMS_MULTIPLE); - else - is_join_clause = (NumRelids(clause) > 1); - } - - if (is_join_clause) + if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo)) { /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, ((OpExpr *) clause)->args, - jointype); + jointype, + sjinfo); } else { @@ -671,35 +690,11 @@ clause_selectivity(PlannerInfo *root, #endif else if (IsA(clause, ScalarArrayOpExpr)) { - /* First, decide if it's a join clause, same as for OpExpr */ - bool is_join_clause; - - if (varRelid != 0) - { - /* - * If we are considering a nestloop join then all clauses are - * restriction clauses, since we are only interested in the one - * relation. - */ - is_join_clause = false; - } - else - { - /* - * Otherwise, it's a join if there's more than one relation used. - * We can optimize this calculation if an rinfo was passed. - */ - if (rinfo) - is_join_clause = (bms_membership(rinfo->clause_relids) == - BMS_MULTIPLE); - else - is_join_clause = (NumRelids(clause) > 1); - } - /* Use node specific selectivity calculation function */ s1 = scalararraysel(root, (ScalarArrayOpExpr *) clause, - is_join_clause, + treat_as_join_clause(clause, rinfo, + varRelid, sjinfo), varRelid, jointype, sjinfo); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d9d8c89864..14c3cf1f21 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.193 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.194 2008/08/16 00:01:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -118,10 +118,8 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey); static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context); -static Selectivity approx_selectivity(PlannerInfo *root, List *quals, - SpecialJoinInfo *sjinfo); -static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root, - SpecialJoinInfo *sjinfo); +static double approx_tuple_count(PlannerInfo *root, JoinPath *path, + List *quals, SpecialJoinInfo *sjinfo); static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -1288,20 +1286,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = nestloop_inner_path_rows(inner_path); double ntuples; - Selectivity joininfactor; if (!enable_nestloop) startup_cost += disable_cost; - /* - * If we're doing JOIN_IN then we will stop scanning inner tuples for an - * outer tuple as soon as we have one match. Account for the effects of - * this by scaling down the cost estimates in proportion to the JOIN_IN - * selectivity. (This assumes that all the quals attached to the join are - * IN quals, which should be true.) - */ - joininfactor = join_in_selectivity(path, root, sjinfo); - /* cost of source data */ /* @@ -1328,12 +1316,12 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) run_cost += (outer_path_rows - 1) * inner_path->startup_cost; } run_cost += outer_path_rows * - (inner_path->total_cost - inner_path->startup_cost) * joininfactor; + (inner_path->total_cost - inner_path->startup_cost); /* * Compute number of tuples processed (not number emitted!) */ - ntuples = outer_path_rows * inner_path_rows * joininfactor; + ntuples = outer_path_rows * inner_path_rows; /* CPU costs */ cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root); @@ -1369,7 +1357,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; - Selectivity merge_selec; QualCost merge_qual_cost; QualCost qp_qual_cost; double outer_path_rows = PATH_ROWS(outer_path); @@ -1385,7 +1372,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) outerendsel, innerstartsel, innerendsel; - Selectivity joininfactor; Path sort_path; /* dummy for result of cost_sort */ /* Protect some assumptions below that rowcounts aren't zero */ @@ -1398,21 +1384,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) startup_cost += disable_cost; /* - * Compute cost and selectivity of the mergequals and qpquals (other - * restriction clauses) separately. We use approx_selectivity here for - * speed --- in most cases, any errors won't affect the result much. - * - * Note: it's probably bogus to use the normal selectivity calculation - * here when either the outer or inner path is a UniquePath. + * Compute cost of the mergequals and qpquals (other restriction clauses) + * separately. */ - merge_selec = approx_selectivity(root, mergeclauses, sjinfo); cost_qual_eval(&merge_qual_cost, mergeclauses, root); cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root); qp_qual_cost.startup -= merge_qual_cost.startup; qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple; - /* approx # tuples passing the merge quals */ - mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows); + /* + * Get approx # tuples passing the mergequals. We use approx_tuple_count + * here for speed --- in most cases, any errors won't affect the result + * much. + */ + mergejointuples = approx_tuple_count(root, &path->jpath, + mergeclauses, sjinfo); /* * When there are equal merge keys in the outer relation, the mergejoin @@ -1422,11 +1408,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) * but on the other hand we ignore the bookkeeping costs of mark/restore. * Not clear if it's worth developing a more refined model. * - * The number of re-fetches can be estimated approximately as size of - * merge join output minus size of inner relation. Assume that the - * distinct key values are 1, 2, ..., and denote the number of values of - * each key in the outer relation as m1, m2, ...; in the inner relation, - * n1, n2, ... Then we have + * For regular inner and outer joins, the number of re-fetches can be + * estimated approximately as size of merge join output minus size of + * inner relation. Assume that the distinct key values are 1, 2, ..., and + * denote the number of values of each key in the outer relation as m1, + * m2, ...; in the inner relation, n1, n2, ... Then we have * * size of join = m1 * n1 + m2 * n2 + ... * @@ -1439,8 +1425,17 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) * are effectively subtracting those from the number of rescanned tuples, * when we should not. Can we do better without expensive selectivity * computations? + * + * For SEMI and ANTI joins, only one inner tuple need be rescanned for + * each group of same-keyed outer tuples (assuming that all joinquals + * are merge quals). This makes the effect small enough to ignore, + * so we just set rescannedtuples = 0. Likewise, the whole issue is + * moot if we are working from a unique-ified outer input. */ - if (IsA(outer_path, UniquePath)) + if (sjinfo->jointype == JOIN_SEMI || + sjinfo->jointype == JOIN_ANTI) + rescannedtuples = 0; + else if (IsA(outer_path, UniquePath)) rescannedtuples = 0; else { @@ -1505,7 +1500,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) innerstartsel = cache->leftstartsel; innerendsel = cache->leftendsel; } - if (path->jpath.jointype == JOIN_LEFT) + if (path->jpath.jointype == JOIN_LEFT || + path->jpath.jointype == JOIN_ANTI) { outerstartsel = 0.0; outerendsel = 1.0; @@ -1600,21 +1596,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) /* CPU costs */ - /* - * If we're doing JOIN_IN then we will stop outputting inner tuples for an - * outer tuple as soon as we have one match. Account for the effects of - * this by scaling down the cost estimates in proportion to the expected - * output size. (This assumes that all the quals attached to the join are - * IN quals, which should be true.) - */ - joininfactor = join_in_selectivity(&path->jpath, root, sjinfo); - /* * The number of tuple comparisons needed is approximately number of outer * rows plus number of inner rows plus number of rescanned tuples (can we * refine this?). At each one, we need to evaluate the mergejoin quals. - * NOTE: JOIN_IN mode does not save any work here, so do NOT include - * joininfactor. */ startup_cost += merge_qual_cost.startup; startup_cost += merge_qual_cost.per_tuple * @@ -1627,12 +1612,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) * For each tuple that gets through the mergejoin proper, we charge * cpu_tuple_cost plus the cost of evaluating additional restriction * clauses that are to be applied at the join. (This is pessimistic since - * not all of the quals may get evaluated at each tuple.) This work is - * skipped in JOIN_IN mode, so apply the factor. + * not all of the quals may get evaluated at each tuple.) */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; - run_cost += cpu_per_tuple * mergejointuples * joininfactor; + run_cost += cpu_per_tuple * mergejointuples; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost; @@ -1711,7 +1695,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; - Selectivity hash_selec; QualCost hash_qual_cost; QualCost qp_qual_cost; double hashjointuples; @@ -1722,28 +1705,27 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) int numbatches; double virtualbuckets; Selectivity innerbucketsize; - Selectivity joininfactor; ListCell *hcl; if (!enable_hashjoin) startup_cost += disable_cost; /* - * Compute cost and selectivity of the hashquals and qpquals (other - * restriction clauses) separately. We use approx_selectivity here for - * speed --- in most cases, any errors won't affect the result much. - * - * Note: it's probably bogus to use the normal selectivity calculation - * here when either the outer or inner path is a UniquePath. + * Compute cost of the hashquals and qpquals (other restriction clauses) + * separately. */ - hash_selec = approx_selectivity(root, hashclauses, sjinfo); cost_qual_eval(&hash_qual_cost, hashclauses, root); cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root); qp_qual_cost.startup -= hash_qual_cost.startup; qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple; - /* approx # tuples passing the hash quals */ - hashjointuples = clamp_row_est(hash_selec * outer_path_rows * inner_path_rows); + /* + * Get approx # tuples passing the hashquals. We use approx_tuple_count + * here for speed --- in most cases, any errors won't affect the result + * much. + */ + hashjointuples = approx_tuple_count(root, &path->jpath, + hashclauses, sjinfo); /* cost of source data */ startup_cost += outer_path->startup_cost; @@ -1858,15 +1840,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) /* CPU costs */ - /* - * If we're doing JOIN_IN then we will stop comparing inner tuples to an - * outer tuple as soon as we have one match. Account for the effects of - * this by scaling down the cost estimates in proportion to the expected - * output size. (This assumes that all the quals attached to the join are - * IN quals, which should be true.) - */ - joininfactor = join_in_selectivity(&path->jpath, root, sjinfo); - /* * The number of tuple comparisons needed is the number of outer tuples * times the typical number of tuples in a hash bucket, which is the inner @@ -1878,8 +1851,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) */ startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * - outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * - joininfactor * 0.5; + outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5; /* * For each tuple that gets through the hashjoin proper, we charge @@ -1889,7 +1861,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; - run_cost += cpu_per_tuple * hashjointuples * joininfactor; + run_cost += cpu_per_tuple * hashjointuples; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost; @@ -2213,13 +2185,12 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan) /* - * approx_selectivity - * Quick-and-dirty estimation of clause selectivities. - * The input can be either an implicitly-ANDed list of boolean - * expressions, or a list of RestrictInfo nodes (typically the latter). + * approx_tuple_count + * Quick-and-dirty estimation of the number of join rows passing + * a set of qual conditions. * - * Currently this is only used in join estimation, so sjinfo should never - * be NULL. + * The quals can be either an implicitly-ANDed list of boolean expressions, + * or a list of RestrictInfo nodes (typically the latter). * * This is quick-and-dirty because we bypass clauselist_selectivity, and * simply multiply the independent clause selectivities together. Now @@ -2232,20 +2203,34 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan) * output tuples are generated and passed through qpqual checking, it * seems OK to live with the approximation. */ -static Selectivity -approx_selectivity(PlannerInfo *root, List *quals, SpecialJoinInfo *sjinfo) +static double +approx_tuple_count(PlannerInfo *root, JoinPath *path, + List *quals, SpecialJoinInfo *sjinfo) { - Selectivity total = 1.0; + double tuples; + double outer_tuples = path->outerjoinpath->parent->rows; + double inner_tuples = path->innerjoinpath->parent->rows; + Selectivity selec = 1.0; ListCell *l; + /* Get the approximate selectivity */ foreach(l, quals) { Node *qual = (Node *) lfirst(l); /* Note that clause_selectivity will be able to cache its result */ - total *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo); + selec *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo); } - return total; + + /* Apply it correctly using the input relation sizes */ + if (sjinfo->jointype == JOIN_SEMI) + tuples = selec * outer_tuples; + else if (sjinfo->jointype == JOIN_ANTI) + tuples = (1.0 - selec) * outer_tuples; + else + tuples = selec * outer_tuples * inner_tuples; + + return clamp_row_est(tuples); } @@ -2315,7 +2300,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, Selectivity jselec; Selectivity pselec; double nrows; - UniquePath *upath; /* * Compute joinclause selectivity. Note that we are only considering @@ -2380,9 +2364,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * pushed-down quals are applied after the outer join, so their * selectivity applies fully. * - * For JOIN_IN and variants, the Cartesian product is figured with respect - * to a unique-ified input, and then we can clamp to the size of the other - * input. + * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction + * of LHS rows that have matches, and we apply that straightforwardly. */ switch (jointype) { @@ -2404,22 +2387,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, nrows *= pselec; break; case JOIN_SEMI: - /* XXX this is unsafe, could Assert? */ - upath = create_unique_path(root, inner_rel, - inner_rel->cheapest_total_path, - sjinfo); - if (upath) - nrows = outer_rel->rows * upath->rows * jselec; - else - nrows = outer_rel->rows * inner_rel->rows * jselec; - if (nrows > outer_rel->rows) - nrows = outer_rel->rows; + nrows = outer_rel->rows * jselec; + nrows *= pselec; break; case JOIN_ANTI: - /* XXX this is utterly wrong */ - nrows = outer_rel->rows * inner_rel->rows * jselec; - if (nrows < outer_rel->rows) - nrows = outer_rel->rows; + nrows = outer_rel->rows * (1.0 - jselec); nrows *= pselec; break; default: @@ -2432,67 +2404,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, rel->rows = clamp_row_est(nrows); } -/* - * join_in_selectivity - * Determines the factor by which a JOIN_IN join's result is expected - * to be smaller than an ordinary inner join. - * - * 'path' is already filled in except for the cost fields - * 'sjinfo' must be the JOIN_SEMI SpecialJoinInfo for the join - */ -static Selectivity -join_in_selectivity(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) -{ - RelOptInfo *innerrel; - UniquePath *innerunique; - Selectivity selec; - double nrows; - - /* Return 1.0 whenever it's not JOIN_IN */ - if (path->jointype != JOIN_SEMI) - return 1.0; - Assert(sjinfo && sjinfo->jointype == JOIN_SEMI); - - /* - * Return 1.0 if the inner side is already known unique. The case where - * the inner path is already a UniquePath probably cannot happen in - * current usage, but check it anyway for completeness. The interesting - * case is where we've determined the inner relation itself is unique, - * which we can check by looking at the rows estimate for its UniquePath. - */ - if (IsA(path->innerjoinpath, UniquePath)) - return 1.0; - innerrel = path->innerjoinpath->parent; - /* XXX might assert if sjinfo doesn't exactly match innerrel? */ - innerunique = create_unique_path(root, - innerrel, - innerrel->cheapest_total_path, - sjinfo); - if (innerunique && innerunique->rows >= innerrel->rows) - return 1.0; - - /* - * Compute same result set_joinrel_size_estimates would compute for - * JOIN_INNER. Note that we use the input rels' absolute size estimates, - * not PATH_ROWS() which might be less; if we used PATH_ROWS() we'd be - * double-counting the effects of any join clauses used in input scans. - */ - selec = clauselist_selectivity(root, - path->joinrestrictinfo, - 0, - JOIN_INNER, - NULL); - nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec; - - nrows = clamp_row_est(nrows); - - /* See if it's larger than the actual JOIN_IN size estimate */ - if (nrows > path->path.parent->rows) - return path->path.parent->rows / nrows; - else - return 1.0; -} - /* * set_function_size_estimates * Set the size estimates for a base relation that is a function call. diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index cb9a07b77d..30e841b487 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.148 2008/07/13 20:45:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.149 2008/08/16 00:01:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -830,7 +830,8 @@ Selectivity join_selectivity(PlannerInfo *root, Oid operator, List *args, - JoinType jointype) + JoinType jointype, + SpecialJoinInfo *sjinfo) { RegProcedure oprjoin = get_oprjoin(operator); float8 result; @@ -842,11 +843,12 @@ join_selectivity(PlannerInfo *root, if (!oprjoin) return (Selectivity) 0.5; - result = DatumGetFloat8(OidFunctionCall4(oprjoin, + result = DatumGetFloat8(OidFunctionCall5(oprjoin, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), - Int16GetDatum(jointype))); + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); if (result < 0.0 || result > 1.0) elog(ERROR, "invalid join selectivity: %f", result); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 7d5609d447..d484221232 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.251 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.252 2008/08/16 00:01:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -55,19 +55,34 @@ * * float8 oprrest (internal, oid, internal, int4); * + * The result is a selectivity, that is, a fraction (0 to 1) of the rows + * of the relation that are expected to produce a TRUE result for the + * given operator. + * * The call convention for a join estimator (oprjoin function) is similar - * except that varRelid is not needed, and instead the join type is + * except that varRelid is not needed, and instead join information is * supplied: * * Selectivity oprjoin (PlannerInfo *root, * Oid operator, * List *args, - * JoinType jointype); + * JoinType jointype, + * SpecialJoinInfo *sjinfo); * - * float8 oprjoin (internal, oid, internal, int2); + * float8 oprjoin (internal, oid, internal, int2, internal); * - * (We deliberately make the SQL signature different to facilitate - * catching errors.) + * (Before Postgres 8.4, join estimators had only the first four of these + * parameters. That signature is still allowed, but deprecated.) The + * relationship between jointype and sjinfo is explained in the comments for + * clause_selectivity() --- the short version is that jointype is usually + * best ignored in favor of examining sjinfo. + * + * Join selectivity for regular inner and outer joins is defined as the + * fraction (0 to 1) of the cross product of the relations that is expected + * to produce a TRUE result for the given operator. For both semi and anti + * joins, however, the selectivity is defined as the fraction of the left-hand + * side relation's rows that are expected to have a match (ie, at least one + * row with a TRUE result) in the right-hand side. *---------- */ @@ -113,6 +128,10 @@ static double var_eq_non_const(VariableStatData *vardata, Oid operator, static double ineq_histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, bool isgt, Datum constval, Oid consttype); +static double eqjoinsel_inner(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2); +static double eqjoinsel_semi(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -1579,7 +1598,6 @@ scalararraysel(PlannerInfo *root, Oid nominal_element_type; RegProcedure oprsel; FmgrInfo oprselproc; - Datum selarg4; Selectivity s1; /* @@ -1587,15 +1605,9 @@ scalararraysel(PlannerInfo *root, * it hasn't got one. */ if (is_join_clause) - { oprsel = get_oprjoin(operator); - selarg4 = Int16GetDatum(jointype); - } else - { oprsel = get_oprrest(operator); - selarg4 = Int32GetDatum(varRelid); - } if (!oprsel) return (Selectivity) 0.5; fmgr_info(oprsel, &oprselproc); @@ -1660,11 +1672,19 @@ scalararraysel(PlannerInfo *root, elem_values[i], elem_nulls[i], elmbyval)); - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - selarg4)); + if (is_join_clause) + s2 = DatumGetFloat8(FunctionCall5(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + else + s2 = DatumGetFloat8(FunctionCall4(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (useOr) s1 = s1 + s2 - s1 * s2; else @@ -1694,11 +1714,19 @@ scalararraysel(PlannerInfo *root, * estimation function would really care ... */ args = list_make2(leftop, elem); - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - selarg4)); + if (is_join_clause) + s2 = DatumGetFloat8(FunctionCall5(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + else + s2 = DatumGetFloat8(FunctionCall4(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (useOr) s1 = s1 + s2 - s1 * s2; else @@ -1721,11 +1749,19 @@ scalararraysel(PlannerInfo *root, dummyexpr->typeId = nominal_element_type; dummyexpr->typeMod = -1; args = list_make2(leftop, dummyexpr); - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - selarg4)); + if (is_join_clause) + s2 = DatumGetFloat8(FunctionCall5(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + else + s2 = DatumGetFloat8(FunctionCall4(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); s1 = useOr ? 0.0 : 1.0; /* @@ -1803,13 +1839,24 @@ rowcomparesel(PlannerInfo *root, /* Build equivalent arg list for single operator */ opargs = list_make2(linitial(clause->largs), linitial(clause->rargs)); - /* Decide if it's a join clause, same as for OpExpr */ + /* + * Decide if it's a join clause. This should match clausesel.c's + * treat_as_join_clause(), except that we intentionally consider only + * the leading columns and not the rest of the clause. + */ if (varRelid != 0) { /* - * If we are considering a nestloop join then all clauses are - * restriction clauses, since we are only interested in the one - * relation. + * Caller is forcing restriction mode (eg, because we are examining + * an inner indexscan qual). + */ + is_join_clause = false; + } + else if (sjinfo == NULL) + { + /* + * It must be a restriction clause, since it's being evaluated at + * a scan node. */ is_join_clause = false; } @@ -1817,7 +1864,6 @@ rowcomparesel(PlannerInfo *root, { /* * Otherwise, it's a join if there's more than one relation used. - * Notice we ignore the low-order columns here. */ is_join_clause = (NumRelids((Node *) opargs) > 1); } @@ -1827,7 +1873,8 @@ rowcomparesel(PlannerInfo *root, /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, opargs, - jointype); + jointype, + sjinfo); } else { @@ -1849,10 +1896,60 @@ eqjoinsel(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); +#ifdef NOT_USED JoinType jointype = (JoinType) PG_GETARG_INT16(3); +#endif + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); double selec; VariableStatData vardata1; VariableStatData vardata2; + bool join_is_reversed; + + get_join_variables(root, args, sjinfo, + &vardata1, &vardata2, &join_is_reversed); + + switch (sjinfo->jointype) + { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_FULL: + selec = eqjoinsel_inner(operator, &vardata1, &vardata2); + break; + case JOIN_SEMI: + case JOIN_ANTI: + if (!join_is_reversed) + selec = eqjoinsel_semi(operator, &vardata1, &vardata2); + else + selec = eqjoinsel_semi(get_commutator(operator), + &vardata2, &vardata1); + break; + default: + /* other values not expected here */ + elog(ERROR, "unrecognized join type: %d", + (int) sjinfo->jointype); + selec = 0; /* keep compiler quiet */ + break; + } + + ReleaseVariableStats(vardata1); + ReleaseVariableStats(vardata2); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} + +/* + * eqjoinsel_inner --- eqjoinsel for normal inner join + * + * We also use this for LEFT/FULL outer joins; it's not presently clear + * that it's worth trying to distinguish them here. + */ +static double +eqjoinsel_inner(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2) +{ + double selec; double nd1; double nd2; Form_pg_statistic stats1 = NULL; @@ -1868,29 +1965,27 @@ eqjoinsel(PG_FUNCTION_ARGS) float4 *numbers2 = NULL; int nnumbers2 = 0; - get_join_variables(root, args, &vardata1, &vardata2); + nd1 = get_variable_numdistinct(vardata1); + nd2 = get_variable_numdistinct(vardata2); - nd1 = get_variable_numdistinct(&vardata1); - nd2 = get_variable_numdistinct(&vardata2); - - if (HeapTupleIsValid(vardata1.statsTuple)) + if (HeapTupleIsValid(vardata1->statsTuple)) { - stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple); - have_mcvs1 = get_attstatsslot(vardata1.statsTuple, - vardata1.atttype, - vardata1.atttypmod, + stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); + have_mcvs1 = get_attstatsslot(vardata1->statsTuple, + vardata1->atttype, + vardata1->atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values1, &nvalues1, &numbers1, &nnumbers1); } - if (HeapTupleIsValid(vardata2.statsTuple)) + if (HeapTupleIsValid(vardata2->statsTuple)) { - stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple); - have_mcvs2 = get_attstatsslot(vardata2.statsTuple, - vardata2.atttype, - vardata2.atttypmod, + stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); + have_mcvs2 = get_attstatsslot(vardata2->statsTuple, + vardata2->atttype, + vardata2->atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values2, &nvalues2, @@ -1932,25 +2027,6 @@ eqjoinsel(PG_FUNCTION_ARGS) hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool)); hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool)); - /* - * If we are doing any variant of JOIN_SEMI, pretend all the values of - * the righthand relation are unique (ie, act as if it's been - * DISTINCT'd). - * - * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or - * JOIN_RIGHT here, because we do not have enough information to - * determine which var is really on which side of the join. Perhaps - * someday we should pass in more information. - */ - if (jointype == JOIN_SEMI) - { - float4 oneovern = 1.0 / nd2; - - for (i = 0; i < nvalues2; i++) - numbers2[i] = oneovern; - nullfrac2 = oneovern; - } - /* * Note we assume that each MCV will match at most one member of the * other MCV list. If the operator isn't really equality, there could @@ -2075,18 +2151,170 @@ eqjoinsel(PG_FUNCTION_ARGS) } if (have_mcvs1) - free_attstatsslot(vardata1.atttype, values1, nvalues1, + free_attstatsslot(vardata1->atttype, values1, nvalues1, numbers1, nnumbers1); if (have_mcvs2) - free_attstatsslot(vardata2.atttype, values2, nvalues2, + free_attstatsslot(vardata2->atttype, values2, nvalues2, numbers2, nnumbers2); - ReleaseVariableStats(vardata1); - ReleaseVariableStats(vardata2); + return selec; +} - CLAMP_PROBABILITY(selec); +/* + * eqjoinsel_semi --- eqjoinsel for semi join + * + * (Also used for anti join, which we are supposed to estimate the same way.) + * Caller has ensured that vardata1 is the LHS variable. + */ +static double +eqjoinsel_semi(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2) +{ + double selec; + double nd1; + double nd2; + Form_pg_statistic stats1 = NULL; + Form_pg_statistic stats2 = NULL; + bool have_mcvs1 = false; + Datum *values1 = NULL; + int nvalues1 = 0; + float4 *numbers1 = NULL; + int nnumbers1 = 0; + bool have_mcvs2 = false; + Datum *values2 = NULL; + int nvalues2 = 0; + float4 *numbers2 = NULL; + int nnumbers2 = 0; - PG_RETURN_FLOAT8((float8) selec); + nd1 = get_variable_numdistinct(vardata1); + nd2 = get_variable_numdistinct(vardata2); + + if (HeapTupleIsValid(vardata1->statsTuple)) + { + stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); + have_mcvs1 = get_attstatsslot(vardata1->statsTuple, + vardata1->atttype, + vardata1->atttypmod, + STATISTIC_KIND_MCV, + InvalidOid, + &values1, &nvalues1, + &numbers1, &nnumbers1); + } + + if (HeapTupleIsValid(vardata2->statsTuple)) + { + stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); + have_mcvs2 = get_attstatsslot(vardata2->statsTuple, + vardata2->atttype, + vardata2->atttypmod, + STATISTIC_KIND_MCV, + InvalidOid, + &values2, &nvalues2, + &numbers2, &nnumbers2); + } + + if (have_mcvs1 && have_mcvs2 && OidIsValid(operator)) + { + /* + * We have most-common-value lists for both relations. Run through + * the lists to see which MCVs actually join to each other with the + * given operator. This allows us to determine the exact join + * selectivity for the portion of the relations represented by the MCV + * lists. We still have to estimate for the remaining population, but + * in a skewed distribution this gives us a big leg up in accuracy. + */ + FmgrInfo eqproc; + bool *hasmatch1; + bool *hasmatch2; + double nullfrac1 = stats1->stanullfrac; + double matchfreq1; + int i, + nmatches; + + fmgr_info(get_opcode(operator), &eqproc); + hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool)); + hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool)); + + /* + * Note we assume that each MCV will match at most one member of the + * other MCV list. If the operator isn't really equality, there could + * be multiple matches --- but we don't look for them, both for speed + * and because the math wouldn't add up... + */ + nmatches = 0; + for (i = 0; i < nvalues1; i++) + { + int j; + + for (j = 0; j < nvalues2; j++) + { + if (hasmatch2[j]) + continue; + if (DatumGetBool(FunctionCall2(&eqproc, + values1[i], + values2[j]))) + { + hasmatch1[i] = hasmatch2[j] = true; + nmatches++; + break; + } + } + } + /* Sum up frequencies of matched MCVs */ + matchfreq1 = 0.0; + for (i = 0; i < nvalues1; i++) + { + if (hasmatch1[i]) + matchfreq1 += numbers1[i]; + } + CLAMP_PROBABILITY(matchfreq1); + pfree(hasmatch1); + pfree(hasmatch2); + + /* + * Now we need to estimate the fraction of relation 1 that has at + * least one join partner. We know for certain that the matched + * MCVs do, so that gives us a lower bound, but we're really in the + * dark about everything else. Our crude approach is: if nd1 <= nd2 + * then assume all non-null rel1 rows have join partners, else assume + * for the uncertain rows that a fraction nd2/nd1 have join partners. + * We can discount the known-matched MCVs from the distinct-values + * counts before doing the division. + */ + nd1 -= nmatches; + nd2 -= nmatches; + if (nd1 <= nd2 || nd2 <= 0) + selec = Max(matchfreq1, 1.0 - nullfrac1); + else + { + double uncertain = 1.0 - matchfreq1 - nullfrac1; + + CLAMP_PROBABILITY(uncertain); + selec = matchfreq1 + (nd2/nd1) * uncertain; + } + } + else + { + /* + * Without MCV lists for both sides, we can only use the heuristic + * about nd1 vs nd2. + */ + double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; + + if (nd1 <= nd2 || nd2 <= 0) + selec = 1.0 - nullfrac1; + else + selec = (nd2/nd1) * (1.0 - nullfrac1); + } + + if (have_mcvs1) + free_attstatsslot(vardata1->atttype, values1, nvalues1, + numbers1, nnumbers1); + if (have_mcvs2) + free_attstatsslot(vardata2->atttype, values2, nvalues2, + numbers2, nnumbers2); + + return selec; } /* @@ -2099,6 +2327,7 @@ neqjoinsel(PG_FUNCTION_ARGS) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); Oid eqop; float8 result; @@ -2109,11 +2338,12 @@ neqjoinsel(PG_FUNCTION_ARGS) eqop = get_negator(operator); if (eqop) { - result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel, + result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel, PointerGetDatum(root), ObjectIdGetDatum(eqop), PointerGetDatum(args), - Int16GetDatum(jointype))); + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); } else { @@ -3661,10 +3891,17 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid, /* * get_join_variables * Apply examine_variable() to each side of a join clause. + * Also, attempt to identify whether the join clause has the same + * or reversed sense compared to the SpecialJoinInfo. + * + * We consider the join clause "normal" if it is "lhs_var OP rhs_var", + * or "reversed" if it is "rhs_var OP lhs_var". In complicated cases + * where we can't tell for sure, we default to assuming it's normal. */ void -get_join_variables(PlannerInfo *root, List *args, - VariableStatData *vardata1, VariableStatData *vardata2) +get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, + VariableStatData *vardata1, VariableStatData *vardata2, + bool *join_is_reversed) { Node *left, *right; @@ -3677,6 +3914,15 @@ get_join_variables(PlannerInfo *root, List *args, examine_variable(root, left, 0, vardata1); examine_variable(root, right, 0, vardata2); + + if (vardata1->rel && + bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand)) + *join_is_reversed = true; /* var1 is on RHS */ + else if (vardata2->rel && + bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand)) + *join_is_reversed = true; /* var2 is on LHS */ + else + *join_is_reversed = false; } /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index b2345d1364..27501367f5 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.477 2008/08/11 13:58:46 heikki Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.478 2008/08/16 00:01:37 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200808111 +#define CATALOG_VERSION_NO 200808141 #endif diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index e0f5b48aa4..2fc91e92e3 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.161 2008/07/14 00:51:45 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.162 2008/08/16 00:01:37 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -941,11 +941,11 @@ extern void OperatorCreate(const char *operatorName, Oid operatorNamespace, Oid leftTypeId, Oid rightTypeId, - List *procedureName, + Oid procedureId, List *commutatorName, List *negatorName, - List *restrictionName, - List *joinName, + Oid restrictionId, + Oid joinId, bool canMerge, bool canHash); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index c516af8a16..d00d36e85f 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.509 2008/07/18 03:32:53 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.510 2008/08/16 00:01:37 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -223,13 +223,13 @@ DATA(insert OID = 103 ( scalarltsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "22 DESCR("restriction selectivity of < and related operators on scalar datatypes"); DATA(insert OID = 104 ( scalargtsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ scalargtsel _null_ _null_ _null_ )); DESCR("restriction selectivity of > and related operators on scalar datatypes"); -DATA(insert OID = 105 ( eqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ eqjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 105 ( eqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ eqjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of = and related operators"); -DATA(insert OID = 106 ( neqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ neqjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 106 ( neqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ neqjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of <> and related operators"); -DATA(insert OID = 107 ( scalarltjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ scalarltjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 107 ( scalarltjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ scalarltjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of < and related operators on scalar datatypes"); -DATA(insert OID = 108 ( scalargtjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ scalargtjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 108 ( scalargtjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ scalargtjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of > and related operators on scalar datatypes"); DATA(insert OID = 109 ( unknownin PGNSP PGUID 12 1 0 0 f f t f i 1 705 "2275" _null_ _null_ _null_ unknownin _null_ _null_ _null_ )); @@ -289,7 +289,7 @@ DATA(insert OID = 138 ( box_center PGNSP PGUID 12 1 0 0 f f t f i 1 600 "60 DESCR("center of"); DATA(insert OID = 139 ( areasel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ areasel _null_ _null_ _null_ )); DESCR("restriction selectivity for area-comparison operators"); -DATA(insert OID = 140 ( areajoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ areajoinsel _null_ _null_ _null_ )); +DATA(insert OID = 140 ( areajoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ areajoinsel _null_ _null_ _null_ )); DESCR("join selectivity for area-comparison operators"); DATA(insert OID = 141 ( int4mul PGNSP PGUID 12 1 0 0 f f t f i 2 23 "23 23" _null_ _null_ _null_ int4mul _null_ _null_ _null_ )); DESCR("multiply"); @@ -1630,11 +1630,11 @@ DESCR("current clock time"); DATA(insert OID = 1300 ( positionsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ positionsel _null_ _null_ _null_ )); DESCR("restriction selectivity for position-comparison operators"); -DATA(insert OID = 1301 ( positionjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ positionjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1301 ( positionjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ positionjoinsel _null_ _null_ _null_ )); DESCR("join selectivity for position-comparison operators"); DATA(insert OID = 1302 ( contsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ contsel _null_ _null_ _null_ )); DESCR("restriction selectivity for containment comparison operators"); -DATA(insert OID = 1303 ( contjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ contjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1303 ( contjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ contjoinsel _null_ _null_ _null_ )); DESCR("join selectivity for containment comparison operators"); DATA(insert OID = 1304 ( overlaps PGNSP PGUID 12 1 0 0 f f f f i 4 16 "1184 1184 1184 1184" _null_ _null_ _null_ overlaps_timestamp _null_ _null_ _null_ )); @@ -2684,9 +2684,9 @@ DATA(insert OID = 1814 ( iclikesel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 DESCR("restriction selectivity of ILIKE"); DATA(insert OID = 1815 ( icnlikesel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ icnlikesel _null_ _null_ _null_ )); DESCR("restriction selectivity of NOT ILIKE"); -DATA(insert OID = 1816 ( iclikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ iclikejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1816 ( iclikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ iclikejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of ILIKE"); -DATA(insert OID = 1817 ( icnlikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ icnlikejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1817 ( icnlikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ icnlikejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of NOT ILIKE"); DATA(insert OID = 1818 ( regexeqsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ regexeqsel _null_ _null_ _null_ )); DESCR("restriction selectivity of regex match"); @@ -2700,17 +2700,17 @@ DATA(insert OID = 1822 ( nlikesel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 2 DESCR("restriction selectivity of NOT LIKE"); DATA(insert OID = 1823 ( icregexnesel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ icregexnesel _null_ _null_ _null_ )); DESCR("restriction selectivity of case-insensitive regex non-match"); -DATA(insert OID = 1824 ( regexeqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ regexeqjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1824 ( regexeqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ regexeqjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of regex match"); -DATA(insert OID = 1825 ( likejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ likejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1825 ( likejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ likejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of LIKE"); -DATA(insert OID = 1826 ( icregexeqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ icregexeqjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1826 ( icregexeqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ icregexeqjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of case-insensitive regex match"); -DATA(insert OID = 1827 ( regexnejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ regexnejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1827 ( regexnejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ regexnejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of regex non-match"); -DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ nlikejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ nlikejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of NOT LIKE"); -DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of case-insensitive regex non-match"); /* Aggregate-related functions */ diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index 7471d1b44d..7963b745f5 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.50 2008/06/19 00:46:06 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.51 2008/08/16 00:01:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -50,6 +50,7 @@ extern Selectivity restriction_selectivity(PlannerInfo *root, extern Selectivity join_selectivity(PlannerInfo *root, Oid operator, List *args, - JoinType jointype); + JoinType jointype, + SpecialJoinInfo *sjinfo); #endif /* PLANCAT_H */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index e94e7e5916..120269eee5 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.45 2008/08/14 18:48:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.46 2008/08/16 00:01:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -104,8 +104,10 @@ extern bool get_restriction_variable(PlannerInfo *root, List *args, VariableStatData *vardata, Node **other, bool *varonleft); extern void get_join_variables(PlannerInfo *root, List *args, + SpecialJoinInfo *sjinfo, VariableStatData *vardata1, - VariableStatData *vardata2); + VariableStatData *vardata2, + bool *join_is_reversed); extern double get_variable_numdistinct(VariableStatData *vardata); extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 533bac3ab6..a21c2ba374 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -543,17 +543,20 @@ WHERE p1.oprrest = p2.oid AND -- If oprjoin is set, the operator must be a binary boolean op, -- and it must link to a proc with the right signature -- to be a join selectivity estimator. --- The proc signature we want is: float8 proc(internal, oid, internal, int2) +-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal) +-- (Note: the old signature with only 4 args is still allowed, but no core +-- estimator should be using it.) SELECT p1.oid, p1.oprname, p2.oid, p2.proname FROM pg_operator AS p1, pg_proc AS p2 WHERE p1.oprjoin = p2.oid AND (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR p2.prorettype != 'float8'::regtype OR p2.proretset OR - p2.pronargs != 4 OR + p2.pronargs != 5 OR p2.proargtypes[0] != 'internal'::regtype OR p2.proargtypes[1] != 'oid'::regtype OR p2.proargtypes[2] != 'internal'::regtype OR - p2.proargtypes[3] != 'int2'::regtype); + p2.proargtypes[3] != 'int2'::regtype OR + p2.proargtypes[4] != 'internal'::regtype); oid | oprname | oid | proname -----+---------+-----+--------- (0 rows) diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 5017849830..57c2cad951 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -444,18 +444,21 @@ WHERE p1.oprrest = p2.oid AND -- If oprjoin is set, the operator must be a binary boolean op, -- and it must link to a proc with the right signature -- to be a join selectivity estimator. --- The proc signature we want is: float8 proc(internal, oid, internal, int2) +-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal) +-- (Note: the old signature with only 4 args is still allowed, but no core +-- estimator should be using it.) SELECT p1.oid, p1.oprname, p2.oid, p2.proname FROM pg_operator AS p1, pg_proc AS p2 WHERE p1.oprjoin = p2.oid AND (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR p2.prorettype != 'float8'::regtype OR p2.proretset OR - p2.pronargs != 4 OR + p2.pronargs != 5 OR p2.proargtypes[0] != 'internal'::regtype OR p2.proargtypes[1] != 'oid'::regtype OR p2.proargtypes[2] != 'internal'::regtype OR - p2.proargtypes[3] != 'int2'::regtype); + p2.proargtypes[3] != 'int2'::regtype OR + p2.proargtypes[4] != 'internal'::regtype); -- **************** pg_aggregate ****************