diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c
index 11690aa8ce..94b3bf2862 100644
--- a/src/backend/catalog/pg_operator.c
+++ b/src/backend/catalog/pg_operator.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.104 2008/06/19 00:46:04 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.105 2008/08/16 00:01:35 tgl Exp $
  *
  * NOTES
  *	  these routines moved here from commands/define.c and somewhat cleaned up.
@@ -21,12 +21,12 @@
 #include "access/xact.h"
 #include "catalog/dependency.h"
 #include "catalog/indexing.h"
+#include "catalog/namespace.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_type.h"
 #include "miscadmin.h"
-#include "parser/parse_func.h"
 #include "parser/parse_oper.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
@@ -273,6 +273,11 @@ OperatorShellMake(const char *operatorName,
 
 	heap_freetuple(tup);
 
+	/*
+	 * Make sure the tuple is visible for subsequent lookups/updates.
+	 */
+	CommandCounterIncrement();
+
 	/*
 	 * close the operator relation and return the oid.
 	 */
@@ -289,14 +294,19 @@ OperatorShellMake(const char *operatorName,
  *		operatorNamespace		namespace for new operator
  *		leftTypeId				X left type ID
  *		rightTypeId				X right type ID
- *		procedureName			procedure for operator
+ *		procedureId				procedure ID for operator
  *		commutatorName			X commutator operator
  *		negatorName				X negator operator
- *		restrictionName			X restriction sel. procedure
- *		joinName				X join sel. procedure
+ *		restrictionId			X restriction selectivity procedure ID
+ *		joinId					X join selectivity procedure ID
  *		canMerge				merge join can be used with this operator
  *		canHash					hash join can be used with this operator
  *
+ * The caller should have validated properties and permissions for the
+ * objects passed as OID references.  We must handle the commutator and
+ * negator operator references specially, however, since those need not
+ * exist beforehand.
+ *
  * This routine gets complicated because it allows the user to
  * specify operators that do not exist.  For example, if operator
  * "op" is being defined, the negator operator "negop" and the
@@ -310,59 +320,17 @@ OperatorShellMake(const char *operatorName,
  * information about the operator (just its name and types).
  * Forward declaration is used only for this purpose, it is
  * not available to the user as it is for type definition.
- *
- * Algorithm:
- *
- * check if operator already defined
- *	  if so, but oprcode is null, save the Oid -- we are filling in a shell
- *	  otherwise error
- * get the attribute types from relation descriptor for pg_operator
- * assign values to the fields of the operator:
- *	 operatorName
- *	 owner id (simply the user id of the caller)
- *	 operator "kind" either "b" for binary or "l" for left unary
- *	 canMerge boolean
- *	 canHash boolean
- *	 leftTypeObjectId -- type must already be defined
- *	 rightTypeObjectId -- this is optional, enter ObjectId=0 if none specified
- *	 resultType -- defer this, since it must be determined from
- *				   the pg_procedure catalog
- *	 commutatorObjectId -- if this is NULL, enter ObjectId=0
- *					  else if this already exists, enter its ObjectId
- *					  else if this does not yet exist, and is not
- *						the same as the main operatorName, then create
- *						a shell and enter the new ObjectId
- *					  else if this does not exist but IS the same
- *						name & types as the main operator, set the ObjectId=0.
- *						(We are creating a self-commutating operator.)
- *						The link will be fixed later by OperatorUpd.
- *	 negatorObjectId   -- same as for commutatorObjectId
- *	 operatorProcedure -- must access the pg_procedure catalog to get the
- *				   ObjectId of the procedure that actually does the operator
- *				   actions this is required.  Do a lookup to find out the
- *				   return type of the procedure
- *	 restrictionProcedure -- must access the pg_procedure catalog to get
- *				   the ObjectId but this is optional
- *	 joinProcedure -- same as restrictionProcedure
- * now either insert or replace the operator into the pg_operator catalog
- * if the operator shell is being filled in
- *	 access the catalog in order to get a valid buffer
- *	 create a tuple using ModifyHeapTuple
- *	 get the t_self from the modified tuple and call RelationReplaceHeapTuple
- * else if a new operator is being created
- *	 create a tuple using heap_formtuple
- *	 call simple_heap_insert
  */
 void
 OperatorCreate(const char *operatorName,
 			   Oid operatorNamespace,
 			   Oid leftTypeId,
 			   Oid rightTypeId,
-			   List *procedureName,
+			   Oid procedureId,
 			   List *commutatorName,
 			   List *negatorName,
-			   List *restrictionName,
-			   List *joinName,
+			   Oid restrictionId,
+			   Oid joinId,
 			   bool canMerge,
 			   bool canHash)
 {
@@ -373,15 +341,10 @@ OperatorCreate(const char *operatorName,
 	Datum		values[Natts_pg_operator];
 	Oid			operatorObjectId;
 	bool		operatorAlreadyDefined;
-	Oid			procOid;
 	Oid			operResultType;
 	Oid			commutatorId,
-				negatorId,
-				restOid,
-				joinOid;
+				negatorId;
 	bool		selfCommutator = false;
-	Oid			typeId[4];		/* only need up to 4 args here */
-	int			nargs;
 	NameData	oname;
 	TupleDesc	tupDesc;
 	int			i;
@@ -395,11 +358,6 @@ OperatorCreate(const char *operatorName,
 				 errmsg("\"%s\" is not a valid operator name",
 						operatorName)));
 
-	if (!OidIsValid(leftTypeId) && !OidIsValid(rightTypeId))
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
-		   errmsg("at least one of leftarg or rightarg must be specified")));
-
 	if (!(OidIsValid(leftTypeId) && OidIsValid(rightTypeId)))
 	{
 		/* If it's not a binary op, these things mustn't be set: */
@@ -407,7 +365,7 @@ OperatorCreate(const char *operatorName,
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
 					 errmsg("only binary operators can have commutators")));
-		if (joinName)
+		if (OidIsValid(joinId))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
 				 errmsg("only binary operators can have join selectivity")));
@@ -421,6 +379,33 @@ OperatorCreate(const char *operatorName,
 					 errmsg("only binary operators can hash")));
 	}
 
+	operResultType = get_func_rettype(procedureId);
+
+	if (operResultType != BOOLOID)
+	{
+		/* If it's not a boolean op, these things mustn't be set: */
+		if (negatorName)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("only boolean operators can have negators")));
+		if (OidIsValid(restrictionId))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("only boolean operators can have restriction selectivity")));
+		if (OidIsValid(joinId))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("only boolean operators can have join selectivity")));
+		if (canMerge)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("only boolean operators can merge join")));
+		if (canHash)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("only boolean operators can hash")));
+	}
+
 	operatorObjectId = OperatorGet(operatorName,
 								   operatorNamespace,
 								   leftTypeId,
@@ -435,62 +420,61 @@ OperatorCreate(const char *operatorName,
 
 	/*
 	 * At this point, if operatorObjectId is not InvalidOid then we are
-	 * filling in a previously-created shell.
+	 * filling in a previously-created shell.  Insist that the user own
+	 * any such shell.
 	 */
+	if (OidIsValid(operatorObjectId) &&
+		!pg_oper_ownercheck(operatorObjectId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER,
+					   operatorName);
 
 	/*
-	 * Look up registered procedures -- find the return type of procedureName
-	 * to place in "result" field. Do this before shells are created so we
-	 * don't have to worry about deleting them later.
+	 * Set up the other operators.	If they do not currently exist, create
+	 * shells in order to get ObjectId's.
 	 */
-	if (!OidIsValid(leftTypeId))
+
+	if (commutatorName)
 	{
-		typeId[0] = rightTypeId;
-		nargs = 1;
-	}
-	else if (!OidIsValid(rightTypeId))
-	{
-		typeId[0] = leftTypeId;
-		nargs = 1;
+		/* commutator has reversed arg types */
+		commutatorId = get_other_operator(commutatorName,
+										  rightTypeId, leftTypeId,
+										  operatorName, operatorNamespace,
+										  leftTypeId, rightTypeId,
+										  true);
+
+		/* Permission check: must own other operator */
+		if (OidIsValid(commutatorId) &&
+			!pg_oper_ownercheck(commutatorId, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER,
+						   NameListToString(commutatorName));
+
+		/*
+		 * self-linkage to this operator; will fix below. Note that only
+		 * self-linkage for commutation makes sense.
+		 */
+		if (!OidIsValid(commutatorId))
+			selfCommutator = true;
 	}
 	else
-	{
-		typeId[0] = leftTypeId;
-		typeId[1] = rightTypeId;
-		nargs = 2;
-	}
-	procOid = LookupFuncName(procedureName, nargs, typeId, false);
-	operResultType = get_func_rettype(procOid);
+		commutatorId = InvalidOid;
 
-	/*
-	 * find restriction estimator
-	 */
-	if (restrictionName)
+	if (negatorName)
 	{
-		typeId[0] = INTERNALOID;	/* Query */
-		typeId[1] = OIDOID;		/* operator OID */
-		typeId[2] = INTERNALOID;	/* args list */
-		typeId[3] = INT4OID;	/* varRelid */
+		/* negator has same arg types */
+		negatorId = get_other_operator(negatorName,
+									   leftTypeId, rightTypeId,
+									   operatorName, operatorNamespace,
+									   leftTypeId, rightTypeId,
+									   false);
 
-		restOid = LookupFuncName(restrictionName, 4, typeId, false);
+		/* Permission check: must own other operator */
+		if (OidIsValid(negatorId) &&
+			!pg_oper_ownercheck(negatorId, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER,
+						   NameListToString(negatorName));
 	}
 	else
-		restOid = InvalidOid;
-
-	/*
-	 * find join estimator
-	 */
-	if (joinName)
-	{
-		typeId[0] = INTERNALOID;	/* Query */
-		typeId[1] = OIDOID;		/* operator OID */
-		typeId[2] = INTERNALOID;	/* args list */
-		typeId[3] = INT2OID;	/* jointype */
-
-		joinOid = LookupFuncName(joinName, 4, typeId, false);
-	}
-	else
-		joinOid = InvalidOid;
+		negatorId = InvalidOid;
 
 	/*
 	 * set up values in the operator tuple
@@ -514,53 +498,16 @@ OperatorCreate(const char *operatorName,
 	values[i++] = ObjectIdGetDatum(leftTypeId); /* oprleft */
 	values[i++] = ObjectIdGetDatum(rightTypeId);		/* oprright */
 	values[i++] = ObjectIdGetDatum(operResultType);		/* oprresult */
-
-	/*
-	 * Set up the other operators.	If they do not currently exist, create
-	 * shells in order to get ObjectId's.
-	 */
-
-	if (commutatorName)
-	{
-		/* commutator has reversed arg types */
-		commutatorId = get_other_operator(commutatorName,
-										  rightTypeId, leftTypeId,
-										  operatorName, operatorNamespace,
-										  leftTypeId, rightTypeId,
-										  true);
-
-		/*
-		 * self-linkage to this operator; will fix below. Note that only
-		 * self-linkage for commutation makes sense.
-		 */
-		if (!OidIsValid(commutatorId))
-			selfCommutator = true;
-	}
-	else
-		commutatorId = InvalidOid;
 	values[i++] = ObjectIdGetDatum(commutatorId);		/* oprcom */
-
-	if (negatorName)
-	{
-		/* negator has same arg types */
-		negatorId = get_other_operator(negatorName,
-									   leftTypeId, rightTypeId,
-									   operatorName, operatorNamespace,
-									   leftTypeId, rightTypeId,
-									   false);
-	}
-	else
-		negatorId = InvalidOid;
-	values[i++] = ObjectIdGetDatum(negatorId);	/* oprnegate */
-
-	values[i++] = ObjectIdGetDatum(procOid);	/* oprcode */
-	values[i++] = ObjectIdGetDatum(restOid);	/* oprrest */
-	values[i++] = ObjectIdGetDatum(joinOid);	/* oprjoin */
+	values[i++] = ObjectIdGetDatum(negatorId);		/* oprnegate */
+	values[i++] = ObjectIdGetDatum(procedureId);	/* oprcode */
+	values[i++] = ObjectIdGetDatum(restrictionId);	/* oprrest */
+	values[i++] = ObjectIdGetDatum(joinId);			/* oprjoin */
 
 	pg_operator_desc = heap_open(OperatorRelationId, RowExclusiveLock);
 
 	/*
-	 * If we are adding to an operator shell, update; else insert
+	 * If we are replacing an operator shell, update; else insert
 	 */
 	if (operatorObjectId)
 	{
@@ -706,7 +653,8 @@ OperatorUpd(Oid baseId, Oid commId, Oid negId)
 	/*
 	 * check and update the commutator & negator, if necessary
 	 *
-	 * First make sure we can see them...
+	 * We need a CommandCounterIncrement here in case of a self-commutator
+	 * operator: we'll need to update the tuple that we just inserted.
 	 */
 	CommandCounterIncrement();
 
diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c
index 884181c5cd..391b4d6507 100644
--- a/src/backend/commands/operatorcmds.c
+++ b/src/backend/commands/operatorcmds.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.40 2008/06/19 00:46:04 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.41 2008/08/16 00:01:35 tgl Exp $
  *
  * DESCRIPTION
  *	  The "DefineFoo" routines take the parse tree and pick out the
@@ -39,8 +39,10 @@
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
+#include "parser/parse_func.h"
 #include "parser/parse_oper.h"
 #include "parser/parse_type.h"
 #include "utils/acl.h"
@@ -76,6 +78,11 @@ DefineOperator(List *names, List *parameters)
 	List	   *negatorName = NIL;		/* optional negator operator name */
 	List	   *restrictionName = NIL;	/* optional restrict. sel. procedure */
 	List	   *joinName = NIL; /* optional join sel. procedure */
+	Oid			functionOid;	/* functions converted to OID */
+	Oid			restrictionOid;
+	Oid			joinOid;
+	Oid			typeId[5];		/* only need up to 5 args here */
+	int			nargs;
 	ListCell   *pl;
 
 	/* Convert list of names to a name and namespace */
@@ -154,6 +161,109 @@ DefineOperator(List *names, List *parameters)
 	if (typeName2)
 		typeId2 = typenameTypeId(NULL, typeName2, NULL);
 
+	if (!OidIsValid(typeId1) && !OidIsValid(typeId2))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+		   errmsg("at least one of leftarg or rightarg must be specified")));
+
+	/*
+	 * Look up the operator's underlying function.
+	 */
+	if (!OidIsValid(typeId1))
+	{
+		typeId[0] = typeId2;
+		nargs = 1;
+	}
+	else if (!OidIsValid(typeId2))
+	{
+		typeId[0] = typeId1;
+		nargs = 1;
+	}
+	else
+	{
+		typeId[0] = typeId1;
+		typeId[1] = typeId2;
+		nargs = 2;
+	}
+	functionOid = LookupFuncName(functionName, nargs, typeId, false);
+
+	/*
+	 * We require EXECUTE rights for the function.  This isn't strictly
+	 * necessary, since EXECUTE will be checked at any attempted use of
+	 * the operator, but it seems like a good idea anyway.
+	 */
+	aclresult = pg_proc_aclcheck(functionOid, GetUserId(), ACL_EXECUTE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, ACL_KIND_PROC,
+					   NameListToString(functionName));
+
+	/*
+	 * Look up restriction estimator if specified
+	 */
+	if (restrictionName)
+	{
+		typeId[0] = INTERNALOID;	/* PlannerInfo */
+		typeId[1] = OIDOID;		/* operator OID */
+		typeId[2] = INTERNALOID;	/* args list */
+		typeId[3] = INT4OID;	/* varRelid */
+
+		restrictionOid = LookupFuncName(restrictionName, 4, typeId, false);
+
+		/* estimators must return float8 */
+		if (get_func_rettype(restrictionOid) != FLOAT8OID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("restriction estimator function %s must return type \"float8\"",
+							NameListToString(restrictionName))));
+
+		/* Require EXECUTE rights for the estimator */
+		aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, ACL_KIND_PROC,
+						   NameListToString(restrictionName));
+	}
+	else
+		restrictionOid = InvalidOid;
+
+	/*
+	 * Look up join estimator if specified
+	 */
+	if (joinName)
+	{
+		typeId[0] = INTERNALOID;	/* PlannerInfo */
+		typeId[1] = OIDOID;		/* operator OID */
+		typeId[2] = INTERNALOID;	/* args list */
+		typeId[3] = INT2OID;	/* jointype */
+		typeId[4] = INTERNALOID;	/* SpecialJoinInfo */
+
+		/*
+		 * As of Postgres 8.4, the preferred signature for join estimators
+		 * has 5 arguments, but we still allow the old 4-argument form.
+		 * Try the preferred form first.
+		 */
+		joinOid = LookupFuncName(joinName, 5, typeId, true);
+		if (!OidIsValid(joinOid))
+			joinOid = LookupFuncName(joinName, 4, typeId, true);
+		/* If not found, reference the 5-argument signature in error msg */
+		if (!OidIsValid(joinOid))
+			joinOid = LookupFuncName(joinName, 5, typeId, false);
+
+		/* estimators must return float8 */
+		if (get_func_rettype(joinOid) != FLOAT8OID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("join estimator function %s must return type \"float8\"",
+							NameListToString(joinName))));
+
+		/* Require EXECUTE rights for the estimator */
+		aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, ACL_KIND_PROC,
+						   NameListToString(joinName));
+	}
+	else
+		joinOid = InvalidOid;
+
 	/*
 	 * now have OperatorCreate do all the work..
 	 */
@@ -161,11 +271,11 @@ DefineOperator(List *names, List *parameters)
 				   oprNamespace,	/* namespace */
 				   typeId1,		/* left type id */
 				   typeId2,		/* right type id */
-				   functionName,	/* function for operator */
+				   functionOid,	/* function for operator */
 				   commutatorName,		/* optional commutator operator name */
 				   negatorName, /* optional negator operator name */
-				   restrictionName,		/* optional restrict. sel. procedure */
-				   joinName,	/* optional join sel. procedure name */
+				   restrictionOid,		/* optional restrict. sel. procedure */
+				   joinOid,		/* optional join sel. procedure name */
 				   canMerge,	/* operator merges */
 				   canHash);	/* operator hashes */
 }
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index e788f27873..6db42c5f58 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -398,6 +398,50 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
 	return false;
 }
 
+/*
+ * treat_as_join_clause -
+ *	  Decide whether an operator clause is to be handled by the
+ *	  restriction or join estimator.  Subroutine for clause_selectivity().
+ */
+static inline bool
+treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
+					 int varRelid, SpecialJoinInfo *sjinfo)
+{
+	if (varRelid != 0)
+	{
+		/*
+		 * Caller is forcing restriction mode (eg, because we are examining
+		 * an inner indexscan qual).
+		 */
+		return false;
+	}
+	else if (sjinfo == NULL)
+	{
+		/*
+		 * It must be a restriction clause, since it's being evaluated at
+		 * a scan node.
+		 */
+		return false;
+	}
+	else
+	{
+		/*
+		 * Otherwise, it's a join if there's more than one relation used.
+		 * We can optimize this calculation if an rinfo was passed.
+		 *
+		 * XXX  Since we know the clause is being evaluated at a join,
+		 * the only way it could be single-relation is if it was delayed
+		 * by outer joins.  Although we can make use of the restriction
+		 * qual estimators anyway, it seems likely that we ought to account
+		 * for the probability of injected nulls somehow.
+		 */
+		if (rinfo)
+			return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
+		else
+			return (NumRelids(clause) > 1);
+	}
+}
+
 
 /*
  * clause_selectivity -
@@ -429,9 +473,6 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
  *	   root->join_info_list.
  *	2. For an INNER join, sjinfo is just a transient struct, and only the
  *	   relids and jointype fields in it can be trusted.
- *	3. XXX sjinfo might be NULL even though it really is a join.  This case
- *	   will go away soon, but fixing it requires API changes for oprjoin and
- *	   amcostestimate functions.
  * It is possible for jointype to be different from sjinfo->jointype.
  * This indicates we are considering a variant join: either with
  * the LHS and RHS switched, or with one input unique-ified.
@@ -603,36 +644,14 @@ clause_selectivity(PlannerInfo *root,
 	else if (is_opclause(clause) || IsA(clause, DistinctExpr))
 	{
 		Oid			opno = ((OpExpr *) clause)->opno;
-		bool		is_join_clause;
 
-		if (varRelid != 0)
-		{
-			/*
-			 * If we are considering a nestloop join then all clauses are
-			 * restriction clauses, since we are only interested in the one
-			 * relation.
-			 */
-			is_join_clause = false;
-		}
-		else
-		{
-			/*
-			 * Otherwise, it's a join if there's more than one relation used.
-			 * We can optimize this calculation if an rinfo was passed.
-			 */
-			if (rinfo)
-				is_join_clause = (bms_membership(rinfo->clause_relids) ==
-								  BMS_MULTIPLE);
-			else
-				is_join_clause = (NumRelids(clause) > 1);
-		}
-
-		if (is_join_clause)
+		if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
 		{
 			/* Estimate selectivity for a join clause. */
 			s1 = join_selectivity(root, opno,
 								  ((OpExpr *) clause)->args,
-								  jointype);
+								  jointype,
+								  sjinfo);
 		}
 		else
 		{
@@ -671,35 +690,11 @@ clause_selectivity(PlannerInfo *root,
 #endif
 	else if (IsA(clause, ScalarArrayOpExpr))
 	{
-		/* First, decide if it's a join clause, same as for OpExpr */
-		bool		is_join_clause;
-
-		if (varRelid != 0)
-		{
-			/*
-			 * If we are considering a nestloop join then all clauses are
-			 * restriction clauses, since we are only interested in the one
-			 * relation.
-			 */
-			is_join_clause = false;
-		}
-		else
-		{
-			/*
-			 * Otherwise, it's a join if there's more than one relation used.
-			 * We can optimize this calculation if an rinfo was passed.
-			 */
-			if (rinfo)
-				is_join_clause = (bms_membership(rinfo->clause_relids) ==
-								  BMS_MULTIPLE);
-			else
-				is_join_clause = (NumRelids(clause) > 1);
-		}
-
 		/* Use node specific selectivity calculation function */
 		s1 = scalararraysel(root,
 							(ScalarArrayOpExpr *) clause,
-							is_join_clause,
+							treat_as_join_clause(clause, rinfo,
+												 varRelid, sjinfo),
 							varRelid,
 							jointype,
 							sjinfo);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index d9d8c89864..14c3cf1f21 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.193 2008/08/14 18:47:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.194 2008/08/16 00:01:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,10 +118,8 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
 			   RestrictInfo *rinfo,
 			   PathKey *pathkey);
 static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
-static Selectivity approx_selectivity(PlannerInfo *root, List *quals,
-				   SpecialJoinInfo *sjinfo);
-static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root,
-									   SpecialJoinInfo *sjinfo);
+static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
+								 List *quals, SpecialJoinInfo *sjinfo);
 static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
@@ -1288,20 +1286,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	double		outer_path_rows = PATH_ROWS(outer_path);
 	double		inner_path_rows = nestloop_inner_path_rows(inner_path);
 	double		ntuples;
-	Selectivity joininfactor;
 
 	if (!enable_nestloop)
 		startup_cost += disable_cost;
 
-	/*
-	 * If we're doing JOIN_IN then we will stop scanning inner tuples for an
-	 * outer tuple as soon as we have one match.  Account for the effects of
-	 * this by scaling down the cost estimates in proportion to the JOIN_IN
-	 * selectivity.  (This assumes that all the quals attached to the join are
-	 * IN quals, which should be true.)
-	 */
-	joininfactor = join_in_selectivity(path, root, sjinfo);
-
 	/* cost of source data */
 
 	/*
@@ -1328,12 +1316,12 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 		run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
 	}
 	run_cost += outer_path_rows *
-		(inner_path->total_cost - inner_path->startup_cost) * joininfactor;
+		(inner_path->total_cost - inner_path->startup_cost);
 
 	/*
 	 * Compute number of tuples processed (not number emitted!)
 	 */
-	ntuples = outer_path_rows * inner_path_rows * joininfactor;
+	ntuples = outer_path_rows * inner_path_rows;
 
 	/* CPU costs */
 	cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
@@ -1369,7 +1357,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
 	Cost		cpu_per_tuple;
-	Selectivity merge_selec;
 	QualCost	merge_qual_cost;
 	QualCost	qp_qual_cost;
 	double		outer_path_rows = PATH_ROWS(outer_path);
@@ -1385,7 +1372,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 				outerendsel,
 				innerstartsel,
 				innerendsel;
-	Selectivity joininfactor;
 	Path		sort_path;		/* dummy for result of cost_sort */
 
 	/* Protect some assumptions below that rowcounts aren't zero */
@@ -1398,21 +1384,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 		startup_cost += disable_cost;
 
 	/*
-	 * Compute cost and selectivity of the mergequals and qpquals (other
-	 * restriction clauses) separately.  We use approx_selectivity here for
-	 * speed --- in most cases, any errors won't affect the result much.
-	 *
-	 * Note: it's probably bogus to use the normal selectivity calculation
-	 * here when either the outer or inner path is a UniquePath.
+	 * Compute cost of the mergequals and qpquals (other restriction clauses)
+	 * separately.
 	 */
-	merge_selec = approx_selectivity(root, mergeclauses, sjinfo);
 	cost_qual_eval(&merge_qual_cost, mergeclauses, root);
 	cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
 	qp_qual_cost.startup -= merge_qual_cost.startup;
 	qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
 
-	/* approx # tuples passing the merge quals */
-	mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows);
+	/*
+	 * Get approx # tuples passing the mergequals.  We use approx_tuple_count
+	 * here for speed --- in most cases, any errors won't affect the result
+	 * much.
+	 */
+	mergejointuples = approx_tuple_count(root, &path->jpath,
+										 mergeclauses, sjinfo);
 
 	/*
 	 * When there are equal merge keys in the outer relation, the mergejoin
@@ -1422,11 +1408,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 * but on the other hand we ignore the bookkeeping costs of mark/restore.
 	 * Not clear if it's worth developing a more refined model.
 	 *
-	 * The number of re-fetches can be estimated approximately as size of
-	 * merge join output minus size of inner relation.	Assume that the
-	 * distinct key values are 1, 2, ..., and denote the number of values of
-	 * each key in the outer relation as m1, m2, ...; in the inner relation,
-	 * n1, n2, ... Then we have
+	 * For regular inner and outer joins, the number of re-fetches can be
+	 * estimated approximately as size of merge join output minus size of
+	 * inner relation. Assume that the distinct key values are 1, 2, ..., and
+	 * denote the number of values of each key in the outer relation as m1,
+	 * m2, ...; in the inner relation, n1, n2, ...  Then we have
 	 *
 	 * size of join = m1 * n1 + m2 * n2 + ...
 	 *
@@ -1439,8 +1425,17 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 * are effectively subtracting those from the number of rescanned tuples,
 	 * when we should not.	Can we do better without expensive selectivity
 	 * computations?
+	 *
+	 * For SEMI and ANTI joins, only one inner tuple need be rescanned for
+	 * each group of same-keyed outer tuples (assuming that all joinquals
+	 * are merge quals).  This makes the effect small enough to ignore,
+	 * so we just set rescannedtuples = 0.  Likewise, the whole issue is
+	 * moot if we are working from a unique-ified outer input.
 	 */
-	if (IsA(outer_path, UniquePath))
+	if (sjinfo->jointype == JOIN_SEMI ||
+		sjinfo->jointype == JOIN_ANTI)
+		rescannedtuples = 0;
+	else if (IsA(outer_path, UniquePath))
 		rescannedtuples = 0;
 	else
 	{
@@ -1505,7 +1500,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 			innerstartsel = cache->leftstartsel;
 			innerendsel = cache->leftendsel;
 		}
-		if (path->jpath.jointype == JOIN_LEFT)
+		if (path->jpath.jointype == JOIN_LEFT ||
+			path->jpath.jointype == JOIN_ANTI)
 		{
 			outerstartsel = 0.0;
 			outerendsel = 1.0;
@@ -1600,21 +1596,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 
 	/* CPU costs */
 
-	/*
-	 * If we're doing JOIN_IN then we will stop outputting inner tuples for an
-	 * outer tuple as soon as we have one match.  Account for the effects of
-	 * this by scaling down the cost estimates in proportion to the expected
-	 * output size.  (This assumes that all the quals attached to the join are
-	 * IN quals, which should be true.)
-	 */
-	joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
-
 	/*
 	 * The number of tuple comparisons needed is approximately number of outer
 	 * rows plus number of inner rows plus number of rescanned tuples (can we
 	 * refine this?).  At each one, we need to evaluate the mergejoin quals.
-	 * NOTE: JOIN_IN mode does not save any work here, so do NOT include
-	 * joininfactor.
 	 */
 	startup_cost += merge_qual_cost.startup;
 	startup_cost += merge_qual_cost.per_tuple *
@@ -1627,12 +1612,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 * For each tuple that gets through the mergejoin proper, we charge
 	 * cpu_tuple_cost plus the cost of evaluating additional restriction
 	 * clauses that are to be applied at the join.	(This is pessimistic since
-	 * not all of the quals may get evaluated at each tuple.)  This work is
-	 * skipped in JOIN_IN mode, so apply the factor.
+	 * not all of the quals may get evaluated at each tuple.)
 	 */
 	startup_cost += qp_qual_cost.startup;
 	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
-	run_cost += cpu_per_tuple * mergejointuples * joininfactor;
+	run_cost += cpu_per_tuple * mergejointuples;
 
 	path->jpath.path.startup_cost = startup_cost;
 	path->jpath.path.total_cost = startup_cost + run_cost;
@@ -1711,7 +1695,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
 	Cost		cpu_per_tuple;
-	Selectivity hash_selec;
 	QualCost	hash_qual_cost;
 	QualCost	qp_qual_cost;
 	double		hashjointuples;
@@ -1722,28 +1705,27 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	int			numbatches;
 	double		virtualbuckets;
 	Selectivity innerbucketsize;
-	Selectivity joininfactor;
 	ListCell   *hcl;
 
 	if (!enable_hashjoin)
 		startup_cost += disable_cost;
 
 	/*
-	 * Compute cost and selectivity of the hashquals and qpquals (other
-	 * restriction clauses) separately.  We use approx_selectivity here for
-	 * speed --- in most cases, any errors won't affect the result much.
-	 *
-	 * Note: it's probably bogus to use the normal selectivity calculation
-	 * here when either the outer or inner path is a UniquePath.
+	 * Compute cost of the hashquals and qpquals (other restriction clauses)
+	 * separately.
 	 */
-	hash_selec = approx_selectivity(root, hashclauses, sjinfo);
 	cost_qual_eval(&hash_qual_cost, hashclauses, root);
 	cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
 	qp_qual_cost.startup -= hash_qual_cost.startup;
 	qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
 
-	/* approx # tuples passing the hash quals */
-	hashjointuples = clamp_row_est(hash_selec * outer_path_rows * inner_path_rows);
+	/*
+	 * Get approx # tuples passing the hashquals.  We use approx_tuple_count
+	 * here for speed --- in most cases, any errors won't affect the result
+	 * much.
+	 */
+	hashjointuples = approx_tuple_count(root, &path->jpath,
+										hashclauses, sjinfo);
 
 	/* cost of source data */
 	startup_cost += outer_path->startup_cost;
@@ -1858,15 +1840,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 
 	/* CPU costs */
 
-	/*
-	 * If we're doing JOIN_IN then we will stop comparing inner tuples to an
-	 * outer tuple as soon as we have one match.  Account for the effects of
-	 * this by scaling down the cost estimates in proportion to the expected
-	 * output size.  (This assumes that all the quals attached to the join are
-	 * IN quals, which should be true.)
-	 */
-	joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
-
 	/*
 	 * The number of tuple comparisons needed is the number of outer tuples
 	 * times the typical number of tuples in a hash bucket, which is the inner
@@ -1878,8 +1851,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 */
 	startup_cost += hash_qual_cost.startup;
 	run_cost += hash_qual_cost.per_tuple *
-		outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) *
-		joininfactor * 0.5;
+		outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
 
 	/*
 	 * For each tuple that gets through the hashjoin proper, we charge
@@ -1889,7 +1861,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 */
 	startup_cost += qp_qual_cost.startup;
 	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
-	run_cost += cpu_per_tuple * hashjointuples * joininfactor;
+	run_cost += cpu_per_tuple * hashjointuples;
 
 	path->jpath.path.startup_cost = startup_cost;
 	path->jpath.path.total_cost = startup_cost + run_cost;
@@ -2213,13 +2185,12 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
 
 
 /*
- * approx_selectivity
- *		Quick-and-dirty estimation of clause selectivities.
- *		The input can be either an implicitly-ANDed list of boolean
- *		expressions, or a list of RestrictInfo nodes (typically the latter).
+ * approx_tuple_count
+ *		Quick-and-dirty estimation of the number of join rows passing
+ *		a set of qual conditions.
  *
- * Currently this is only used in join estimation, so sjinfo should never
- * be NULL.
+ * The quals can be either an implicitly-ANDed list of boolean expressions,
+ * or a list of RestrictInfo nodes (typically the latter).
  *
  * This is quick-and-dirty because we bypass clauselist_selectivity, and
  * simply multiply the independent clause selectivities together.  Now
@@ -2232,20 +2203,34 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
  * output tuples are generated and passed through qpqual checking, it
  * seems OK to live with the approximation.
  */
-static Selectivity
-approx_selectivity(PlannerInfo *root, List *quals, SpecialJoinInfo *sjinfo)
+static double
+approx_tuple_count(PlannerInfo *root, JoinPath *path,
+				   List *quals, SpecialJoinInfo *sjinfo)
 {
-	Selectivity total = 1.0;
+	double		tuples;
+	double		outer_tuples = path->outerjoinpath->parent->rows;
+	double		inner_tuples = path->innerjoinpath->parent->rows;
+	Selectivity selec = 1.0;
 	ListCell   *l;
 
+	/* Get the approximate selectivity */
 	foreach(l, quals)
 	{
 		Node	   *qual = (Node *) lfirst(l);
 
 		/* Note that clause_selectivity will be able to cache its result */
-		total *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
+		selec *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
 	}
-	return total;
+
+	/* Apply it correctly using the input relation sizes */
+	if (sjinfo->jointype == JOIN_SEMI)
+		tuples = selec * outer_tuples;
+	else if (sjinfo->jointype == JOIN_ANTI)
+		tuples = (1.0 - selec) * outer_tuples;
+	else
+		tuples = selec * outer_tuples * inner_tuples;
+
+	return clamp_row_est(tuples);
 }
 
 
@@ -2315,7 +2300,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 	Selectivity jselec;
 	Selectivity pselec;
 	double		nrows;
-	UniquePath *upath;
 
 	/*
 	 * Compute joinclause selectivity.	Note that we are only considering
@@ -2380,9 +2364,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 	 * pushed-down quals are applied after the outer join, so their
 	 * selectivity applies fully.
 	 *
-	 * For JOIN_IN and variants, the Cartesian product is figured with respect
-	 * to a unique-ified input, and then we can clamp to the size of the other
-	 * input.
+	 * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction
+	 * of LHS rows that have matches, and we apply that straightforwardly.
 	 */
 	switch (jointype)
 	{
@@ -2404,22 +2387,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 			nrows *= pselec;
 			break;
 		case JOIN_SEMI:
-			/* XXX this is unsafe, could Assert? */
-			upath = create_unique_path(root, inner_rel,
-									   inner_rel->cheapest_total_path,
-									   sjinfo);
-			if (upath)
-				nrows = outer_rel->rows * upath->rows * jselec;
-			else
-				nrows = outer_rel->rows * inner_rel->rows * jselec;
-			if (nrows > outer_rel->rows)
-				nrows = outer_rel->rows;
+			nrows = outer_rel->rows * jselec;
+			nrows *= pselec;
 			break;
 		case JOIN_ANTI:
-			/* XXX this is utterly wrong */
-			nrows = outer_rel->rows * inner_rel->rows * jselec;
-			if (nrows < outer_rel->rows)
-				nrows = outer_rel->rows;
+			nrows = outer_rel->rows * (1.0 - jselec);
 			nrows *= pselec;
 			break;
 		default:
@@ -2432,67 +2404,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 	rel->rows = clamp_row_est(nrows);
 }
 
-/*
- * join_in_selectivity
- *	  Determines the factor by which a JOIN_IN join's result is expected
- *	  to be smaller than an ordinary inner join.
- *
- * 'path' is already filled in except for the cost fields
- * 'sjinfo' must be the JOIN_SEMI SpecialJoinInfo for the join
- */
-static Selectivity
-join_in_selectivity(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
-{
-	RelOptInfo *innerrel;
-	UniquePath *innerunique;
-	Selectivity selec;
-	double		nrows;
-
-	/* Return 1.0 whenever it's not JOIN_IN */
-	if (path->jointype != JOIN_SEMI)
-		return 1.0;
-	Assert(sjinfo && sjinfo->jointype == JOIN_SEMI);
-
-	/*
-	 * Return 1.0 if the inner side is already known unique.  The case where
-	 * the inner path is already a UniquePath probably cannot happen in
-	 * current usage, but check it anyway for completeness.  The interesting
-	 * case is where we've determined the inner relation itself is unique,
-	 * which we can check by looking at the rows estimate for its UniquePath.
-	 */
-	if (IsA(path->innerjoinpath, UniquePath))
-		return 1.0;
-	innerrel = path->innerjoinpath->parent;
-	/* XXX might assert if sjinfo doesn't exactly match innerrel? */
-	innerunique = create_unique_path(root,
-									 innerrel,
-									 innerrel->cheapest_total_path,
-									 sjinfo);
-	if (innerunique && innerunique->rows >= innerrel->rows)
-		return 1.0;
-
-	/*
-	 * Compute same result set_joinrel_size_estimates would compute for
-	 * JOIN_INNER.	Note that we use the input rels' absolute size estimates,
-	 * not PATH_ROWS() which might be less; if we used PATH_ROWS() we'd be
-	 * double-counting the effects of any join clauses used in input scans.
-	 */
-	selec = clauselist_selectivity(root,
-								   path->joinrestrictinfo,
-								   0,
-								   JOIN_INNER,
-								   NULL);
-	nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec;
-
-	nrows = clamp_row_est(nrows);
-
-	/* See if it's larger than the actual JOIN_IN size estimate */
-	if (nrows > path->path.parent->rows)
-		return path->path.parent->rows / nrows;
-	else
-		return 1.0;
-}
-
 /*
  * set_function_size_estimates
  *		Set the size estimates for a base relation that is a function call.
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index cb9a07b77d..30e841b487 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.148 2008/07/13 20:45:47 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.149 2008/08/16 00:01:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -830,7 +830,8 @@ Selectivity
 join_selectivity(PlannerInfo *root,
 				 Oid operator,
 				 List *args,
-				 JoinType jointype)
+				 JoinType jointype,
+				 SpecialJoinInfo *sjinfo)
 {
 	RegProcedure oprjoin = get_oprjoin(operator);
 	float8		result;
@@ -842,11 +843,12 @@ join_selectivity(PlannerInfo *root,
 	if (!oprjoin)
 		return (Selectivity) 0.5;
 
-	result = DatumGetFloat8(OidFunctionCall4(oprjoin,
+	result = DatumGetFloat8(OidFunctionCall5(oprjoin,
 											 PointerGetDatum(root),
 											 ObjectIdGetDatum(operator),
 											 PointerGetDatum(args),
-											 Int16GetDatum(jointype)));
+											 Int16GetDatum(jointype),
+											 PointerGetDatum(sjinfo)));
 
 	if (result < 0.0 || result > 1.0)
 		elog(ERROR, "invalid join selectivity: %f", result);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 7d5609d447..d484221232 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.251 2008/08/14 18:47:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.252 2008/08/16 00:01:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -55,19 +55,34 @@
  *
  *		float8 oprrest (internal, oid, internal, int4);
  *
+ * The result is a selectivity, that is, a fraction (0 to 1) of the rows
+ * of the relation that are expected to produce a TRUE result for the
+ * given operator.
+ *
  * The call convention for a join estimator (oprjoin function) is similar
- * except that varRelid is not needed, and instead the join type is
+ * except that varRelid is not needed, and instead join information is
  * supplied:
  *
  *		Selectivity oprjoin (PlannerInfo *root,
  *							 Oid operator,
  *							 List *args,
- *							 JoinType jointype);
+ *							 JoinType jointype,
+ *							 SpecialJoinInfo *sjinfo);
  *
- *		float8 oprjoin (internal, oid, internal, int2);
+ *		float8 oprjoin (internal, oid, internal, int2, internal);
  *
- * (We deliberately make the SQL signature different to facilitate
- * catching errors.)
+ * (Before Postgres 8.4, join estimators had only the first four of these
+ * parameters.  That signature is still allowed, but deprecated.)  The
+ * relationship between jointype and sjinfo is explained in the comments for
+ * clause_selectivity() --- the short version is that jointype is usually
+ * best ignored in favor of examining sjinfo.
+ *
+ * Join selectivity for regular inner and outer joins is defined as the
+ * fraction (0 to 1) of the cross product of the relations that is expected
+ * to produce a TRUE result for the given operator.  For both semi and anti
+ * joins, however, the selectivity is defined as the fraction of the left-hand
+ * side relation's rows that are expected to have a match (ie, at least one
+ * row with a TRUE result) in the right-hand side.
  *----------
  */
 
@@ -113,6 +128,10 @@ static double var_eq_non_const(VariableStatData *vardata, Oid operator,
 static double ineq_histogram_selectivity(VariableStatData *vardata,
 						   FmgrInfo *opproc, bool isgt,
 						   Datum constval, Oid consttype);
+static double eqjoinsel_inner(Oid operator,
+				VariableStatData *vardata1, VariableStatData *vardata2);
+static double eqjoinsel_semi(Oid operator,
+				VariableStatData *vardata1, VariableStatData *vardata2);
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
 				  Datum lobound, Datum hibound, Oid boundstypid,
 				  double *scaledlobound, double *scaledhibound);
@@ -1579,7 +1598,6 @@ scalararraysel(PlannerInfo *root,
 	Oid			nominal_element_type;
 	RegProcedure oprsel;
 	FmgrInfo	oprselproc;
-	Datum		selarg4;
 	Selectivity s1;
 
 	/*
@@ -1587,15 +1605,9 @@ scalararraysel(PlannerInfo *root,
 	 * it hasn't got one.
 	 */
 	if (is_join_clause)
-	{
 		oprsel = get_oprjoin(operator);
-		selarg4 = Int16GetDatum(jointype);
-	}
 	else
-	{
 		oprsel = get_oprrest(operator);
-		selarg4 = Int32GetDatum(varRelid);
-	}
 	if (!oprsel)
 		return (Selectivity) 0.5;
 	fmgr_info(oprsel, &oprselproc);
@@ -1660,11 +1672,19 @@ scalararraysel(PlannerInfo *root,
 										elem_values[i],
 										elem_nulls[i],
 										elmbyval));
-			s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-											  PointerGetDatum(root),
-											  ObjectIdGetDatum(operator),
-											  PointerGetDatum(args),
-											  selarg4));
+			if (is_join_clause)
+				s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
+												  PointerGetDatum(root),
+												  ObjectIdGetDatum(operator),
+												  PointerGetDatum(args),
+												  Int16GetDatum(jointype),
+												  PointerGetDatum(sjinfo)));
+			else
+				s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
+												  PointerGetDatum(root),
+												  ObjectIdGetDatum(operator),
+												  PointerGetDatum(args),
+												  Int32GetDatum(varRelid)));
 			if (useOr)
 				s1 = s1 + s2 - s1 * s2;
 			else
@@ -1694,11 +1714,19 @@ scalararraysel(PlannerInfo *root,
 			 * estimation function would really care ...
 			 */
 			args = list_make2(leftop, elem);
-			s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-											  PointerGetDatum(root),
-											  ObjectIdGetDatum(operator),
-											  PointerGetDatum(args),
-											  selarg4));
+			if (is_join_clause)
+				s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
+												  PointerGetDatum(root),
+												  ObjectIdGetDatum(operator),
+												  PointerGetDatum(args),
+												  Int16GetDatum(jointype),
+												  PointerGetDatum(sjinfo)));
+			else
+				s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
+												  PointerGetDatum(root),
+												  ObjectIdGetDatum(operator),
+												  PointerGetDatum(args),
+												  Int32GetDatum(varRelid)));
 			if (useOr)
 				s1 = s1 + s2 - s1 * s2;
 			else
@@ -1721,11 +1749,19 @@ scalararraysel(PlannerInfo *root,
 		dummyexpr->typeId = nominal_element_type;
 		dummyexpr->typeMod = -1;
 		args = list_make2(leftop, dummyexpr);
-		s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-										  PointerGetDatum(root),
-										  ObjectIdGetDatum(operator),
-										  PointerGetDatum(args),
-										  selarg4));
+		if (is_join_clause)
+			s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
+											  PointerGetDatum(root),
+											  ObjectIdGetDatum(operator),
+											  PointerGetDatum(args),
+											  Int16GetDatum(jointype),
+											  PointerGetDatum(sjinfo)));
+		else
+			s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
+											  PointerGetDatum(root),
+											  ObjectIdGetDatum(operator),
+											  PointerGetDatum(args),
+											  Int32GetDatum(varRelid)));
 		s1 = useOr ? 0.0 : 1.0;
 
 		/*
@@ -1803,13 +1839,24 @@ rowcomparesel(PlannerInfo *root,
 	/* Build equivalent arg list for single operator */
 	opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
 
-	/* Decide if it's a join clause, same as for OpExpr */
+	/*
+	 * Decide if it's a join clause.  This should match clausesel.c's
+	 * treat_as_join_clause(), except that we intentionally consider only
+	 * the leading columns and not the rest of the clause.
+	 */
 	if (varRelid != 0)
 	{
 		/*
-		 * If we are considering a nestloop join then all clauses are
-		 * restriction clauses, since we are only interested in the one
-		 * relation.
+		 * Caller is forcing restriction mode (eg, because we are examining
+		 * an inner indexscan qual).
+		 */
+		is_join_clause = false;
+	}
+	else if (sjinfo == NULL)
+	{
+		/*
+		 * It must be a restriction clause, since it's being evaluated at
+		 * a scan node.
 		 */
 		is_join_clause = false;
 	}
@@ -1817,7 +1864,6 @@ rowcomparesel(PlannerInfo *root,
 	{
 		/*
 		 * Otherwise, it's a join if there's more than one relation used.
-		 * Notice we ignore the low-order columns here.
 		 */
 		is_join_clause = (NumRelids((Node *) opargs) > 1);
 	}
@@ -1827,7 +1873,8 @@ rowcomparesel(PlannerInfo *root,
 		/* Estimate selectivity for a join clause. */
 		s1 = join_selectivity(root, opno,
 							  opargs,
-							  jointype);
+							  jointype,
+							  sjinfo);
 	}
 	else
 	{
@@ -1849,10 +1896,60 @@ eqjoinsel(PG_FUNCTION_ARGS)
 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 	Oid			operator = PG_GETARG_OID(1);
 	List	   *args = (List *) PG_GETARG_POINTER(2);
+#ifdef NOT_USED
 	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
+#endif
+	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
 	double		selec;
 	VariableStatData vardata1;
 	VariableStatData vardata2;
+	bool		join_is_reversed;
+
+	get_join_variables(root, args, sjinfo,
+					   &vardata1, &vardata2, &join_is_reversed);
+
+	switch (sjinfo->jointype)
+	{
+		case JOIN_INNER:
+		case JOIN_LEFT:
+		case JOIN_FULL:
+			selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
+			break;
+		case JOIN_SEMI:
+		case JOIN_ANTI:
+			if (!join_is_reversed)
+				selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
+			else
+				selec = eqjoinsel_semi(get_commutator(operator),
+									   &vardata2, &vardata1);
+			break;
+		default:
+			/* other values not expected here */
+			elog(ERROR, "unrecognized join type: %d",
+				 (int) sjinfo->jointype);
+			selec = 0;			/* keep compiler quiet */
+			break;
+	}
+
+	ReleaseVariableStats(vardata1);
+	ReleaseVariableStats(vardata2);
+
+	CLAMP_PROBABILITY(selec);
+
+	PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * eqjoinsel_inner --- eqjoinsel for normal inner join
+ *
+ * We also use this for LEFT/FULL outer joins; it's not presently clear
+ * that it's worth trying to distinguish them here.
+ */
+static double
+eqjoinsel_inner(Oid operator,
+				VariableStatData *vardata1, VariableStatData *vardata2)
+{
+	double		selec;
 	double		nd1;
 	double		nd2;
 	Form_pg_statistic stats1 = NULL;
@@ -1868,29 +1965,27 @@ eqjoinsel(PG_FUNCTION_ARGS)
 	float4	   *numbers2 = NULL;
 	int			nnumbers2 = 0;
 
-	get_join_variables(root, args, &vardata1, &vardata2);
+	nd1 = get_variable_numdistinct(vardata1);
+	nd2 = get_variable_numdistinct(vardata2);
 
-	nd1 = get_variable_numdistinct(&vardata1);
-	nd2 = get_variable_numdistinct(&vardata2);
-
-	if (HeapTupleIsValid(vardata1.statsTuple))
+	if (HeapTupleIsValid(vardata1->statsTuple))
 	{
-		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
-		have_mcvs1 = get_attstatsslot(vardata1.statsTuple,
-									  vardata1.atttype,
-									  vardata1.atttypmod,
+		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
+		have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
+									  vardata1->atttype,
+									  vardata1->atttypmod,
 									  STATISTIC_KIND_MCV,
 									  InvalidOid,
 									  &values1, &nvalues1,
 									  &numbers1, &nnumbers1);
 	}
 
-	if (HeapTupleIsValid(vardata2.statsTuple))
+	if (HeapTupleIsValid(vardata2->statsTuple))
 	{
-		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
-		have_mcvs2 = get_attstatsslot(vardata2.statsTuple,
-									  vardata2.atttype,
-									  vardata2.atttypmod,
+		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
+		have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
+									  vardata2->atttype,
+									  vardata2->atttypmod,
 									  STATISTIC_KIND_MCV,
 									  InvalidOid,
 									  &values2, &nvalues2,
@@ -1932,25 +2027,6 @@ eqjoinsel(PG_FUNCTION_ARGS)
 		hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
 		hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
 
-		/*
-		 * If we are doing any variant of JOIN_SEMI, pretend all the values of
-		 * the righthand relation are unique (ie, act as if it's been
-		 * DISTINCT'd).
-		 *
-		 * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or
-		 * JOIN_RIGHT here, because we do not have enough information to
-		 * determine which var is really on which side of the join. Perhaps
-		 * someday we should pass in more information.
-		 */
-		if (jointype == JOIN_SEMI)
-		{
-			float4		oneovern = 1.0 / nd2;
-
-			for (i = 0; i < nvalues2; i++)
-				numbers2[i] = oneovern;
-			nullfrac2 = oneovern;
-		}
-
 		/*
 		 * Note we assume that each MCV will match at most one member of the
 		 * other MCV list.	If the operator isn't really equality, there could
@@ -2075,18 +2151,170 @@ eqjoinsel(PG_FUNCTION_ARGS)
 	}
 
 	if (have_mcvs1)
-		free_attstatsslot(vardata1.atttype, values1, nvalues1,
+		free_attstatsslot(vardata1->atttype, values1, nvalues1,
 						  numbers1, nnumbers1);
 	if (have_mcvs2)
-		free_attstatsslot(vardata2.atttype, values2, nvalues2,
+		free_attstatsslot(vardata2->atttype, values2, nvalues2,
 						  numbers2, nnumbers2);
 
-	ReleaseVariableStats(vardata1);
-	ReleaseVariableStats(vardata2);
+	return selec;
+}
 
-	CLAMP_PROBABILITY(selec);
+/*
+ * eqjoinsel_semi --- eqjoinsel for semi join
+ *
+ * (Also used for anti join, which we are supposed to estimate the same way.)
+ * Caller has ensured that vardata1 is the LHS variable.
+ */
+static double
+eqjoinsel_semi(Oid operator,
+			   VariableStatData *vardata1, VariableStatData *vardata2)
+{
+	double		selec;
+	double		nd1;
+	double		nd2;
+	Form_pg_statistic stats1 = NULL;
+	Form_pg_statistic stats2 = NULL;
+	bool		have_mcvs1 = false;
+	Datum	   *values1 = NULL;
+	int			nvalues1 = 0;
+	float4	   *numbers1 = NULL;
+	int			nnumbers1 = 0;
+	bool		have_mcvs2 = false;
+	Datum	   *values2 = NULL;
+	int			nvalues2 = 0;
+	float4	   *numbers2 = NULL;
+	int			nnumbers2 = 0;
 
-	PG_RETURN_FLOAT8((float8) selec);
+	nd1 = get_variable_numdistinct(vardata1);
+	nd2 = get_variable_numdistinct(vardata2);
+
+	if (HeapTupleIsValid(vardata1->statsTuple))
+	{
+		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
+		have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
+									  vardata1->atttype,
+									  vardata1->atttypmod,
+									  STATISTIC_KIND_MCV,
+									  InvalidOid,
+									  &values1, &nvalues1,
+									  &numbers1, &nnumbers1);
+	}
+
+	if (HeapTupleIsValid(vardata2->statsTuple))
+	{
+		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
+		have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
+									  vardata2->atttype,
+									  vardata2->atttypmod,
+									  STATISTIC_KIND_MCV,
+									  InvalidOid,
+									  &values2, &nvalues2,
+									  &numbers2, &nnumbers2);
+	}
+
+	if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
+	{
+		/*
+		 * We have most-common-value lists for both relations.	Run through
+		 * the lists to see which MCVs actually join to each other with the
+		 * given operator.	This allows us to determine the exact join
+		 * selectivity for the portion of the relations represented by the MCV
+		 * lists.  We still have to estimate for the remaining population, but
+		 * in a skewed distribution this gives us a big leg up in accuracy.
+		 */
+		FmgrInfo	eqproc;
+		bool	   *hasmatch1;
+		bool	   *hasmatch2;
+		double		nullfrac1 = stats1->stanullfrac;
+		double		matchfreq1;
+		int			i,
+					nmatches;
+
+		fmgr_info(get_opcode(operator), &eqproc);
+		hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
+		hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+
+		/*
+		 * Note we assume that each MCV will match at most one member of the
+		 * other MCV list.	If the operator isn't really equality, there could
+		 * be multiple matches --- but we don't look for them, both for speed
+		 * and because the math wouldn't add up...
+		 */
+		nmatches = 0;
+		for (i = 0; i < nvalues1; i++)
+		{
+			int			j;
+
+			for (j = 0; j < nvalues2; j++)
+			{
+				if (hasmatch2[j])
+					continue;
+				if (DatumGetBool(FunctionCall2(&eqproc,
+											   values1[i],
+											   values2[j])))
+				{
+					hasmatch1[i] = hasmatch2[j] = true;
+					nmatches++;
+					break;
+				}
+			}
+		}
+		/* Sum up frequencies of matched MCVs */
+		matchfreq1 = 0.0;
+		for (i = 0; i < nvalues1; i++)
+		{
+			if (hasmatch1[i])
+				matchfreq1 += numbers1[i];
+		}
+		CLAMP_PROBABILITY(matchfreq1);
+		pfree(hasmatch1);
+		pfree(hasmatch2);
+
+		/*
+		 * Now we need to estimate the fraction of relation 1 that has at
+		 * least one join partner.  We know for certain that the matched
+		 * MCVs do, so that gives us a lower bound, but we're really in the
+		 * dark about everything else.  Our crude approach is: if nd1 <= nd2
+		 * then assume all non-null rel1 rows have join partners, else assume
+		 * for the uncertain rows that a fraction nd2/nd1 have join partners.
+		 * We can discount the known-matched MCVs from the distinct-values
+		 * counts before doing the division.
+		 */
+		nd1 -= nmatches;
+		nd2 -= nmatches;
+		if (nd1 <= nd2 || nd2 <= 0)
+			selec = Max(matchfreq1, 1.0 - nullfrac1);
+		else
+		{
+			double	uncertain = 1.0 - matchfreq1 - nullfrac1;
+
+			CLAMP_PROBABILITY(uncertain);
+			selec = matchfreq1 + (nd2/nd1) * uncertain;
+		}
+	}
+	else
+	{
+		/*
+		 * Without MCV lists for both sides, we can only use the heuristic
+		 * about nd1 vs nd2.
+		 */
+		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
+
+		if (nd1 <= nd2 || nd2 <= 0)
+			selec = 1.0 - nullfrac1;
+		else
+			selec = (nd2/nd1) * (1.0 - nullfrac1);
+	}
+
+	if (have_mcvs1)
+		free_attstatsslot(vardata1->atttype, values1, nvalues1,
+						  numbers1, nnumbers1);
+	if (have_mcvs2)
+		free_attstatsslot(vardata2->atttype, values2, nvalues2,
+						  numbers2, nnumbers2);
+
+	return selec;
 }
 
 /*
@@ -2099,6 +2327,7 @@ neqjoinsel(PG_FUNCTION_ARGS)
 	Oid			operator = PG_GETARG_OID(1);
 	List	   *args = (List *) PG_GETARG_POINTER(2);
 	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
+	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
 	Oid			eqop;
 	float8		result;
 
@@ -2109,11 +2338,12 @@ neqjoinsel(PG_FUNCTION_ARGS)
 	eqop = get_negator(operator);
 	if (eqop)
 	{
-		result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel,
+		result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
 													PointerGetDatum(root),
 													ObjectIdGetDatum(eqop),
 													PointerGetDatum(args),
-													Int16GetDatum(jointype)));
+													Int16GetDatum(jointype),
+													PointerGetDatum(sjinfo)));
 	}
 	else
 	{
@@ -3661,10 +3891,17 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
 /*
  * get_join_variables
  *		Apply examine_variable() to each side of a join clause.
+ *		Also, attempt to identify whether the join clause has the same
+ *		or reversed sense compared to the SpecialJoinInfo.
+ *
+ * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
+ * or "reversed" if it is "rhs_var OP lhs_var".  In complicated cases
+ * where we can't tell for sure, we default to assuming it's normal.
  */
 void
-get_join_variables(PlannerInfo *root, List *args,
-				   VariableStatData *vardata1, VariableStatData *vardata2)
+get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
+				   VariableStatData *vardata1, VariableStatData *vardata2,
+				   bool *join_is_reversed)
 {
 	Node	   *left,
 			   *right;
@@ -3677,6 +3914,15 @@ get_join_variables(PlannerInfo *root, List *args,
 
 	examine_variable(root, left, 0, vardata1);
 	examine_variable(root, right, 0, vardata2);
+
+	if (vardata1->rel &&
+		bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
+		*join_is_reversed = true;				/* var1 is on RHS */
+	else if (vardata2->rel &&
+			 bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
+		*join_is_reversed = true;				/* var2 is on LHS */
+	else
+		*join_is_reversed = false;
 }
 
 /*
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index b2345d1364..27501367f5 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.477 2008/08/11 13:58:46 heikki Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.478 2008/08/16 00:01:37 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200808111
+#define CATALOG_VERSION_NO	200808141
 
 #endif
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index e0f5b48aa4..2fc91e92e3 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.161 2008/07/14 00:51:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.162 2008/08/16 00:01:37 tgl Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -941,11 +941,11 @@ extern void OperatorCreate(const char *operatorName,
 			   Oid operatorNamespace,
 			   Oid leftTypeId,
 			   Oid rightTypeId,
-			   List *procedureName,
+			   Oid procedureId,
 			   List *commutatorName,
 			   List *negatorName,
-			   List *restrictionName,
-			   List *joinName,
+			   Oid restrictionId,
+			   Oid joinId,
 			   bool canMerge,
 			   bool canHash);
 
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index c516af8a16..d00d36e85f 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.509 2008/07/18 03:32:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.510 2008/08/16 00:01:37 tgl Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -223,13 +223,13 @@ DATA(insert OID = 103 (  scalarltsel	   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "22
 DESCR("restriction selectivity of < and related operators on scalar datatypes");
 DATA(insert OID = 104 (  scalargtsel	   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_	scalargtsel _null_ _null_ _null_ ));
 DESCR("restriction selectivity of > and related operators on scalar datatypes");
-DATA(insert OID = 105 (  eqjoinsel		   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_	eqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 105 (  eqjoinsel		   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_	eqjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of = and related operators");
-DATA(insert OID = 106 (  neqjoinsel		   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_	neqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 106 (  neqjoinsel		   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_	neqjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of <> and related operators");
-DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_	scalarltjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_	scalarltjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of < and related operators on scalar datatypes");
-DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_	scalargtjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_	scalargtjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of > and related operators on scalar datatypes");
 
 DATA(insert OID =  109 (  unknownin		   PGNSP PGUID 12 1 0 0 f f t f i 1 705 "2275" _null_ _null_ _null_ unknownin _null_ _null_ _null_ ));
@@ -289,7 +289,7 @@ DATA(insert OID = 138 (  box_center		   PGNSP PGUID 12 1 0 0 f f t f i 1 600 "60
 DESCR("center of");
 DATA(insert OID = 139 (  areasel		   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_	areasel _null_ _null_ _null_ ));
 DESCR("restriction selectivity for area-comparison operators");
-DATA(insert OID = 140 (  areajoinsel	   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_	areajoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 140 (  areajoinsel	   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_	areajoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity for area-comparison operators");
 DATA(insert OID = 141 (  int4mul		   PGNSP PGUID 12 1 0 0 f f t f i 2 23 "23 23" _null_ _null_ _null_ int4mul _null_ _null_ _null_ ));
 DESCR("multiply");
@@ -1630,11 +1630,11 @@ DESCR("current clock time");
 
 DATA(insert OID = 1300 (  positionsel		   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_	positionsel _null_ _null_ _null_ ));
 DESCR("restriction selectivity for position-comparison operators");
-DATA(insert OID = 1301 (  positionjoinsel	   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_	positionjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1301 (  positionjoinsel	   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_	positionjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity for position-comparison operators");
 DATA(insert OID = 1302 (  contsel		   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_	contsel _null_ _null_ _null_ ));
 DESCR("restriction selectivity for containment comparison operators");
-DATA(insert OID = 1303 (  contjoinsel	   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_	contjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1303 (  contjoinsel	   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_	contjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity for containment comparison operators");
 
 DATA(insert OID = 1304 ( overlaps			 PGNSP PGUID 12 1 0 0 f f f f i 4 16 "1184 1184 1184 1184" _null_ _null_ _null_ overlaps_timestamp _null_ _null_ _null_ ));
@@ -2684,9 +2684,9 @@ DATA(insert OID = 1814 ( iclikesel			PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281
 DESCR("restriction selectivity of ILIKE");
 DATA(insert OID = 1815 ( icnlikesel			PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  icnlikesel _null_ _null_ _null_ ));
 DESCR("restriction selectivity of NOT ILIKE");
-DATA(insert OID = 1816 ( iclikejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  iclikejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1816 ( iclikejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  iclikejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of ILIKE");
-DATA(insert OID = 1817 ( icnlikejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  icnlikejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1817 ( icnlikejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  icnlikejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of NOT ILIKE");
 DATA(insert OID = 1818 ( regexeqsel			PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  regexeqsel _null_ _null_ _null_ ));
 DESCR("restriction selectivity of regex match");
@@ -2700,17 +2700,17 @@ DATA(insert OID = 1822 ( nlikesel			PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 2
 DESCR("restriction selectivity of NOT LIKE");
 DATA(insert OID = 1823 ( icregexnesel		PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  icregexnesel _null_ _null_ _null_ ));
 DESCR("restriction selectivity of case-insensitive regex non-match");
-DATA(insert OID = 1824 ( regexeqjoinsel		PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  regexeqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1824 ( regexeqjoinsel		PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  regexeqjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of regex match");
-DATA(insert OID = 1825 ( likejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  likejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1825 ( likejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  likejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of LIKE");
-DATA(insert OID = 1826 ( icregexeqjoinsel	PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  icregexeqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1826 ( icregexeqjoinsel	PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  icregexeqjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of case-insensitive regex match");
-DATA(insert OID = 1827 ( regexnejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  regexnejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1827 ( regexnejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  regexnejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of regex non-match");
-DATA(insert OID = 1828 ( nlikejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  nlikejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1828 ( nlikejoinsel		PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  nlikejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of NOT LIKE");
-DATA(insert OID = 1829 ( icregexnejoinsel	PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  icregexnejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1829 ( icregexnejoinsel	PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  icregexnejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of case-insensitive regex non-match");
 
 /* Aggregate-related functions */
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index 7471d1b44d..7963b745f5 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.50 2008/06/19 00:46:06 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.51 2008/08/16 00:01:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,6 +50,7 @@ extern Selectivity restriction_selectivity(PlannerInfo *root,
 extern Selectivity join_selectivity(PlannerInfo *root,
 				 Oid operator,
 				 List *args,
-				 JoinType jointype);
+				 JoinType jointype,
+				 SpecialJoinInfo *sjinfo);
 
 #endif   /* PLANCAT_H */
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index e94e7e5916..120269eee5 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.45 2008/08/14 18:48:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.46 2008/08/16 00:01:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -104,8 +104,10 @@ extern bool get_restriction_variable(PlannerInfo *root, List *args,
 						 VariableStatData *vardata, Node **other,
 						 bool *varonleft);
 extern void get_join_variables(PlannerInfo *root, List *args,
+				   SpecialJoinInfo *sjinfo,
 				   VariableStatData *vardata1,
-				   VariableStatData *vardata2);
+				   VariableStatData *vardata2,
+				   bool *join_is_reversed);
 extern double get_variable_numdistinct(VariableStatData *vardata);
 extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
 				Datum constval, bool varonleft,
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 533bac3ab6..a21c2ba374 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -543,17 +543,20 @@ WHERE p1.oprrest = p2.oid AND
 -- If oprjoin is set, the operator must be a binary boolean op,
 -- and it must link to a proc with the right signature
 -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal, int2)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal)
+-- (Note: the old signature with only 4 args is still allowed, but no core
+-- estimator should be using it.)
 SELECT p1.oid, p1.oprname, p2.oid, p2.proname
 FROM pg_operator AS p1, pg_proc AS p2
 WHERE p1.oprjoin = p2.oid AND
     (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
      p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 4 OR
+     p2.pronargs != 5 OR
      p2.proargtypes[0] != 'internal'::regtype OR
      p2.proargtypes[1] != 'oid'::regtype OR
      p2.proargtypes[2] != 'internal'::regtype OR
-     p2.proargtypes[3] != 'int2'::regtype);
+     p2.proargtypes[3] != 'int2'::regtype OR
+     p2.proargtypes[4] != 'internal'::regtype);
  oid | oprname | oid | proname 
 -----+---------+-----+---------
 (0 rows)
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index 5017849830..57c2cad951 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -444,18 +444,21 @@ WHERE p1.oprrest = p2.oid AND
 -- If oprjoin is set, the operator must be a binary boolean op,
 -- and it must link to a proc with the right signature
 -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal, int2)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal)
+-- (Note: the old signature with only 4 args is still allowed, but no core
+-- estimator should be using it.)
 
 SELECT p1.oid, p1.oprname, p2.oid, p2.proname
 FROM pg_operator AS p1, pg_proc AS p2
 WHERE p1.oprjoin = p2.oid AND
     (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
      p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 4 OR
+     p2.pronargs != 5 OR
      p2.proargtypes[0] != 'internal'::regtype OR
      p2.proargtypes[1] != 'oid'::regtype OR
      p2.proargtypes[2] != 'internal'::regtype OR
-     p2.proargtypes[3] != 'int2'::regtype);
+     p2.proargtypes[3] != 'int2'::regtype OR
+     p2.proargtypes[4] != 'internal'::regtype);
 
 -- **************** pg_aggregate ****************