From ab05eedecc5c5b3a07ff101d29c9fef612f42996 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 6 Feb 2007 02:59:15 +0000
Subject: [PATCH] Add support for cross-type hashing in hashed subplans (hashed
 IN/NOT IN cases that aren't turned into true joins).  Since this is the last
 missing bit of infrastructure, go ahead and fill out the hash integer_ops and
 float_ops opfamilies with cross-type operators.  The operator family project
 is now DONE ... er, except for documentation ...

---
 src/backend/executor/execGrouping.c      | 84 ++++++++++++++++++++----
 src/backend/executor/nodeSubplan.c       | 50 +++++++++-----
 src/backend/optimizer/plan/subselect.c   | 19 ++----
 src/backend/optimizer/util/pathnode.c    |  4 +-
 src/include/catalog/catversion.h         |  4 +-
 src/include/catalog/pg_amop.h            | 10 ++-
 src/include/catalog/pg_operator.h        | 20 +++---
 src/include/executor/executor.h          |  6 +-
 src/include/nodes/execnodes.h            | 25 +++++--
 src/test/regress/expected/opr_sanity.out | 35 ++++++++--
 src/test/regress/sql/opr_sanity.sql      | 34 ++++++++--
 11 files changed, 219 insertions(+), 72 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 08391bcc45..e6c9cf2a7d 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.24 2007/01/30 01:33:36 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.25 2007/02/06 02:59:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -178,7 +178,7 @@ execTuplesUnequal(TupleTableSlot *slot1,
 /*
  * execTuplesMatchPrepare
  *		Look up the equality functions needed for execTuplesMatch or
- *		execTuplesUnequal.
+ *		execTuplesUnequal, given an array of equality operator OIDs.
  *
  * The result is a palloc'd array.
  */
@@ -208,6 +208,8 @@ execTuplesMatchPrepare(int numCols,
  * This is similar to execTuplesMatchPrepare, but we also need to find the
  * hash functions associated with the equality operators.  *eqFunctions and
  * *hashFunctions receive the palloc'd result arrays.
+ *
+ * Note: we expect that the given operators are not cross-type comparisons.
  */
 void
 execTuplesHashPrepare(int numCols,
@@ -232,7 +234,7 @@ execTuplesHashPrepare(int numCols,
 								   &left_hash_function, &right_hash_function))
 			elog(ERROR, "could not find hash function for hash operator %u",
 				 eq_opr);
-		/* For the moment, we're not supporting cross-type cases here */
+		/* We're not supporting cross-type cases here */
 		Assert(left_hash_function == right_hash_function);
 		fmgr_info(eq_function, &(*eqFunctions)[i]);
 		fmgr_info(right_hash_function, &(*hashFunctions)[i]);
@@ -259,7 +261,9 @@ execTuplesHashPrepare(int numCols,
  *	tablecxt: memory context in which to store table and table entries
  *	tempcxt: short-lived context for evaluation hash and comparison functions
  *
- * The function arrays may be made with execTuplesHashPrepare().
+ * The function arrays may be made with execTuplesHashPrepare().  Note they
+ * are not cross-type functions, but expect to see the table datatype(s)
+ * on both sides.
  *
  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
  * storage that will live as long as the hashtable does.
@@ -282,13 +286,15 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 
 	hashtable->numCols = numCols;
 	hashtable->keyColIdx = keyColIdx;
-	hashtable->eqfunctions = eqfunctions;
-	hashtable->hashfunctions = hashfunctions;
+	hashtable->tab_hash_funcs = hashfunctions;
+	hashtable->tab_eq_funcs = eqfunctions;
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
 	hashtable->entrysize = entrysize;
 	hashtable->tableslot = NULL;	/* will be made on first lookup */
 	hashtable->inputslot = NULL;
+	hashtable->in_hash_funcs = NULL;
+	hashtable->cur_eq_funcs = NULL;
 
 	MemSet(&hash_ctl, 0, sizeof(hash_ctl));
 	hash_ctl.keysize = sizeof(TupleHashEntryData);
@@ -305,7 +311,7 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 
 /*
  * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * given tuple.  The tuple must be the same type as the hashtable entries.
  *
  * If isnew is NULL, we do not create new entries; we return NULL if no
  * match is found.
@@ -351,6 +357,9 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 	 * invoke this code re-entrantly.
 	 */
 	hashtable->inputslot = slot;
+	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+	hashtable->cur_eq_funcs = hashtable->tab_eq_funcs;
+
 	saveCurHT = CurTupleHashTable;
 	CurTupleHashTable = hashtable;
 
@@ -394,6 +403,55 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 	return entry;
 }
 
+/*
+ * Search for a hashtable entry matching the given tuple.  No entry is
+ * created if there's not a match.  This is similar to the non-creating
+ * case of LookupTupleHashEntry, except that it supports cross-type
+ * comparisons, in which the given tuple is not of the same type as the
+ * table entries.  The caller must provide the hash functions to use for
+ * the input tuple, as well as the equality functions, since these may be
+ * different from the table's internal functions.
+ */
+TupleHashEntry
+FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
+				   FmgrInfo *eqfunctions,
+				   FmgrInfo *hashfunctions)
+{
+	TupleHashEntry entry;
+	MemoryContext oldContext;
+	TupleHashTable saveCurHT;
+	TupleHashEntryData dummy;
+
+	/* Need to run the hash functions in short-lived context */
+	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+	/*
+	 * Set up data needed by hash and match functions
+	 *
+	 * We save and restore CurTupleHashTable just in case someone manages to
+	 * invoke this code re-entrantly.
+	 */
+	hashtable->inputslot = slot;
+	hashtable->in_hash_funcs = hashfunctions;
+	hashtable->cur_eq_funcs = eqfunctions;
+
+	saveCurHT = CurTupleHashTable;
+	CurTupleHashTable = hashtable;
+
+	/* Search the hash table */
+	dummy.firstTuple = NULL;	/* flag to reference inputslot */
+	entry = (TupleHashEntry) hash_search(hashtable->hashtab,
+										 &dummy,
+										 HASH_FIND,
+										 NULL);
+
+	CurTupleHashTable = saveCurHT;
+
+	MemoryContextSwitchTo(oldContext);
+
+	return entry;
+}
+
 /*
  * Compute the hash value for a tuple
  *
@@ -418,6 +476,7 @@ TupleHashTableHash(const void *key, Size keysize)
 	TupleHashTable hashtable = CurTupleHashTable;
 	int			numCols = hashtable->numCols;
 	AttrNumber *keyColIdx = hashtable->keyColIdx;
+	FmgrInfo   *hashfunctions;
 	uint32		hashkey = 0;
 	int			i;
 
@@ -425,6 +484,7 @@ TupleHashTableHash(const void *key, Size keysize)
 	{
 		/* Process the current input tuple for the table */
 		slot = hashtable->inputslot;
+		hashfunctions = hashtable->in_hash_funcs;
 	}
 	else
 	{
@@ -432,6 +492,7 @@ TupleHashTableHash(const void *key, Size keysize)
 		/* (this case never actually occurs in current dynahash.c code) */
 		slot = hashtable->tableslot;
 		ExecStoreMinimalTuple(tuple, slot, false);
+		hashfunctions = hashtable->tab_hash_funcs;
 	}
 
 	for (i = 0; i < numCols; i++)
@@ -449,7 +510,7 @@ TupleHashTableHash(const void *key, Size keysize)
 		{
 			uint32		hkey;
 
-			hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
+			hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
 												attr));
 			hashkey ^= hkey;
 		}
@@ -493,11 +554,12 @@ TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
 	Assert(tuple2 == NULL);
 	slot2 = hashtable->inputslot;
 
-	if (execTuplesMatch(slot1,
-						slot2,
+	/* For crosstype comparisons, the inputslot must be first */
+	if (execTuplesMatch(slot2,
+						slot1,
 						hashtable->numCols,
 						hashtable->keyColIdx,
-						hashtable->eqfunctions,
+						hashtable->cur_eq_funcs,
 						hashtable->tempcxt))
 		return 0;
 	else
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 0e840802eb..32167a94ef 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.84 2007/02/02 00:07:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.85 2007/02/06 02:59:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -139,7 +139,10 @@ ExecHashSubPlan(SubPlanState *node,
 	if (slotNoNulls(slot))
 	{
 		if (node->havehashrows &&
-			LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL)
+			FindTupleHashEntry(node->hashtable,
+							   slot,
+							   node->cur_eq_funcs,
+							   node->lhs_hash_funcs) != NULL)
 		{
 			ExecClearTuple(slot);
 			return BoolGetDatum(true);
@@ -453,8 +456,8 @@ buildSubPlanHash(SubPlanState *node)
 
 	node->hashtable = BuildTupleHashTable(ncols,
 										  node->keyColIdx,
-										  node->eqfunctions,
-										  node->hashfunctions,
+										  node->tab_eq_funcs,
+										  node->tab_hash_funcs,
 										  nbuckets,
 										  sizeof(TupleHashEntryData),
 										  node->tablecxt,
@@ -472,8 +475,8 @@ buildSubPlanHash(SubPlanState *node)
 		}
 		node->hashnulls = BuildTupleHashTable(ncols,
 											  node->keyColIdx,
-											  node->eqfunctions,
-											  node->hashfunctions,
+											  node->tab_eq_funcs,
+											  node->tab_hash_funcs,
 											  nbuckets,
 											  sizeof(TupleHashEntryData),
 											  node->tablecxt,
@@ -573,9 +576,9 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
 	while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
 	{
 		ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
-		if (!execTuplesUnequal(hashtable->tableslot, slot,
+		if (!execTuplesUnequal(slot, hashtable->tableslot,
 							   numCols, keyColIdx,
-							   hashtable->eqfunctions,
+							   hashtable->cur_eq_funcs,
 							   hashtable->tempcxt))
 			return true;
 	}
@@ -653,8 +656,10 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
 	node->tablecxt = NULL;
 	node->innerecontext = NULL;
 	node->keyColIdx = NULL;
-	node->eqfunctions = NULL;
-	node->hashfunctions = NULL;
+	node->tab_hash_funcs = NULL;
+	node->tab_eq_funcs = NULL;
+	node->lhs_hash_funcs = NULL;
+	node->cur_eq_funcs = NULL;
 
 	/*
 	 * create an EState for the subplan
@@ -781,8 +786,10 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
 
 		lefttlist = righttlist = NIL;
 		leftptlist = rightptlist = NIL;
-		node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
-		node->hashfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		node->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		node->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		node->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		node->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
 		i = 1;
 		foreach(l, oplist)
 		{
@@ -792,6 +799,7 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
 			Expr	   *expr;
 			TargetEntry *tle;
 			GenericExprState *tlestate;
+			Oid			rhs_eq_oper;
 			Oid			left_hashfn;
 			Oid			right_hashfn;
 
@@ -827,18 +835,24 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
 			righttlist = lappend(righttlist, tlestate);
 			rightptlist = lappend(rightptlist, tle);
 
-			/* Lookup the combining function */
-			fmgr_info(opexpr->opfuncid, &node->eqfunctions[i - 1]);
-			node->eqfunctions[i - 1].fn_expr = (Node *) opexpr;
+			/* Lookup the equality function (potentially cross-type) */
+			fmgr_info(opexpr->opfuncid, &node->cur_eq_funcs[i - 1]);
+			node->cur_eq_funcs[i - 1].fn_expr = (Node *) opexpr;
+
+			/* Look up the equality function for the RHS type */
+			if (!get_compatible_hash_operators(opexpr->opno,
+											   NULL, &rhs_eq_oper))
+				elog(ERROR, "could not find compatible hash operator for operator %u",
+					 opexpr->opno);
+			fmgr_info(get_opcode(rhs_eq_oper), &node->tab_eq_funcs[i - 1]);
 
 			/* Lookup the associated hash functions */
 			if (!get_op_hash_functions(opexpr->opno,
 									   &left_hashfn, &right_hashfn))
 				elog(ERROR, "could not find hash function for hash operator %u",
 					 opexpr->opno);
-			/* For the moment, not supporting cross-type cases */
-			Assert(left_hashfn == right_hashfn);
-			fmgr_info(right_hashfn, &node->hashfunctions[i - 1]);
+			fmgr_info(left_hashfn, &node->lhs_hash_funcs[i - 1]);
+			fmgr_info(right_hashfn, &node->tab_hash_funcs[i - 1]);
 
 			i++;
 		}
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 7339445e04..e79991a0f6 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.117 2007/01/10 18:06:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.118 2007/02/06 02:59:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -598,17 +598,13 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
 		return false;
 
 	/*
-	 * The combining operators must be hashable, strict, and self-commutative.
+	 * The combining operators must be hashable and strict.
 	 * The need for hashability is obvious, since we want to use hashing.
 	 * Without strictness, behavior in the presence of nulls is too
-	 * unpredictable.  (We actually must assume even more than plain
-	 * strictness, see nodeSubplan.c for details.)	And commutativity ensures
-	 * that the left and right datatypes are the same; this allows us to
-	 * assume that the combining operators are equality for the righthand
-	 * datatype, so that they can be used to compare righthand tuples as well
-	 * as comparing lefthand to righthand tuples.  (This last restriction
-	 * could be relaxed by using two different sets of operators with the hash
-	 * table, but there is no obvious usefulness to that at present.)
+	 * unpredictable.  We actually must assume even more than plain
+	 * strictness: they can't yield NULL for non-null inputs, either
+	 * (see nodeSubplan.c).  However, hash indexes and hash joins assume
+	 * that too.
 	 */
 	if (IsA(slink->testexpr, OpExpr))
 	{
@@ -644,8 +640,7 @@ hash_ok_operator(OpExpr *expr)
 	if (!HeapTupleIsValid(tup))
 		elog(ERROR, "cache lookup failed for operator %u", opid);
 	optup = (Form_pg_operator) GETSTRUCT(tup);
-	if (!optup->oprcanhash || optup->oprcom != opid ||
-		!func_strict(optup->oprcode))
+	if (!optup->oprcanhash || !func_strict(optup->oprcode))
 	{
 		ReleaseSysCache(tup);
 		return false;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 5832d145ef..81f7c99e96 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.137 2007/01/20 20:45:39 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.138 2007/02/06 02:59:12 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1070,7 +1070,7 @@ distinct_col_search(int colno, List *colnos, List *opids)
  * We assume hashed aggregation will work if each IN operator is marked
  * hashjoinable.  If the IN operators are cross-type, this could conceivably
  * fail: the aggregation will need a hashable equality operator for the RHS
- * datatype --- but it's pretty hard to conceive of a hash opclass that has
+ * datatype --- but it's pretty hard to conceive of a hash opfamily that has
  * cross-type hashing without support for hashing the individual types, so
  * we don't expend cycles here to support the case.  We could check
  * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 6aebea61dd..154b56b32d 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.380 2007/02/05 04:22:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.381 2007/02/06 02:59:12 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200702041
+#define CATALOG_VERSION_NO	200702051
 
 #endif
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 01e47df7d9..c1023df6ea 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -29,7 +29,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_amop.h,v 1.78 2007/01/28 16:16:52 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_amop.h,v 1.79 2007/02/06 02:59:12 tgl Exp $
  *
  * NOTES
  *	 the genbki.sh script reads this file and generates .bki
@@ -510,12 +510,20 @@ DATA(insert (	435   1082 1082 1 f 1093	405 ));
 /* float_ops */
 DATA(insert (	1971   700 700 1 f  620	405 ));
 DATA(insert (	1971   701 701 1 f  670	405 ));
+DATA(insert (	1971   700 701 1 f 1120	405 ));
+DATA(insert (	1971   701 700 1 f 1130	405 ));
 /* network_ops */
 DATA(insert (	1975   869 869 1 f 1201	405 ));
 /* integer_ops */
 DATA(insert (	1977   21 21 1 f	94	405 ));
 DATA(insert (	1977   23 23 1 f	96	405 ));
 DATA(insert (	1977   20 20 1 f	410	405 ));
+DATA(insert (	1977   21 23 1 f	532	405 ));
+DATA(insert (	1977   21 20 1 f   1862	405 ));
+DATA(insert (	1977   23 21 1 f	533	405 ));
+DATA(insert (	1977   23 20 1 f	15	405 ));
+DATA(insert (	1977   20 21 1 f   1868	405 ));
+DATA(insert (	1977   20 23 1 f	416	405 ));
 /* interval_ops */
 DATA(insert (	1983   1186 1186 1 f 1330	405 ));
 /* macaddr_ops */
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index 6411da05c3..e53f74b519 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.149 2007/01/28 16:16:52 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.150 2007/02/06 02:59:12 tgl Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -89,7 +89,7 @@ typedef FormData_pg_operator *Form_pg_operator;
  * ----------------
  */
 
-DATA(insert OID =  15 ( "="		   PGNSP PGUID b t f	23	20	16 416	36 int48eq eqsel eqjoinsel ));
+DATA(insert OID =  15 ( "="		   PGNSP PGUID b t t	23	20	16 416	36 int48eq eqsel eqjoinsel ));
 DATA(insert OID =  36 ( "<>"	   PGNSP PGUID b f f	23	20	16 417	15 int48ne neqsel neqjoinsel ));
 DATA(insert OID =  37 ( "<"		   PGNSP PGUID b f f	23	20	16 419	82 int48lt scalarltsel scalarltjoinsel ));
 DATA(insert OID =  76 ( ">"		   PGNSP PGUID b f f	23	20	16 418	80 int48gt scalargtsel scalargtjoinsel ));
@@ -139,7 +139,7 @@ DATA(insert OID = 413 ( ">"		   PGNSP PGUID b f f	20	20	16 412 414 int8gt scalar
 DATA(insert OID = 414 ( "<="	   PGNSP PGUID b f f	20	20	16 415 413 int8le scalarltsel scalarltjoinsel ));
 DATA(insert OID = 415 ( ">="	   PGNSP PGUID b f f	20	20	16 414 412 int8ge scalargtsel scalargtjoinsel ));
 
-DATA(insert OID = 416 ( "="		   PGNSP PGUID b t f	20	23	16	15 417 int84eq eqsel eqjoinsel ));
+DATA(insert OID = 416 ( "="		   PGNSP PGUID b t t	20	23	16	15 417 int84eq eqsel eqjoinsel ));
 DATA(insert OID = 417 ( "<>"	   PGNSP PGUID b f f	20	23	16	36 416 int84ne neqsel neqjoinsel ));
 DATA(insert OID = 418 ( "<"		   PGNSP PGUID b f f	20	23	16	76 430 int84lt scalarltsel scalarltjoinsel ));
 DATA(insert OID = 419 ( ">"		   PGNSP PGUID b f f	20	23	16	37 420 int84gt scalargtsel scalargtjoinsel ));
@@ -194,8 +194,8 @@ DATA(insert OID = 528 (  "/"	   PGNSP PGUID b f f	23	23	23	 0	 0 int4div - - ));
 DATA(insert OID = 529 (  "%"	   PGNSP PGUID b f f	21	21	21	 0	 0 int2mod - - ));
 DATA(insert OID = 530 (  "%"	   PGNSP PGUID b f f	23	23	23	 0	 0 int4mod - - ));
 DATA(insert OID = 531 (  "<>"	   PGNSP PGUID b f f	25	25	16 531	98 textne neqsel neqjoinsel ));
-DATA(insert OID = 532 (  "="	   PGNSP PGUID b t f	21	23	16 533 538 int24eq eqsel eqjoinsel ));
-DATA(insert OID = 533 (  "="	   PGNSP PGUID b t f	23	21	16 532 539 int42eq eqsel eqjoinsel ));
+DATA(insert OID = 532 (  "="	   PGNSP PGUID b t t	21	23	16 533 538 int24eq eqsel eqjoinsel ));
+DATA(insert OID = 533 (  "="	   PGNSP PGUID b t t	23	21	16 532 539 int42eq eqsel eqjoinsel ));
 DATA(insert OID = 534 (  "<"	   PGNSP PGUID b f f	21	23	16 537 542 int24lt scalarltsel scalarltjoinsel ));
 DATA(insert OID = 535 (  "<"	   PGNSP PGUID b f f	23	21	16 536 543 int42lt scalarltsel scalarltjoinsel ));
 DATA(insert OID = 536 (  ">"	   PGNSP PGUID b f f	21	23	16 535 540 int24gt scalargtsel scalargtjoinsel ));
@@ -479,7 +479,7 @@ DATA(insert OID = 1116 (  "+"		PGNSP PGUID b f f 700 701 701 1126	 0 float48pl -
 DATA(insert OID = 1117 (  "-"		PGNSP PGUID b f f 700 701 701  0	 0 float48mi - - ));
 DATA(insert OID = 1118 (  "/"		PGNSP PGUID b f f 700 701 701  0	 0 float48div - - ));
 DATA(insert OID = 1119 (  "*"		PGNSP PGUID b f f 700 701 701 1129	 0 float48mul - - ));
-DATA(insert OID = 1120 (  "="		PGNSP PGUID b t f  700	701  16 1130 1121 float48eq eqsel eqjoinsel ));
+DATA(insert OID = 1120 (  "="		PGNSP PGUID b t t  700	701  16 1130 1121 float48eq eqsel eqjoinsel ));
 DATA(insert OID = 1121 (  "<>"		PGNSP PGUID b f f  700	701  16 1131 1120 float48ne neqsel neqjoinsel ));
 DATA(insert OID = 1122 (  "<"		PGNSP PGUID b f f  700	701  16 1133 1125 float48lt scalarltsel scalarltjoinsel ));
 DATA(insert OID = 1123 (  ">"		PGNSP PGUID b f f  700	701  16 1132 1124 float48gt scalargtsel scalargtjoinsel ));
@@ -491,7 +491,7 @@ DATA(insert OID = 1126 (  "+"		PGNSP PGUID b f f 701 700 701 1116	 0 float84pl -
 DATA(insert OID = 1127 (  "-"		PGNSP PGUID b f f 701 700 701  0	 0 float84mi - - ));
 DATA(insert OID = 1128 (  "/"		PGNSP PGUID b f f 701 700 701  0	 0 float84div - - ));
 DATA(insert OID = 1129 (  "*"		PGNSP PGUID b f f 701 700 701 1119	 0 float84mul - - ));
-DATA(insert OID = 1130 (  "="		PGNSP PGUID b t f  701	700  16 1120 1131 float84eq eqsel eqjoinsel ));
+DATA(insert OID = 1130 (  "="		PGNSP PGUID b t t  701	700  16 1120 1131 float84eq eqsel eqjoinsel ));
 DATA(insert OID = 1131 (  "<>"		PGNSP PGUID b f f  701	700  16 1121 1130 float84ne neqsel neqjoinsel ));
 DATA(insert OID = 1132 (  "<"		PGNSP PGUID b f f  701	700  16 1123 1135 float84lt scalarltsel scalarltjoinsel ));
 DATA(insert OID = 1133 (  ">"		PGNSP PGUID b f f  701	700  16 1122 1134 float84gt scalargtsel scalargtjoinsel ));
@@ -717,14 +717,14 @@ DATA(insert OID = 1809 (  ">="	  PGNSP PGUID b f f 1562 1562 16 1808 1806 varbit
 
 DATA(insert OID = 1849 (  "+"	   PGNSP PGUID b f f 1186 1083 1083	 1800 0 interval_pl_time - - ));
 
-DATA(insert OID = 1862 ( "="	   PGNSP PGUID b t f	21	20	16 1868  1863 int28eq eqsel eqjoinsel ));
+DATA(insert OID = 1862 ( "="	   PGNSP PGUID b t t	21	20	16 1868  1863 int28eq eqsel eqjoinsel ));
 DATA(insert OID = 1863 ( "<>"	   PGNSP PGUID b f f	21	20	16 1869  1862 int28ne neqsel neqjoinsel ));
 DATA(insert OID = 1864 ( "<"	   PGNSP PGUID b f f	21	20	16 1871  1867 int28lt scalarltsel scalarltjoinsel ));
 DATA(insert OID = 1865 ( ">"	   PGNSP PGUID b f f	21	20	16 1870  1866 int28gt scalargtsel scalargtjoinsel ));
 DATA(insert OID = 1866 ( "<="	   PGNSP PGUID b f f	21	20	16 1873  1865 int28le scalarltsel scalarltjoinsel ));
 DATA(insert OID = 1867 ( ">="	   PGNSP PGUID b f f	21	20	16 1872  1864 int28ge scalargtsel scalargtjoinsel ));
 
-DATA(insert OID = 1868 ( "="	   PGNSP PGUID b t f	20	21	16	1862 1869 int82eq eqsel eqjoinsel ));
+DATA(insert OID = 1868 ( "="	   PGNSP PGUID b t t	20	21	16	1862 1869 int82eq eqsel eqjoinsel ));
 DATA(insert OID = 1869 ( "<>"	   PGNSP PGUID b f f	20	21	16	1863 1868 int82ne neqsel neqjoinsel ));
 DATA(insert OID = 1870 ( "<"	   PGNSP PGUID b f f	20	21	16	1865 1873 int82lt scalarltsel scalarltjoinsel ));
 DATA(insert OID = 1871 ( ">"	   PGNSP PGUID b f f	20	21	16	1864 1872 int82gt scalargtsel scalargtjoinsel ));
@@ -858,7 +858,7 @@ DATA(insert OID = 2553 (  "+"	   PGNSP PGUID b f f	1186 1114 1114 2066 0 interva
 DATA(insert OID = 2554 (  "+"	   PGNSP PGUID b f f	1186 1184 1184 1327 0 interval_pl_timestamptz - - ));
 DATA(insert OID = 2555 (  "+"	   PGNSP PGUID b f f	23   1082 1082 1100 0 integer_pl_date - - ));
 
-/* new operators for Y-direction rtree opclasses */
+/* new operators for Y-direction rtree opfamilies */
 DATA(insert OID = 2570 (  "<<|"    PGNSP PGUID b f f 603 603	16	 0	 0 box_below positionsel positionjoinsel ));
 DATA(insert OID = 2571 (  "&<|"    PGNSP PGUID b f f 603 603	16	 0	 0 box_overbelow positionsel positionjoinsel ));
 DATA(insert OID = 2572 (  "|&>"    PGNSP PGUID b f f 603 603	16	 0	 0 box_overabove positionsel positionjoinsel ));
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 1800f4cb03..bfbe1ba2f3 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.135 2007/02/02 00:07:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.136 2007/02/06 02:59:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -99,6 +99,10 @@ extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
 					 TupleTableSlot *slot,
 					 bool *isnew);
+extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
+										 TupleTableSlot *slot,
+										 FmgrInfo *eqfunctions,
+										 FmgrInfo *hashfunctions);
 
 /*
  * prototypes from functions in execJunk.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index da1ae70872..35a0ab3a60 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.166 2007/01/05 22:19:55 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.167 2007/02/06 02:59:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -367,6 +367,16 @@ typedef struct ExecRowMark
  *				 Tuple Hash Tables
  *
  * All-in-memory tuple hash tables are used for a number of purposes.
+ *
+ * Note: tab_hash_funcs are for the key datatype(s) stored in the table,
+ * and tab_eq_funcs are non-cross-type equality operators for those types.
+ * Normally these are the only functions used, but FindTupleHashEntry()
+ * supports searching a hashtable using cross-data-type hashing.  For that,
+ * the caller must supply hash functions for the LHS datatype as well as
+ * the cross-type equality operators to use.  in_hash_funcs and cur_eq_funcs
+ * are set to point to the caller's function arrays while doing such a search.
+ * During LookupTupleHashEntry(), they point to tab_hash_funcs and
+ * tab_eq_funcs respectively.
  * ----------------------------------------------------------------
  */
 typedef struct TupleHashEntryData *TupleHashEntry;
@@ -384,13 +394,16 @@ typedef struct TupleHashTableData
 	HTAB	   *hashtab;		/* underlying dynahash table */
 	int			numCols;		/* number of columns in lookup key */
 	AttrNumber *keyColIdx;		/* attr numbers of key columns */
-	FmgrInfo   *eqfunctions;	/* lookup data for comparison functions */
-	FmgrInfo   *hashfunctions;	/* lookup data for hash functions */
+	FmgrInfo   *tab_hash_funcs;	/* hash functions for table datatype(s) */
+	FmgrInfo   *tab_eq_funcs;	/* equality functions for table datatype(s) */
 	MemoryContext tablecxt;		/* memory context containing table */
 	MemoryContext tempcxt;		/* context for function evaluations */
 	Size		entrysize;		/* actual size to make each hash entry */
 	TupleTableSlot *tableslot;	/* slot for referencing table entries */
+	/* The following fields are set transiently for each table search: */
 	TupleTableSlot *inputslot;	/* current input tuple's slot */
+	FmgrInfo   *in_hash_funcs;	/* hash functions for input datatype(s) */
+	FmgrInfo   *cur_eq_funcs;	/* equality functions for input vs. table */
 } TupleHashTableData;
 
 typedef HASH_SEQ_STATUS TupleHashIterator;
@@ -585,8 +598,10 @@ typedef struct SubPlanState
 	MemoryContext tablecxt;		/* memory context containing tables */
 	ExprContext *innerecontext; /* working context for comparisons */
 	AttrNumber *keyColIdx;		/* control data for hash tables */
-	FmgrInfo   *eqfunctions;	/* comparison functions for hash tables */
-	FmgrInfo   *hashfunctions;	/* lookup data for hash functions */
+	FmgrInfo   *tab_hash_funcs;	/* hash functions for table datatype(s) */
+	FmgrInfo   *tab_eq_funcs;	/* equality functions for table datatype(s) */
+	FmgrInfo   *lhs_hash_funcs;	/* hash functions for lefthand datatype(s) */
+	FmgrInfo   *cur_eq_funcs;	/* equality functions for LHS vs. table */
 } SubPlanState;
 
 /* ----------------
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index fcbfe3cad5..2bc25de052 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -829,13 +829,16 @@ WHERE p1.amopopr = p2.oid AND p2.oprcode = p3.oid AND
 ------------+---------+---------+--------
 (0 rows)
 
--- Multiple-datatype btree opclasses should provide closed sets of equality
+-- Multiple-datatype btree opfamilies should provide closed sets of equality
 -- operators; that is if you provide int2 = int4 and int4 = int8 then you
--- must also provide int2 = int8 (and commutators of all these).  This is
--- necessary because the planner tries to deduce additional qual clauses from
+-- should also provide int2 = int8 (and commutators of all these).  This is
+-- important because the planner tries to deduce additional qual clauses from
 -- transitivity of mergejoinable operators.  If there are clauses
--- int2var = int4var and int4var = int8var, the planner will deduce
--- int2var = int8var ... and it had better have a way to represent it.
+-- int2var = int4var and int4var = int8var, the planner will want to deduce
+-- int2var = int8var ... so there should be a way to represent that.  While
+-- a missing cross-type operator is now only an efficiency loss rather than
+-- an error condition, it still seems reasonable to insist that all built-in
+-- opfamilies be complete.
 -- check commutative closure
 SELECT p1.amoplefttype, p1.amoprighttype
 FROM pg_amop AS p1
@@ -870,6 +873,28 @@ WHERE p1.amopfamily = p2.amopfamily AND
 --------------+---------------+---------------
 (0 rows)
 
+-- We also expect that built-in multiple-datatype hash opfamilies provide
+-- complete sets of cross-type operators.  Again, this isn't required, but
+-- it is reasonable to expect it for built-in opfamilies.
+-- if same family has x=x and y=y, it should have x=y
+SELECT p1.amoplefttype, p2.amoplefttype
+FROM pg_amop AS p1, pg_amop AS p2
+WHERE p1.amopfamily = p2.amopfamily AND
+    p1.amoplefttype = p1.amoprighttype AND
+    p2.amoplefttype = p2.amoprighttype AND
+    p1.amopmethod = (SELECT oid FROM pg_am WHERE amname = 'hash') AND
+    p2.amopmethod = (SELECT oid FROM pg_am WHERE amname = 'hash') AND
+    p1.amopstrategy = 1 AND p2.amopstrategy = 1 AND
+    p1.amoplefttype != p2.amoplefttype AND
+    NOT EXISTS(SELECT 1 FROM pg_amop p3 WHERE
+                 p3.amopfamily = p1.amopfamily AND
+                 p3.amoplefttype = p1.amoplefttype AND
+                 p3.amoprighttype = p2.amoplefttype AND
+                 p3.amopstrategy = 1);
+ amoplefttype | amoplefttype 
+--------------+--------------
+(0 rows)
+
 -- **************** pg_amproc ****************
 -- Look for illegal values in pg_amproc fields
 SELECT p1.amprocfamily, p1.amprocnum
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index cbf9baf672..9496422458 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -656,13 +656,16 @@ WHERE p1.amopopr = p2.oid AND p2.oprcode = p3.oid AND
     p1.amoplefttype != p1.amoprighttype AND
     p3.provolatile = 'v';
 
--- Multiple-datatype btree opclasses should provide closed sets of equality
+-- Multiple-datatype btree opfamilies should provide closed sets of equality
 -- operators; that is if you provide int2 = int4 and int4 = int8 then you
--- must also provide int2 = int8 (and commutators of all these).  This is
--- necessary because the planner tries to deduce additional qual clauses from
+-- should also provide int2 = int8 (and commutators of all these).  This is
+-- important because the planner tries to deduce additional qual clauses from
 -- transitivity of mergejoinable operators.  If there are clauses
--- int2var = int4var and int4var = int8var, the planner will deduce
--- int2var = int8var ... and it had better have a way to represent it.
+-- int2var = int4var and int4var = int8var, the planner will want to deduce
+-- int2var = int8var ... so there should be a way to represent that.  While
+-- a missing cross-type operator is now only an efficiency loss rather than
+-- an error condition, it still seems reasonable to insist that all built-in
+-- opfamilies be complete.
 
 -- check commutative closure
 SELECT p1.amoplefttype, p1.amoprighttype
@@ -692,6 +695,27 @@ WHERE p1.amopfamily = p2.amopfamily AND
                  p3.amoprighttype = p2.amoprighttype AND
                  p3.amopstrategy = 3);
 
+-- We also expect that built-in multiple-datatype hash opfamilies provide
+-- complete sets of cross-type operators.  Again, this isn't required, but
+-- it is reasonable to expect it for built-in opfamilies.
+
+-- if same family has x=x and y=y, it should have x=y
+SELECT p1.amoplefttype, p2.amoplefttype
+FROM pg_amop AS p1, pg_amop AS p2
+WHERE p1.amopfamily = p2.amopfamily AND
+    p1.amoplefttype = p1.amoprighttype AND
+    p2.amoplefttype = p2.amoprighttype AND
+    p1.amopmethod = (SELECT oid FROM pg_am WHERE amname = 'hash') AND
+    p2.amopmethod = (SELECT oid FROM pg_am WHERE amname = 'hash') AND
+    p1.amopstrategy = 1 AND p2.amopstrategy = 1 AND
+    p1.amoplefttype != p2.amoplefttype AND
+    NOT EXISTS(SELECT 1 FROM pg_amop p3 WHERE
+                 p3.amopfamily = p1.amopfamily AND
+                 p3.amoplefttype = p1.amoplefttype AND
+                 p3.amoprighttype = p2.amoplefttype AND
+                 p3.amopstrategy = 1);
+
+
 -- **************** pg_amproc ****************
 
 -- Look for illegal values in pg_amproc fields