Fix mergejoin cost estimation so that we consider the statistical ranges of

the two join variables at both ends: not only trailing rows that need not be scanned because there cannot be a match on the other side, but initial rows that will be scanned without possibly having a match. This allows a more realistic estimate of startup cost to be made, per recent pgsql-performance discussion. In passing, fix a couple of bugs that had crept into mergejoinscansel: it was not quite up to speed for the task of estimating descending-order scans, which is a new requirement in 8.3.
2007-12-08 21:05:11 +00:00 · 2007-12-08 21:05:11 +00:00 · 9fd8843647
commit 9fd8843647
parent 8821612854
4 changed files with 347 additions and 150 deletions
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -54,7 +54,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.189 2007/11/15 22:25:15 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.190 2007/12/08 21:05:11 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1372,12 +1372,16 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 	double		outer_path_rows = PATH_ROWS(outer_path);
 	double		inner_path_rows = PATH_ROWS(inner_path);
 	double		outer_rows,
-				inner_rows;
+				inner_rows,
+				outer_skip_rows,
+				inner_skip_rows;
 	double		mergejointuples,
 				rescannedtuples;
 	double		rescanratio;
-	Selectivity outerscansel,
-				innerscansel;
+	Selectivity outerstartsel,
+				outerendsel,
+				innerstartsel,
+				innerendsel;
 	Selectivity joininfactor;
 	Path		sort_path;		/* dummy for result of cost_sort */

@ -1444,10 +1448,12 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 	 * A merge join will stop as soon as it exhausts either input stream
 	 * (unless it's an outer join, in which case the outer side has to be
 	 * scanned all the way anyway).  Estimate fraction of the left and right
-	 * inputs that will actually need to be scanned. We use only the first
-	 * (most significant) merge clause for this purpose.  Since
-	 * mergejoinscansel() is a fairly expensive computation, we cache the
-	 * results in the merge clause RestrictInfo.
+	 * inputs that will actually need to be scanned.  Likewise, we can
+	 * estimate the number of rows that will be skipped before the first
+	 * join pair is found, which should be factored into startup cost.
+	 * We use only the first (most significant) merge clause for this purpose.
+	 * Since mergejoinscansel() is a fairly expensive computation, we cache
+	 * the results in the merge clause RestrictInfo.
 	 */
 	if (mergeclauses && path->jpath.jointype != JOIN_FULL)
 	{
@ -1478,37 +1484,61 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 						  outer_path->parent->relids))
 		{
 			/* left side of clause is outer */
-			outerscansel = cache->leftscansel;
-			innerscansel = cache->rightscansel;
+			outerstartsel = cache->leftstartsel;
+			outerendsel = cache->leftendsel;
+			innerstartsel = cache->rightstartsel;
+			innerendsel = cache->rightendsel;
 		}
 		else
 		{
 			/* left side of clause is inner */
-			outerscansel = cache->rightscansel;
-			innerscansel = cache->leftscansel;
+			outerstartsel = cache->rightstartsel;
+			outerendsel = cache->rightendsel;
+			innerstartsel = cache->leftstartsel;
+			innerendsel = cache->leftendsel;
 		}
 		if (path->jpath.jointype == JOIN_LEFT)
-			outerscansel = 1.0;
+		{
+			outerstartsel = 0.0;
+			outerendsel = 1.0;
+		}
 		else if (path->jpath.jointype == JOIN_RIGHT)
-			innerscansel = 1.0;
+		{
+			innerstartsel = 0.0;
+			innerendsel = 1.0;
+		}
 	}
 	else
 	{
 		/* cope with clauseless or full mergejoin */
-		outerscansel = innerscansel = 1.0;
+		outerstartsel = innerstartsel = 0.0;
+		outerendsel = innerendsel = 1.0;
 	}

-	/* convert selectivity to row count; must scan at least one row */
-	outer_rows = clamp_row_est(outer_path_rows * outerscansel);
-	inner_rows = clamp_row_est(inner_path_rows * innerscansel);
+	/*
+	 * Convert selectivities to row counts.  We force outer_rows and
+	 * inner_rows to be at least 1, but the skip_rows estimates can be zero.
+	 */
+	outer_skip_rows = rint(outer_path_rows * outerstartsel);
+	inner_skip_rows = rint(inner_path_rows * innerstartsel);
+	outer_rows = clamp_row_est(outer_path_rows * outerendsel);
+	inner_rows = clamp_row_est(inner_path_rows * innerendsel);
+
+	Assert(outer_skip_rows <= outer_rows);
+	Assert(inner_skip_rows <= inner_rows);

 	/*
 	 * Readjust scan selectivities to account for above rounding.  This is
 	 * normally an insignificant effect, but when there are only a few rows in
 	 * the inputs, failing to do this makes for a large percentage error.
 	 */
-	outerscansel = outer_rows / outer_path_rows;
-	innerscansel = inner_rows / inner_path_rows;
+	outerstartsel = outer_skip_rows / outer_path_rows;
+	innerstartsel = inner_skip_rows / inner_path_rows;
+	outerendsel = outer_rows / outer_path_rows;
+	innerendsel = inner_rows / inner_path_rows;
+
+	Assert(outerstartsel <= outerendsel);
+	Assert(innerstartsel <= innerendsel);

 	/* cost of source data */

@ -1522,14 +1552,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 				  outer_path->parent->width,
 				  -1.0);
 		startup_cost += sort_path.startup_cost;
+		startup_cost += (sort_path.total_cost - sort_path.startup_cost)
+			* outerstartsel;
 		run_cost += (sort_path.total_cost - sort_path.startup_cost)
-			* outerscansel;
+			* (outerendsel - outerstartsel);
 	}
 	else
 	{
 		startup_cost += outer_path->startup_cost;
+		startup_cost += (outer_path->total_cost - outer_path->startup_cost)
+			* outerstartsel;
 		run_cost += (outer_path->total_cost - outer_path->startup_cost)
-			* outerscansel;
+			* (outerendsel - outerstartsel);
 	}

 	if (innersortkeys)			/* do we need to sort inner? */
@ -1542,14 +1576,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 				  inner_path->parent->width,
 				  -1.0);
 		startup_cost += sort_path.startup_cost;
+		startup_cost += (sort_path.total_cost - sort_path.startup_cost)
+			* innerstartsel * rescanratio;
 		run_cost += (sort_path.total_cost - sort_path.startup_cost)
-			* innerscansel * rescanratio;
+			* (innerendsel - innerstartsel) * rescanratio;
 	}
 	else
 	{
 		startup_cost += inner_path->startup_cost;
+		startup_cost += (inner_path->total_cost - inner_path->startup_cost)
+			* innerstartsel * rescanratio;
 		run_cost += (inner_path->total_cost - inner_path->startup_cost)
-			* innerscansel * rescanratio;
+			* (innerendsel - innerstartsel) * rescanratio;
 	}

 	/* CPU costs */
@ -1571,8 +1609,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 	 * joininfactor.
 	 */
 	startup_cost += merge_qual_cost.startup;
+	startup_cost += merge_qual_cost.per_tuple *
+		(outer_skip_rows + inner_skip_rows * rescanratio);
 	run_cost += merge_qual_cost.per_tuple *
-		(outer_rows + inner_rows * rescanratio);
+		((outer_rows - outer_skip_rows) +
+		 (inner_rows - inner_skip_rows) * rescanratio);

 	/*
 	 * For each tuple that gets through the mergejoin proper, we charge
@ -1597,8 +1638,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
 {
 	MergeScanSelCache *cache;
 	ListCell   *lc;
-	Selectivity leftscansel,
-				rightscansel;
+	Selectivity leftstartsel,
+				leftendsel,
+				rightstartsel,
+				rightendsel;
 	MemoryContext oldcontext;

 	/* Do we have this result already? */
@ -1617,8 +1660,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
 					 pathkey->pk_opfamily,
 					 pathkey->pk_strategy,
 					 pathkey->pk_nulls_first,
-					 &leftscansel,
-					 &rightscansel);
+					 &leftstartsel,
+					 &leftendsel,
+					 &rightstartsel,
+					 &rightendsel);

 	/* Cache the result in suitably long-lived workspace */
 	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
@ -1627,8 +1672,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
 	cache->opfamily = pathkey->pk_opfamily;
 	cache->strategy = pathkey->pk_strategy;
 	cache->nulls_first = pathkey->pk_nulls_first;
-	cache->leftscansel = leftscansel;
-	cache->rightscansel = rightscansel;
+	cache->leftstartsel = leftstartsel;
+	cache->leftendsel = leftendsel;
+	cache->rightstartsel = rightstartsel;
+	cache->rightendsel = rightendsel;

 	rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache);

--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@ -15,7 +15,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.241 2007/11/15 22:25:16 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.242 2007/12/08 21:05:11 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -128,8 +128,8 @@ static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
 							int rangelo, int rangehi);
 static char *convert_string_datum(Datum value, Oid typid);
 static double convert_timevalue_to_scalar(Datum value, Oid typid);
-static bool get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
-					 Oid sortop, Datum *max);
+static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
+					 Oid sortop, Datum *min, Datum *max);
 static Selectivity prefix_selectivity(VariableStatData *vardata,
 				   Oid vartype, Oid opfamily, Const *prefixcon);
 static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
@ -2172,18 +2172,24 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
 * we can estimate how much of the input will actually be read.  This
 * can have a considerable impact on the cost when using indexscans.
 *
+ * Also, we can estimate how much of each input has to be read before the
+ * first join pair is found, which will affect the join's startup time.
+ *
 * clause should be a clause already known to be mergejoinable.  opfamily,
 * strategy, and nulls_first specify the sort ordering being used.
 *
- * *leftscan is set to the fraction of the left-hand variable expected
- * to be scanned (0 to 1), and similarly *rightscan for the right-hand
- * variable.
+ * The outputs are:
+ *		*leftstart is set to the fraction of the left-hand variable expected
+ *		 to be scanned before the first join pair is found (0 to 1).
+ *		*leftend is set to the fraction of the left-hand variable expected
+ *		 to be scanned before the join terminates (0 to 1).
+ *		*rightstart, *rightend similarly for the right-hand variable.
 */
 void
 mergejoinscansel(PlannerInfo *root, Node *clause,
 				 Oid opfamily, int strategy, bool nulls_first,
-				 Selectivity *leftscan,
-				 Selectivity *rightscan)
+				 Selectivity *leftstart, Selectivity *leftend,
+				 Selectivity *rightstart, Selectivity *rightend)
 {
 	Node	   *left,
 			   *right;
@ -2196,14 +2202,23 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
 	Oid			opno,
 				lsortop,
 				rsortop,
+				lstatop,
+				rstatop,
+				ltop,
 				leop,
+				revltop,
 				revleop;
-	Datum		leftmax,
+	bool		isgt;
+	Datum		leftmin,
+				leftmax,
+				rightmin,
 				rightmax;
 	double		selec;

 	/* Set default results if we can't figure anything out. */
-	*leftscan = *rightscan = 1.0;
+	/* XXX should default "start" fraction be a bit more than 0? */
+	*leftstart = *rightstart = 0.0;
+	*leftend = *rightend = 1.0;

 	/* Deconstruct the merge clause */
 	if (!is_opclause(clause))
@ -2229,30 +2244,103 @@ mergejoinscansel(PlannerInfo *root, Node *clause,

 	/*
 	 * Look up the various operators we need.  If we don't find them all, it
-	 * probably means the opfamily is broken, but we cope anyway.
+	 * probably means the opfamily is broken, but we just fail silently.
+	 *
+	 * Note: we expect that pg_statistic histograms will be sorted by the
+	 * '<' operator, regardless of which sort direction we are considering.
 	 */
 	switch (strategy)
 	{
 		case BTLessStrategyNumber:
-			lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype,
-										  BTLessStrategyNumber);
-			rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype,
-										  BTLessStrategyNumber);
-			leop = get_opfamily_member(opfamily, op_lefttype, op_righttype,
-									   BTLessEqualStrategyNumber);
-			revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype,
-										  BTLessEqualStrategyNumber);
+			isgt = false;
+			if (op_lefttype == op_righttype)
+			{
+				/* easy case */
+				ltop = get_opfamily_member(opfamily,
+										   op_lefttype, op_righttype,
+										   BTLessStrategyNumber);
+				leop = get_opfamily_member(opfamily,
+										   op_lefttype, op_righttype,
+										   BTLessEqualStrategyNumber);
+				lsortop = ltop;
+				rsortop = ltop;
+				lstatop = lsortop;
+				rstatop = rsortop;
+				revltop = ltop;
+				revleop = leop;
+			}
+			else
+			{
+				ltop = get_opfamily_member(opfamily,
+										   op_lefttype, op_righttype,
+										   BTLessStrategyNumber);
+				leop = get_opfamily_member(opfamily,
+										   op_lefttype, op_righttype,
+										   BTLessEqualStrategyNumber);
+				lsortop = get_opfamily_member(opfamily,
+											  op_lefttype, op_lefttype,
+											  BTLessStrategyNumber);
+				rsortop = get_opfamily_member(opfamily,
+											  op_righttype, op_righttype,
+											  BTLessStrategyNumber);
+				lstatop = lsortop;
+				rstatop = rsortop;
+				revltop = get_opfamily_member(opfamily,
+											  op_righttype, op_lefttype,
+											  BTLessStrategyNumber);
+				revleop = get_opfamily_member(opfamily,
+											  op_righttype, op_lefttype,
+											  BTLessEqualStrategyNumber);
+			}
 			break;
 		case BTGreaterStrategyNumber:
 			/* descending-order case */
-			lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype,
-										  BTGreaterStrategyNumber);
-			rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype,
-										  BTGreaterStrategyNumber);
-			leop = get_opfamily_member(opfamily, op_lefttype, op_righttype,
-									   BTGreaterEqualStrategyNumber);
-			revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype,
-										  BTGreaterEqualStrategyNumber);
+			isgt = true;
+			if (op_lefttype == op_righttype)
+			{
+				/* easy case */
+				ltop = get_opfamily_member(opfamily,
+										   op_lefttype, op_righttype,
+										   BTGreaterStrategyNumber);
+				leop = get_opfamily_member(opfamily,
+										   op_lefttype, op_righttype,
+										   BTGreaterEqualStrategyNumber);
+				lsortop = ltop;
+				rsortop = ltop;
+				lstatop = get_opfamily_member(opfamily,
+											  op_lefttype, op_lefttype,
+											  BTLessStrategyNumber);
+				rstatop = lstatop;
+				revltop = ltop;
+				revleop = leop;
+			}
+			else
+			{
+				ltop = get_opfamily_member(opfamily,
+										   op_lefttype, op_righttype,
+										   BTGreaterStrategyNumber);
+				leop = get_opfamily_member(opfamily,
+										   op_lefttype, op_righttype,
+										   BTGreaterEqualStrategyNumber);
+				lsortop = get_opfamily_member(opfamily,
+											  op_lefttype, op_lefttype,
+											  BTGreaterStrategyNumber);
+				rsortop = get_opfamily_member(opfamily,
+											  op_righttype, op_righttype,
+											  BTGreaterStrategyNumber);
+				lstatop = get_opfamily_member(opfamily,
+											  op_lefttype, op_lefttype,
+											  BTLessStrategyNumber);
+				rstatop = get_opfamily_member(opfamily,
+											  op_righttype, op_righttype,
+											  BTLessStrategyNumber);
+				revltop = get_opfamily_member(opfamily,
+											  op_righttype, op_lefttype,
+											  BTGreaterStrategyNumber);
+				revleop = get_opfamily_member(opfamily,
+											  op_righttype, op_lefttype,
+											  BTGreaterEqualStrategyNumber);
+			}
 			break;
 		default:
 			goto fail;			/* shouldn't get here */
@ -2260,66 +2348,133 @@ mergejoinscansel(PlannerInfo *root, Node *clause,

 	if (!OidIsValid(lsortop) ||
 		!OidIsValid(rsortop) ||
+		!OidIsValid(lstatop) ||
+		!OidIsValid(rstatop) ||
+		!OidIsValid(ltop) ||
 		!OidIsValid(leop) ||
+		!OidIsValid(revltop) ||
 		!OidIsValid(revleop))
 		goto fail;				/* insufficient info in catalogs */

-	/* Try to get maximum values of both inputs */
-	if (!get_variable_maximum(root, &leftvar, lsortop, &leftmax))
-		goto fail;				/* no max available from stats */
-
-	if (!get_variable_maximum(root, &rightvar, rsortop, &rightmax))
-		goto fail;				/* no max available from stats */
+	/* Try to get ranges of both inputs */
+	if (!isgt)
+	{
+		if (!get_variable_range(root, &leftvar, lstatop,
+								&leftmin, &leftmax))
+			goto fail;			/* no range available from stats */
+		if (!get_variable_range(root, &rightvar, rstatop,
+								&rightmin, &rightmax))
+			goto fail;			/* no range available from stats */
+	}
+	else
+	{
+		/* need to swap the max and min */
+		if (!get_variable_range(root, &leftvar, lstatop,
+								&leftmax, &leftmin))
+			goto fail;			/* no range available from stats */
+		if (!get_variable_range(root, &rightvar, rstatop,
+								&rightmax, &rightmin))
+			goto fail;			/* no range available from stats */
+	}

 	/*
 	 * Now, the fraction of the left variable that will be scanned is the
 	 * fraction that's <= the right-side maximum value.  But only believe
-	 * non-default estimates, else stick with our 1.0.	Also, if the sort
-	 * order is nulls-first, we're going to have to read over any nulls too.
+	 * non-default estimates, else stick with our 1.0.
 	 */
-	selec = scalarineqsel(root, leop, false, &leftvar,
+	selec = scalarineqsel(root, leop, isgt, &leftvar,
 						  rightmax, op_righttype);
 	if (selec != DEFAULT_INEQ_SEL)
-	{
-		if (nulls_first && HeapTupleIsValid(leftvar.statsTuple))
-		{
-			Form_pg_statistic stats;
-
-			stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
-			selec += stats->stanullfrac;
-			CLAMP_PROBABILITY(selec);
-		}
-		*leftscan = selec;
-	}
+		*leftend = selec;

 	/* And similarly for the right variable. */
-	selec = scalarineqsel(root, revleop, false, &rightvar,
+	selec = scalarineqsel(root, revleop, isgt, &rightvar,
 						  leftmax, op_lefttype);
 	if (selec != DEFAULT_INEQ_SEL)
-	{
-		if (nulls_first && HeapTupleIsValid(rightvar.statsTuple))
-		{
-			Form_pg_statistic stats;
-
-			stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
-			selec += stats->stanullfrac;
-			CLAMP_PROBABILITY(selec);
-		}
-		*rightscan = selec;
-	}
+		*rightend = selec;

 	/*
-	 * Only one of the two fractions can really be less than 1.0; believe the
-	 * smaller estimate and reset the other one to exactly 1.0.  If we get
-	 * exactly equal estimates (as can easily happen with self-joins), believe
-	 * neither.
+	 * Only one of the two "end" fractions can really be less than 1.0;
+	 * believe the smaller estimate and reset the other one to exactly 1.0.
+	 * If we get exactly equal estimates (as can easily happen with
+	 * self-joins), believe neither.
 	 */
-	if (*leftscan > *rightscan)
-		*leftscan = 1.0;
-	else if (*leftscan < *rightscan)
-		*rightscan = 1.0;
+	if (*leftend > *rightend)
+		*leftend = 1.0;
+	else if (*leftend < *rightend)
+		*rightend = 1.0;
 	else
-		*leftscan = *rightscan = 1.0;
+		*leftend = *rightend = 1.0;
+
+	/*
+	 * Also, the fraction of the left variable that will be scanned before
+	 * the first join pair is found is the fraction that's < the right-side
+	 * minimum value.  But only believe non-default estimates, else stick with
+	 * our own default.
+	 */
+	selec = scalarineqsel(root, ltop, isgt, &leftvar,
+						  rightmin, op_righttype);
+	if (selec != DEFAULT_INEQ_SEL)
+		*leftstart = selec;
+
+	/* And similarly for the right variable. */
+	selec = scalarineqsel(root, revltop, isgt, &rightvar,
+						  leftmin, op_lefttype);
+	if (selec != DEFAULT_INEQ_SEL)
+		*rightstart = selec;
+
+	/*
+	 * Only one of the two "start" fractions can really be more than zero;
+	 * believe the larger estimate and reset the other one to exactly 0.0.
+	 * If we get exactly equal estimates (as can easily happen with
+	 * self-joins), believe neither.
+	 */
+	if (*leftstart < *rightstart)
+		*leftstart = 0.0;
+	else if (*leftstart > *rightstart)
+		*rightstart = 0.0;
+	else
+		*leftstart = *rightstart = 0.0;
+
+	/*
+	 * If the sort order is nulls-first, we're going to have to skip over any
+	 * nulls too.  These would not have been counted by scalarineqsel, and
+	 * we can safely add in this fraction regardless of whether we believe
+	 * scalarineqsel's results or not.  But be sure to clamp the sum to 1.0!
+	 */
+	if (nulls_first)
+	{
+		Form_pg_statistic stats;
+
+		if (HeapTupleIsValid(leftvar.statsTuple))
+		{
+			stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
+			*leftstart += stats->stanullfrac;
+			CLAMP_PROBABILITY(*leftstart);
+			*leftend += stats->stanullfrac;
+			CLAMP_PROBABILITY(*leftend);
+		}
+		if (HeapTupleIsValid(rightvar.statsTuple))
+		{
+			stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
+			*rightstart += stats->stanullfrac;
+			CLAMP_PROBABILITY(*rightstart);
+			*rightend += stats->stanullfrac;
+			CLAMP_PROBABILITY(*rightend);
+		}
+	}
+
+	/* Disbelieve start >= end, just in case that can happen */
+	if (*leftstart >= *leftend)
+	{
+		*leftstart = 0.0;
+		*leftend = 1.0;
+	}
+	if (*rightstart >= *rightend)
+	{
+		*rightstart = 0.0;
+		*rightend = 1.0;
+	}

 fail:
 	ReleaseVariableStats(leftvar);
@ -3778,20 +3933,21 @@ get_variable_numdistinct(VariableStatData *vardata)
 }

 /*
- * get_variable_maximum
- *		Estimate the maximum value of the specified variable.
- *		If successful, store value in *max and return TRUE.
+ * get_variable_range
+ *		Estimate the minimum and maximum value of the specified variable.
+ *		If successful, store values in *min and *max, and return TRUE.
 *		If no data available, return FALSE.
 *
- * sortop is the "<" comparison operator to use.  (To extract the
- * minimum instead of the maximum, just pass the ">" operator instead.)
+ * sortop is the "<" comparison operator to use.  This should generally
+ * be "<" not ">", as only the former is likely to be found in pg_statistic.
 */
 static bool
-get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
-					 Oid sortop, Datum *max)
+get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
+				   Datum *min, Datum *max)
 {
+	Datum		tmin = 0;
 	Datum		tmax = 0;
-	bool		have_max = false;
+	bool		have_data = false;
 	Form_pg_statistic stats;
 	int16		typLen;
 	bool		typByVal;
@ -3809,7 +3965,7 @@ get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
 	get_typlenbyval(vardata->atttype, &typLen, &typByVal);

 	/*
-	 * If there is a histogram, grab the last or first value as appropriate.
+	 * If there is a histogram, grab the first and last values.
 	 *
 	 * If there is a histogram that is sorted with some other operator than
 	 * the one we want, fail --- this suggests that there is data we can't
@ -3823,42 +3979,24 @@ get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
 	{
 		if (nvalues > 0)
 		{
+			tmin = datumCopy(values[0], typByVal, typLen);
 			tmax = datumCopy(values[nvalues - 1], typByVal, typLen);
-			have_max = true;
+			have_data = true;
 		}
 		free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
 	}
-	else
+	else if (get_attstatsslot(vardata->statsTuple,
+							  vardata->atttype, vardata->atttypmod,
+							  STATISTIC_KIND_HISTOGRAM, InvalidOid,
+							  &values, &nvalues,
+							  NULL, NULL))
 	{
-		Oid			rsortop = get_commutator(sortop);
-
-		if (OidIsValid(rsortop) &&
-			get_attstatsslot(vardata->statsTuple,
-							 vardata->atttype, vardata->atttypmod,
-							 STATISTIC_KIND_HISTOGRAM, rsortop,
-							 &values, &nvalues,
-							 NULL, NULL))
-		{
-			if (nvalues > 0)
-			{
-				tmax = datumCopy(values[0], typByVal, typLen);
-				have_max = true;
-			}
-			free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
-		}
-		else if (get_attstatsslot(vardata->statsTuple,
-								  vardata->atttype, vardata->atttypmod,
-								  STATISTIC_KIND_HISTOGRAM, InvalidOid,
-								  &values, &nvalues,
-								  NULL, NULL))
-		{
-			free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
-			return false;
-		}
+		free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+		return false;
 	}

 	/*
-	 * If we have most-common-values info, look for a large MCV.  This is
+	 * If we have most-common-values info, look for extreme MCVs.  This is
 	 * needed even if we also have a histogram, since the histogram excludes
 	 * the MCVs.  However, usually the MCVs will not be the extreme values, so
 	 * avoid unnecessary data copying.
@ -3869,31 +4007,41 @@ get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
 						 &values, &nvalues,
 						 NULL, NULL))
 	{
-		bool		large_mcv = false;
+		bool		tmin_is_mcv = false;
+		bool		tmax_is_mcv = false;
 		FmgrInfo	opproc;

 		fmgr_info(get_opcode(sortop), &opproc);

 		for (i = 0; i < nvalues; i++)
 		{
-			if (!have_max)
+			if (!have_data)
 			{
-				tmax = values[i];
-				large_mcv = have_max = true;
+				tmin = tmax = values[i];
+				tmin_is_mcv = tmax_is_mcv = have_data = true;
+				continue;
 			}
-			else if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i])))
+			if (DatumGetBool(FunctionCall2(&opproc, values[i], tmin)))
+			{
+				tmin = values[i];
+				tmin_is_mcv = true;
+			}
+			if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i])))
 			{
 				tmax = values[i];
-				large_mcv = true;
+				tmax_is_mcv = true;
 			}
 		}
-		if (large_mcv)
+		if (tmin_is_mcv)
+			tmin = datumCopy(tmin, typByVal, typLen);
+		if (tmax_is_mcv)
 			tmax = datumCopy(tmax, typByVal, typLen);
 		free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
 	}

+	*min = tmin;
 	*max = tmax;
-	return have_max;
+	return have_data;
 }


--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.150 2007/11/15 22:25:17 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.151 2007/12/08 21:05:11 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -993,8 +993,10 @@ typedef struct MergeScanSelCache
 	int			strategy;		/* sort direction (ASC or DESC) */
 	bool		nulls_first;	/* do NULLs come before normal values? */
 	/* Results */
-	Selectivity leftscansel;	/* scan fraction for clause left side */
-	Selectivity rightscansel;	/* scan fraction for clause right side */
+	Selectivity leftstartsel;	/* first-join fraction for clause left side */
+	Selectivity leftendsel;		/* last-join fraction for clause left side */
+	Selectivity rightstartsel;	/* first-join fraction for clause right side */
+	Selectivity rightendsel;	/* last-join fraction for clause right side */
 } MergeScanSelCache;

 /*
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.41 2007/11/07 22:37:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.42 2007/12/08 21:05:11 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -161,8 +161,8 @@ extern Selectivity rowcomparesel(PlannerInfo *root,

 extern void mergejoinscansel(PlannerInfo *root, Node *clause,
 				 Oid opfamily, int strategy, bool nulls_first,
-				 Selectivity *leftscan,
-				 Selectivity *rightscan);
+				 Selectivity *leftstart, Selectivity *leftend,
+				 Selectivity *rightstart, Selectivity *rightend);

 extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 					double input_rows);