Fix mergejoin cost estimation so that we consider the statistical ranges of
the two join variables at both ends: not only trailing rows that need not be scanned because there cannot be a match on the other side, but initial rows that will be scanned without possibly having a match. This allows a more realistic estimate of startup cost to be made, per recent pgsql-performance discussion. In passing, fix a couple of bugs that had crept into mergejoinscansel: it was not quite up to speed for the task of estimating descending-order scans, which is a new requirement in 8.3.
This commit is contained in:
parent
8821612854
commit
9fd8843647
@ -54,7 +54,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.189 2007/11/15 22:25:15 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.190 2007/12/08 21:05:11 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -1372,12 +1372,16 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
|
||||
double outer_path_rows = PATH_ROWS(outer_path);
|
||||
double inner_path_rows = PATH_ROWS(inner_path);
|
||||
double outer_rows,
|
||||
inner_rows;
|
||||
inner_rows,
|
||||
outer_skip_rows,
|
||||
inner_skip_rows;
|
||||
double mergejointuples,
|
||||
rescannedtuples;
|
||||
double rescanratio;
|
||||
Selectivity outerscansel,
|
||||
innerscansel;
|
||||
Selectivity outerstartsel,
|
||||
outerendsel,
|
||||
innerstartsel,
|
||||
innerendsel;
|
||||
Selectivity joininfactor;
|
||||
Path sort_path; /* dummy for result of cost_sort */
|
||||
|
||||
@ -1444,10 +1448,12 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
|
||||
* A merge join will stop as soon as it exhausts either input stream
|
||||
* (unless it's an outer join, in which case the outer side has to be
|
||||
* scanned all the way anyway). Estimate fraction of the left and right
|
||||
* inputs that will actually need to be scanned. We use only the first
|
||||
* (most significant) merge clause for this purpose. Since
|
||||
* mergejoinscansel() is a fairly expensive computation, we cache the
|
||||
* results in the merge clause RestrictInfo.
|
||||
* inputs that will actually need to be scanned. Likewise, we can
|
||||
* estimate the number of rows that will be skipped before the first
|
||||
* join pair is found, which should be factored into startup cost.
|
||||
* We use only the first (most significant) merge clause for this purpose.
|
||||
* Since mergejoinscansel() is a fairly expensive computation, we cache
|
||||
* the results in the merge clause RestrictInfo.
|
||||
*/
|
||||
if (mergeclauses && path->jpath.jointype != JOIN_FULL)
|
||||
{
|
||||
@ -1478,37 +1484,61 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
|
||||
outer_path->parent->relids))
|
||||
{
|
||||
/* left side of clause is outer */
|
||||
outerscansel = cache->leftscansel;
|
||||
innerscansel = cache->rightscansel;
|
||||
outerstartsel = cache->leftstartsel;
|
||||
outerendsel = cache->leftendsel;
|
||||
innerstartsel = cache->rightstartsel;
|
||||
innerendsel = cache->rightendsel;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* left side of clause is inner */
|
||||
outerscansel = cache->rightscansel;
|
||||
innerscansel = cache->leftscansel;
|
||||
outerstartsel = cache->rightstartsel;
|
||||
outerendsel = cache->rightendsel;
|
||||
innerstartsel = cache->leftstartsel;
|
||||
innerendsel = cache->leftendsel;
|
||||
}
|
||||
if (path->jpath.jointype == JOIN_LEFT)
|
||||
outerscansel = 1.0;
|
||||
{
|
||||
outerstartsel = 0.0;
|
||||
outerendsel = 1.0;
|
||||
}
|
||||
else if (path->jpath.jointype == JOIN_RIGHT)
|
||||
innerscansel = 1.0;
|
||||
{
|
||||
innerstartsel = 0.0;
|
||||
innerendsel = 1.0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* cope with clauseless or full mergejoin */
|
||||
outerscansel = innerscansel = 1.0;
|
||||
outerstartsel = innerstartsel = 0.0;
|
||||
outerendsel = innerendsel = 1.0;
|
||||
}
|
||||
|
||||
/* convert selectivity to row count; must scan at least one row */
|
||||
outer_rows = clamp_row_est(outer_path_rows * outerscansel);
|
||||
inner_rows = clamp_row_est(inner_path_rows * innerscansel);
|
||||
/*
|
||||
* Convert selectivities to row counts. We force outer_rows and
|
||||
* inner_rows to be at least 1, but the skip_rows estimates can be zero.
|
||||
*/
|
||||
outer_skip_rows = rint(outer_path_rows * outerstartsel);
|
||||
inner_skip_rows = rint(inner_path_rows * innerstartsel);
|
||||
outer_rows = clamp_row_est(outer_path_rows * outerendsel);
|
||||
inner_rows = clamp_row_est(inner_path_rows * innerendsel);
|
||||
|
||||
Assert(outer_skip_rows <= outer_rows);
|
||||
Assert(inner_skip_rows <= inner_rows);
|
||||
|
||||
/*
|
||||
* Readjust scan selectivities to account for above rounding. This is
|
||||
* normally an insignificant effect, but when there are only a few rows in
|
||||
* the inputs, failing to do this makes for a large percentage error.
|
||||
*/
|
||||
outerscansel = outer_rows / outer_path_rows;
|
||||
innerscansel = inner_rows / inner_path_rows;
|
||||
outerstartsel = outer_skip_rows / outer_path_rows;
|
||||
innerstartsel = inner_skip_rows / inner_path_rows;
|
||||
outerendsel = outer_rows / outer_path_rows;
|
||||
innerendsel = inner_rows / inner_path_rows;
|
||||
|
||||
Assert(outerstartsel <= outerendsel);
|
||||
Assert(innerstartsel <= innerendsel);
|
||||
|
||||
/* cost of source data */
|
||||
|
||||
@ -1522,14 +1552,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
|
||||
outer_path->parent->width,
|
||||
-1.0);
|
||||
startup_cost += sort_path.startup_cost;
|
||||
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
|
||||
* outerstartsel;
|
||||
run_cost += (sort_path.total_cost - sort_path.startup_cost)
|
||||
* outerscansel;
|
||||
* (outerendsel - outerstartsel);
|
||||
}
|
||||
else
|
||||
{
|
||||
startup_cost += outer_path->startup_cost;
|
||||
startup_cost += (outer_path->total_cost - outer_path->startup_cost)
|
||||
* outerstartsel;
|
||||
run_cost += (outer_path->total_cost - outer_path->startup_cost)
|
||||
* outerscansel;
|
||||
* (outerendsel - outerstartsel);
|
||||
}
|
||||
|
||||
if (innersortkeys) /* do we need to sort inner? */
|
||||
@ -1542,14 +1576,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
|
||||
inner_path->parent->width,
|
||||
-1.0);
|
||||
startup_cost += sort_path.startup_cost;
|
||||
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
|
||||
* innerstartsel * rescanratio;
|
||||
run_cost += (sort_path.total_cost - sort_path.startup_cost)
|
||||
* innerscansel * rescanratio;
|
||||
* (innerendsel - innerstartsel) * rescanratio;
|
||||
}
|
||||
else
|
||||
{
|
||||
startup_cost += inner_path->startup_cost;
|
||||
startup_cost += (inner_path->total_cost - inner_path->startup_cost)
|
||||
* innerstartsel * rescanratio;
|
||||
run_cost += (inner_path->total_cost - inner_path->startup_cost)
|
||||
* innerscansel * rescanratio;
|
||||
* (innerendsel - innerstartsel) * rescanratio;
|
||||
}
|
||||
|
||||
/* CPU costs */
|
||||
@ -1571,8 +1609,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
|
||||
* joininfactor.
|
||||
*/
|
||||
startup_cost += merge_qual_cost.startup;
|
||||
startup_cost += merge_qual_cost.per_tuple *
|
||||
(outer_skip_rows + inner_skip_rows * rescanratio);
|
||||
run_cost += merge_qual_cost.per_tuple *
|
||||
(outer_rows + inner_rows * rescanratio);
|
||||
((outer_rows - outer_skip_rows) +
|
||||
(inner_rows - inner_skip_rows) * rescanratio);
|
||||
|
||||
/*
|
||||
* For each tuple that gets through the mergejoin proper, we charge
|
||||
@ -1597,8 +1638,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
|
||||
{
|
||||
MergeScanSelCache *cache;
|
||||
ListCell *lc;
|
||||
Selectivity leftscansel,
|
||||
rightscansel;
|
||||
Selectivity leftstartsel,
|
||||
leftendsel,
|
||||
rightstartsel,
|
||||
rightendsel;
|
||||
MemoryContext oldcontext;
|
||||
|
||||
/* Do we have this result already? */
|
||||
@ -1617,8 +1660,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
|
||||
pathkey->pk_opfamily,
|
||||
pathkey->pk_strategy,
|
||||
pathkey->pk_nulls_first,
|
||||
&leftscansel,
|
||||
&rightscansel);
|
||||
&leftstartsel,
|
||||
&leftendsel,
|
||||
&rightstartsel,
|
||||
&rightendsel);
|
||||
|
||||
/* Cache the result in suitably long-lived workspace */
|
||||
oldcontext = MemoryContextSwitchTo(root->planner_cxt);
|
||||
@ -1627,8 +1672,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
|
||||
cache->opfamily = pathkey->pk_opfamily;
|
||||
cache->strategy = pathkey->pk_strategy;
|
||||
cache->nulls_first = pathkey->pk_nulls_first;
|
||||
cache->leftscansel = leftscansel;
|
||||
cache->rightscansel = rightscansel;
|
||||
cache->leftstartsel = leftstartsel;
|
||||
cache->leftendsel = leftendsel;
|
||||
cache->rightstartsel = rightstartsel;
|
||||
cache->rightendsel = rightendsel;
|
||||
|
||||
rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache);
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.241 2007/11/15 22:25:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.242 2007/12/08 21:05:11 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -128,8 +128,8 @@ static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
|
||||
int rangelo, int rangehi);
|
||||
static char *convert_string_datum(Datum value, Oid typid);
|
||||
static double convert_timevalue_to_scalar(Datum value, Oid typid);
|
||||
static bool get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
|
||||
Oid sortop, Datum *max);
|
||||
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
|
||||
Oid sortop, Datum *min, Datum *max);
|
||||
static Selectivity prefix_selectivity(VariableStatData *vardata,
|
||||
Oid vartype, Oid opfamily, Const *prefixcon);
|
||||
static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
|
||||
@ -2172,18 +2172,24 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
|
||||
* we can estimate how much of the input will actually be read. This
|
||||
* can have a considerable impact on the cost when using indexscans.
|
||||
*
|
||||
* Also, we can estimate how much of each input has to be read before the
|
||||
* first join pair is found, which will affect the join's startup time.
|
||||
*
|
||||
* clause should be a clause already known to be mergejoinable. opfamily,
|
||||
* strategy, and nulls_first specify the sort ordering being used.
|
||||
*
|
||||
* *leftscan is set to the fraction of the left-hand variable expected
|
||||
* to be scanned (0 to 1), and similarly *rightscan for the right-hand
|
||||
* variable.
|
||||
* The outputs are:
|
||||
* *leftstart is set to the fraction of the left-hand variable expected
|
||||
* to be scanned before the first join pair is found (0 to 1).
|
||||
* *leftend is set to the fraction of the left-hand variable expected
|
||||
* to be scanned before the join terminates (0 to 1).
|
||||
* *rightstart, *rightend similarly for the right-hand variable.
|
||||
*/
|
||||
void
|
||||
mergejoinscansel(PlannerInfo *root, Node *clause,
|
||||
Oid opfamily, int strategy, bool nulls_first,
|
||||
Selectivity *leftscan,
|
||||
Selectivity *rightscan)
|
||||
Selectivity *leftstart, Selectivity *leftend,
|
||||
Selectivity *rightstart, Selectivity *rightend)
|
||||
{
|
||||
Node *left,
|
||||
*right;
|
||||
@ -2196,14 +2202,23 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
|
||||
Oid opno,
|
||||
lsortop,
|
||||
rsortop,
|
||||
lstatop,
|
||||
rstatop,
|
||||
ltop,
|
||||
leop,
|
||||
revltop,
|
||||
revleop;
|
||||
Datum leftmax,
|
||||
bool isgt;
|
||||
Datum leftmin,
|
||||
leftmax,
|
||||
rightmin,
|
||||
rightmax;
|
||||
double selec;
|
||||
|
||||
/* Set default results if we can't figure anything out. */
|
||||
*leftscan = *rightscan = 1.0;
|
||||
/* XXX should default "start" fraction be a bit more than 0? */
|
||||
*leftstart = *rightstart = 0.0;
|
||||
*leftend = *rightend = 1.0;
|
||||
|
||||
/* Deconstruct the merge clause */
|
||||
if (!is_opclause(clause))
|
||||
@ -2229,30 +2244,103 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
|
||||
|
||||
/*
|
||||
* Look up the various operators we need. If we don't find them all, it
|
||||
* probably means the opfamily is broken, but we cope anyway.
|
||||
* probably means the opfamily is broken, but we just fail silently.
|
||||
*
|
||||
* Note: we expect that pg_statistic histograms will be sorted by the
|
||||
* '<' operator, regardless of which sort direction we are considering.
|
||||
*/
|
||||
switch (strategy)
|
||||
{
|
||||
case BTLessStrategyNumber:
|
||||
lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype,
|
||||
BTLessStrategyNumber);
|
||||
rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype,
|
||||
BTLessStrategyNumber);
|
||||
leop = get_opfamily_member(opfamily, op_lefttype, op_righttype,
|
||||
BTLessEqualStrategyNumber);
|
||||
revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype,
|
||||
BTLessEqualStrategyNumber);
|
||||
isgt = false;
|
||||
if (op_lefttype == op_righttype)
|
||||
{
|
||||
/* easy case */
|
||||
ltop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_righttype,
|
||||
BTLessStrategyNumber);
|
||||
leop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_righttype,
|
||||
BTLessEqualStrategyNumber);
|
||||
lsortop = ltop;
|
||||
rsortop = ltop;
|
||||
lstatop = lsortop;
|
||||
rstatop = rsortop;
|
||||
revltop = ltop;
|
||||
revleop = leop;
|
||||
}
|
||||
else
|
||||
{
|
||||
ltop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_righttype,
|
||||
BTLessStrategyNumber);
|
||||
leop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_righttype,
|
||||
BTLessEqualStrategyNumber);
|
||||
lsortop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_lefttype,
|
||||
BTLessStrategyNumber);
|
||||
rsortop = get_opfamily_member(opfamily,
|
||||
op_righttype, op_righttype,
|
||||
BTLessStrategyNumber);
|
||||
lstatop = lsortop;
|
||||
rstatop = rsortop;
|
||||
revltop = get_opfamily_member(opfamily,
|
||||
op_righttype, op_lefttype,
|
||||
BTLessStrategyNumber);
|
||||
revleop = get_opfamily_member(opfamily,
|
||||
op_righttype, op_lefttype,
|
||||
BTLessEqualStrategyNumber);
|
||||
}
|
||||
break;
|
||||
case BTGreaterStrategyNumber:
|
||||
/* descending-order case */
|
||||
lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype,
|
||||
BTGreaterStrategyNumber);
|
||||
rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype,
|
||||
BTGreaterStrategyNumber);
|
||||
leop = get_opfamily_member(opfamily, op_lefttype, op_righttype,
|
||||
BTGreaterEqualStrategyNumber);
|
||||
revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype,
|
||||
BTGreaterEqualStrategyNumber);
|
||||
isgt = true;
|
||||
if (op_lefttype == op_righttype)
|
||||
{
|
||||
/* easy case */
|
||||
ltop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_righttype,
|
||||
BTGreaterStrategyNumber);
|
||||
leop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_righttype,
|
||||
BTGreaterEqualStrategyNumber);
|
||||
lsortop = ltop;
|
||||
rsortop = ltop;
|
||||
lstatop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_lefttype,
|
||||
BTLessStrategyNumber);
|
||||
rstatop = lstatop;
|
||||
revltop = ltop;
|
||||
revleop = leop;
|
||||
}
|
||||
else
|
||||
{
|
||||
ltop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_righttype,
|
||||
BTGreaterStrategyNumber);
|
||||
leop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_righttype,
|
||||
BTGreaterEqualStrategyNumber);
|
||||
lsortop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_lefttype,
|
||||
BTGreaterStrategyNumber);
|
||||
rsortop = get_opfamily_member(opfamily,
|
||||
op_righttype, op_righttype,
|
||||
BTGreaterStrategyNumber);
|
||||
lstatop = get_opfamily_member(opfamily,
|
||||
op_lefttype, op_lefttype,
|
||||
BTLessStrategyNumber);
|
||||
rstatop = get_opfamily_member(opfamily,
|
||||
op_righttype, op_righttype,
|
||||
BTLessStrategyNumber);
|
||||
revltop = get_opfamily_member(opfamily,
|
||||
op_righttype, op_lefttype,
|
||||
BTGreaterStrategyNumber);
|
||||
revleop = get_opfamily_member(opfamily,
|
||||
op_righttype, op_lefttype,
|
||||
BTGreaterEqualStrategyNumber);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
goto fail; /* shouldn't get here */
|
||||
@ -2260,66 +2348,133 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
|
||||
|
||||
if (!OidIsValid(lsortop) ||
|
||||
!OidIsValid(rsortop) ||
|
||||
!OidIsValid(lstatop) ||
|
||||
!OidIsValid(rstatop) ||
|
||||
!OidIsValid(ltop) ||
|
||||
!OidIsValid(leop) ||
|
||||
!OidIsValid(revltop) ||
|
||||
!OidIsValid(revleop))
|
||||
goto fail; /* insufficient info in catalogs */
|
||||
|
||||
/* Try to get maximum values of both inputs */
|
||||
if (!get_variable_maximum(root, &leftvar, lsortop, &leftmax))
|
||||
goto fail; /* no max available from stats */
|
||||
|
||||
if (!get_variable_maximum(root, &rightvar, rsortop, &rightmax))
|
||||
goto fail; /* no max available from stats */
|
||||
/* Try to get ranges of both inputs */
|
||||
if (!isgt)
|
||||
{
|
||||
if (!get_variable_range(root, &leftvar, lstatop,
|
||||
&leftmin, &leftmax))
|
||||
goto fail; /* no range available from stats */
|
||||
if (!get_variable_range(root, &rightvar, rstatop,
|
||||
&rightmin, &rightmax))
|
||||
goto fail; /* no range available from stats */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* need to swap the max and min */
|
||||
if (!get_variable_range(root, &leftvar, lstatop,
|
||||
&leftmax, &leftmin))
|
||||
goto fail; /* no range available from stats */
|
||||
if (!get_variable_range(root, &rightvar, rstatop,
|
||||
&rightmax, &rightmin))
|
||||
goto fail; /* no range available from stats */
|
||||
}
|
||||
|
||||
/*
|
||||
* Now, the fraction of the left variable that will be scanned is the
|
||||
* fraction that's <= the right-side maximum value. But only believe
|
||||
* non-default estimates, else stick with our 1.0. Also, if the sort
|
||||
* order is nulls-first, we're going to have to read over any nulls too.
|
||||
* non-default estimates, else stick with our 1.0.
|
||||
*/
|
||||
selec = scalarineqsel(root, leop, false, &leftvar,
|
||||
selec = scalarineqsel(root, leop, isgt, &leftvar,
|
||||
rightmax, op_righttype);
|
||||
if (selec != DEFAULT_INEQ_SEL)
|
||||
{
|
||||
if (nulls_first && HeapTupleIsValid(leftvar.statsTuple))
|
||||
{
|
||||
Form_pg_statistic stats;
|
||||
|
||||
stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
|
||||
selec += stats->stanullfrac;
|
||||
CLAMP_PROBABILITY(selec);
|
||||
}
|
||||
*leftscan = selec;
|
||||
}
|
||||
*leftend = selec;
|
||||
|
||||
/* And similarly for the right variable. */
|
||||
selec = scalarineqsel(root, revleop, false, &rightvar,
|
||||
selec = scalarineqsel(root, revleop, isgt, &rightvar,
|
||||
leftmax, op_lefttype);
|
||||
if (selec != DEFAULT_INEQ_SEL)
|
||||
{
|
||||
if (nulls_first && HeapTupleIsValid(rightvar.statsTuple))
|
||||
{
|
||||
Form_pg_statistic stats;
|
||||
|
||||
stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
|
||||
selec += stats->stanullfrac;
|
||||
CLAMP_PROBABILITY(selec);
|
||||
}
|
||||
*rightscan = selec;
|
||||
}
|
||||
*rightend = selec;
|
||||
|
||||
/*
|
||||
* Only one of the two fractions can really be less than 1.0; believe the
|
||||
* smaller estimate and reset the other one to exactly 1.0. If we get
|
||||
* exactly equal estimates (as can easily happen with self-joins), believe
|
||||
* neither.
|
||||
* Only one of the two "end" fractions can really be less than 1.0;
|
||||
* believe the smaller estimate and reset the other one to exactly 1.0.
|
||||
* If we get exactly equal estimates (as can easily happen with
|
||||
* self-joins), believe neither.
|
||||
*/
|
||||
if (*leftscan > *rightscan)
|
||||
*leftscan = 1.0;
|
||||
else if (*leftscan < *rightscan)
|
||||
*rightscan = 1.0;
|
||||
if (*leftend > *rightend)
|
||||
*leftend = 1.0;
|
||||
else if (*leftend < *rightend)
|
||||
*rightend = 1.0;
|
||||
else
|
||||
*leftscan = *rightscan = 1.0;
|
||||
*leftend = *rightend = 1.0;
|
||||
|
||||
/*
|
||||
* Also, the fraction of the left variable that will be scanned before
|
||||
* the first join pair is found is the fraction that's < the right-side
|
||||
* minimum value. But only believe non-default estimates, else stick with
|
||||
* our own default.
|
||||
*/
|
||||
selec = scalarineqsel(root, ltop, isgt, &leftvar,
|
||||
rightmin, op_righttype);
|
||||
if (selec != DEFAULT_INEQ_SEL)
|
||||
*leftstart = selec;
|
||||
|
||||
/* And similarly for the right variable. */
|
||||
selec = scalarineqsel(root, revltop, isgt, &rightvar,
|
||||
leftmin, op_lefttype);
|
||||
if (selec != DEFAULT_INEQ_SEL)
|
||||
*rightstart = selec;
|
||||
|
||||
/*
|
||||
* Only one of the two "start" fractions can really be more than zero;
|
||||
* believe the larger estimate and reset the other one to exactly 0.0.
|
||||
* If we get exactly equal estimates (as can easily happen with
|
||||
* self-joins), believe neither.
|
||||
*/
|
||||
if (*leftstart < *rightstart)
|
||||
*leftstart = 0.0;
|
||||
else if (*leftstart > *rightstart)
|
||||
*rightstart = 0.0;
|
||||
else
|
||||
*leftstart = *rightstart = 0.0;
|
||||
|
||||
/*
|
||||
* If the sort order is nulls-first, we're going to have to skip over any
|
||||
* nulls too. These would not have been counted by scalarineqsel, and
|
||||
* we can safely add in this fraction regardless of whether we believe
|
||||
* scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
|
||||
*/
|
||||
if (nulls_first)
|
||||
{
|
||||
Form_pg_statistic stats;
|
||||
|
||||
if (HeapTupleIsValid(leftvar.statsTuple))
|
||||
{
|
||||
stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
|
||||
*leftstart += stats->stanullfrac;
|
||||
CLAMP_PROBABILITY(*leftstart);
|
||||
*leftend += stats->stanullfrac;
|
||||
CLAMP_PROBABILITY(*leftend);
|
||||
}
|
||||
if (HeapTupleIsValid(rightvar.statsTuple))
|
||||
{
|
||||
stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
|
||||
*rightstart += stats->stanullfrac;
|
||||
CLAMP_PROBABILITY(*rightstart);
|
||||
*rightend += stats->stanullfrac;
|
||||
CLAMP_PROBABILITY(*rightend);
|
||||
}
|
||||
}
|
||||
|
||||
/* Disbelieve start >= end, just in case that can happen */
|
||||
if (*leftstart >= *leftend)
|
||||
{
|
||||
*leftstart = 0.0;
|
||||
*leftend = 1.0;
|
||||
}
|
||||
if (*rightstart >= *rightend)
|
||||
{
|
||||
*rightstart = 0.0;
|
||||
*rightend = 1.0;
|
||||
}
|
||||
|
||||
fail:
|
||||
ReleaseVariableStats(leftvar);
|
||||
@ -3778,20 +3933,21 @@ get_variable_numdistinct(VariableStatData *vardata)
|
||||
}
|
||||
|
||||
/*
|
||||
* get_variable_maximum
|
||||
* Estimate the maximum value of the specified variable.
|
||||
* If successful, store value in *max and return TRUE.
|
||||
* get_variable_range
|
||||
* Estimate the minimum and maximum value of the specified variable.
|
||||
* If successful, store values in *min and *max, and return TRUE.
|
||||
* If no data available, return FALSE.
|
||||
*
|
||||
* sortop is the "<" comparison operator to use. (To extract the
|
||||
* minimum instead of the maximum, just pass the ">" operator instead.)
|
||||
* sortop is the "<" comparison operator to use. This should generally
|
||||
* be "<" not ">", as only the former is likely to be found in pg_statistic.
|
||||
*/
|
||||
static bool
|
||||
get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
|
||||
Oid sortop, Datum *max)
|
||||
get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
|
||||
Datum *min, Datum *max)
|
||||
{
|
||||
Datum tmin = 0;
|
||||
Datum tmax = 0;
|
||||
bool have_max = false;
|
||||
bool have_data = false;
|
||||
Form_pg_statistic stats;
|
||||
int16 typLen;
|
||||
bool typByVal;
|
||||
@ -3809,7 +3965,7 @@ get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
|
||||
get_typlenbyval(vardata->atttype, &typLen, &typByVal);
|
||||
|
||||
/*
|
||||
* If there is a histogram, grab the last or first value as appropriate.
|
||||
* If there is a histogram, grab the first and last values.
|
||||
*
|
||||
* If there is a histogram that is sorted with some other operator than
|
||||
* the one we want, fail --- this suggests that there is data we can't
|
||||
@ -3823,42 +3979,24 @@ get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
|
||||
{
|
||||
if (nvalues > 0)
|
||||
{
|
||||
tmin = datumCopy(values[0], typByVal, typLen);
|
||||
tmax = datumCopy(values[nvalues - 1], typByVal, typLen);
|
||||
have_max = true;
|
||||
have_data = true;
|
||||
}
|
||||
free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
|
||||
}
|
||||
else
|
||||
else if (get_attstatsslot(vardata->statsTuple,
|
||||
vardata->atttype, vardata->atttypmod,
|
||||
STATISTIC_KIND_HISTOGRAM, InvalidOid,
|
||||
&values, &nvalues,
|
||||
NULL, NULL))
|
||||
{
|
||||
Oid rsortop = get_commutator(sortop);
|
||||
|
||||
if (OidIsValid(rsortop) &&
|
||||
get_attstatsslot(vardata->statsTuple,
|
||||
vardata->atttype, vardata->atttypmod,
|
||||
STATISTIC_KIND_HISTOGRAM, rsortop,
|
||||
&values, &nvalues,
|
||||
NULL, NULL))
|
||||
{
|
||||
if (nvalues > 0)
|
||||
{
|
||||
tmax = datumCopy(values[0], typByVal, typLen);
|
||||
have_max = true;
|
||||
}
|
||||
free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
|
||||
}
|
||||
else if (get_attstatsslot(vardata->statsTuple,
|
||||
vardata->atttype, vardata->atttypmod,
|
||||
STATISTIC_KIND_HISTOGRAM, InvalidOid,
|
||||
&values, &nvalues,
|
||||
NULL, NULL))
|
||||
{
|
||||
free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
|
||||
return false;
|
||||
}
|
||||
free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have most-common-values info, look for a large MCV. This is
|
||||
* If we have most-common-values info, look for extreme MCVs. This is
|
||||
* needed even if we also have a histogram, since the histogram excludes
|
||||
* the MCVs. However, usually the MCVs will not be the extreme values, so
|
||||
* avoid unnecessary data copying.
|
||||
@ -3869,31 +4007,41 @@ get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
|
||||
&values, &nvalues,
|
||||
NULL, NULL))
|
||||
{
|
||||
bool large_mcv = false;
|
||||
bool tmin_is_mcv = false;
|
||||
bool tmax_is_mcv = false;
|
||||
FmgrInfo opproc;
|
||||
|
||||
fmgr_info(get_opcode(sortop), &opproc);
|
||||
|
||||
for (i = 0; i < nvalues; i++)
|
||||
{
|
||||
if (!have_max)
|
||||
if (!have_data)
|
||||
{
|
||||
tmax = values[i];
|
||||
large_mcv = have_max = true;
|
||||
tmin = tmax = values[i];
|
||||
tmin_is_mcv = tmax_is_mcv = have_data = true;
|
||||
continue;
|
||||
}
|
||||
else if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i])))
|
||||
if (DatumGetBool(FunctionCall2(&opproc, values[i], tmin)))
|
||||
{
|
||||
tmin = values[i];
|
||||
tmin_is_mcv = true;
|
||||
}
|
||||
if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i])))
|
||||
{
|
||||
tmax = values[i];
|
||||
large_mcv = true;
|
||||
tmax_is_mcv = true;
|
||||
}
|
||||
}
|
||||
if (large_mcv)
|
||||
if (tmin_is_mcv)
|
||||
tmin = datumCopy(tmin, typByVal, typLen);
|
||||
if (tmax_is_mcv)
|
||||
tmax = datumCopy(tmax, typByVal, typLen);
|
||||
free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
|
||||
}
|
||||
|
||||
*min = tmin;
|
||||
*max = tmax;
|
||||
return have_max;
|
||||
return have_data;
|
||||
}
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.150 2007/11/15 22:25:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.151 2007/12/08 21:05:11 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -993,8 +993,10 @@ typedef struct MergeScanSelCache
|
||||
int strategy; /* sort direction (ASC or DESC) */
|
||||
bool nulls_first; /* do NULLs come before normal values? */
|
||||
/* Results */
|
||||
Selectivity leftscansel; /* scan fraction for clause left side */
|
||||
Selectivity rightscansel; /* scan fraction for clause right side */
|
||||
Selectivity leftstartsel; /* first-join fraction for clause left side */
|
||||
Selectivity leftendsel; /* last-join fraction for clause left side */
|
||||
Selectivity rightstartsel; /* first-join fraction for clause right side */
|
||||
Selectivity rightendsel; /* last-join fraction for clause right side */
|
||||
} MergeScanSelCache;
|
||||
|
||||
/*
|
||||
|
@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.41 2007/11/07 22:37:24 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.42 2007/12/08 21:05:11 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -161,8 +161,8 @@ extern Selectivity rowcomparesel(PlannerInfo *root,
|
||||
|
||||
extern void mergejoinscansel(PlannerInfo *root, Node *clause,
|
||||
Oid opfamily, int strategy, bool nulls_first,
|
||||
Selectivity *leftscan,
|
||||
Selectivity *rightscan);
|
||||
Selectivity *leftstart, Selectivity *leftend,
|
||||
Selectivity *rightstart, Selectivity *rightend);
|
||||
|
||||
extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
|
||||
double input_rows);
|
||||
|
Loading…
Reference in New Issue
Block a user