Fix cost_nestloop and cost_hashjoin to model the behavior of semi and anti
joins a bit better, ie, understand the differing cost functions for matched and unmatched outer tuples. There is more that could be done in cost_hashjoin but this already helps a great deal. Per discussions with Robert Haas.
This commit is contained in:
parent
a41e9ec0db
commit
8dcf18414b
@ -54,7 +54,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.207 2009/04/17 15:33:33 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.208 2009/05/09 22:51:41 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -71,6 +71,7 @@
|
|||||||
#include "optimizer/pathnode.h"
|
#include "optimizer/pathnode.h"
|
||||||
#include "optimizer/placeholder.h"
|
#include "optimizer/placeholder.h"
|
||||||
#include "optimizer/planmain.h"
|
#include "optimizer/planmain.h"
|
||||||
|
#include "optimizer/restrictinfo.h"
|
||||||
#include "parser/parsetree.h"
|
#include "parser/parsetree.h"
|
||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
#include "utils/selfuncs.h"
|
#include "utils/selfuncs.h"
|
||||||
@ -119,6 +120,11 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
|
|||||||
RestrictInfo *rinfo,
|
RestrictInfo *rinfo,
|
||||||
PathKey *pathkey);
|
PathKey *pathkey);
|
||||||
static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
|
static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
|
||||||
|
static bool adjust_semi_join(PlannerInfo *root, JoinPath *path,
|
||||||
|
SpecialJoinInfo *sjinfo,
|
||||||
|
Selectivity *outer_match_frac,
|
||||||
|
Selectivity *match_count,
|
||||||
|
bool *indexed_join_quals);
|
||||||
static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
|
static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
|
||||||
List *quals);
|
List *quals);
|
||||||
static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
|
static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
|
||||||
@ -1394,11 +1400,15 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
Path *inner_path = path->innerjoinpath;
|
Path *inner_path = path->innerjoinpath;
|
||||||
Cost startup_cost = 0;
|
Cost startup_cost = 0;
|
||||||
Cost run_cost = 0;
|
Cost run_cost = 0;
|
||||||
|
Cost inner_run_cost;
|
||||||
Cost cpu_per_tuple;
|
Cost cpu_per_tuple;
|
||||||
QualCost restrict_qual_cost;
|
QualCost restrict_qual_cost;
|
||||||
double outer_path_rows = PATH_ROWS(outer_path);
|
double outer_path_rows = PATH_ROWS(outer_path);
|
||||||
double inner_path_rows = nestloop_inner_path_rows(inner_path);
|
double inner_path_rows = nestloop_inner_path_rows(inner_path);
|
||||||
double ntuples;
|
double ntuples;
|
||||||
|
Selectivity outer_match_frac;
|
||||||
|
Selectivity match_count;
|
||||||
|
bool indexed_join_quals;
|
||||||
|
|
||||||
if (!enable_nestloop)
|
if (!enable_nestloop)
|
||||||
startup_cost += disable_cost;
|
startup_cost += disable_cost;
|
||||||
@ -1428,13 +1438,66 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
*/
|
*/
|
||||||
run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
|
run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
|
||||||
}
|
}
|
||||||
run_cost += outer_path_rows *
|
inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
|
||||||
(inner_path->total_cost - inner_path->startup_cost);
|
|
||||||
|
if (adjust_semi_join(root, path, sjinfo,
|
||||||
|
&outer_match_frac,
|
||||||
|
&match_count,
|
||||||
|
&indexed_join_quals))
|
||||||
|
{
|
||||||
|
double outer_matched_rows;
|
||||||
|
Selectivity inner_scan_frac;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute number of tuples processed (not number emitted!)
|
* SEMI or ANTI join: executor will stop after first match.
|
||||||
|
*
|
||||||
|
* For an outer-rel row that has at least one match, we can expect the
|
||||||
|
* inner scan to stop after a fraction 1/(match_count+1) of the inner
|
||||||
|
* rows, if the matches are evenly distributed. Since they probably
|
||||||
|
* aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
|
||||||
|
* that fraction. (If we used a larger fuzz factor, we'd have to
|
||||||
|
* clamp inner_scan_frac to at most 1.0; but since match_count is at
|
||||||
|
* least 1, no such clamp is needed now.)
|
||||||
*/
|
*/
|
||||||
|
outer_matched_rows = rint(outer_path_rows * outer_match_frac);
|
||||||
|
inner_scan_frac = 2.0 / (match_count + 1.0);
|
||||||
|
|
||||||
|
/* Add inner run cost for outer tuples having matches */
|
||||||
|
run_cost += outer_matched_rows * inner_run_cost * inner_scan_frac;
|
||||||
|
|
||||||
|
/* Compute number of tuples processed (not number emitted!) */
|
||||||
|
ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For unmatched outer-rel rows, there are two cases. If the inner
|
||||||
|
* path is an indexscan using all the joinquals as indexquals, then
|
||||||
|
* an unmatched row results in an indexscan returning no rows, which
|
||||||
|
* is probably quite cheap. We estimate this case as the same cost
|
||||||
|
* to return the first tuple of a nonempty scan. Otherwise, the
|
||||||
|
* executor will have to scan the whole inner rel; not so cheap.
|
||||||
|
*/
|
||||||
|
if (indexed_join_quals)
|
||||||
|
{
|
||||||
|
run_cost += (outer_path_rows - outer_matched_rows) *
|
||||||
|
inner_run_cost / inner_path_rows;
|
||||||
|
/* We won't be evaluating any quals at all for these rows */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
run_cost += (outer_path_rows - outer_matched_rows) *
|
||||||
|
inner_run_cost;
|
||||||
|
ntuples += (outer_path_rows - outer_matched_rows) *
|
||||||
|
inner_path_rows;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Normal case; we'll scan whole input rel for each outer row */
|
||||||
|
run_cost += outer_path_rows * inner_run_cost;
|
||||||
|
|
||||||
|
/* Compute number of tuples processed (not number emitted!) */
|
||||||
ntuples = outer_path_rows * inner_path_rows;
|
ntuples = outer_path_rows * inner_path_rows;
|
||||||
|
}
|
||||||
|
|
||||||
/* CPU costs */
|
/* CPU costs */
|
||||||
cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
|
cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
|
||||||
@ -1731,6 +1794,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
* cpu_tuple_cost plus the cost of evaluating additional restriction
|
* cpu_tuple_cost plus the cost of evaluating additional restriction
|
||||||
* clauses that are to be applied at the join. (This is pessimistic since
|
* clauses that are to be applied at the join. (This is pessimistic since
|
||||||
* not all of the quals may get evaluated at each tuple.)
|
* not all of the quals may get evaluated at each tuple.)
|
||||||
|
*
|
||||||
|
* Note: we could adjust for SEMI/ANTI joins skipping some qual evaluations
|
||||||
|
* here, but it's probably not worth the trouble.
|
||||||
*/
|
*/
|
||||||
startup_cost += qp_qual_cost.startup;
|
startup_cost += qp_qual_cost.startup;
|
||||||
cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
|
cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
|
||||||
@ -1824,6 +1890,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
int num_skew_mcvs;
|
int num_skew_mcvs;
|
||||||
double virtualbuckets;
|
double virtualbuckets;
|
||||||
Selectivity innerbucketsize;
|
Selectivity innerbucketsize;
|
||||||
|
Selectivity outer_match_frac;
|
||||||
|
Selectivity match_count;
|
||||||
ListCell *hcl;
|
ListCell *hcl;
|
||||||
|
|
||||||
if (!enable_hashjoin)
|
if (!enable_hashjoin)
|
||||||
@ -1838,12 +1906,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
qp_qual_cost.startup -= hash_qual_cost.startup;
|
qp_qual_cost.startup -= hash_qual_cost.startup;
|
||||||
qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
|
qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
|
||||||
|
|
||||||
/*
|
|
||||||
* Get approx # tuples passing the hashquals. We use approx_tuple_count
|
|
||||||
* here because we need an estimate done with JOIN_INNER semantics.
|
|
||||||
*/
|
|
||||||
hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
|
|
||||||
|
|
||||||
/* cost of source data */
|
/* cost of source data */
|
||||||
startup_cost += outer_path->startup_cost;
|
startup_cost += outer_path->startup_cost;
|
||||||
run_cost += outer_path->total_cost - outer_path->startup_cost;
|
run_cost += outer_path->total_cost - outer_path->startup_cost;
|
||||||
@ -1970,18 +2032,78 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
|
|
||||||
/* CPU costs */
|
/* CPU costs */
|
||||||
|
|
||||||
|
if (adjust_semi_join(root, &path->jpath, sjinfo,
|
||||||
|
&outer_match_frac,
|
||||||
|
&match_count,
|
||||||
|
NULL))
|
||||||
|
{
|
||||||
|
double outer_matched_rows;
|
||||||
|
Selectivity inner_scan_frac;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The number of tuple comparisons needed is the number of outer tuples
|
* SEMI or ANTI join: executor will stop after first match.
|
||||||
* times the typical number of tuples in a hash bucket, which is the inner
|
*
|
||||||
* relation size times its bucketsize fraction. At each one, we need to
|
* For an outer-rel row that has at least one match, we can expect the
|
||||||
* evaluate the hashjoin quals. But actually, charging the full qual eval
|
* bucket scan to stop after a fraction 1/(match_count+1) of the
|
||||||
* cost at each tuple is pessimistic, since we don't evaluate the quals
|
* bucket's rows, if the matches are evenly distributed. Since they
|
||||||
* unless the hash values match exactly. For lack of a better idea, halve
|
* probably aren't quite evenly distributed, we apply a fuzz factor of
|
||||||
* the cost estimate to allow for that.
|
* 2.0 to that fraction. (If we used a larger fuzz factor, we'd have
|
||||||
|
* to clamp inner_scan_frac to at most 1.0; but since match_count is
|
||||||
|
* at least 1, no such clamp is needed now.)
|
||||||
|
*/
|
||||||
|
outer_matched_rows = rint(outer_path_rows * outer_match_frac);
|
||||||
|
inner_scan_frac = 2.0 / (match_count + 1.0);
|
||||||
|
|
||||||
|
startup_cost += hash_qual_cost.startup;
|
||||||
|
run_cost += hash_qual_cost.per_tuple * outer_matched_rows *
|
||||||
|
clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For unmatched outer-rel rows, the picture is quite a lot different.
|
||||||
|
* In the first place, there is no reason to assume that these rows
|
||||||
|
* preferentially hit heavily-populated buckets; instead assume they
|
||||||
|
* are uncorrelated with the inner distribution and so they see an
|
||||||
|
* average bucket size of inner_path_rows / virtualbuckets. In the
|
||||||
|
* second place, it seems likely that they will have few if any
|
||||||
|
* exact hash-code matches and so very few of the tuples in the
|
||||||
|
* bucket will actually require eval of the hash quals. We don't
|
||||||
|
* have any good way to estimate how many will, but for the moment
|
||||||
|
* assume that the effective cost per bucket entry is one-tenth what
|
||||||
|
* it is for matchable tuples.
|
||||||
|
*/
|
||||||
|
run_cost += hash_qual_cost.per_tuple *
|
||||||
|
(outer_path_rows - outer_matched_rows) *
|
||||||
|
clamp_row_est(inner_path_rows / virtualbuckets) * 0.05;
|
||||||
|
|
||||||
|
/* Get # of tuples that will pass the basic join */
|
||||||
|
if (path->jpath.jointype == JOIN_SEMI)
|
||||||
|
hashjointuples = outer_matched_rows;
|
||||||
|
else
|
||||||
|
hashjointuples = outer_path_rows - outer_matched_rows;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The number of tuple comparisons needed is the number of outer
|
||||||
|
* tuples times the typical number of tuples in a hash bucket, which
|
||||||
|
* is the inner relation size times its bucketsize fraction. At each
|
||||||
|
* one, we need to evaluate the hashjoin quals. But actually,
|
||||||
|
* charging the full qual eval cost at each tuple is pessimistic,
|
||||||
|
* since we don't evaluate the quals unless the hash values match
|
||||||
|
* exactly. For lack of a better idea, halve the cost estimate to
|
||||||
|
* allow for that.
|
||||||
*/
|
*/
|
||||||
startup_cost += hash_qual_cost.startup;
|
startup_cost += hash_qual_cost.startup;
|
||||||
run_cost += hash_qual_cost.per_tuple *
|
run_cost += hash_qual_cost.per_tuple * outer_path_rows *
|
||||||
outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
|
clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get approx # tuples passing the hashquals. We use
|
||||||
|
* approx_tuple_count here because we need an estimate done with
|
||||||
|
* JOIN_INNER semantics.
|
||||||
|
*/
|
||||||
|
hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For each tuple that gets through the hashjoin proper, we charge
|
* For each tuple that gets through the hashjoin proper, we charge
|
||||||
@ -2320,6 +2442,156 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* adjust_semi_join
|
||||||
|
* Estimate how much of the inner input a SEMI or ANTI join
|
||||||
|
* can be expected to scan.
|
||||||
|
*
|
||||||
|
* In a hash or nestloop SEMI/ANTI join, the executor will stop scanning
|
||||||
|
* inner rows as soon as it finds a match to the current outer row.
|
||||||
|
* We should therefore adjust some of the cost components for this effect.
|
||||||
|
* This function computes some estimates needed for these adjustments.
|
||||||
|
*
|
||||||
|
* 'path' is already filled in except for the cost fields
|
||||||
|
* 'sjinfo' is extra info about the join for selectivity estimation
|
||||||
|
*
|
||||||
|
* Returns TRUE if this is a SEMI or ANTI join, FALSE if not.
|
||||||
|
*
|
||||||
|
* Output parameters (set only in TRUE-result case):
|
||||||
|
* *outer_match_frac is set to the fraction of the outer tuples that are
|
||||||
|
* expected to have at least one match.
|
||||||
|
* *match_count is set to the average number of matches expected for
|
||||||
|
* outer tuples that have at least one match.
|
||||||
|
* *indexed_join_quals is set to TRUE if all the joinquals are used as
|
||||||
|
* inner index quals, FALSE if not.
|
||||||
|
*
|
||||||
|
* indexed_join_quals can be passed as NULL if that information is not
|
||||||
|
* relevant (it is only useful for the nestloop case).
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
|
||||||
|
Selectivity *outer_match_frac,
|
||||||
|
Selectivity *match_count,
|
||||||
|
bool *indexed_join_quals)
|
||||||
|
{
|
||||||
|
JoinType jointype = path->jointype;
|
||||||
|
Selectivity jselec;
|
||||||
|
Selectivity nselec;
|
||||||
|
Selectivity avgmatch;
|
||||||
|
SpecialJoinInfo norm_sjinfo;
|
||||||
|
List *joinquals;
|
||||||
|
ListCell *l;
|
||||||
|
|
||||||
|
/* Fall out if it's not JOIN_SEMI or JOIN_ANTI */
|
||||||
|
if (jointype != JOIN_SEMI && jointype != JOIN_ANTI)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: it's annoying to repeat this selectivity estimation on each call,
|
||||||
|
* when the joinclause list will be the same for all path pairs
|
||||||
|
* implementing a given join. clausesel.c will save us from the worst
|
||||||
|
* effects of this by caching at the RestrictInfo level; but perhaps it'd
|
||||||
|
* be worth finding a way to cache the results at a higher level.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In an ANTI join, we must ignore clauses that are "pushed down",
|
||||||
|
* since those won't affect the match logic. In a SEMI join, we do not
|
||||||
|
* distinguish joinquals from "pushed down" quals, so just use the whole
|
||||||
|
* restrictinfo list.
|
||||||
|
*/
|
||||||
|
if (jointype == JOIN_ANTI)
|
||||||
|
{
|
||||||
|
joinquals = NIL;
|
||||||
|
foreach(l, path->joinrestrictinfo)
|
||||||
|
{
|
||||||
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
|
||||||
|
|
||||||
|
Assert(IsA(rinfo, RestrictInfo));
|
||||||
|
if (!rinfo->is_pushed_down)
|
||||||
|
joinquals = lappend(joinquals, rinfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
joinquals = path->joinrestrictinfo;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
|
||||||
|
*/
|
||||||
|
jselec = clauselist_selectivity(root,
|
||||||
|
joinquals,
|
||||||
|
0,
|
||||||
|
jointype,
|
||||||
|
sjinfo);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Also get the normal inner-join selectivity of the join clauses.
|
||||||
|
*/
|
||||||
|
norm_sjinfo.type = T_SpecialJoinInfo;
|
||||||
|
norm_sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
|
||||||
|
norm_sjinfo.min_righthand = path->innerjoinpath->parent->relids;
|
||||||
|
norm_sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
|
||||||
|
norm_sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
|
||||||
|
norm_sjinfo.jointype = JOIN_INNER;
|
||||||
|
/* we don't bother trying to make the remaining fields valid */
|
||||||
|
norm_sjinfo.lhs_strict = false;
|
||||||
|
norm_sjinfo.delay_upper_joins = false;
|
||||||
|
norm_sjinfo.join_quals = NIL;
|
||||||
|
|
||||||
|
nselec = clauselist_selectivity(root,
|
||||||
|
joinquals,
|
||||||
|
0,
|
||||||
|
JOIN_INNER,
|
||||||
|
&norm_sjinfo);
|
||||||
|
|
||||||
|
/* Avoid leaking a lot of ListCells */
|
||||||
|
if (jointype == JOIN_ANTI)
|
||||||
|
list_free(joinquals);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* jselec can be interpreted as the fraction of outer-rel rows that have
|
||||||
|
* any matches (this is true for both SEMI and ANTI cases). And nselec
|
||||||
|
* is the fraction of the Cartesian product that matches. So, the
|
||||||
|
* average number of matches for each outer-rel row that has at least
|
||||||
|
* one match is nselec * inner_rows / jselec.
|
||||||
|
*
|
||||||
|
* Note: it is correct to use the inner rel's "rows" count here, not
|
||||||
|
* PATH_ROWS(), even if the inner path under consideration is an inner
|
||||||
|
* indexscan. This is because we have included all the join clauses
|
||||||
|
* in the selectivity estimate, even ones used in an inner indexscan.
|
||||||
|
*/
|
||||||
|
if (jselec > 0) /* protect against zero divide */
|
||||||
|
{
|
||||||
|
avgmatch = nselec * path->innerjoinpath->parent->rows / jselec;
|
||||||
|
/* Clamp to sane range */
|
||||||
|
avgmatch = Max(1.0, avgmatch);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
avgmatch = 1.0;
|
||||||
|
|
||||||
|
*outer_match_frac = jselec;
|
||||||
|
*match_count = avgmatch;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If requested, check whether the inner path uses all the joinquals
|
||||||
|
* as indexquals. (If that's true, we can assume that an unmatched
|
||||||
|
* outer tuple is cheap to process, whereas otherwise it's probably
|
||||||
|
* expensive.)
|
||||||
|
*/
|
||||||
|
if (indexed_join_quals)
|
||||||
|
{
|
||||||
|
List *nrclauses;
|
||||||
|
|
||||||
|
nrclauses = select_nonredundant_join_clauses(root,
|
||||||
|
path->joinrestrictinfo,
|
||||||
|
path->innerjoinpath);
|
||||||
|
*indexed_join_quals = (nrclauses == NIL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* approx_tuple_count
|
* approx_tuple_count
|
||||||
* Quick-and-dirty estimation of the number of join rows passing
|
* Quick-and-dirty estimation of the number of join rows passing
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.258 2009/04/19 19:46:33 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.259 2009/05/09 22:51:41 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -1562,62 +1562,16 @@ create_nestloop_plan(PlannerInfo *root,
|
|||||||
List *otherclauses;
|
List *otherclauses;
|
||||||
NestLoop *join_plan;
|
NestLoop *join_plan;
|
||||||
|
|
||||||
if (IsA(best_path->innerjoinpath, IndexPath))
|
|
||||||
{
|
|
||||||
/*
|
/*
|
||||||
* An index is being used to reduce the number of tuples scanned in
|
* If the inner path is a nestloop inner indexscan, it might be using
|
||||||
* the inner relation. If there are join clauses being used with the
|
* some of the join quals as index quals, in which case we don't have
|
||||||
* index, we may remove those join clauses from the list of clauses
|
* to check them again at the join node. Remove any join quals that
|
||||||
* that have to be checked as qpquals at the join node.
|
* are redundant.
|
||||||
*
|
|
||||||
* We can also remove any join clauses that are redundant with those
|
|
||||||
* being used in the index scan; this check is needed because
|
|
||||||
* find_eclass_clauses_for_index_join() may emit different clauses
|
|
||||||
* than generate_join_implied_equalities() did.
|
|
||||||
*
|
|
||||||
* We can skip this if the index path is an ordinary indexpath and not
|
|
||||||
* a special innerjoin path, since it then wouldn't be using any join
|
|
||||||
* clauses.
|
|
||||||
*/
|
*/
|
||||||
IndexPath *innerpath = (IndexPath *) best_path->innerjoinpath;
|
|
||||||
|
|
||||||
if (innerpath->isjoininner)
|
|
||||||
joinrestrictclauses =
|
joinrestrictclauses =
|
||||||
select_nonredundant_join_clauses(root,
|
select_nonredundant_join_clauses(root,
|
||||||
joinrestrictclauses,
|
joinrestrictclauses,
|
||||||
innerpath->indexclauses);
|
best_path->innerjoinpath);
|
||||||
}
|
|
||||||
else if (IsA(best_path->innerjoinpath, BitmapHeapPath))
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Same deal for bitmapped index scans.
|
|
||||||
*
|
|
||||||
* Note: both here and above, we ignore any implicit index
|
|
||||||
* restrictions associated with the use of partial indexes. This is
|
|
||||||
* OK because we're only trying to prove we can dispense with some
|
|
||||||
* join quals; failing to prove that doesn't result in an incorrect
|
|
||||||
* plan. It is the right way to proceed because adding more quals to
|
|
||||||
* the stuff we got from the original query would just make it harder
|
|
||||||
* to detect duplication. (Also, to change this we'd have to be wary
|
|
||||||
* of UPDATE/DELETE/SELECT FOR UPDATE target relations; see notes
|
|
||||||
* above about EvalPlanQual.)
|
|
||||||
*/
|
|
||||||
BitmapHeapPath *innerpath = (BitmapHeapPath *) best_path->innerjoinpath;
|
|
||||||
|
|
||||||
if (innerpath->isjoininner)
|
|
||||||
{
|
|
||||||
List *bitmapclauses;
|
|
||||||
|
|
||||||
bitmapclauses =
|
|
||||||
make_restrictinfo_from_bitmapqual(innerpath->bitmapqual,
|
|
||||||
true,
|
|
||||||
false);
|
|
||||||
joinrestrictclauses =
|
|
||||||
select_nonredundant_join_clauses(root,
|
|
||||||
joinrestrictclauses,
|
|
||||||
bitmapclauses);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Sort join qual clauses into best execution order */
|
/* Sort join qual clauses into best execution order */
|
||||||
joinrestrictclauses = order_qual_clauses(root, joinrestrictclauses);
|
joinrestrictclauses = order_qual_clauses(root, joinrestrictclauses);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.58 2009/04/16 20:42:16 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.59 2009/05/09 22:51:41 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -35,8 +35,9 @@ static Expr *make_sub_restrictinfos(Expr *clause,
|
|||||||
bool pseudoconstant,
|
bool pseudoconstant,
|
||||||
Relids required_relids,
|
Relids required_relids,
|
||||||
Relids nullable_relids);
|
Relids nullable_relids);
|
||||||
static bool join_clause_is_redundant(PlannerInfo *root,
|
static List *select_nonredundant_join_list(List *restrictinfo_list,
|
||||||
RestrictInfo *rinfo,
|
List *reference_list);
|
||||||
|
static bool join_clause_is_redundant(RestrictInfo *rinfo,
|
||||||
List *reference_list);
|
List *reference_list);
|
||||||
|
|
||||||
|
|
||||||
@ -545,25 +546,90 @@ extract_actual_join_clauses(List *restrictinfo_list,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* select_nonredundant_join_clauses
|
* select_nonredundant_join_clauses
|
||||||
*
|
*
|
||||||
* Given a list of RestrictInfo clauses that are to be applied in a join,
|
* Given a list of RestrictInfo clauses that are to be applied in a join,
|
||||||
* select the ones that are not redundant with any clause in the
|
* select the ones that are not redundant with any clause that's enforced
|
||||||
* reference_list. This is used only for nestloop-with-inner-indexscan
|
* by the inner_path. This is used for nestloop joins, wherein any clause
|
||||||
* joins: any clauses being checked by the index should be removed from
|
* being used in an inner indexscan need not be checked again at the join.
|
||||||
* the qpquals list.
|
|
||||||
*
|
*
|
||||||
* "Redundant" means either equal() or derived from the same EquivalenceClass.
|
* "Redundant" means either equal() or derived from the same EquivalenceClass.
|
||||||
* We have to check the latter because indxqual.c may select different derived
|
* We have to check the latter because indxqual.c may select different derived
|
||||||
* clauses than were selected by generate_join_implied_equalities().
|
* clauses than were selected by generate_join_implied_equalities().
|
||||||
*
|
*
|
||||||
* Note that we assume the given restrictinfo_list has already been checked
|
* Note that we are *not* checking for local redundancies within the given
|
||||||
* for local redundancies, so we don't check again.
|
* restrictinfo_list; that should have been handled elsewhere.
|
||||||
*/
|
*/
|
||||||
List *
|
List *
|
||||||
select_nonredundant_join_clauses(PlannerInfo *root,
|
select_nonredundant_join_clauses(PlannerInfo *root,
|
||||||
List *restrictinfo_list,
|
List *restrictinfo_list,
|
||||||
|
Path *inner_path)
|
||||||
|
{
|
||||||
|
if (IsA(inner_path, IndexPath))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Check the index quals to see if any of them are join clauses.
|
||||||
|
*
|
||||||
|
* We can skip this if the index path is an ordinary indexpath and not
|
||||||
|
* a special innerjoin path, since it then wouldn't be using any join
|
||||||
|
* clauses.
|
||||||
|
*/
|
||||||
|
IndexPath *innerpath = (IndexPath *) inner_path;
|
||||||
|
|
||||||
|
if (innerpath->isjoininner)
|
||||||
|
restrictinfo_list =
|
||||||
|
select_nonredundant_join_list(restrictinfo_list,
|
||||||
|
innerpath->indexclauses);
|
||||||
|
}
|
||||||
|
else if (IsA(inner_path, BitmapHeapPath))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Same deal for bitmapped index scans.
|
||||||
|
*
|
||||||
|
* Note: both here and above, we ignore any implicit index
|
||||||
|
* restrictions associated with the use of partial indexes. This is
|
||||||
|
* OK because we're only trying to prove we can dispense with some
|
||||||
|
* join quals; failing to prove that doesn't result in an incorrect
|
||||||
|
* plan. It's quite unlikely that a join qual could be proven
|
||||||
|
* redundant by an index predicate anyway. (Also, if we did manage
|
||||||
|
* to prove it, we'd have to have a special case for update targets;
|
||||||
|
* see notes about EvalPlanQual testing in create_indexscan_plan().)
|
||||||
|
*/
|
||||||
|
BitmapHeapPath *innerpath = (BitmapHeapPath *) inner_path;
|
||||||
|
|
||||||
|
if (innerpath->isjoininner)
|
||||||
|
{
|
||||||
|
List *bitmapclauses;
|
||||||
|
|
||||||
|
bitmapclauses =
|
||||||
|
make_restrictinfo_from_bitmapqual(innerpath->bitmapqual,
|
||||||
|
true,
|
||||||
|
false);
|
||||||
|
restrictinfo_list =
|
||||||
|
select_nonredundant_join_list(restrictinfo_list,
|
||||||
|
bitmapclauses);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XXX the inner path of a nestloop could also be an append relation
|
||||||
|
* whose elements use join quals. However, they might each use different
|
||||||
|
* quals; we could only remove join quals that are enforced by all the
|
||||||
|
* appendrel members. For the moment we don't bother to try.
|
||||||
|
*/
|
||||||
|
|
||||||
|
return restrictinfo_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* select_nonredundant_join_list
|
||||||
|
* Select the members of restrictinfo_list that are not redundant with
|
||||||
|
* any member of reference_list. See above for more info.
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
select_nonredundant_join_list(List *restrictinfo_list,
|
||||||
List *reference_list)
|
List *reference_list)
|
||||||
{
|
{
|
||||||
List *result = NIL;
|
List *result = NIL;
|
||||||
@ -574,7 +640,7 @@ select_nonredundant_join_clauses(PlannerInfo *root,
|
|||||||
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
|
||||||
|
|
||||||
/* drop it if redundant with any reference clause */
|
/* drop it if redundant with any reference clause */
|
||||||
if (join_clause_is_redundant(root, rinfo, reference_list))
|
if (join_clause_is_redundant(rinfo, reference_list))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* otherwise, add it to result list */
|
/* otherwise, add it to result list */
|
||||||
@ -589,8 +655,7 @@ select_nonredundant_join_clauses(PlannerInfo *root,
|
|||||||
* Test whether rinfo is redundant with any clause in reference_list.
|
* Test whether rinfo is redundant with any clause in reference_list.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
join_clause_is_redundant(PlannerInfo *root,
|
join_clause_is_redundant(RestrictInfo *rinfo,
|
||||||
RestrictInfo *rinfo,
|
|
||||||
List *reference_list)
|
List *reference_list)
|
||||||
{
|
{
|
||||||
ListCell *refitem;
|
ListCell *refitem;
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.43 2009/04/16 20:42:16 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.44 2009/05/09 22:51:41 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -39,6 +39,6 @@ extern void extract_actual_join_clauses(List *restrictinfo_list,
|
|||||||
List **otherquals);
|
List **otherquals);
|
||||||
extern List *select_nonredundant_join_clauses(PlannerInfo *root,
|
extern List *select_nonredundant_join_clauses(PlannerInfo *root,
|
||||||
List *restrictinfo_list,
|
List *restrictinfo_list,
|
||||||
List *reference_list);
|
Path *inner_path);
|
||||||
|
|
||||||
#endif /* RESTRICTINFO_H */
|
#endif /* RESTRICTINFO_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user