Improve eqjoinsel's ndistinct clamping to work for multiple levels of join.
This patch fixes an oversight in my commit 7f3eba30c9d622d1981b1368f2d79ba0999cdff2 of 2008-10-23. That patch accounted for baserel restriction clauses that reduced the number of rows coming out of a table (and hence the number of possibly-distinct values of a join variable), but not for join restriction clauses that might have been applied at a lower level of join. To account for the latter, look up the sizes of the min_lefthand and min_righthand inputs of the current join, and clamp with those in the same way as for the base relations. Noted while investigating a complaint from Ben Chobot, although this in itself doesn't seem to explain his report. Back-patch to 8.4; previous versions used different estimation methods for which this heuristic isn't relevant.
This commit is contained in:
parent
5cfe33fe7b
commit
97930cf578
@ -142,9 +142,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
|
|||||||
FmgrInfo *opproc, bool isgt,
|
FmgrInfo *opproc, bool isgt,
|
||||||
Datum constval, Oid consttype);
|
Datum constval, Oid consttype);
|
||||||
static double eqjoinsel_inner(Oid operator,
|
static double eqjoinsel_inner(Oid operator,
|
||||||
VariableStatData *vardata1, VariableStatData *vardata2);
|
VariableStatData *vardata1, VariableStatData *vardata2,
|
||||||
|
RelOptInfo *rel1, RelOptInfo *rel2);
|
||||||
static double eqjoinsel_semi(Oid operator,
|
static double eqjoinsel_semi(Oid operator,
|
||||||
VariableStatData *vardata1, VariableStatData *vardata2);
|
VariableStatData *vardata1, VariableStatData *vardata2,
|
||||||
|
RelOptInfo *rel1, RelOptInfo *rel2);
|
||||||
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
|
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
|
||||||
Datum lobound, Datum hibound, Oid boundstypid,
|
Datum lobound, Datum hibound, Oid boundstypid,
|
||||||
double *scaledlobound, double *scaledhibound);
|
double *scaledlobound, double *scaledhibound);
|
||||||
@ -173,6 +175,7 @@ static bool get_actual_variable_range(PlannerInfo *root,
|
|||||||
VariableStatData *vardata,
|
VariableStatData *vardata,
|
||||||
Oid sortop,
|
Oid sortop,
|
||||||
Datum *min, Datum *max);
|
Datum *min, Datum *max);
|
||||||
|
static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
|
||||||
static Selectivity prefix_selectivity(PlannerInfo *root,
|
static Selectivity prefix_selectivity(PlannerInfo *root,
|
||||||
VariableStatData *vardata,
|
VariableStatData *vardata,
|
||||||
Oid vartype, Oid opfamily, Const *prefixcon);
|
Oid vartype, Oid opfamily, Const *prefixcon);
|
||||||
@ -2008,24 +2011,47 @@ eqjoinsel(PG_FUNCTION_ARGS)
|
|||||||
VariableStatData vardata1;
|
VariableStatData vardata1;
|
||||||
VariableStatData vardata2;
|
VariableStatData vardata2;
|
||||||
bool join_is_reversed;
|
bool join_is_reversed;
|
||||||
|
RelOptInfo *rel1;
|
||||||
|
RelOptInfo *rel2;
|
||||||
|
|
||||||
get_join_variables(root, args, sjinfo,
|
get_join_variables(root, args, sjinfo,
|
||||||
&vardata1, &vardata2, &join_is_reversed);
|
&vardata1, &vardata2, &join_is_reversed);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Identify the join's direct input relations. We use the min lefthand
|
||||||
|
* and min righthand as the inputs, even though the join might actually
|
||||||
|
* get done with larger input relations. The min inputs are guaranteed to
|
||||||
|
* have been formed by now, though, and always using them ensures
|
||||||
|
* consistency of estimates.
|
||||||
|
*/
|
||||||
|
if (!join_is_reversed)
|
||||||
|
{
|
||||||
|
rel1 = find_join_input_rel(root, sjinfo->min_lefthand);
|
||||||
|
rel2 = find_join_input_rel(root, sjinfo->min_righthand);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rel1 = find_join_input_rel(root, sjinfo->min_righthand);
|
||||||
|
rel2 = find_join_input_rel(root, sjinfo->min_lefthand);
|
||||||
|
}
|
||||||
|
|
||||||
switch (sjinfo->jointype)
|
switch (sjinfo->jointype)
|
||||||
{
|
{
|
||||||
case JOIN_INNER:
|
case JOIN_INNER:
|
||||||
case JOIN_LEFT:
|
case JOIN_LEFT:
|
||||||
case JOIN_FULL:
|
case JOIN_FULL:
|
||||||
selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
|
selec = eqjoinsel_inner(operator, &vardata1, &vardata2,
|
||||||
|
rel1, rel2);
|
||||||
break;
|
break;
|
||||||
case JOIN_SEMI:
|
case JOIN_SEMI:
|
||||||
case JOIN_ANTI:
|
case JOIN_ANTI:
|
||||||
if (!join_is_reversed)
|
if (!join_is_reversed)
|
||||||
selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
|
selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
|
||||||
|
rel1, rel2);
|
||||||
else
|
else
|
||||||
selec = eqjoinsel_semi(get_commutator(operator),
|
selec = eqjoinsel_semi(get_commutator(operator),
|
||||||
&vardata2, &vardata1);
|
&vardata2, &vardata1,
|
||||||
|
rel2, rel1);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* other values not expected here */
|
/* other values not expected here */
|
||||||
@ -2051,7 +2077,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
|
|||||||
*/
|
*/
|
||||||
static double
|
static double
|
||||||
eqjoinsel_inner(Oid operator,
|
eqjoinsel_inner(Oid operator,
|
||||||
VariableStatData *vardata1, VariableStatData *vardata2)
|
VariableStatData *vardata1, VariableStatData *vardata2,
|
||||||
|
RelOptInfo *rel1, RelOptInfo *rel2)
|
||||||
{
|
{
|
||||||
double selec;
|
double selec;
|
||||||
double nd1;
|
double nd1;
|
||||||
@ -2252,15 +2279,19 @@ eqjoinsel_inner(Oid operator,
|
|||||||
* be, providing a crude correction for the selectivity of restriction
|
* be, providing a crude correction for the selectivity of restriction
|
||||||
* clauses on those relations. (We don't do that in the other path
|
* clauses on those relations. (We don't do that in the other path
|
||||||
* since there we are comparing the nd values to stats for the whole
|
* since there we are comparing the nd values to stats for the whole
|
||||||
* relations.)
|
* relations.) We can apply this clamp both with respect to the base
|
||||||
|
* relations from which the join variables come, and to the immediate
|
||||||
|
* input relations of the current join.
|
||||||
*/
|
*/
|
||||||
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
|
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
|
||||||
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
|
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
|
||||||
|
|
||||||
if (vardata1->rel)
|
if (vardata1->rel)
|
||||||
nd1 = Min(nd1, vardata1->rel->rows);
|
nd1 = Min(nd1, vardata1->rel->rows);
|
||||||
|
nd1 = Min(nd1, rel1->rows);
|
||||||
if (vardata2->rel)
|
if (vardata2->rel)
|
||||||
nd2 = Min(nd2, vardata2->rel->rows);
|
nd2 = Min(nd2, vardata2->rel->rows);
|
||||||
|
nd2 = Min(nd2, rel2->rows);
|
||||||
|
|
||||||
selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
|
selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
|
||||||
if (nd1 > nd2)
|
if (nd1 > nd2)
|
||||||
@ -2287,7 +2318,8 @@ eqjoinsel_inner(Oid operator,
|
|||||||
*/
|
*/
|
||||||
static double
|
static double
|
||||||
eqjoinsel_semi(Oid operator,
|
eqjoinsel_semi(Oid operator,
|
||||||
VariableStatData *vardata1, VariableStatData *vardata2)
|
VariableStatData *vardata1, VariableStatData *vardata2,
|
||||||
|
RelOptInfo *rel1, RelOptInfo *rel2)
|
||||||
{
|
{
|
||||||
double selec;
|
double selec;
|
||||||
double nd1;
|
double nd1;
|
||||||
@ -2435,8 +2467,10 @@ eqjoinsel_semi(Oid operator,
|
|||||||
{
|
{
|
||||||
if (vardata1->rel)
|
if (vardata1->rel)
|
||||||
nd1 = Min(nd1, vardata1->rel->rows);
|
nd1 = Min(nd1, vardata1->rel->rows);
|
||||||
|
nd1 = Min(nd1, rel1->rows);
|
||||||
if (vardata2->rel)
|
if (vardata2->rel)
|
||||||
nd2 = Min(nd2, vardata2->rel->rows);
|
nd2 = Min(nd2, vardata2->rel->rows);
|
||||||
|
nd2 = Min(nd2, rel2->rows);
|
||||||
|
|
||||||
if (nd1 <= nd2 || nd2 <= 0)
|
if (nd1 <= nd2 || nd2 <= 0)
|
||||||
selec = 1.0 - nullfrac1;
|
selec = 1.0 - nullfrac1;
|
||||||
@ -4759,6 +4793,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
|
|||||||
return have_data;
|
return have_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* find_join_input_rel
|
||||||
|
* Look up the input relation for a join.
|
||||||
|
*
|
||||||
|
* We assume that the input relation's RelOptInfo must have been constructed
|
||||||
|
* already.
|
||||||
|
*/
|
||||||
|
static RelOptInfo *
|
||||||
|
find_join_input_rel(PlannerInfo *root, Relids relids)
|
||||||
|
{
|
||||||
|
RelOptInfo *rel = NULL;
|
||||||
|
|
||||||
|
switch (bms_membership(relids))
|
||||||
|
{
|
||||||
|
case BMS_EMPTY_SET:
|
||||||
|
/* should not happen */
|
||||||
|
break;
|
||||||
|
case BMS_SINGLETON:
|
||||||
|
rel = find_base_rel(root, bms_singleton_member(relids));
|
||||||
|
break;
|
||||||
|
case BMS_MULTIPLE:
|
||||||
|
rel = find_join_rel(root, relids);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rel == NULL)
|
||||||
|
elog(ERROR, "could not find RelOptInfo for given relids");
|
||||||
|
|
||||||
|
return rel;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*-------------------------------------------------------------------------
|
/*-------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user