Fix thinko in hash cost estimation: average frequency
should be computed from total number of distinct values in whole relation, not # distinct values we expect to have after restriction clauses are applied.
This commit is contained in:
parent
26c94c5d9c
commit
a8fe109ac1
@ -42,7 +42,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.75 2001/06/05 05:26:04 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.76 2001/06/10 02:59:35 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -873,6 +873,9 @@ estimate_hash_bucketsize(Query *root, Var *var)
|
|||||||
if (ndistinct < 0.0)
|
if (ndistinct < 0.0)
|
||||||
ndistinct = -ndistinct * rel->tuples;
|
ndistinct = -ndistinct * rel->tuples;
|
||||||
|
|
||||||
|
/* Also compute avg freq of all distinct data values in raw relation */
|
||||||
|
avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Adjust ndistinct to account for restriction clauses. Observe we are
|
* Adjust ndistinct to account for restriction clauses. Observe we are
|
||||||
* assuming that the data distribution is affected uniformly by the
|
* assuming that the data distribution is affected uniformly by the
|
||||||
@ -883,17 +886,6 @@ estimate_hash_bucketsize(Query *root, Var *var)
|
|||||||
*/
|
*/
|
||||||
ndistinct *= rel->rows / rel->tuples;
|
ndistinct *= rel->rows / rel->tuples;
|
||||||
|
|
||||||
/*
|
|
||||||
* Discourage use of hash join if there seem not to be very many distinct
|
|
||||||
* data values. The threshold here is somewhat arbitrary, as is the
|
|
||||||
* fraction used to "discourage" the choice.
|
|
||||||
*/
|
|
||||||
if (ndistinct < 50.0)
|
|
||||||
{
|
|
||||||
ReleaseSysCache(tuple);
|
|
||||||
return 0.5;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Form initial estimate of bucketsize fraction. Here we use rel->rows,
|
* Form initial estimate of bucketsize fraction. Here we use rel->rows,
|
||||||
* ie the number of rows after applying restriction clauses, because
|
* ie the number of rows after applying restriction clauses, because
|
||||||
@ -903,8 +895,8 @@ estimate_hash_bucketsize(Query *root, Var *var)
|
|||||||
estfract = (double) NTUP_PER_BUCKET / rel->rows;
|
estfract = (double) NTUP_PER_BUCKET / rel->rows;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Adjust estimated bucketsize if too few distinct values to fill
|
* Adjust estimated bucketsize if too few distinct values (after
|
||||||
* all the buckets.
|
* restriction clauses) to fill all the buckets.
|
||||||
*/
|
*/
|
||||||
needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
|
needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
|
||||||
if (ndistinct < needdistinct)
|
if (ndistinct < needdistinct)
|
||||||
@ -931,8 +923,6 @@ estimate_hash_bucketsize(Query *root, Var *var)
|
|||||||
/*
|
/*
|
||||||
* Adjust estimated bucketsize upward to account for skewed distribution.
|
* Adjust estimated bucketsize upward to account for skewed distribution.
|
||||||
*/
|
*/
|
||||||
avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
|
|
||||||
|
|
||||||
if (avgfreq > 0.0 && mcvfreq > avgfreq)
|
if (avgfreq > 0.0 && mcvfreq > avgfreq)
|
||||||
estfract *= mcvfreq / avgfreq;
|
estfract *= mcvfreq / avgfreq;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user