Use histogram data to improve the row-count estimates on equality constraints.

FossilOrigin-Name: 6bfc5c69eb22938972bbf4e60179952dc215f770
This commit is contained in:
drh 2011-01-20 16:52:09 +00:00
parent e847d3247f
commit 82759754ce
4 changed files with 85 additions and 17 deletions

View File

@ -1,8 +1,8 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C The\sfirst\sof\sa\splanned\sseries\sof\senhancements\sto\sthe\squery\splanner\sthat\nenable\sit\sto\smake\sbetter\suse\sof\ssqlite_stat2\shistograms\swhen\sthe\stable\nhas\smany\srepeated\svalues.
D 2011-01-20T02:56:37.736
C Use\shistogram\sdata\sto\simprove\sthe\srow-count\sestimates\son\sequality\sconstraints.
D 2011-01-20T16:52:09.439
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30
F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7
F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
F src/where.c 5cd6b88d57bfc816ba7f753a3cdf03686d954b8a
F src/where.c 068ecc5195578aed083a4314e572de9979a489e5
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87
F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125
@ -256,7 +256,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5
F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69
F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04
F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045
F test/analyze5.test 18987796646efdf009ca0b8c8f060874a8fe57fb
F test/analyze5.test b6bd2e4fbbe2ffcf2a22250b28fb1f2a2ca507de
F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3
F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6
F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e
@ -900,18 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P 9660a0a22547656cc3765b673d0cee9e1dd829ef
R 9ff1bb21abd03a28e074b829beb25c52
T *bgcolor * #a8c7d3
T *branch * stat2-enhancement
T *sym-stat2-enhancement *
T -sym-trunk *
P 2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5
R b298fdc0ea4be4264436dd1639cb2644
U drh
Z f191562671825ddb731a0f83c41674dd
Z 10e84b987d0709faa336295530a6ebae
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFNN6RpoxKgR168RlERAtbyAJ4tlGP5CKHBEdaaRtF9LD6pnMJo9QCaAnxY
oc508+oZBxzr/UoIZL3o+G4=
=2eyE
iD8DBQFNOGg8oxKgR168RlERAhVwAJ9ZyON4XSbnksAxlSedyp9Kpxx2QACfe+y0
6eKoyLZRueuMI2F5tREdfw4=
=Hhmt
-----END PGP SIGNATURE-----

View File

@ -1 +1 @@
2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5
6bfc5c69eb22938972bbf4e60179952dc215f770

View File

@ -2461,6 +2461,51 @@ range_est_fallback:
return rc;
}
#ifdef SQLITE_ENABLE_STAT2
/*
** Estimate the number of rows that will be returned based on
** an equality constraint x=VALUE and where that VALUE occurs in
** the histogram data. This only works when x is the left-most
** column of an index and sqlite_stat2 histogram data is available
** for that index.
**
** Write the estimated row count into *pnRow. If unable to make
** an estimate, leave *pnRow unchanged.
*/
void whereEqScanEst(
Parse *pParse, /* Parsing & code generating context */
Index *p, /* The index whose left-most column is pTerm */
WhereTerm *pTerm, /* The x=VALUE constraint */
double *pnRow /* Write the revised row estimate here */
){
sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */
int iLower, iUpper; /* Range of histogram regions containing pRhs */
u8 aff; /* Column affinity */
int rc; /* Subfunction return code */
double nRowEst; /* New estimate of the number of rows */
assert( p->aSample!=0 );
assert( pTerm->eOperator==WO_EQ );
aff = p->pTable->aCol[p->aiColumn[0]].affinity;
rc = valueFromExpr(pParse, pTerm->pExpr->pRight, aff, &pRhs);
if( rc ) goto whereEqScanEst_cancel;
rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
if( rc ) goto whereEqScanEst_cancel;
rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
if( rc ) goto whereEqScanEst_cancel;
if( iLower>=iUpper ){
nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
if( nRowEst<*pnRow ) *pnRow = nRowEst;
}else{
nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
*pnRow = nRowEst;
}
whereEqScanEst_cancel:
sqlite3ValueFree(pRhs);
}
#endif /* defined(SQLITE_ENABLE_STAT2) */
/*
** Find the query plan for accessing a particular table. Write the
@ -2624,10 +2669,13 @@ static void bestBtreeIndex(
int bInEst = 0;
int nInMul = 1;
int estBound = 100;
int nBound = 0; /* Number of range constraints seen */
int nBound = 0; /* Number of range constraints seen */
int bSort = 0;
int bLookup = 0;
WhereTerm *pTerm; /* A single term of the WHERE clause */
WhereTerm *pTerm; /* A single term of the WHERE clause */
#ifdef SQLITE_ENABLE_STAT2
WhereTerm *pFirstEqTerm = 0; /* First WO_EQ term */
#endif
/* Determine the values of nEq and nInMul */
for(nEq=0; nEq<pProbe->nColumn; nEq++){
@ -2647,6 +2695,11 @@ static void bestBtreeIndex(
}else if( pTerm->eOperator & WO_ISNULL ){
wsFlags |= WHERE_COLUMN_NULL;
}
#ifdef SQLITE_ENABLE_STAT2
else if( nEq==0 && pProbe->aSample ){
pFirstEqTerm = pTerm;
}
#endif
used |= pTerm->prereqRight;
}
@ -2723,6 +2776,17 @@ static void bestBtreeIndex(
nInMul = (int)(nRow / aiRowEst[nEq]);
}
#ifdef SQLITE_ENABLE_STAT2
/* If the constraint is of the form x=VALUE and histogram
** data is available for column x, then it might be possible
** to get a better estimate on the number of rows based on
** VALUE and how common that value is according to the histogram.
*/
if( nRow>(double)1 && nEq==1 && pFirstEqTerm!=0 ){
whereEqScanEst(pParse, pProbe, pFirstEqTerm, &nRow);
}
#endif /* SQLITE_ENABLE_STAT2 */
/* Assume constant cost to access a row and logarithmic cost to
** do a binary search. Hence, the initial cost is the number of output
** rows plus log2(table-size) times the number of binary searches.

View File

@ -214,6 +214,14 @@ foreach {testid where rows} {
20 {y>=0 AND y<'charlie'} 700
21 {y>=0 AND y<='charlie'} 900
22 {y>=0 AND y<'delta'} 900
23 {y>'alpha' AND y<x'00'} 600
24 {y>='bravo' AND y<x'00'} 600
25 {y>'bravo' AND y<x'00'} 300
26 {y>='charlie' AND y<x'00'} 300
27 {y>'charlie' AND y<x'00'} 100
28 {y>='delta' AND y<x'00'} 100
29 {y>'delta' AND y<x'00'} 50
30 {y>='echo' AND y<x'00'} 50
} {
do_test analyze5-3.$testid {
eqp "SELECT * FROM t1 WHERE $where"