Adjustments to the result row estimator for the IN operator so that it gives

the same estimates as the equivalent OR operator.  Test cases for the same.

FossilOrigin-Name: c82cb9c028b3ba5463ae50c30196dbf157a7a305
This commit is contained in:
drh 2011-01-21 18:18:13 +00:00
parent 0c50fa0f61
commit 5ac0607169
4 changed files with 75 additions and 30 deletions

View File

@ -1,8 +1,8 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C Make\suse\sof\shistogram\sdata\sto\smake\sbetter\sestimates\sfor\sthe\snumber\sof\srows\nthat\swill\sbe\sreturned\sfrom\s"x\sIN\s(v1,v2,v3,...)"\sconstraints.
D 2011-01-21T16:27:18.621
C Adjustments\sto\sthe\sresult\srow\sestimator\sfor\sthe\sIN\soperator\sso\sthat\sit\sgives\nthe\ssame\sestimates\sas\sthe\sequivalent\sOR\soperator.\s\sTest\scases\sfor\sthe\ssame.
D 2011-01-21T18:18:13.960
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30
F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7
F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
F src/where.c 7f2844afffd9e09373e874a74de81d3502b2a35c
F src/where.c 2de6723cfb051bcfcfd3d3ca1ac04bb1388ba530
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87
F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125
@ -256,7 +256,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5
F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69
F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04
F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045
F test/analyze5.test b2139886ee199a226df8f319e37aa7bd78b8f402
F test/analyze5.test c19fe600c48dade660eb374fa7209435463c2d4a
F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3
F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6
F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e
@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P f73a167b434fadcbbd15e3891c4b7f4f87f6363c
R 46f7d508c889f9891a64638b5f1737ae
P fd3977a27ae68e694df12a4713e55515c1e87c5d
R cea6312924a8fb4373e961fbaf9716e5
U drh
Z 14ba122ed035896c7c1b08aa324c4833
Z d2cdc178cbf264c31de567c61a7d5758
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFNObPpoxKgR168RlERAmgEAJ97hcV3wI5jmVOjUrAeDzSnM45gLACghPy2
7kt0j2FfeGbbS4tWO9hsJaU=
=BYPr
iD8DBQFNOc3poxKgR168RlERAkIwAKCEe6e9BZEE6g3M5kOLzfgzYu8BvQCghsyD
JkbODaFMx8NcwWU/YYsOcuo=
=cn1U
-----END PGP SIGNATURE-----

View File

@ -1 +1 @@
fd3977a27ae68e694df12a4713e55515c1e87c5d
c82cb9c028b3ba5463ae50c30196dbf157a7a305

View File

@ -117,6 +117,7 @@ struct WhereTerm {
#define TERM_ORINFO 0x10 /* Need to free the WhereTerm.u.pOrInfo object */
#define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */
#define TERM_OR_OK 0x40 /* Used during OR-clause processing */
#define TERM_NOHELP 0x80 /* This term does not reduce the search space */
/*
** An instance of the following structure holds all information about a
@ -1060,6 +1061,7 @@ static void exprAnalyzeOrTerm(
}else{
sqlite3ExprListDelete(db, pList);
}
pTerm->wtFlags |= TERM_NOHELP;
pTerm->eOperator = 0; /* case 1 trumps case 2 */
}
}
@ -2523,8 +2525,10 @@ whereEqualScanEst_cancel:
#ifdef SQLITE_ENABLE_STAT2
/*
** Estimate the number of rows that will be returned based on
** an IN constraint "x IN (V1,V2,V3,...)" where the right-hand side
** of the IN operator is a list of values.
** an IN constraint where the right-hand side of the IN operator
** is a list of values. Example:
**
** WHERE x IN (1,2,3,4)
**
** Write the estimated row count into *pnRow and return SQLITE_OK.
** If unable to make an estimate, leave *pnRow unchanged and return
@ -2544,22 +2548,24 @@ int whereInScanEst(
sqlite3_value *pVal = 0; /* One value from list */
int iLower, iUpper; /* Range of histogram regions containing pRhs */
u8 aff; /* Column affinity */
int rc; /* Subfunction return code */
int rc = SQLITE_OK; /* Subfunction return code */
double nRowEst; /* New estimate of the number of rows */
int nRegion = 0; /* Number of histogram regions spanned */
int nSingle = 0; /* Count of values contained within one region */
int nSpan = 0; /* Number of histogram regions spanned */
int nSingle = 0; /* Histogram regions hit by a single value */
int nNotFound = 0; /* Count of values that are not constants */
int i; /* Loop counter */
u8 aHit[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */
int i; /* Loop counter */
u8 aSpan[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */
u8 aSingle[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions hit once */
assert( p->aSample!=0 );
aff = p->pTable->aCol[p->aiColumn[0]].affinity;
memset(aHit, 0, sizeof(aHit));
memset(aSpan, 0, sizeof(aSpan));
memset(aSingle, 0, sizeof(aSingle));
for(i=0; i<pList->nExpr; i++){
sqlite3ValueFree(pVal);
rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
if( rc ) break;
if( pVal==0 ){
if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){
nNotFound++;
continue;
}
@ -2568,19 +2574,26 @@ int whereInScanEst(
rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
if( rc ) break;
if( iLower>=iUpper ){
nSingle++;
aSingle[iLower] = 1;
}else{
assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
while( iLower<iUpper ) aSpan[iLower++] = 1;
}
assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
while( iLower<=iUpper ) aHit[iLower++] = 1;
}
if( rc==SQLITE_OK ){
for(i=nRegion=0; i<ArraySize(aHit); i++) nRegion += aHit[i];
nRowEst = nRegion*p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES+1)
for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){
if( aSpan[i] ){
nSpan++;
}else if( aSingle[i] ){
nSingle++;
}
}
nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES)
+ nNotFound*p->aiRowEst[1];
if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
*pnRow = nRowEst;
WHERETRACE(("IN row estimate: nRegion=%d, nSingle=%d, nNotFound=%d\n",
nRegion, nSingle, nNotFound));
WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
nSpan, nSingle, nNotFound, nRowEst));
}
sqlite3ValueFree(pVal);
return rc;
@ -2923,7 +2936,7 @@ static void bestBtreeIndex(
thisTab = getMask(pWC->pMaskSet, iCur);
for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){
if( pTerm->wtFlags & TERM_VIRTUAL ) continue;
if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_NOHELP) ) continue;
if( (pTerm->prereqAll & notValid)!=thisTab ) continue;
if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){
if( nSkipEq ){
@ -2937,7 +2950,7 @@ static void bestBtreeIndex(
}
}else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){
if( nSkipRange ){
/* Ignore the first nBound range constraints since the index
/* Ignore the first nSkipRange range constraints since the index
** has already accounted for these */
nSkipRange--;
}else{

View File

@ -118,7 +118,7 @@ foreach {testid where rows} {
$rows]
}
foreach {testid where rows} {
101 {z=-1} 50
101 {z=-1} 50
102 {z=0} 400
103 {z=1} 300
104 {z=2} 200
@ -138,6 +138,38 @@ foreach {testid where rows} {
} [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)}} $rows]
}
# for the next sequence of tests a value of rows<=0 means a full-table scan
# is used.
#
#set sqlite_where_trace 1
foreach {testid where rows} {
201 {z IN (-1)} 50
202 {z IN (0)} 400
203 {z IN (1)} 300
204 {z IN (2)} 200
205 {z IN (3)} 100
206 {z IN (4)} 50
207 {z IN (0.5)} 50
208 {z IN (0,1)} 700
209 {z IN (0,1,2)} 900
210 {z IN (0,1,2,3)} 0
211 {z IN (0,1,2,3,4,5)} 0
212 {z IN (1,2)} 500
213 {z IN (2,3)} 300
214 {z=3 OR z=2} 300
215 {z IN (-1,3)} 150
216 {z=-1 OR z=3} 150
} {
if {$rows<=0} {
set ans {SCAN TABLE t1 (~100 rows)}
} else {
set ans [format {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)} $rows]
}
do_test analyze5-1.$testid {
lindex [eqp "SELECT * FROM t1 WHERE $where"] 3
} $ans
}
# For the t1.y column, most entries are known to be zero. So do a
# full table scan for y=0 but use the index for any other constraint on
# y.