From 5ac06071690a465efea1c904fce30f3b56bfc2a6 Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 21 Jan 2011 18:18:13 +0000 Subject: [PATCH] Adjustments to the result row estimator for the IN operator so that it gives the same estimates as the equivalent OR operator. Test cases for the same. FossilOrigin-Name: c82cb9c028b3ba5463ae50c30196dbf157a7a305 --- manifest | 20 +++++++++---------- manifest.uuid | 2 +- src/where.c | 49 +++++++++++++++++++++++++++++----------------- test/analyze5.test | 34 +++++++++++++++++++++++++++++++- 4 files changed, 75 insertions(+), 30 deletions(-) diff --git a/manifest b/manifest index e202477c08..f65504e819 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Make\suse\sof\shistogram\sdata\sto\smake\sbetter\sestimates\sfor\sthe\snumber\sof\srows\nthat\swill\sbe\sreturned\sfrom\s"x\sIN\s(v1,v2,v3,...)"\sconstraints. -D 2011-01-21T16:27:18.621 +C Adjustments\sto\sthe\sresult\srow\sestimator\sfor\sthe\sIN\soperator\sso\sthat\sit\sgives\nthe\ssame\sestimates\sas\sthe\sequivalent\sOR\soperator.\s\sTest\scases\sfor\sthe\ssame. +D 2011-01-21T18:18:13.960 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 7f2844afffd9e09373e874a74de81d3502b2a35c +F src/where.c 2de6723cfb051bcfcfd3d3ca1ac04bb1388ba530 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -256,7 +256,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69 F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04 F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 -F test/analyze5.test b2139886ee199a226df8f319e37aa7bd78b8f402 +F test/analyze5.test c19fe600c48dade660eb374fa7209435463c2d4a F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6 F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P f73a167b434fadcbbd15e3891c4b7f4f87f6363c -R 46f7d508c889f9891a64638b5f1737ae +P fd3977a27ae68e694df12a4713e55515c1e87c5d +R cea6312924a8fb4373e961fbaf9716e5 U drh -Z 14ba122ed035896c7c1b08aa324c4833 +Z d2cdc178cbf264c31de567c61a7d5758 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNObPpoxKgR168RlERAmgEAJ97hcV3wI5jmVOjUrAeDzSnM45gLACghPy2 -7kt0j2FfeGbbS4tWO9hsJaU= -=BYPr +iD8DBQFNOc3poxKgR168RlERAkIwAKCEe6e9BZEE6g3M5kOLzfgzYu8BvQCghsyD +JkbODaFMx8NcwWU/YYsOcuo= +=cn1U -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index ad6a13da6f..67ab42aca6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fd3977a27ae68e694df12a4713e55515c1e87c5d \ No newline at end of file +c82cb9c028b3ba5463ae50c30196dbf157a7a305 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 8eaef351d7..cb0b4638f3 100644 --- a/src/where.c +++ b/src/where.c @@ -117,6 +117,7 @@ struct WhereTerm { #define TERM_ORINFO 0x10 /* Need to free the WhereTerm.u.pOrInfo object */ #define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */ #define TERM_OR_OK 0x40 /* Used during OR-clause processing */ +#define TERM_NOHELP 0x80 /* This term does not reduce the search space */ /* ** An instance of the following structure holds all information about a @@ -1060,6 +1061,7 @@ static void exprAnalyzeOrTerm( }else{ sqlite3ExprListDelete(db, pList); } + pTerm->wtFlags |= TERM_NOHELP; pTerm->eOperator = 0; /* case 1 trumps case 2 */ } } @@ -2523,8 +2525,10 @@ whereEqualScanEst_cancel: #ifdef SQLITE_ENABLE_STAT2 /* ** Estimate the number of rows that will be returned based on -** an IN constraint "x IN (V1,V2,V3,...)" where the right-hand side -** of the IN operator is a list of values. +** an IN constraint where the right-hand side of the IN operator +** is a list of values. Example: +** +** WHERE x IN (1,2,3,4) ** ** Write the estimated row count into *pnRow and return SQLITE_OK. ** If unable to make an estimate, leave *pnRow unchanged and return @@ -2544,22 +2548,24 @@ int whereInScanEst( sqlite3_value *pVal = 0; /* One value from list */ int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ - int rc; /* Subfunction return code */ + int rc = SQLITE_OK; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ - int nRegion = 0; /* Number of histogram regions spanned */ - int nSingle = 0; /* Count of values contained within one region */ + int nSpan = 0; /* Number of histogram regions spanned */ + int nSingle = 0; /* Histogram regions hit by a single value */ int nNotFound = 0; /* Count of values that are not constants */ - int i; /* Loop counter */ - u8 aHit[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ + int i; /* Loop counter */ + u8 aSpan[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ + u8 aSingle[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions hit once */ assert( p->aSample!=0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; - memset(aHit, 0, sizeof(aHit)); + memset(aSpan, 0, sizeof(aSpan)); + memset(aSingle, 0, sizeof(aSingle)); for(i=0; inExpr; i++){ sqlite3ValueFree(pVal); rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal); if( rc ) break; - if( pVal==0 ){ + if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){ nNotFound++; continue; } @@ -2568,19 +2574,26 @@ int whereInScanEst( rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper); if( rc ) break; if( iLower>=iUpper ){ - nSingle++; + aSingle[iLower] = 1; + }else{ + assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES ); + while( iLower=0 && iUpper<=SQLITE_INDEX_SAMPLES ); - while( iLower<=iUpper ) aHit[iLower++] = 1; } if( rc==SQLITE_OK ){ - for(i=nRegion=0; iaiRowEst[0]/(SQLITE_INDEX_SAMPLES+1) + for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){ + if( aSpan[i] ){ + nSpan++; + }else if( aSingle[i] ){ + nSingle++; + } + } + nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES) + nNotFound*p->aiRowEst[1]; if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; *pnRow = nRowEst; - WHERETRACE(("IN row estimate: nRegion=%d, nSingle=%d, nNotFound=%d\n", - nRegion, nSingle, nNotFound)); + WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n", + nSpan, nSingle, nNotFound, nRowEst)); } sqlite3ValueFree(pVal); return rc; @@ -2923,7 +2936,7 @@ static void bestBtreeIndex( thisTab = getMask(pWC->pMaskSet, iCur); for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){ - if( pTerm->wtFlags & TERM_VIRTUAL ) continue; + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_NOHELP) ) continue; if( (pTerm->prereqAll & notValid)!=thisTab ) continue; if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){ if( nSkipEq ){ @@ -2937,7 +2950,7 @@ static void bestBtreeIndex( } }else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){ if( nSkipRange ){ - /* Ignore the first nBound range constraints since the index + /* Ignore the first nSkipRange range constraints since the index ** has already accounted for these */ nSkipRange--; }else{ diff --git a/test/analyze5.test b/test/analyze5.test index a6bf640235..3c89690b4b 100644 --- a/test/analyze5.test +++ b/test/analyze5.test @@ -118,7 +118,7 @@ foreach {testid where rows} { $rows] } foreach {testid where rows} { - 101 {z=-1} 50 + 101 {z=-1} 50 102 {z=0} 400 103 {z=1} 300 104 {z=2} 200 @@ -138,6 +138,38 @@ foreach {testid where rows} { } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)}} $rows] } +# for the next sequence of tests a value of rows<=0 means a full-table scan +# is used. +# +#set sqlite_where_trace 1 +foreach {testid where rows} { + 201 {z IN (-1)} 50 + 202 {z IN (0)} 400 + 203 {z IN (1)} 300 + 204 {z IN (2)} 200 + 205 {z IN (3)} 100 + 206 {z IN (4)} 50 + 207 {z IN (0.5)} 50 + 208 {z IN (0,1)} 700 + 209 {z IN (0,1,2)} 900 + 210 {z IN (0,1,2,3)} 0 + 211 {z IN (0,1,2,3,4,5)} 0 + 212 {z IN (1,2)} 500 + 213 {z IN (2,3)} 300 + 214 {z=3 OR z=2} 300 + 215 {z IN (-1,3)} 150 + 216 {z=-1 OR z=3} 150 +} { + if {$rows<=0} { + set ans {SCAN TABLE t1 (~100 rows)} + } else { + set ans [format {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)} $rows] + } + do_test analyze5-1.$testid { + lindex [eqp "SELECT * FROM t1 WHERE $where"] 3 + } $ans +} + # For the t1.y column, most entries are known to be zero. So do a # full table scan for y=0 but use the index for any other constraint on # y.