Changes to the way the planner calculates the costs of various table and index scans. Some test cases still failing.

FossilOrigin-Name: c5a6ec0a880652dc8f4593d9f7acd58ddc3dc5f3
This commit is contained in:
dan 2014-04-24 20:04:49 +00:00
parent f0582f43dd
commit aa9933c115
7 changed files with 111 additions and 70 deletions

View File

@ -1,5 +1,5 @@
C Comment\stweaks\son\sthe\stest\scase\sfor\sthe\s[b75a9ca6b0]\sbug\sfix.
D 2014-04-21T13:36:54.639
C Changes\sto\sthe\sway\sthe\splanner\scalculates\sthe\scosts\sof\svarious\stable\sand\sindex\sscans.\sSome\stest\scases\sstill\sfailing.
D 2014-04-24T20:04:49.939
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 2ef13430cd359f7b361bb863504e227b25cc7f81
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -291,7 +291,7 @@ F src/vtab.c 21b932841e51ebd7d075e2d0ad1415dce8d2d5fd
F src/wal.c 76e7fc6de229bea8b30bb2539110f03a494dc3a8
F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4
F src/walker.c 11edb74d587bc87b33ca96a5173e3ec1b8389e45
F src/where.c 3b127bdc24b7aa84ffa69729170be11555cd7733
F src/where.c c12bc20cd649bcae39de3e452bfc1a3f164454ee
F src/whereInt.h 929c1349b5355fd44f22cee5c14d72b3329c58a6
F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
@ -311,7 +311,7 @@ F test/analyze5.test 765c4e284aa69ca172772aa940946f55629bc8c4
F test/analyze6.test d31defa011a561b938b4608d3538c1b4e0b5e92c
F test/analyze7.test bb1409afc9e8629e414387ef048b8e0e3e0bdc4f
F test/analyze8.test 093d15c1c888eed5034304a98c992f7360130b88
F test/analyze9.test e072a5172d55afcba98d6ca6a219ce8878c2f5c9
F test/analyze9.test e219daa58fd8677c6a43d771798cf37d68f51d3e
F test/analyzeA.test 1a5c40079894847976d983ca39c707aaa44b6944
F test/analyzeB.test 8bf35ee0a548aea831bf56762cb8e7fdb1db083d
F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b
@ -329,7 +329,7 @@ F test/auth.test 5bdf154eb28c0e4bbc0473f335858c0d96171768
F test/auth2.test c3b415b76c033bedb81292118fb7c01f5f10cbcd
F test/auth3.test a4755e6a2a2fea547ffe63c874eb569e60a28eb5
F test/autoinc.test c58912526998a39e11f66b533e23cfabea7f25b7
F test/autoindex1.test d4dfe14001dfcb74cfbd7107f45a79fc1ab6183e
F test/autoindex1.test 762ff3f8e25d852aae55c6462ca166a80c0cde61
F test/autovacuum.test 941892505d2c0f410a0cb5970dfa1c7c4e5f6e74
F test/autovacuum_ioerr2.test 8a367b224183ad801e0e24dcb7d1501f45f244b4
F test/avtrans.test 0252654f4295ddda3b2cce0e894812259e655a85
@ -1079,7 +1079,7 @@ F test/walslow.test e7be6d9888f83aa5d3d3c7c08aa9b5c28b93609a
F test/walthread.test de8dbaf6d9e41481c460ba31ca61e163d7348f8e
F test/where.test 28b64e93428961b07b0d486778d63fd672948f6b
F test/where2.test 455a2eb2666e66c1e84e2cb5815173a85e6237db
F test/where3.test d28c51f257e60be30f74308fa385ceeddfb54a6e
F test/where3.test 1ad55ba900bd7747f98b6082e65bd3e442c5004e
F test/where4.test d8420ceeb8323a41ceff1f1841fc528e824e1ecf
F test/where5.test fdf66f96d29a064b63eb543e28da4dfdccd81ad2
F test/where6.test 5da5a98cec820d488e82708301b96cb8c18a258b
@ -1093,7 +1093,7 @@ F test/whereC.test d6f4ecd4fa2d9429681a5b22a25d2bda8e86ab8a
F test/whereD.test fd9120e262f9da3c45940f52aefeef4d15b904e5
F test/whereE.test b3a055eef928c992b0a33198a7b8dc10eea5ad2f
F test/whereF.test 5b2ba0dbe8074aa13e416b37c753991f0a2492d7
F test/whereG.test 2533b72ed4a31fd1687230a499b557b911525344
F test/whereG.test 8189fedf3b98ab581bb70f830175e403a0ef1722
F test/whereH.test e4b07f7a3c2f5d31195cd33710054c78667573b2
F test/wherelimit.test 5e9fd41e79bb2b2d588ed999d641d9c965619b31
F test/wild001.test bca33f499866f04c24510d74baf1e578d4e44b1c
@ -1161,7 +1161,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01
F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff
P de9a490f594183f337a2ec9e0f87792eac83548b
R ce888b84132e0cad3bcca115a32951d3
U drh
Z cf9f241149456ab1fa24984e95a412d2
P 65d2544af9adc1e2f1d193e57f8be0422fb0d5eb
R 6cc54703275bf2ed6708c34ae52cf7ea
T *branch * experimental-costs
T *sym-experimental-costs *
T -sym-trunk *
U dan
Z 868aa60f36dda291ae018583501645e5

View File

@ -1 +1 @@
65d2544af9adc1e2f1d193e57f8be0422fb0d5eb
c5a6ec0a880652dc8f4593d9f7acd58ddc3dc5f3

View File

@ -227,7 +227,7 @@ static int whereClauseInsert(WhereClause *pWC, Expr *p, u8 wtFlags){
if( p && ExprHasProperty(p, EP_Unlikely) ){
pTerm->truthProb = sqlite3LogEst(p->iTable) - 99;
}else{
pTerm->truthProb = -1;
pTerm->truthProb = 1;
}
pTerm->pExpr = sqlite3ExprSkipCollate(p);
pTerm->wtFlags = wtFlags;
@ -1975,6 +1975,31 @@ static void whereKeyStats(
}
#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
/*
** If it is not NULL, pTerm is a term that provides an upper or lower
** bound on a range scan. Without considering pTerm, it is estimated
** that the scan will visit nNew rows. This function returns the number
** estimated to be visited after taking pTerm into account.
**
** If the user explicitly specified a likelihood() value for this term,
** then the return value is the likelihood multiplied by the number of
** input rows. Otherwise, this function assumes that an "IS NOT NULL" term
** has a likelihood of 0.50, and any other term a likelihood of 0.25.
*/
static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){
LogEst nRet = nNew;
if( pTerm ){
if( pTerm->truthProb<=0 ){
nRet += pTerm->truthProb;
}else if( pTerm->wtFlags & TERM_VNULL ){
nRet -= 10; assert( 10==sqlite3LogEst(2) );
}else{
nRet -= 20; assert( 20==sqlite3LogEst(4) );
}
}
return nRet;
}
/*
** This function is used to estimate the number of rows that will be visited
** by scanning an index for a range of values. The range may have an upper
@ -2127,17 +2152,9 @@ static int whereRangeScanEst(
UNUSED_PARAMETER(pBuilder);
#endif
assert( pLower || pUpper );
/* TUNING: Each inequality constraint reduces the search space 4-fold.
** A BETWEEN operator, therefore, reduces the search space 16-fold */
nNew = nOut;
if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ){
nNew -= 20; assert( 20==sqlite3LogEst(4) );
nOut--;
}
if( pUpper ){
nNew -= 20; assert( 20==sqlite3LogEst(4) );
nOut--;
}
nNew = whereRangeAdjust(pLower, nOut);
nNew = whereRangeAdjust(pUpper, nNew);
nOut -= (pLower!=0) + (pUpper!=0);
if( nNew<10 ) nNew = 10;
if( nNew<nOut ) nOut = nNew;
pLoop->nOut = (LogEst)nOut;
@ -3987,7 +4004,9 @@ static void whereLoopOutputAdjust(WhereClause *pWC, WhereLoop *pLoop){
if( pX==pTerm ) break;
if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break;
}
if( j<0 ) pLoop->nOut += pTerm->truthProb;
if( j<0 ){
pLoop->nOut += (pTerm->truthProb<=0 ? pTerm->truthProb : -1);
}
}
}
@ -4081,6 +4100,7 @@ static int whereLoopAddBtreeIndex(
pNew->nOut = saved_nOut;
}
for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){
LogEst rCostIdx;
int nIn = 0;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
int nRecValid = pBuilder->nRecValid;
@ -4154,7 +4174,8 @@ static int whereLoopAddBtreeIndex(
pNew->aLTerm[pNew->nLTerm-2] : 0;
}
if( pNew->wsFlags & WHERE_COLUMN_RANGE ){
/* Adjust nOut and rRun for STAT3 range values */
/* Adjust nOut using stat3/stat4 data. Or, if there is no stat3/stat4
** data, using some other estimate. */
assert( pNew->nOut==saved_nOut );
whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew);
}
@ -4181,13 +4202,16 @@ static int whereLoopAddBtreeIndex(
}
}
#endif
/* Set rCostIdx to the cost of visiting selected rows in index. Add
** it to pNew->rRun, which is currently set to the cost of the index
** seek only. Then, if this is a non-covering index, add the cost of
** visiting the rows in the main table. */
rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow;
pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rCostIdx);
if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
/* Each row involves a step of the index, then a binary search of
** the main table */
pNew->rRun = sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10);
pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16);
}
/* Step cost for each output row */
pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut);
whereLoopOutputAdjust(pBuilder->pWC, pNew);
rc = whereLoopInsert(pBuilder, pNew);
if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0
@ -4319,6 +4343,7 @@ static int whereLoopAddBtree(
sPk.aiRowEst = aiRowEstPk;
sPk.onError = OE_Replace;
sPk.pTable = pTab;
sPk.szIdxRow = pTab->szTabRow;
aiRowEstPk[0] = pTab->nRowEst;
aiRowEstPk[1] = 1;
pFirst = pSrc->pTab->pIndex;
@ -4396,10 +4421,8 @@ static int whereLoopAddBtree(
/* Full table scan */
pNew->iSortIdx = b ? iSortIdx : 0;
/* TUNING: Cost of full table scan is 3*(N + log2(N)).
** + The extra 3 factor is to encourage the use of indexed lookups
** over full scans. FIXME */
pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16;
/* TUNING: Cost of full table scan is (N*3.0). */
pNew->rRun = rSize + 16;
whereLoopOutputAdjust(pWC, pNew);
rc = whereLoopInsert(pBuilder, pNew);
pNew->nOut = rSize;
@ -4426,35 +4449,16 @@ static int whereLoopAddBtree(
)
){
pNew->iSortIdx = b ? iSortIdx : 0;
/* TUNING: The base cost of an index scan is N + log2(N).
** The log2(N) is for the initial seek to the beginning and the N
** is for the scan itself. */
pNew->rRun = sqlite3LogEstAdd(rSize, rLogSize);
if( m==0 ){
/* TUNING: Cost of a covering index scan is K*(N + log2(N)).
** + The extra factor K of between 1.1 and 3.0 that depends
** on the relative sizes of the table and the index. K
** is smaller for smaller indices, thus favoring them.
** The upper bound on K (3.0) matches the penalty factor
** on a full table scan that tries to encourage the use of
** indexed lookups over full scans.
*/
pNew->rRun += 1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
}else{
/* TUNING: The cost of scanning a non-covering index is multiplied
** by log2(N) to account for the binary search of the main table
** that must happen for each row of the index.
** TODO: Should there be a multiplier here, analogous to the 3x
** multiplier for a fulltable scan or covering index scan, to
** further discourage the use of an index scan? Or is the log2(N)
** term sufficient discouragement?
** TODO: What if some or all of the WHERE clause terms can be
** computed without reference to the original table. Then the
** penality should reduce to logK where K is the number of output
** rows.
*/
pNew->rRun += rLogSize;
/* The cost of visiting the index rows is N*K, where K is
** between 1.1 and 3.0, depending on the relative sizes of the
** index and table rows. If this is a non-covering index scan,
** also add the cost of visiting table rows (N*3.0). */
pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
if( m!=0 ){
pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rSize+16);
}
whereLoopOutputAdjust(pWC, pNew);
rc = whereLoopInsert(pBuilder, pNew);
pNew->nOut = rSize;
@ -4732,8 +4736,7 @@ static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){
pNew->iSortIdx = 0;
memset(&pNew->u, 0, sizeof(pNew->u));
for(i=0; rc==SQLITE_OK && i<sSum.n; i++){
/* TUNING: Multiple by 3.5 for the secondary table lookup */
pNew->rRun = sSum.a[i].rRun + 18;
pNew->rRun = sSum.a[i].rRun;
pNew->nOut = sSum.a[i].nOut;
pNew->prereq = sSum.a[i].prereq;
rc = whereLoopInsert(pBuilder, pNew);

View File

@ -577,16 +577,16 @@ do_test 13.1 {
execsql ANALYZE
} {}
do_eqp_test 13.2.1 {
SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<20
SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<12
} {/SEARCH TABLE t1 USING INDEX i1/}
do_eqp_test 13.2.2 {
SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<20
SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<12
} {/SEARCH TABLE t1 USING INDEX i1/}
do_eqp_test 13.3.1 {
SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<20
SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<12
} {/SEARCH TABLE t1 USING INDEX i2/}
do_eqp_test 13.3.2 {
SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<20
SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<12
} {/SEARCH TABLE t1 USING INDEX i2/}
#-------------------------------------------------------------------------

View File

@ -97,6 +97,8 @@ do_test autoindex1-210 {
PRAGMA automatic_index=ON;
ANALYZE;
UPDATE sqlite_stat1 SET stat='10000' WHERE tbl='t1';
-- Table t2 actually contains 8 rows.
UPDATE sqlite_stat1 SET stat='16' WHERE tbl='t2';
ANALYZE sqlite_master;
SELECT b, (SELECT d FROM t2 WHERE c=a) FROM t1;
}

View File

@ -231,6 +231,7 @@ do_execsql_test where3-3.0 {
CREATE TABLE t301(a INTEGER PRIMARY KEY,b,c);
CREATE INDEX t301c ON t301(c);
INSERT INTO t301 VALUES(1,2,3);
INSERT INTO t301 VALUES(2,2,3);
CREATE TABLE t302(x, y);
INSERT INTO t302 VALUES(4,5);
ANALYZE;
@ -251,7 +252,7 @@ do_execsql_test where3-3.2 {
} {}
do_execsql_test where3-3.3 {
SELECT * FROM t301 WHERE c=3 AND a IS NOT NULL;
} {1 2 3}
} {1 2 3 2 2 3}
if 0 { # Query planner no longer does this
# Verify that when there are multiple tables in a join which must be

View File

@ -14,6 +14,7 @@
set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix whereG
do_execsql_test whereG-1.0 {
CREATE TABLE composer(
@ -179,5 +180,36 @@ do_execsql_test whereG-4.0 {
ORDER BY x;
} {right}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 5.1 {
CREATE TABLE t1(a, b, c);
CREATE INDEX i1 ON t1(a, b);
}
do_eqp_test 5.1.2 {
SELECT * FROM t1 WHERE a>?
} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}}
do_eqp_test 5.1.3 {
SELECT * FROM t1 WHERE likelihood(a>?, 0.9)
} {0 0 0 {SCAN TABLE t1}}
do_test 5.2 {
for {set i 0} {$i < 100} {incr i} {
execsql { INSERT INTO t1 VALUES('abc', $i, $i); }
}
execsql { INSERT INTO t1 SELECT 'def', b, c FROM t1; }
execsql { ANALYZE }
} {}
do_eqp_test 5.2.2 {
SELECT * FROM t1 WHERE likelihood(b>?, 0.01)
} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (ANY(a) AND b>?)}}
do_eqp_test 5.2.3 {
SELECT * FROM t1 WHERE likelihood(b>?, 0.9)
} {0 0 0 {SCAN TABLE t1}}
finish_test