Changes to the way the planner calculates the costs of various table and index scans. Some test cases still failing.

FossilOrigin-Name: c5a6ec0a880652dc8f4593d9f7acd58ddc3dc5f3
2014-04-24 20:04:49 +00:00 · 2014-04-24 20:04:49 +00:00 · aa9933c115
parent f0582f43dd
commit aa9933c115
7 changed files with 111 additions and 70 deletions
--- a/25
+++ b/25
@ -1,5 +1,5 @@
-C Comment\stweaks\son\sthe\stest\scase\sfor\sthe\s[b75a9ca6b0]\sbug\sfix.
-D 2014-04-21T13:36:54.639
+C Changes\sto\sthe\sway\sthe\splanner\scalculates\sthe\scosts\sof\svarious\stable\sand\sindex\sscans.\sSome\stest\scases\sstill\sfailing.
+D 2014-04-24T20:04:49.939
 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
 F Makefile.in 2ef13430cd359f7b361bb863504e227b25cc7f81
 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -291,7 +291,7 @@ F src/vtab.c 21b932841e51ebd7d075e2d0ad1415dce8d2d5fd
 F src/wal.c 76e7fc6de229bea8b30bb2539110f03a494dc3a8
 F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4
 F src/walker.c 11edb74d587bc87b33ca96a5173e3ec1b8389e45
-F src/where.c 3b127bdc24b7aa84ffa69729170be11555cd7733
+F src/where.c c12bc20cd649bcae39de3e452bfc1a3f164454ee
 F src/whereInt.h 929c1349b5355fd44f22cee5c14d72b3329c58a6
 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
@ -311,7 +311,7 @@ F test/analyze5.test 765c4e284aa69ca172772aa940946f55629bc8c4
 F test/analyze6.test d31defa011a561b938b4608d3538c1b4e0b5e92c
 F test/analyze7.test bb1409afc9e8629e414387ef048b8e0e3e0bdc4f
 F test/analyze8.test 093d15c1c888eed5034304a98c992f7360130b88
-F test/analyze9.test e072a5172d55afcba98d6ca6a219ce8878c2f5c9
+F test/analyze9.test e219daa58fd8677c6a43d771798cf37d68f51d3e
 F test/analyzeA.test 1a5c40079894847976d983ca39c707aaa44b6944
 F test/analyzeB.test 8bf35ee0a548aea831bf56762cb8e7fdb1db083d
 F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b
@ -329,7 +329,7 @@ F test/auth.test 5bdf154eb28c0e4bbc0473f335858c0d96171768
 F test/auth2.test c3b415b76c033bedb81292118fb7c01f5f10cbcd
 F test/auth3.test a4755e6a2a2fea547ffe63c874eb569e60a28eb5
 F test/autoinc.test c58912526998a39e11f66b533e23cfabea7f25b7
-F test/autoindex1.test d4dfe14001dfcb74cfbd7107f45a79fc1ab6183e
+F test/autoindex1.test 762ff3f8e25d852aae55c6462ca166a80c0cde61
 F test/autovacuum.test 941892505d2c0f410a0cb5970dfa1c7c4e5f6e74
 F test/autovacuum_ioerr2.test 8a367b224183ad801e0e24dcb7d1501f45f244b4
 F test/avtrans.test 0252654f4295ddda3b2cce0e894812259e655a85
@ -1079,7 +1079,7 @@ F test/walslow.test e7be6d9888f83aa5d3d3c7c08aa9b5c28b93609a
 F test/walthread.test de8dbaf6d9e41481c460ba31ca61e163d7348f8e
 F test/where.test 28b64e93428961b07b0d486778d63fd672948f6b
 F test/where2.test 455a2eb2666e66c1e84e2cb5815173a85e6237db
-F test/where3.test d28c51f257e60be30f74308fa385ceeddfb54a6e
+F test/where3.test 1ad55ba900bd7747f98b6082e65bd3e442c5004e
 F test/where4.test d8420ceeb8323a41ceff1f1841fc528e824e1ecf
 F test/where5.test fdf66f96d29a064b63eb543e28da4dfdccd81ad2
 F test/where6.test 5da5a98cec820d488e82708301b96cb8c18a258b
@ -1093,7 +1093,7 @@ F test/whereC.test d6f4ecd4fa2d9429681a5b22a25d2bda8e86ab8a
 F test/whereD.test fd9120e262f9da3c45940f52aefeef4d15b904e5
 F test/whereE.test b3a055eef928c992b0a33198a7b8dc10eea5ad2f
 F test/whereF.test 5b2ba0dbe8074aa13e416b37c753991f0a2492d7
-F test/whereG.test 2533b72ed4a31fd1687230a499b557b911525344
+F test/whereG.test 8189fedf3b98ab581bb70f830175e403a0ef1722
 F test/whereH.test e4b07f7a3c2f5d31195cd33710054c78667573b2
 F test/wherelimit.test 5e9fd41e79bb2b2d588ed999d641d9c965619b31
 F test/wild001.test bca33f499866f04c24510d74baf1e578d4e44b1c
@ -1161,7 +1161,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
 F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01
 F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff
-P de9a490f594183f337a2ec9e0f87792eac83548b
-R ce888b84132e0cad3bcca115a32951d3
-U drh
-Z cf9f241149456ab1fa24984e95a412d2
+P 65d2544af9adc1e2f1d193e57f8be0422fb0d5eb
+R 6cc54703275bf2ed6708c34ae52cf7ea
+T *branch * experimental-costs
+T *sym-experimental-costs *
+T -sym-trunk *
+U dan
+Z 868aa60f36dda291ae018583501645e5
--- a/manifest.uuid
+++ b/manifest.uuid
@ -1 +1 @@
-65d2544af9adc1e2f1d193e57f8be0422fb0d5eb
+c5a6ec0a880652dc8f4593d9f7acd58ddc3dc5f3
--- a/src/where.c
+++ b/src/where.c
@ -227,7 +227,7 @@ static int whereClauseInsert(WhereClause *pWC, Expr *p, u8 wtFlags){
  if( p && ExprHasProperty(p, EP_Unlikely) ){
    pTerm->truthProb = sqlite3LogEst(p->iTable) - 99;
  }else{
-    pTerm->truthProb = -1;
+    pTerm->truthProb = 1;
  }
  pTerm->pExpr = sqlite3ExprSkipCollate(p);
  pTerm->wtFlags = wtFlags;
@ -1975,6 +1975,31 @@ static void whereKeyStats(
 }
 #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */

+/*
+** If it is not NULL, pTerm is a term that provides an upper or lower
+** bound on a range scan. Without considering pTerm, it is estimated 
+** that the scan will visit nNew rows. This function returns the number
+** estimated to be visited after taking pTerm into account.
+**
+** If the user explicitly specified a likelihood() value for this term,
+** then the return value is the likelihood multiplied by the number of
+** input rows. Otherwise, this function assumes that an "IS NOT NULL" term
+** has a likelihood of 0.50, and any other term a likelihood of 0.25.
+*/
+static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){
+  LogEst nRet = nNew;
+  if( pTerm ){
+    if( pTerm->truthProb<=0 ){
+      nRet += pTerm->truthProb;
+    }else if( pTerm->wtFlags & TERM_VNULL ){
+      nRet -= 10;        assert( 10==sqlite3LogEst(2) );
+    }else{
+      nRet -= 20;        assert( 20==sqlite3LogEst(4) );
+    }
+  }
+  return nRet;
+}
+
 /*
 ** This function is used to estimate the number of rows that will be visited
 ** by scanning an index for a range of values. The range may have an upper
@ -2127,17 +2152,9 @@ static int whereRangeScanEst(
  UNUSED_PARAMETER(pBuilder);
 #endif
  assert( pLower || pUpper );
-  /* TUNING:  Each inequality constraint reduces the search space 4-fold.
-  ** A BETWEEN operator, therefore, reduces the search space 16-fold */
-  nNew = nOut;
-  if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ){
-    nNew -= 20;        assert( 20==sqlite3LogEst(4) );
-    nOut--;
-  }
-  if( pUpper ){
-    nNew -= 20;        assert( 20==sqlite3LogEst(4) );
-    nOut--;
-  }
+  nNew = whereRangeAdjust(pLower, nOut);
+  nNew = whereRangeAdjust(pUpper, nNew);
+  nOut -= (pLower!=0) + (pUpper!=0);
  if( nNew<10 ) nNew = 10;
  if( nNew<nOut ) nOut = nNew;
  pLoop->nOut = (LogEst)nOut;
@ -3987,7 +4004,9 @@ static void whereLoopOutputAdjust(WhereClause *pWC, WhereLoop *pLoop){
      if( pX==pTerm ) break;
      if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break;
    }
-    if( j<0 ) pLoop->nOut += pTerm->truthProb;
+    if( j<0 ){
+      pLoop->nOut += (pTerm->truthProb<=0 ? pTerm->truthProb : -1);
+    }
  }
 }

@ -4081,6 +4100,7 @@ static int whereLoopAddBtreeIndex(
    pNew->nOut = saved_nOut;
  }
  for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){
+    LogEst rCostIdx;
    int nIn = 0;
 #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    int nRecValid = pBuilder->nRecValid;
@ -4154,7 +4174,8 @@ static int whereLoopAddBtreeIndex(
                     pNew->aLTerm[pNew->nLTerm-2] : 0;
    }
    if( pNew->wsFlags & WHERE_COLUMN_RANGE ){
-      /* Adjust nOut and rRun for STAT3 range values */
+      /* Adjust nOut using stat3/stat4 data. Or, if there is no stat3/stat4
+      ** data, using some other estimate.  */
      assert( pNew->nOut==saved_nOut );
      whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew);
    }
@ -4181,13 +4202,16 @@ static int whereLoopAddBtreeIndex(
      }
    }
 #endif
+    /* Set rCostIdx to the cost of visiting selected rows in index. Add
+    ** it to pNew->rRun, which is currently set to the cost of the index
+    ** seek only. Then, if this is a non-covering index, add the cost of
+    ** visiting the rows in the main table.  */
+    rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow;
+    pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rCostIdx);
    if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
-      /* Each row involves a step of the index, then a binary search of
-      ** the main table */
-      pNew->rRun =  sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10);
+      pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16);
    }
-    /* Step cost for each output row */
-    pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut);
+
    whereLoopOutputAdjust(pBuilder->pWC, pNew);
    rc = whereLoopInsert(pBuilder, pNew);
    if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0
@ -4319,6 +4343,7 @@ static int whereLoopAddBtree(
    sPk.aiRowEst = aiRowEstPk;
    sPk.onError = OE_Replace;
    sPk.pTable = pTab;
+    sPk.szIdxRow = pTab->szTabRow;
    aiRowEstPk[0] = pTab->nRowEst;
    aiRowEstPk[1] = 1;
    pFirst = pSrc->pTab->pIndex;
@ -4396,10 +4421,8 @@ static int whereLoopAddBtree(

      /* Full table scan */
      pNew->iSortIdx = b ? iSortIdx : 0;
-      /* TUNING: Cost of full table scan is 3*(N + log2(N)).
-      **  +  The extra 3 factor is to encourage the use of indexed lookups
-      **     over full scans.  FIXME */
-      pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16;
+      /* TUNING: Cost of full table scan is (N*3.0). */
+      pNew->rRun = rSize + 16;
      whereLoopOutputAdjust(pWC, pNew);
      rc = whereLoopInsert(pBuilder, pNew);
      pNew->nOut = rSize;
@ -4426,35 +4449,16 @@ static int whereLoopAddBtree(
          )
      ){
        pNew->iSortIdx = b ? iSortIdx : 0;
-        /* TUNING:  The base cost of an index scan is N + log2(N).
-        ** The log2(N) is for the initial seek to the beginning and the N
-        ** is for the scan itself. */
-        pNew->rRun = sqlite3LogEstAdd(rSize, rLogSize);
-        if( m==0 ){
-          /* TUNING: Cost of a covering index scan is K*(N + log2(N)).
-          **  +  The extra factor K of between 1.1 and 3.0 that depends
-          **     on the relative sizes of the table and the index.  K
-          **     is smaller for smaller indices, thus favoring them.
-          **     The upper bound on K (3.0) matches the penalty factor
-          **     on a full table scan that tries to encourage the use of
-          **     indexed lookups over full scans.
-          */
-          pNew->rRun +=  1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
-        }else{
-          /* TUNING: The cost of scanning a non-covering index is multiplied
-          ** by log2(N) to account for the binary search of the main table
-          ** that must happen for each row of the index.
-          ** TODO: Should there be a multiplier here, analogous to the 3x
-          ** multiplier for a fulltable scan or covering index scan, to
-          ** further discourage the use of an index scan?  Or is the log2(N)
-          ** term sufficient discouragement?
-          ** TODO: What if some or all of the WHERE clause terms can be
-          ** computed without reference to the original table.  Then the
-          ** penality should reduce to logK where K is the number of output
-          ** rows.
-          */
-          pNew->rRun += rLogSize;
+
+        /* The cost of visiting the index rows is N*K, where K is
+        ** between 1.1 and 3.0, depending on the relative sizes of the
+        ** index and table rows. If this is a non-covering index scan,
+        ** also add the cost of visiting table rows (N*3.0).  */
+        pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
+        if( m!=0 ){
+          pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rSize+16);
        }
+
        whereLoopOutputAdjust(pWC, pNew);
        rc = whereLoopInsert(pBuilder, pNew);
        pNew->nOut = rSize;
@ -4732,8 +4736,7 @@ static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){
      pNew->iSortIdx = 0;
      memset(&pNew->u, 0, sizeof(pNew->u));
      for(i=0; rc==SQLITE_OK && i<sSum.n; i++){
-        /* TUNING: Multiple by 3.5 for the secondary table lookup */
-        pNew->rRun = sSum.a[i].rRun + 18;
+        pNew->rRun = sSum.a[i].rRun;
        pNew->nOut = sSum.a[i].nOut;
        pNew->prereq = sSum.a[i].prereq;
        rc = whereLoopInsert(pBuilder, pNew);
--- a/test/analyze9.test
+++ b/test/analyze9.test
@ -577,16 +577,16 @@ do_test 13.1 {
  execsql ANALYZE
 } {}
 do_eqp_test 13.2.1 {
-  SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<20
+  SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<12
 } {/SEARCH TABLE t1 USING INDEX i1/}
 do_eqp_test 13.2.2 {
-  SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<20
+  SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<12
 } {/SEARCH TABLE t1 USING INDEX i1/}
 do_eqp_test 13.3.1 {
-  SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<20
+  SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<12
 } {/SEARCH TABLE t1 USING INDEX i2/}
 do_eqp_test 13.3.2 {
-  SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<20
+  SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<12
 } {/SEARCH TABLE t1 USING INDEX i2/}

 #-------------------------------------------------------------------------
--- a/test/autoindex1.test
+++ b/test/autoindex1.test
@ -97,6 +97,8 @@ do_test autoindex1-210 {
    PRAGMA automatic_index=ON;
    ANALYZE;
    UPDATE sqlite_stat1 SET stat='10000' WHERE tbl='t1';
+    -- Table t2 actually contains 8 rows.
+    UPDATE sqlite_stat1 SET stat='16' WHERE tbl='t2';
    ANALYZE sqlite_master;
    SELECT b, (SELECT d FROM t2 WHERE c=a) FROM t1;
  }
--- a/test/where3.test
+++ b/test/where3.test
@ -231,6 +231,7 @@ do_execsql_test where3-3.0 {
  CREATE TABLE t301(a INTEGER PRIMARY KEY,b,c);
  CREATE INDEX t301c ON t301(c);
  INSERT INTO t301 VALUES(1,2,3);
+  INSERT INTO t301 VALUES(2,2,3);
  CREATE TABLE t302(x, y);
  INSERT INTO t302 VALUES(4,5);
  ANALYZE;
@ -251,7 +252,7 @@ do_execsql_test where3-3.2 {
 } {}
 do_execsql_test where3-3.3 {
  SELECT * FROM t301 WHERE c=3 AND a IS NOT NULL;
-} {1 2 3}
+} {1 2 3 2 2 3}

 if 0 {  # Query planner no longer does this
 # Verify that when there are multiple tables in a join which must be
--- a/test/whereG.test
+++ b/test/whereG.test
@ -14,6 +14,7 @@

 set testdir [file dirname $argv0]
 source $testdir/tester.tcl
+set testprefix whereG

 do_execsql_test whereG-1.0 {
  CREATE TABLE composer(
@ -179,5 +180,36 @@ do_execsql_test whereG-4.0 {
   ORDER BY x;
 } {right}

+#-------------------------------------------------------------------------
+# 
+
+reset_db
+do_execsql_test 5.1 {
+  CREATE TABLE t1(a, b, c);
+  CREATE INDEX i1 ON t1(a, b);
+}
+do_eqp_test 5.1.2 {
+  SELECT * FROM t1 WHERE a>?
+} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}}
+do_eqp_test 5.1.3 {
+  SELECT * FROM t1 WHERE likelihood(a>?, 0.9)
+} {0 0 0 {SCAN TABLE t1}}
+
+do_test 5.2 {
+  for {set i 0} {$i < 100} {incr i} {
+    execsql { INSERT INTO t1 VALUES('abc', $i, $i); }
+  }
+  execsql { INSERT INTO t1 SELECT 'def', b, c FROM t1; }
+  execsql { ANALYZE }
+} {}
+
+do_eqp_test 5.2.2 {
+  SELECT * FROM t1 WHERE likelihood(b>?, 0.01)
+} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (ANY(a) AND b>?)}}
+
+do_eqp_test 5.2.3 {
+  SELECT * FROM t1 WHERE likelihood(b>?, 0.9)
+} {0 0 0 {SCAN TABLE t1}}

 finish_test
+