diff --git a/manifest b/manifest index f338ba88f8..bff710da8f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swith\sALTER\sTABLE\sfor\sviews\sthat\shave\sa\snested\sFROM\sclause.\nTicket\s[f50af3e8a565776b]. -D 2020-02-23T17:34:45.938 +C If\sSTAT4\sdetermines\sthat\sa\sWHERE\sclause\sterm\sthat\sis\snot\sused\sby\san\sindex\nhas\svery\shigh\sprobability\sof\sbeing\strue,\sthen\sdo\snot\suse\sthat\sterm\sto\sreduce\nthe\sestimated\soutput\srow\scount. +D 2020-02-24T17:05:09.915 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -617,8 +617,8 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 697424314e40d99f93f548c7bfa526c10e87f4bdf64d5a76a96b999dd7133ebc F src/wal.h 606292549f5a7be50b6227bd685fa76e3a4affad71bb8ac5ce4cb5c79f6a176a F src/walker.c a137468bf36c92e64d2275caa80c83902e3a0fc59273591b96c6416d3253d05d -F src/where.c cbad14f1d8e11b9f052e937274315c7c17266a89eda408c86084ee894debb7d5 -F src/whereInt.h 9157228db086f436a574589f8cc5749bd971e94017c552305ad9ec472ed2e098 +F src/where.c 3b8c9bd013eb0736e16f60bdc109e83337ef99513a3aff5f16ddac036e6c277e +F src/whereInt.h 6b874aa15f94e43a2cec1080be64d955b04deeafeac90ffb5d6975c0d511be3c F src/wherecode.c f5df56e395ade2240cabb2d39500c681bd29f8cc0636c3301c4996ad160df94d F src/whereexpr.c 264d58971eaf8256eb5b0917bcd7fc7a1f1109fdda183a8382308a1b18a2dce7 F src/window.c f8ba2ee12a19b51d3ba42c16277c74185ee9215306bc0d5a03974ade8b5bc98f @@ -655,6 +655,7 @@ F test/analyzeC.test 489fe2ea3be3f17548e8dd895f1b41c9669b52de1b0861f5bffe6eec46e F test/analyzeD.test e50cd0b3e6063216cc0c88a1776e8645dc0bd65a6bb275769cbee33b7fd8d90c F test/analyzeE.test 8684e8ac5722fb97c251887ad97e5d496a98af1d F test/analyzeF.test 9e1a0537949eb5483642b1140a5c39e5b4025939024b935398471fa552f4dabb +F test/analyzeG.test a48c0f324dd14de9a40d52abe5ca2637f682b9a791d2523dd619f6efa14e345b F test/analyzer1.test 459fa02c445ddbf0101a3bad47b34290a35f2e49 F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b F test/async2.test c0a9bd20816d7d6a2ceca7b8c03d3d69c28ffb8b @@ -1858,7 +1859,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P f02030b3403d67734bba471a91ad5bfdb03ddf6fdc3ef14808a04495e43b0470 -R 27f30806e168cf65f551539c0795adeb +P c431b3fd8fd0f6a6974bba3e9366b0430ec003d570e7ce70ceefbcff5fe4b6fa 4558163b6a525990f0f1b6629dbb76daf49bcaf1ddbaf0c50fe05ce9ee480ff8 +R 2f59a601b560c09f0f628a3935395fd4 +T +closed 4558163b6a525990f0f1b6629dbb76daf49bcaf1ddbaf0c50fe05ce9ee480ff8 U drh -Z b1d396f928d83f91cde5e969c202f590 +Z 88585aca481f88c3f165f1a6e9b07ffb diff --git a/manifest.uuid b/manifest.uuid index fc3a50b794..a0c4891c50 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c431b3fd8fd0f6a6974bba3e9366b0430ec003d570e7ce70ceefbcff5fe4b6fa \ No newline at end of file +40739c793b0e98a3bae296d3a1f74944edcdd4cc33c26b417fde4eaf6f14d062 \ No newline at end of file diff --git a/src/where.c b/src/where.c index da9c5a7233..d9f11296fa 100644 --- a/src/where.c +++ b/src/where.c @@ -2298,7 +2298,9 @@ static void whereLoopOutputAdjust( /* In the absence of explicit truth probabilities, use heuristics to ** guess a reasonable truth probability. */ pLoop->nOut--; - if( pTerm->eOperator&(WO_EQ|WO_IS) ){ + if( (pTerm->eOperator&(WO_EQ|WO_IS))!=0 + && (pTerm->wtFlags & TERM_HIGHTRUTH)==0 /* tag-20200224-1 */ + ){ Expr *pRight = pTerm->pExpr->pRight; int k = 0; testcase( pTerm->pExpr->op==TK_IS ); @@ -2307,7 +2309,10 @@ static void whereLoopOutputAdjust( }else{ k = 20; } - if( iReducewtFlags |= TERM_HEURTRUTH; + iReduce = k; + } } } } @@ -2489,9 +2494,9 @@ static int whereLoopAddBtreeIndex( } if( IsUniqueIndex(pProbe) && saved_nEq==pProbe->nKeyCol-1 ){ - pBuilder->bldFlags |= SQLITE_BLDF_UNIQUE; + pBuilder->bldFlags1 |= SQLITE_BLDF1_UNIQUE; }else{ - pBuilder->bldFlags |= SQLITE_BLDF_INDEXED; + pBuilder->bldFlags1 |= SQLITE_BLDF1_INDEXED; } pNew->wsFlags = saved_wsFlags; pNew->u.btree.nEq = saved_nEq; @@ -2656,6 +2661,27 @@ static int whereLoopAddBtreeIndex( if( rc!=SQLITE_OK ) break; /* Jump out of the pTerm loop */ if( nOut ){ pNew->nOut = sqlite3LogEst(nOut); + if( nEq==1 + /* TUNING: Mark terms as "low selectivity" if they seem likely + ** to be true for half or more of the rows in the table. + ** See tag-202002240-1 */ + && pNew->nOut+10 > pProbe->aiRowLogEst[0] + ){ +#if WHERETRACE_ENABLED /* 0x01 */ + if( sqlite3WhereTrace & 0x01 ){ + sqlite3DebugPrintf( + "STAT4 determines term has low selectivity:\n"); + sqlite3WhereTermPrint(pTerm, 999); + } +#endif + pTerm->wtFlags |= TERM_HIGHTRUTH; + if( pTerm->wtFlags & TERM_HEURTRUTH ){ + /* If the term has previously been used with an assumption of + ** higher selectivity, then set the flag to rerun the + ** loop computations. */ + pBuilder->bldFlags2 |= SQLITE_BLDF2_2NDPASS; + } + } if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut; pNew->nOut -= nIn; } @@ -3080,9 +3106,9 @@ static int whereLoopAddBtree( } } - pBuilder->bldFlags = 0; + pBuilder->bldFlags1 = 0; rc = whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, 0); - if( pBuilder->bldFlags==SQLITE_BLDF_INDEXED ){ + if( pBuilder->bldFlags1==SQLITE_BLDF1_INDEXED ){ /* If a non-unique index is used, or if a prefix of the key for ** unique index is used (making the index functionally non-unique) ** then the sqlite_stat1 data becomes important for scoring the @@ -4537,6 +4563,28 @@ static int exprIsDeterministic(Expr *p){ return w.eCode; } + +#ifdef WHERETRACE_ENABLED +/* +** Display all WhereLoops in pWInfo +*/ +static void showAllWhereLoops(WhereInfo *pWInfo, WhereClause *pWC){ + if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */ + WhereLoop *p; + int i; + static const char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz" + "ABCDEFGHIJKLMNOPQRSTUVWYXZ"; + for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){ + p->cId = zLabel[i%(sizeof(zLabel)-1)]; + sqlite3WhereLoopPrint(p, pWC); + } + } +} +# define WHERETRACE_ALL_LOOPS(W,C) showAllWhereLoops(W,C) +#else +# define WHERETRACE_ALL_LOOPS(W,C) +#endif + /* ** Generate the beginning of the loop used for WHERE clause processing. ** The return value is a pointer to an opaque structure that contains @@ -4838,19 +4886,28 @@ WhereInfo *sqlite3WhereBegin( if( nTabList!=1 || whereShortCut(&sWLB)==0 ){ rc = whereLoopAddAll(&sWLB); if( rc ) goto whereBeginError; - -#ifdef WHERETRACE_ENABLED - if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */ - WhereLoop *p; - int i; - static const char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz" - "ABCDEFGHIJKLMNOPQRSTUVWYXZ"; - for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){ - p->cId = zLabel[i%(sizeof(zLabel)-1)]; - sqlite3WhereLoopPrint(p, sWLB.pWC); + +#ifdef SQLITE_ENABLE_STAT4 + /* If one or more WhereTerm.truthProb values were used in estimating + ** loop parameters, but then those truthProb values were subsequently + ** changed based on STAT4 information while computing subsequent loops, + ** then we need to rerun the whole loop building process so that all + ** loops will be built using the revised truthProb values. */ + if( sWLB.bldFlags2 & SQLITE_BLDF2_2NDPASS ){ + WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC); + WHERETRACE(0xffff, + ("**** Redo all loop computations due to" + " TERM_HIGHTRUTH changes ****\n")); + while( pWInfo->pLoops ){ + WhereLoop *p = pWInfo->pLoops; + pWInfo->pLoops = p->pNextLoop; + whereLoopDelete(db, p); } + rc = whereLoopAddAll(&sWLB); + if( rc ) goto whereBeginError; } #endif + WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC); wherePathSolver(pWInfo, 0); if( db->mallocFailed ) goto whereBeginError; diff --git a/src/whereInt.h b/src/whereInt.h index 74101624d5..e33dde55e2 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -291,6 +291,12 @@ struct WhereTerm { #define TERM_LIKE 0x0400 /* The original LIKE operator */ #define TERM_IS 0x0800 /* Term.pExpr is an IS operator */ #define TERM_VARSELECT 0x1000 /* Term.pExpr contains a correlated sub-query */ +#define TERM_HEURTRUTH 0x2000 /* Heuristic truthProb used */ +#ifdef SQLITE_ENABLE_STAT4 +# define TERM_HIGHTRUTH 0x4000 /* Term excludes few rows */ +#else +# define TERM_HIGHTRUTH 0 /* Only used with STAT4 */ +#endif /* ** An instance of the WhereScan object is used as an iterator for locating @@ -405,13 +411,16 @@ struct WhereLoopBuilder { UnpackedRecord *pRec; /* Probe for stat4 (if required) */ int nRecValid; /* Number of valid fields currently in pRec */ #endif - unsigned int bldFlags; /* SQLITE_BLDF_* flags */ + unsigned char bldFlags1; /* First set of SQLITE_BLDF_* flags */ + unsigned char bldFlags2; /* Second set of SQLITE_BLDF_* flags */ unsigned int iPlanLimit; /* Search limiter */ }; /* Allowed values for WhereLoopBuider.bldFlags */ -#define SQLITE_BLDF_INDEXED 0x0001 /* An index is used */ -#define SQLITE_BLDF_UNIQUE 0x0002 /* All keys of a UNIQUE index used */ +#define SQLITE_BLDF1_INDEXED 0x0001 /* An index is used */ +#define SQLITE_BLDF1_UNIQUE 0x0002 /* All keys of a UNIQUE index used */ + +#define SQLITE_BLDF2_2NDPASS 0x0004 /* Second builder pass needed */ /* The WhereLoopBuilder.iPlanLimit is used to limit the number of ** index+constraint combinations the query planner will consider for a diff --git a/test/analyzeG.test b/test/analyzeG.test new file mode 100644 index 0000000000..eb1853b1dc --- /dev/null +++ b/test/analyzeG.test @@ -0,0 +1,88 @@ +# 2020-02-23 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# Tests for functionality related to ANALYZE. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +ifcapable !stat4 { + finish_test + return +} +set testprefix analyzeG + +proc do_scan_order_test {tn sql expect} { + uplevel [list do_test $tn [subst -nocommands { + set res "" + db eval "explain query plan $sql" { + lappend res [set detail] + } + set res + }] [list {*}$expect]] +} + +#------------------------------------------------------------------------- +# Test cases 1.* seek to verify that even if an index is not used, its +# stat4 data may be used by the planner to estimate the number of +# rows that match an unindexed constraint on the same column. +# +do_execsql_test 1.0 { + PRAGMA automatic_index = 0; + CREATE TABLE t1(a, x); + CREATE TABLE t2(b, y); + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO t1 SELECT (i%50), NULL FROM s; + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO t2 SELECT (CASE WHEN i<95 THEN 44 ELSE i END), NULL FROM s; +} + +# Join tables t1 and t2. Both contain 100 rows. (a=44) matches 2 rows +# in "t1", (b=44) matches 95 rows in table "t2". But the planner doesn't +# know this, so it has no preference as to which order the tables are +# scanned in. In practice this means that tables are scanned in the order +# they are specified in in the FROM clause. +do_scan_order_test 1.1.1 { + SELECT * FROM t1, t2 WHERE a=44 AND b=44; +} { + {SCAN TABLE t1} {SCAN TABLE t2} +} +do_scan_order_test 1.1.2 { + SELECT * FROM t2, t1 WHERE a=44 AND b=44 +} { + {SCAN TABLE t2} {SCAN TABLE t1} +} + +do_execsql_test 1.2 { + CREATE INDEX t2b ON t2(b); + ANALYZE; +} + +# Now, with the ANALYZE data, the planner knows that (b=44) matches a +# large number of rows. So it elects to scan table "t1" first, regardless +# of the order in which the tables are specified in the FROM clause. +do_scan_order_test 1.3.1 { + SELECT * FROM t1, t2 WHERE a=44 AND b=44; +} { + {SCAN TABLE t1} {SCAN TABLE t2} +} +do_scan_order_test 1.3.2 { + SELECT * FROM t2, t1 WHERE a=44 AND b=44 +} { + {SCAN TABLE t1} {SCAN TABLE t2} +} + + +finish_test