From 89efac94fdfac50fac7f971edbde4850b008ecd8 Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 22 Feb 2020 16:58:49 +0000 Subject: [PATCH 1/5] When stat4 information is available, try to use it to improve the truth probability of WHERE clause terms that do not participate in the index. FossilOrigin-Name: 1babd6ec5d60e2c34aa1c0285ead768a88004218468e97262411973fe3487022 --- manifest | 17 ++++++++++------- manifest.uuid | 2 +- src/where.c | 47 ++++++++++++++++++++++++++++++++++++++++++----- src/whereInt.h | 10 +++++++--- 4 files changed, 60 insertions(+), 16 deletions(-) diff --git a/manifest b/manifest index 8bec1d1c7e..d09662988a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C In\sthe\sOP_Column\sopcode,\sif\sthe\scursor\sis\smarked\sNullRow\s(due\sto\sbeing\sthe\nright\stable\sof\sa\sLEFT\sJOIN\sthat\sdoes\snot\smatch)\sand\sthe\scursor\sis\sthe\stable\ncursor\sfor\san\sOR-optimization\swith\sa\scovering\sindex,\sthen\sdo\snot\ssubstitute\nthe\scovering\sindex\scursor,\ssince\sthe\scovering\sindex\scursor\sdoes\snot\shave\nthe\sNullRow\sflag\sset.\s\sTicket\s[aa4378693018aa99] -D 2020-02-22T13:01:19.240 +C When\sstat4\sinformation\sis\savailable,\stry\sto\suse\sit\sto\simprove\sthe\struth\nprobability\sof\sWHERE\sclause\sterms\sthat\sdo\snot\sparticipate\sin\sthe\sindex. +D 2020-02-22T16:58:49.287 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -617,8 +617,8 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 697424314e40d99f93f548c7bfa526c10e87f4bdf64d5a76a96b999dd7133ebc F src/wal.h 606292549f5a7be50b6227bd685fa76e3a4affad71bb8ac5ce4cb5c79f6a176a F src/walker.c a137468bf36c92e64d2275caa80c83902e3a0fc59273591b96c6416d3253d05d -F src/where.c cbad14f1d8e11b9f052e937274315c7c17266a89eda408c86084ee894debb7d5 -F src/whereInt.h 9157228db086f436a574589f8cc5749bd971e94017c552305ad9ec472ed2e098 +F src/where.c 74a2fc5a900eab9a2fdda2017a290f0eeaa9c5597fdb86322ea2ccbc3758c71d +F src/whereInt.h 94e3aadcf43b4d16279182d147c9e4f8ef6ed5a5bd1ecc021639c29336b0a3eb F src/wherecode.c f5df56e395ade2240cabb2d39500c681bd29f8cc0636c3301c4996ad160df94d F src/whereexpr.c 264d58971eaf8256eb5b0917bcd7fc7a1f1109fdda183a8382308a1b18a2dce7 F src/window.c f8ba2ee12a19b51d3ba42c16277c74185ee9215306bc0d5a03974ade8b5bc98f @@ -1858,7 +1858,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 14d14eb537075c6ac77513b1e7305bed8bc01a9034dfb763fd96f76400f2b705 -R 1f345124b6aafeeb9e600617cf39817f +P f02030b3403d67734bba471a91ad5bfdb03ddf6fdc3ef14808a04495e43b0470 +R 0b7e29ed07d82aa79f09869cc21d3459 +T *branch * stat4-truthprob +T *sym-stat4-truthprob * +T -sym-trunk * U drh -Z 38c81c56b51d8e5ae42e4e25b041736c +Z b67db2665ec4d9de443f342c9e446178 diff --git a/manifest.uuid b/manifest.uuid index 83685737a8..0338a5dfbe 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f02030b3403d67734bba471a91ad5bfdb03ddf6fdc3ef14808a04495e43b0470 \ No newline at end of file +1babd6ec5d60e2c34aa1c0285ead768a88004218468e97262411973fe3487022 \ No newline at end of file diff --git a/src/where.c b/src/where.c index da9c5a7233..58cf11ab1b 100644 --- a/src/where.c +++ b/src/where.c @@ -2307,7 +2307,10 @@ static void whereLoopOutputAdjust( }else{ k = 20; } - if( iReducewtFlags |= TERM_HEURTRUTH; + iReduce = k; + } } } } @@ -2489,9 +2492,9 @@ static int whereLoopAddBtreeIndex( } if( IsUniqueIndex(pProbe) && saved_nEq==pProbe->nKeyCol-1 ){ - pBuilder->bldFlags |= SQLITE_BLDF_UNIQUE; + pBuilder->bldFlags1 |= SQLITE_BLDF1_UNIQUE; }else{ - pBuilder->bldFlags |= SQLITE_BLDF_INDEXED; + pBuilder->bldFlags1 |= SQLITE_BLDF1_INDEXED; } pNew->wsFlags = saved_wsFlags; pNew->u.btree.nEq = saved_nEq; @@ -2656,6 +2659,21 @@ static int whereLoopAddBtreeIndex( if( rc!=SQLITE_OK ) break; /* Jump out of the pTerm loop */ if( nOut ){ pNew->nOut = sqlite3LogEst(nOut); + if( nEq==1 && pTerm->truthProb>0 ){ +#if WHERETRACE_ENABLED /* 0x01 */ + if( sqlite3WhereTrace & 0x01 ){ + sqlite3DebugPrintf("Update truthProb from %d to %d:\n", + pTerm->truthProb, pNew->nOut - pProbe->aiRowLogEst[0]); + sqlite3WhereTermPrint(pTerm, 999); + } +#endif + pTerm->truthProb = pNew->nOut - pProbe->aiRowLogEst[0]; + if( pTerm->wtFlags & TERM_HEURTRUTH ){ + /* If the old heuristic truthProb was previously used, signal + ** that all loops will need to be recomputed */ + pBuilder->bldFlags2 |= SQLITE_BLDF2_2NDPASS; + } + } if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut; pNew->nOut -= nIn; } @@ -3080,9 +3098,9 @@ static int whereLoopAddBtree( } } - pBuilder->bldFlags = 0; + pBuilder->bldFlags1 = 0; rc = whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, 0); - if( pBuilder->bldFlags==SQLITE_BLDF_INDEXED ){ + if( pBuilder->bldFlags1==SQLITE_BLDF1_INDEXED ){ /* If a non-unique index is used, or if a prefix of the key for ** unique index is used (making the index functionally non-unique) ** then the sqlite_stat1 data becomes important for scoring the @@ -4838,6 +4856,25 @@ WhereInfo *sqlite3WhereBegin( if( nTabList!=1 || whereShortCut(&sWLB)==0 ){ rc = whereLoopAddAll(&sWLB); if( rc ) goto whereBeginError; + +#ifdef SQLITE_ENABLE_STAT4 + /* If one or more WhereTerm.truthProb values were used in estimating + ** loop parameters, but then those truthProb values were subsequently + ** changed based on STAT4 information while computing subsequent loops, + ** then we need to rerun the whole loop building process so that all + ** loops will be built using the revised truthProb values. */ + if( sWLB.bldFlags2 & SQLITE_BLDF2_2NDPASS ){ + WHERETRACE(0xffff, + ("**** Redo all loop computations due to truthProb changes ****\n")); + while( pWInfo->pLoops ){ + WhereLoop *p = pWInfo->pLoops; + pWInfo->pLoops = p->pNextLoop; + whereLoopDelete(db, p); + } + rc = whereLoopAddAll(&sWLB); + if( rc ) goto whereBeginError; + } +#endif #ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */ diff --git a/src/whereInt.h b/src/whereInt.h index 74101624d5..9d4559c742 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -291,6 +291,7 @@ struct WhereTerm { #define TERM_LIKE 0x0400 /* The original LIKE operator */ #define TERM_IS 0x0800 /* Term.pExpr is an IS operator */ #define TERM_VARSELECT 0x1000 /* Term.pExpr contains a correlated sub-query */ +#define TERM_HEURTRUTH 0x2000 /* Heuristic truthProb used */ /* ** An instance of the WhereScan object is used as an iterator for locating @@ -405,13 +406,16 @@ struct WhereLoopBuilder { UnpackedRecord *pRec; /* Probe for stat4 (if required) */ int nRecValid; /* Number of valid fields currently in pRec */ #endif - unsigned int bldFlags; /* SQLITE_BLDF_* flags */ + unsigned char bldFlags1; /* First set of SQLITE_BLDF_* flags */ + unsigned char bldFlags2; /* Second set of SQLITE_BLDF_* flags */ unsigned int iPlanLimit; /* Search limiter */ }; /* Allowed values for WhereLoopBuider.bldFlags */ -#define SQLITE_BLDF_INDEXED 0x0001 /* An index is used */ -#define SQLITE_BLDF_UNIQUE 0x0002 /* All keys of a UNIQUE index used */ +#define SQLITE_BLDF1_INDEXED 0x0001 /* An index is used */ +#define SQLITE_BLDF1_UNIQUE 0x0002 /* All keys of a UNIQUE index used */ + +#define SQLITE_BLDF2_2NDPASS 0x0004 /* Second builder pass needed */ /* The WhereLoopBuilder.iPlanLimit is used to limit the number of ** index+constraint combinations the query planner will consider for a From 5c193464510c43ef55f806adb8c5807c294a6b8a Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 22 Feb 2020 17:32:00 +0000 Subject: [PATCH 2/5] Add new test file analyzeG.test, containing a test for the change on this branch. FossilOrigin-Name: 243ab1852a2291595527ea1f26e78ad83eda285ae28f876bc1c703677f495cfa --- manifest | 16 ++++----- manifest.uuid | 2 +- test/analyzeG.test | 85 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 10 deletions(-) create mode 100644 test/analyzeG.test diff --git a/manifest b/manifest index d09662988a..e68e44817c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C When\sstat4\sinformation\sis\savailable,\stry\sto\suse\sit\sto\simprove\sthe\struth\nprobability\sof\sWHERE\sclause\sterms\sthat\sdo\snot\sparticipate\sin\sthe\sindex. -D 2020-02-22T16:58:49.287 +C Add\snew\stest\sfile\sanalyzeG.test,\scontaining\sa\stest\sfor\sthe\schange\son\sthis\sbranch. +D 2020-02-22T17:32:00.691 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -655,6 +655,7 @@ F test/analyzeC.test 489fe2ea3be3f17548e8dd895f1b41c9669b52de1b0861f5bffe6eec46e F test/analyzeD.test e50cd0b3e6063216cc0c88a1776e8645dc0bd65a6bb275769cbee33b7fd8d90c F test/analyzeE.test 8684e8ac5722fb97c251887ad97e5d496a98af1d F test/analyzeF.test 9e1a0537949eb5483642b1140a5c39e5b4025939024b935398471fa552f4dabb +F test/analyzeG.test c42be77a06331f8677c94b44ba35e170f0771a07d869dffb6b0d78f18b562747 F test/analyzer1.test 459fa02c445ddbf0101a3bad47b34290a35f2e49 F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b F test/async2.test c0a9bd20816d7d6a2ceca7b8c03d3d69c28ffb8b @@ -1858,10 +1859,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P f02030b3403d67734bba471a91ad5bfdb03ddf6fdc3ef14808a04495e43b0470 -R 0b7e29ed07d82aa79f09869cc21d3459 -T *branch * stat4-truthprob -T *sym-stat4-truthprob * -T -sym-trunk * -U drh -Z b67db2665ec4d9de443f342c9e446178 +P 1babd6ec5d60e2c34aa1c0285ead768a88004218468e97262411973fe3487022 +R 2b88c80f8c57b2c7447cf9a6931d1437 +U dan +Z e854d74d6063f88b5d388938dfbba4ed diff --git a/manifest.uuid b/manifest.uuid index 0338a5dfbe..2b78df281d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1babd6ec5d60e2c34aa1c0285ead768a88004218468e97262411973fe3487022 \ No newline at end of file +243ab1852a2291595527ea1f26e78ad83eda285ae28f876bc1c703677f495cfa \ No newline at end of file diff --git a/test/analyzeG.test b/test/analyzeG.test new file mode 100644 index 0000000000..5c729b9cb2 --- /dev/null +++ b/test/analyzeG.test @@ -0,0 +1,85 @@ +# 2020-02-23 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# Tests for functionality related to ANALYZE. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +set testprefix analyzeG + +proc do_scan_order_test {tn sql expect} { + uplevel [list do_test $tn [subst -nocommands { + set res "" + db eval "explain query plan $sql" { + lappend res [set detail] + } + set res + }] [list {*}$expect]] +} + +#------------------------------------------------------------------------- +# Test cases 1.* seek to verify that even if an index is not used, its +# stat4 data may be used by the planner to estimate the number of +# rows that match an unindexed constraint on the same column. +# +do_execsql_test 1.0 { + PRAGMA automatic_index = 0; + CREATE TABLE t1(a, x); + CREATE TABLE t2(b, y); + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO t1 SELECT (i%50), NULL FROM s; + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO t2 SELECT (CASE WHEN i<95 THEN 44 ELSE i END), NULL FROM s; +} + +# Join tables t1 and t2. Both contain 100 rows. (a=44) matches 2 rows +# in "t1", (b=44) matches 95 rows in table "t2". But the planner doesn't +# know this, so it has no preference as to which order the tables are +# scanned in. In practice this means that tables are scanned in the order +# they are specified in in the FROM clause. +do_scan_order_test 1.1.1 { + SELECT * FROM t1, t2 WHERE a=44 AND b=44; +} { + {SCAN TABLE t1} {SCAN TABLE t2} +} +do_scan_order_test 1.1.2 { + SELECT * FROM t2, t1 WHERE a=44 AND b=44 +} { + {SCAN TABLE t2} {SCAN TABLE t1} +} + +do_execsql_test 1.2 { + CREATE INDEX t2b ON t2(b); + ANALYZE; +} + +# Now, with the ANALYZE data, the planner knows that (b=44) matches a +# large number of rows. So it elects to scan table "t1" first, regardless +# of the order in which the tables are specified in the FROM clause. +do_scan_order_test 1.3.1 { + SELECT * FROM t1, t2 WHERE a=44 AND b=44; +} { + {SCAN TABLE t1} {SCAN TABLE t2} +} +do_scan_order_test 1.3.2 { + SELECT * FROM t2, t1 WHERE a=44 AND b=44 +} { + {SCAN TABLE t1} {SCAN TABLE t2} +} + + +finish_test + From cea1951e80a602a6cca083e45767e47e04d6e7f0 Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 22 Feb 2020 18:27:48 +0000 Subject: [PATCH 3/5] Do not activate the truthProb adjustment mechanism if the truth probability is less than the heuristic value, as there could be correlations unknown to stat4. Also add additional tracing output to make truthProb adjustments more visible. FossilOrigin-Name: c535fea147ce5c6e4aab25d3c85a3f53a7364c5b5ee10fb6d393c5911a02be7e --- manifest | 14 +++++++------- manifest.uuid | 2 +- src/where.c | 47 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/manifest b/manifest index e68e44817c..9f902b1841 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\snew\stest\sfile\sanalyzeG.test,\scontaining\sa\stest\sfor\sthe\schange\son\sthis\sbranch. -D 2020-02-22T17:32:00.691 +C Do\snot\sactivate\sthe\struthProb\sadjustment\smechanism\sif\sthe\struth\sprobability\nis\sless\sthan\sthe\sheuristic\svalue,\sas\sthere\scould\sbe\scorrelations\sunknown\sto\nstat4.\s\sAlso\sadd\sadditional\stracing\soutput\sto\smake\struthProb\sadjustments\smore\nvisible. +D 2020-02-22T18:27:48.175 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -617,7 +617,7 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 697424314e40d99f93f548c7bfa526c10e87f4bdf64d5a76a96b999dd7133ebc F src/wal.h 606292549f5a7be50b6227bd685fa76e3a4affad71bb8ac5ce4cb5c79f6a176a F src/walker.c a137468bf36c92e64d2275caa80c83902e3a0fc59273591b96c6416d3253d05d -F src/where.c 74a2fc5a900eab9a2fdda2017a290f0eeaa9c5597fdb86322ea2ccbc3758c71d +F src/where.c 44695e878a287d8c1d4976e2e85bea29994facec3972beb7ca22437d62cda6a5 F src/whereInt.h 94e3aadcf43b4d16279182d147c9e4f8ef6ed5a5bd1ecc021639c29336b0a3eb F src/wherecode.c f5df56e395ade2240cabb2d39500c681bd29f8cc0636c3301c4996ad160df94d F src/whereexpr.c 264d58971eaf8256eb5b0917bcd7fc7a1f1109fdda183a8382308a1b18a2dce7 @@ -1859,7 +1859,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 1babd6ec5d60e2c34aa1c0285ead768a88004218468e97262411973fe3487022 -R 2b88c80f8c57b2c7447cf9a6931d1437 -U dan -Z e854d74d6063f88b5d388938dfbba4ed +P 243ab1852a2291595527ea1f26e78ad83eda285ae28f876bc1c703677f495cfa +R 2719b1105cf4d47b954b372c20702e91 +U drh +Z e3872ca3639b1687907dbcae881883fa diff --git a/manifest.uuid b/manifest.uuid index 2b78df281d..e6f868ae79 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -243ab1852a2291595527ea1f26e78ad83eda285ae28f876bc1c703677f495cfa \ No newline at end of file +c535fea147ce5c6e4aab25d3c85a3f53a7364c5b5ee10fb6d393c5911a02be7e \ No newline at end of file diff --git a/src/where.c b/src/where.c index 58cf11ab1b..03bfad3b7d 100644 --- a/src/where.c +++ b/src/where.c @@ -2305,7 +2305,7 @@ static void whereLoopOutputAdjust( if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){ k = 10; }else{ - k = 20; + k = 20; /* Keep the "20" value in sync. See tag-20200222-1 */ } if( iReducewtFlags |= TERM_HEURTRUTH; @@ -2659,7 +2659,13 @@ static int whereLoopAddBtreeIndex( if( rc!=SQLITE_OK ) break; /* Jump out of the pTerm loop */ if( nOut ){ pNew->nOut = sqlite3LogEst(nOut); - if( nEq==1 && pTerm->truthProb>0 ){ + if( nEq==1 + && pTerm->truthProb>0 + /* TUNING: Adjust truthProb from the default heuristic only if the + ** probability is close to 1.0. The "20" constant is copied from + ** the heuristic at tag-20200222-1. Keep values in sync */ + && pNew->nOut+20 > pProbe->aiRowLogEst[0] + ){ #if WHERETRACE_ENABLED /* 0x01 */ if( sqlite3WhereTrace & 0x01 ){ sqlite3DebugPrintf("Update truthProb from %d to %d:\n", @@ -4555,6 +4561,28 @@ static int exprIsDeterministic(Expr *p){ return w.eCode; } + +#ifdef WHERETRACE_ENABLED +/* +** Display all WhereLoops in pWInfo +*/ +static void showAllWhereLoops(WhereInfo *pWInfo, WhereClause *pWC){ + if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */ + WhereLoop *p; + int i; + static const char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz" + "ABCDEFGHIJKLMNOPQRSTUVWYXZ"; + for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){ + p->cId = zLabel[i%(sizeof(zLabel)-1)]; + sqlite3WhereLoopPrint(p, pWC); + } + } +} +# define WHERETRACE_ALL_LOOPS(W,C) showAllWhereLoops(W,C) +#else +# define WHERETRACE_ALL_LOOPS(W,C) +#endif + /* ** Generate the beginning of the loop used for WHERE clause processing. ** The return value is a pointer to an opaque structure that contains @@ -4864,6 +4892,7 @@ WhereInfo *sqlite3WhereBegin( ** then we need to rerun the whole loop building process so that all ** loops will be built using the revised truthProb values. */ if( sWLB.bldFlags2 & SQLITE_BLDF2_2NDPASS ){ + WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC); WHERETRACE(0xffff, ("**** Redo all loop computations due to truthProb changes ****\n")); while( pWInfo->pLoops ){ @@ -4875,19 +4904,7 @@ WhereInfo *sqlite3WhereBegin( if( rc ) goto whereBeginError; } #endif - -#ifdef WHERETRACE_ENABLED - if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */ - WhereLoop *p; - int i; - static const char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz" - "ABCDEFGHIJKLMNOPQRSTUVWYXZ"; - for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){ - p->cId = zLabel[i%(sizeof(zLabel)-1)]; - sqlite3WhereLoopPrint(p, sWLB.pWC); - } - } -#endif + WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC); wherePathSolver(pWInfo, 0); if( db->mallocFailed ) goto whereBeginError; From 35d3cb80c4eeeb7a583ba67bff6ad5d2bf8853e2 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 24 Feb 2020 13:35:34 +0000 Subject: [PATCH 4/5] Disable the new analyzeG.test module if not building with STAT4. FossilOrigin-Name: 4a9d3005769e0398183b03a3e132e3946b9d1c48073af2e0559d7beeac3245c0 --- manifest | 12 ++++++------ manifest.uuid | 2 +- test/analyzeG.test | 5 ++++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/manifest b/manifest index 36169e66e6..7ba7392d67 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\sbugfix\sfrom\strunk. -D 2020-02-24T13:26:29.409 +C Disable\sthe\snew\sanalyzeG.test\smodule\sif\snot\sbuilding\swith\sSTAT4. +D 2020-02-24T13:35:34.777 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -655,7 +655,7 @@ F test/analyzeC.test 489fe2ea3be3f17548e8dd895f1b41c9669b52de1b0861f5bffe6eec46e F test/analyzeD.test e50cd0b3e6063216cc0c88a1776e8645dc0bd65a6bb275769cbee33b7fd8d90c F test/analyzeE.test 8684e8ac5722fb97c251887ad97e5d496a98af1d F test/analyzeF.test 9e1a0537949eb5483642b1140a5c39e5b4025939024b935398471fa552f4dabb -F test/analyzeG.test c42be77a06331f8677c94b44ba35e170f0771a07d869dffb6b0d78f18b562747 +F test/analyzeG.test a48c0f324dd14de9a40d52abe5ca2637f682b9a791d2523dd619f6efa14e345b F test/analyzer1.test 459fa02c445ddbf0101a3bad47b34290a35f2e49 F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b F test/async2.test c0a9bd20816d7d6a2ceca7b8c03d3d69c28ffb8b @@ -1859,7 +1859,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c535fea147ce5c6e4aab25d3c85a3f53a7364c5b5ee10fb6d393c5911a02be7e c431b3fd8fd0f6a6974bba3e9366b0430ec003d570e7ce70ceefbcff5fe4b6fa -R 540a6846dd3df06385ce1aa12449a267 +P b542dee9de843c19664c19df7435c6034d23d0d213804d588ec0ff599082d576 +R f29b5aa99c284afcd3f25a8fb29ee973 U drh -Z eee179433b7c26e18c07c4463c4efe9e +Z 9cae8ff3aee64bed711ead2769a79514 diff --git a/manifest.uuid b/manifest.uuid index b8c0a97d33..46fc370397 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b542dee9de843c19664c19df7435c6034d23d0d213804d588ec0ff599082d576 \ No newline at end of file +4a9d3005769e0398183b03a3e132e3946b9d1c48073af2e0559d7beeac3245c0 \ No newline at end of file diff --git a/test/analyzeG.test b/test/analyzeG.test index 5c729b9cb2..eb1853b1dc 100644 --- a/test/analyzeG.test +++ b/test/analyzeG.test @@ -14,6 +14,10 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl +ifcapable !stat4 { + finish_test + return +} set testprefix analyzeG proc do_scan_order_test {tn sql expect} { @@ -82,4 +86,3 @@ do_scan_order_test 1.3.2 { finish_test - From f06cdde2cfb57d40c705044d95effa5b352ed125 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 24 Feb 2020 16:46:08 +0000 Subject: [PATCH 5/5] Rework this changes so that instead of setting the WhereTerm.truthProb when a term is seen to be of low selectivity, it merely sets a new flag (the TERM_HIGHTRUTH flag) which causes whereLoopOutputAdjust() to ignore that term. FossilOrigin-Name: 4558163b6a525990f0f1b6629dbb76daf49bcaf1ddbaf0c50fe05ce9ee480ff8 --- manifest | 14 +++++++------- manifest.uuid | 2 +- src/where.c | 29 ++++++++++++++++------------- src/whereInt.h | 5 +++++ 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/manifest b/manifest index 7ba7392d67..ac8c74c984 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Disable\sthe\snew\sanalyzeG.test\smodule\sif\snot\sbuilding\swith\sSTAT4. -D 2020-02-24T13:35:34.777 +C Rework\sthis\schanges\sso\sthat\sinstead\sof\ssetting\sthe\sWhereTerm.truthProb\swhen\na\sterm\sis\sseen\sto\sbe\sof\slow\sselectivity,\sit\smerely\ssets\sa\snew\sflag\n(the\sTERM_HIGHTRUTH\sflag)\swhich\scauses\swhereLoopOutputAdjust()\sto\signore\nthat\sterm. +D 2020-02-24T16:46:08.182 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -617,8 +617,8 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 697424314e40d99f93f548c7bfa526c10e87f4bdf64d5a76a96b999dd7133ebc F src/wal.h 606292549f5a7be50b6227bd685fa76e3a4affad71bb8ac5ce4cb5c79f6a176a F src/walker.c a137468bf36c92e64d2275caa80c83902e3a0fc59273591b96c6416d3253d05d -F src/where.c 44695e878a287d8c1d4976e2e85bea29994facec3972beb7ca22437d62cda6a5 -F src/whereInt.h 94e3aadcf43b4d16279182d147c9e4f8ef6ed5a5bd1ecc021639c29336b0a3eb +F src/where.c 3b8c9bd013eb0736e16f60bdc109e83337ef99513a3aff5f16ddac036e6c277e +F src/whereInt.h 6b874aa15f94e43a2cec1080be64d955b04deeafeac90ffb5d6975c0d511be3c F src/wherecode.c f5df56e395ade2240cabb2d39500c681bd29f8cc0636c3301c4996ad160df94d F src/whereexpr.c 264d58971eaf8256eb5b0917bcd7fc7a1f1109fdda183a8382308a1b18a2dce7 F src/window.c f8ba2ee12a19b51d3ba42c16277c74185ee9215306bc0d5a03974ade8b5bc98f @@ -1859,7 +1859,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P b542dee9de843c19664c19df7435c6034d23d0d213804d588ec0ff599082d576 -R f29b5aa99c284afcd3f25a8fb29ee973 +P 4a9d3005769e0398183b03a3e132e3946b9d1c48073af2e0559d7beeac3245c0 +R 2f59a601b560c09f0f628a3935395fd4 U drh -Z 9cae8ff3aee64bed711ead2769a79514 +Z 144378d74bf58f80a9e1ca77433de7e1 diff --git a/manifest.uuid b/manifest.uuid index 46fc370397..0f1166a39f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4a9d3005769e0398183b03a3e132e3946b9d1c48073af2e0559d7beeac3245c0 \ No newline at end of file +4558163b6a525990f0f1b6629dbb76daf49bcaf1ddbaf0c50fe05ce9ee480ff8 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 03bfad3b7d..d9f11296fa 100644 --- a/src/where.c +++ b/src/where.c @@ -2298,14 +2298,16 @@ static void whereLoopOutputAdjust( /* In the absence of explicit truth probabilities, use heuristics to ** guess a reasonable truth probability. */ pLoop->nOut--; - if( pTerm->eOperator&(WO_EQ|WO_IS) ){ + if( (pTerm->eOperator&(WO_EQ|WO_IS))!=0 + && (pTerm->wtFlags & TERM_HIGHTRUTH)==0 /* tag-20200224-1 */ + ){ Expr *pRight = pTerm->pExpr->pRight; int k = 0; testcase( pTerm->pExpr->op==TK_IS ); if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){ k = 10; }else{ - k = 20; /* Keep the "20" value in sync. See tag-20200222-1 */ + k = 20; } if( iReducewtFlags |= TERM_HEURTRUTH; @@ -2660,23 +2662,23 @@ static int whereLoopAddBtreeIndex( if( nOut ){ pNew->nOut = sqlite3LogEst(nOut); if( nEq==1 - && pTerm->truthProb>0 - /* TUNING: Adjust truthProb from the default heuristic only if the - ** probability is close to 1.0. The "20" constant is copied from - ** the heuristic at tag-20200222-1. Keep values in sync */ - && pNew->nOut+20 > pProbe->aiRowLogEst[0] + /* TUNING: Mark terms as "low selectivity" if they seem likely + ** to be true for half or more of the rows in the table. + ** See tag-202002240-1 */ + && pNew->nOut+10 > pProbe->aiRowLogEst[0] ){ #if WHERETRACE_ENABLED /* 0x01 */ if( sqlite3WhereTrace & 0x01 ){ - sqlite3DebugPrintf("Update truthProb from %d to %d:\n", - pTerm->truthProb, pNew->nOut - pProbe->aiRowLogEst[0]); + sqlite3DebugPrintf( + "STAT4 determines term has low selectivity:\n"); sqlite3WhereTermPrint(pTerm, 999); } #endif - pTerm->truthProb = pNew->nOut - pProbe->aiRowLogEst[0]; + pTerm->wtFlags |= TERM_HIGHTRUTH; if( pTerm->wtFlags & TERM_HEURTRUTH ){ - /* If the old heuristic truthProb was previously used, signal - ** that all loops will need to be recomputed */ + /* If the term has previously been used with an assumption of + ** higher selectivity, then set the flag to rerun the + ** loop computations. */ pBuilder->bldFlags2 |= SQLITE_BLDF2_2NDPASS; } } @@ -4894,7 +4896,8 @@ WhereInfo *sqlite3WhereBegin( if( sWLB.bldFlags2 & SQLITE_BLDF2_2NDPASS ){ WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC); WHERETRACE(0xffff, - ("**** Redo all loop computations due to truthProb changes ****\n")); + ("**** Redo all loop computations due to" + " TERM_HIGHTRUTH changes ****\n")); while( pWInfo->pLoops ){ WhereLoop *p = pWInfo->pLoops; pWInfo->pLoops = p->pNextLoop; diff --git a/src/whereInt.h b/src/whereInt.h index 9d4559c742..e33dde55e2 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -292,6 +292,11 @@ struct WhereTerm { #define TERM_IS 0x0800 /* Term.pExpr is an IS operator */ #define TERM_VARSELECT 0x1000 /* Term.pExpr contains a correlated sub-query */ #define TERM_HEURTRUTH 0x2000 /* Heuristic truthProb used */ +#ifdef SQLITE_ENABLE_STAT4 +# define TERM_HIGHTRUTH 0x4000 /* Term excludes few rows */ +#else +# define TERM_HIGHTRUTH 0 /* Only used with STAT4 */ +#endif /* ** An instance of the WhereScan object is used as an iterator for locating