From 7e910f6422553150a62332bdc2f3c21b16184abb Mon Sep 17 00:00:00 2001 From: drh <> Date: Thu, 9 Dec 2021 01:28:15 +0000 Subject: [PATCH] Add SQLITE_TESTCTRL_LOGEST and enhance the LogEst utility program. Improvements to testability of bloom filters. FossilOrigin-Name: 88b43d798cc5aa59855e92d3e658aee9f0a5def6ffbc5db77af048d75ecdf8cc --- manifest | 28 ++++++++++++++-------------- manifest.uuid | 2 +- src/main.c | 20 ++++++++++++++++++++ src/sqlite.h.in | 3 ++- src/sqliteInt.h | 2 -- src/util.c | 2 -- src/vdbe.c | 6 ++---- src/where.c | 34 +++++++++++++++++++--------------- src/whereInt.h | 2 +- src/wherecode.c | 7 +++---- tool/logest.c | 5 ++++- 11 files changed, 66 insertions(+), 45 deletions(-) diff --git a/manifest b/manifest index 4c4a9d684d..7cbde1e303 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improvements\son\sthe\sdecision\sof\swhether\sor\snot\sto\suse\sa\sBloom\sfilter. -D 2021-12-08T19:50:45.145 +C Add\sSQLITE_TESTCTRL_LOGEST\sand\senhance\sthe\sLogEst\sutility\sprogram.\nImprovements\sto\stestability\sof\sbloom\sfilters. +D 2021-12-09T01:28:15.168 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -515,7 +515,7 @@ F src/in-operator.md 10cd8f4bcd225a32518407c2fb2484089112fd71 F src/insert.c e0293a6f686e18cb2c9dd0619a731518e0109d7e1f1db1932974659e7843cfd1 F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa F src/loadext.c e1dcff1c916bf6834e150b492eddda5d9792453182d2ad64294d2266b6e93c4c -F src/main.c 1ea70751e6005ab6a9f784730fa0919efaa6639440a287deb73cb711e5aae57a +F src/main.c 674a0fdfc2808e1d5a78b2eefe2ec3f93428cf82f0f6c013d577df1a1caa5940 F src/malloc.c d9172a3946f11384f2fd6a799554ee26c6bb407c4bd0874a456ed485a2e362e4 F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c c12a42539b1ba105e3707d0e628ad70e611040d8f5e38cf942cee30c867083de @@ -553,10 +553,10 @@ F src/resolve.c 4a1db4aadd802683db40ca2dbbb268187bd195f10cbdb7206dbd8ac988795571 F src/rowset.c ba9515a922af32abe1f7d39406b9d35730ed65efab9443dc5702693b60854c92 F src/select.c a7a3d9f54eb24821ec5f67f2e5589b68a5d42d46fc5849d7376886777d93a85a F src/shell.c.in cda1eaf0292259b4b0721a5e03af9701fd482ebc37ce6a86ddc94cd9a38bb826 -F src/sqlite.h.in bb56040e3c498711c9f77727e477674395a50931ccba8095cfef5c8fb3c3e138 +F src/sqlite.h.in 50c8f27251b11f1c89b06abc6e4085fce15151bcbd355a44609ecb2ba5424841 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 8ff2fd2c166150b2e48639f5e506fb44e29f1a3f65031710b9e89d1c126ac839 -F src/sqliteInt.h f4fbb14ea32d57b813aabf82f586d2ac042234dd89df1c03281f557907745b98 +F src/sqliteInt.h 31b9673bc26b5b2a846bf26ce7124c869f64368f4eaac865d9350749ea314000 F src/sqliteLimit.h d7323ffea5208c6af2734574bae933ca8ed2ab728083caa117c9738581a31657 F src/status.c 4b8bc2a6905163a38b739854a35b826c737333fab5b1f8e03fa7eb9a4799c4c1 F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1 @@ -621,9 +621,9 @@ F src/trigger.c 2ef56f0b7b75349a5557d0604b475126329c2e1a02432e7d49c4c710613e8254 F src/update.c d6f5c7b9e072660757ac7d58175aca11c07cb95ebbb297ae7f38853700f52328 F src/upsert.c 8789047a8f0a601ea42fa0256d1ba3190c13746b6ba940fe2d25643a7e991937 F src/utf.c ee39565f0843775cc2c81135751ddd93eceb91a673ea2c57f61c76f288b041a0 -F src/util.c 6dfbd0bd1954e9531e1c511e5d20390d7dab9ffbf1e20a37c960d1aaf8582b46 +F src/util.c 569349b0bddcbfbc661856f446adb92e1b0a47b3cbef548da9fc5aa639d7964c F src/vacuum.c 6c38ddc52f0619865c91dae9c441d4d48bf3040d7dc1bc5b22da1e45547ed0b3 -F src/vdbe.c fd8542b7131f299659871535a41ea732764fb25e4d2931965c97fa36658c50d7 +F src/vdbe.c 855ee903521fcc5a799f673f5b05fc599dc50a31e6cb6a15e1e8a6858087595b F src/vdbe.h 25dabb25c7e157b84e59260cfb5b466c3ac103ede9f36f4db371332c47601abe F src/vdbeInt.h 910985ac2783fe0938b314d811759d53fd25caf215810f62ca1ff068d6d60d7b F src/vdbeapi.c 22c79072ae7d8a01e9bcae8ba16e918d60d202eaa9553b5fda38f99f7464d99a @@ -638,9 +638,9 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c ed0398a7adf02c31e34aada42cc86c58f413a7afe5f741a5d373ad087abde028 F src/wal.h c3aa7825bfa2fe0d85bef2db94655f99870a285778baa36307c0a16da32b226a F src/walker.c f890a3298418d7cba3b69b8803594fdc484ea241206a8dfa99db6dd36f8cbb3b -F src/where.c 89958d4fc7c45e916882ebc97481d98597f516ce3d778ace3271aacf34e24e91 -F src/whereInt.h c2cb535e755b25a7e152bdb407cbb2f62bdb8747c44bf2d984139f5cbebb8704 -F src/wherecode.c e2207f011b7e5bdef5722da5e8d95eb30ad01051b3526757901ecb19a9e98ff3 +F src/where.c 4946af4e7d073fd35c97ffce4c4b4f3d84f6a0c9e5cb20342560cfaf072ef3a6 +F src/whereInt.h e83f7ba73db5b1b2685118fad67d178fbe04751a25419f0f6ff73e58b4807325 +F src/wherecode.c 560424f5845ec4ef1e36e8a6a066cc497aaf8163fef29fa18a0317e1e14ba9b7 F src/whereexpr.c 791544603b254cf11f8e84e3b50b0863c57322e9f213b828680f658e232ebc57 F src/window.c 5d3b397b0c026d0ff5890244ac41359e524c01ae31e78782e1ff418c3e271a9e F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 @@ -1856,7 +1856,7 @@ F tool/lemon.c 258881835bd5bccd0c74fb110fe54244ff18e8e7ef3d949cbdab7187f02132bb F tool/lempar.c 57478ea48420da05faa873c6d1616321caa5464644588c97fbe8e0ea04450748 F tool/libvers.c caafc3b689638a1d88d44bc5f526c2278760d9b9 F tool/loadfts.c c3c64e4d5e90e8ba41159232c2189dba4be7b862 -F tool/logest.c 11346aa019e2e77a00902aa7d0cabd27bd2e8cca +F tool/logest.c 83dbfda91615f1db5dce38215303d8bb456f437342d2c64262406dbdd1c931e2 F tool/max-limits.c cbb635fbb37ae4d05f240bfb5b5270bb63c54439 F tool/merge-test.tcl de76b62f2de2a92d4c1ca4f976bce0aea6899e0229e250479b229b2a1914b176 F tool/mkautoconfamal.sh f62353eb6c06ab264da027fd4507d09914433dbdcab9cb011cdc18016f1ab3b8 @@ -1934,7 +1934,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P d3250256772e3348abe887c0ca3550a6647cce3804c9456a9d0112aea7ee1c46 -R a7fca9e9fc2528f3e921fdfbeaa2e361 +P 0fb2a4e08f518cb38ea3edc6a084d1e4874fd622ba3cf9101b49b3e7dc1a3f2b +R 13197c53db0503b249bb051590a0a6df U drh -Z 815ca39cf2b788796cb5ebc9be23c44d +Z 80d7c2199cca2c3c9ca5b22286927a11 diff --git a/manifest.uuid b/manifest.uuid index a9d9e689b3..2d8c59a7eb 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0fb2a4e08f518cb38ea3edc6a084d1e4874fd622ba3cf9101b49b3e7dc1a3f2b \ No newline at end of file +88b43d798cc5aa59855e92d3e658aee9f0a5def6ffbc5db77af048d75ecdf8cc \ No newline at end of file diff --git a/src/main.c b/src/main.c index 804719f176..1b7853f609 100644 --- a/src/main.c +++ b/src/main.c @@ -4347,6 +4347,26 @@ int sqlite3_test_control(int op, ...){ break; } + /* sqlite3_test_control(SQLITE_TESTCTRL_LOGEST, + ** double fIn, // Input value + ** int *pLogEst, // sqlite3LogEstFromDouble(fIn) + ** u64 *pInt, // sqlite3LogEstToInt(*pLogEst) + ** int *pLogEst2 // sqlite3LogEst(*pInt) + ** ); + ** + ** Test access for the LogEst conversion routines. + */ + case SQLITE_TESTCTRL_LOGEST: { + double rIn = va_arg(ap, double); + LogEst rLogEst = sqlite3LogEstFromDouble(rIn); + u64 iInt = sqlite3LogEstToInt(rLogEst); + va_arg(ap, int*)[0] = rLogEst; + va_arg(ap, u64*)[0] = iInt; + va_arg(ap, int*)[0] = sqlite3LogEst(iInt); + break; + } + + #if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_WSD) /* sqlite3_test_control(SQLITE_TESTCTRL_TUNE, id, *piValue) ** diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 4125122e17..f7cf4d2fe1 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -7944,7 +7944,8 @@ int sqlite3_test_control(int op, ...); #define SQLITE_TESTCTRL_SEEK_COUNT 30 #define SQLITE_TESTCTRL_TRACEFLAGS 31 #define SQLITE_TESTCTRL_TUNE 32 -#define SQLITE_TESTCTRL_LAST 32 /* Largest TESTCTRL */ +#define SQLITE_TESTCTRL_LOGEST 33 +#define SQLITE_TESTCTRL_LAST 33 /* Largest TESTCTRL */ /* ** CAPI3REF: SQL Keyword Checking diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 90e60d30b0..0104bace6d 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -4780,9 +4780,7 @@ int sqlite3Utf8CharLen(const char *pData, int nByte); u32 sqlite3Utf8Read(const u8**); LogEst sqlite3LogEst(u64); LogEst sqlite3LogEstAdd(LogEst,LogEst); -#ifndef SQLITE_OMIT_VIRTUALTABLE LogEst sqlite3LogEstFromDouble(double); -#endif u64 sqlite3LogEstToInt(LogEst); VList *sqlite3VListAdd(sqlite3*,VList*,const char*,int,int); const char *sqlite3VListNumToName(VList*,int); diff --git a/src/util.c b/src/util.c index d93c298116..8ea951fa16 100644 --- a/src/util.c +++ b/src/util.c @@ -1586,7 +1586,6 @@ LogEst sqlite3LogEst(u64 x){ return a[x&7] + y - 10; } -#ifndef SQLITE_OMIT_VIRTUALTABLE /* ** Convert a double into a LogEst ** In other words, compute an approximation for 10*log2(x). @@ -1601,7 +1600,6 @@ LogEst sqlite3LogEstFromDouble(double x){ e = (a>>52) - 1022; return e*10; } -#endif /* SQLITE_OMIT_VIRTUALTABLE */ /* ** Convert a LogEst into an integer. diff --git a/src/vdbe.c b/src/vdbe.c index 0abe64a71f..ef60ed0123 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -8222,10 +8222,8 @@ case OP_Filter: { /* jump */ assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); pIn1 = &aMem[pOp->p1]; - if( (pIn1->flags & MEM_Blob)==0 || NEVER(pIn1->n<=0) ){ - VdbeBranchTaken(0, 2); - break; - } + assert( (pIn1->flags & MEM_Blob)!=0 ); + assert( pIn1->n >= 1 ); h = filterHash(aMem, pOp); #ifdef SQLITE_DEBUG if( db->flags&SQLITE_VdbeTrace ){ diff --git a/src/where.c b/src/where.c index 065d14fd64..859dac7cbb 100644 --- a/src/where.c +++ b/src/where.c @@ -1009,7 +1009,7 @@ static SQLITE_NOINLINE void constructBloomFilter( do{ const SrcItem *pItem; const Table *pTab; - int sz; + u64 sz; sqlite3WhereExplainBloomFilter(pParse, pWInfo, pLevel); addrCont = sqlite3VdbeMakeLabel(pParse); iCur = pLevel->iTabCur; @@ -1027,17 +1027,13 @@ static SQLITE_NOINLINE void constructBloomFilter( assert( pItem!=0 ); pTab = pItem->pTab; assert( pTab!=0 ); - if( pTab->tabFlags & TF_HasStat1 ){ - sz = sqlite3LogEstToInt(pItem->pTab->nRowLogEst); - if( sz<10000 ){ - sz = 10000; - }else if( sz>10000000 ){ - sz = 10000000; - } - }else{ + sz = sqlite3LogEstToInt(pTab->nRowLogEst); + if( sz<10000 ){ sz = 10000; + }else if( sz>10000000 ){ + sz = 10000000; } - sqlite3VdbeAddOp2(v, OP_Blob, sz, pLevel->regFilter); + sqlite3VdbeAddOp2(v, OP_Blob, (int)sz, pLevel->regFilter); addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, iCur); VdbeCoverage(v); pWCEnd = &pWInfo->sWC.a[pWInfo->sWC.nTerm]; @@ -2503,7 +2499,11 @@ static void whereLoopOutputAdjust( } if( j<0 ){ if( pLoop->maskSelf==pTerm->prereqAll ){ - pLoop->wsFlags |= WHERE_CULLED; + /* If there are extra terms in the WHERE clause not used by an index + ** that depend only on the table being scanned, and that will tend to + ** cause many rows to be omitted, then mark that table as + ** "self-culling". */ + pLoop->wsFlags |= WHERE_SELFCULL; } if( pTerm->truthProb<=0 ){ /* If a truth probability is specified using the likelihood() hints, @@ -4974,10 +4974,13 @@ static SQLITE_NOINLINE Bitmask whereOmitNoopJoin( ** ** (1) The SEARCH happens more than N times where N is the number ** of rows in the table that is being considered for the Bloom -** filter. (TO DO: Make this condition more precise.) -** (2) Most searches are expected to find zero rows +** filter. +** (2) Some searches are expected to find zero rows. (This is determined +** by the WHERE_SELFCULL flag on the term.) ** (3) The table being searched is not the right table of a LEFT JOIN -** (4) Bloom-filter processing is not disabled +** (4) Bloom-filter processing is not disabled. (Checked by the +** caller.) +** (5) The size of the table being searched is known by ANALYZE. ** ** This block of code merely checks to see if a Bloom filter would be ** appropriate, and if so sets the WHERE_BLOOMFILTER flag on the @@ -4995,7 +4998,7 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful( nSearch = pWInfo->a[0].pWLoop->nOut; for(i=1; inLevel; i++){ WhereLoop *pLoop = pWInfo->a[i].pWLoop; - const int reqFlags = (WHERE_CULLED|WHERE_COLUMN_EQ); + const int reqFlags = (WHERE_SELFCULL|WHERE_COLUMN_EQ); if( (pLoop->wsFlags & reqFlags)==reqFlags && (pLoop->wsFlags & (WHERE_IPK|WHERE_INDEXED))!=0 ){ @@ -5004,6 +5007,7 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful( pTab->tabFlags |= TF_StatsUsed; if( nSearch > pTab->nRowLogEst && (pItem->fg.jointype & JT_LEFT)==0 + && (pTab->tabFlags & TF_HasStat1)!=0 ){ pLoop->wsFlags |= WHERE_BLOOMFILTER; pLoop->wsFlags &= ~WHERE_IDX_ONLY; diff --git a/src/whereInt.h b/src/whereInt.h index d790653a71..8051b78a02 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -608,6 +608,6 @@ void sqlite3WhereTabFuncArgs(Parse*, SrcItem*, WhereClause*); #define WHERE_IN_SEEKSCAN 0x00100000 /* Seek-scan optimization for IN */ #define WHERE_TRANSCONS 0x00200000 /* Uses a transitive constraint */ #define WHERE_BLOOMFILTER 0x00400000 /* Consider using a Bloom-filter */ -#define WHERE_CULLED 0x00800000 /* nOut reduced by extra WHERE terms */ +#define WHERE_SELFCULL 0x00800000 /* nOut reduced by extra WHERE terms */ #endif /* !defined(SQLITE_WHEREINT_H) */ diff --git a/src/wherecode.c b/src/wherecode.c index 42cfcc9eac..05d1f12ced 100644 --- a/src/wherecode.c +++ b/src/wherecode.c @@ -1393,13 +1393,12 @@ static SQLITE_NOINLINE void filterPullDown( if( pLoop->prereq & notReady ) continue; if( pLoop->wsFlags & WHERE_IPK ){ WhereTerm *pTerm = pLoop->aLTerm[0]; - int r1, regRowid; + int regRowid; assert( pTerm!=0 ); assert( pTerm->pExpr!=0 ); testcase( pTerm->wtFlags & TERM_VIRTUAL ); - r1 = sqlite3GetTempReg(pParse); - regRowid = codeEqualityTerm(pParse, pTerm, pLevel, 0, 0, r1); - if( regRowid!=r1 ) sqlite3ReleaseTempReg(pParse, r1); + regRowid = sqlite3GetTempReg(pParse); + regRowid = codeEqualityTerm(pParse, pTerm, pLevel, 0, 0, regRowid); sqlite3VdbeAddOp4Int(pParse->pVdbe, OP_Filter, pLevel->regFilter, addrNxt, regRowid, 1); VdbeCoverage(pParse->pVdbe); diff --git a/tool/logest.c b/tool/logest.c index e936e02cbe..580a3a6f96 100644 --- a/tool/logest.c +++ b/tool/logest.c @@ -75,6 +75,7 @@ static sqlite3_uint64 logEstToInt(LogEst x){ x /= 10; if( n>=5 ) n -= 2; else if( n>=1 ) n -= 1; + if( x>60 ) return ((sqlite3_uint64)0xffffffff)<<32 + 0xffffffff; if( x>=3 ) return (n+8)<<(x-3); return (n+8)>>(3-x); } @@ -149,7 +150,7 @@ int main(int argc, char **argv){ }else if( z[0]=='^' ){ a[n++] = (LogEst)atoi(z+1); }else if( isInteger(z) ){ - a[n++] = logEstFromInteger(atoi(z)); + a[n++] = logEstFromInteger(atoll(z)); }else if( isFloat(z) && z[0]!='-' ){ a[n++] = logEstFromDouble(atof(z)); }else{ @@ -161,6 +162,8 @@ int main(int argc, char **argv){ printf("%5d (%f)\n", a[i], 1.0/(double)logEstToInt(-a[i])); }else if( a[i]<10 ){ printf("%5d (%f)\n", a[i], logEstToInt(a[i]+100)/1024.0); + }else if( a[i]>100 ){ + printf("%5d (%lld)\n", a[i], logEstToInt(a[i])); }else{ sqlite3_uint64 x = logEstToInt(a[i]+100)*100/1024; printf("%5d (%lld.%02lld)\n", a[i], x/100, x%100);