Attempt to vary the size of Bloom filters based on an estimate of how many

keys the filter will hold.

FossilOrigin-Name: a7adcf69088cba4b86cc5731a45c9a5263af4355bc0a38f5225cab421c915f7f
This commit is contained in:
drh 2021-12-06 13:07:28 +00:00
parent 6ae49e67cc
commit 5baaf40af1
6 changed files with 95 additions and 31 deletions

View File

@ -1,5 +1,5 @@
C Run\sas\smany\sBloom\sfilters\sas\spossible\sbefore\sindex\slookups.
D 2021-12-05T20:19:47.744
C Attempt\sto\svary\sthe\ssize\sof\sBloom\sfilters\sbased\son\san\sestimate\sof\show\smany\nkeys\sthe\sfilter\swill\shold.
D 2021-12-06T13:07:28.112
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -515,7 +515,7 @@ F src/insert.c e0293a6f686e18cb2c9dd0619a731518e0109d7e1f1db1932974659e7843cfd1
F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa
F src/loadext.c e1dcff1c916bf6834e150b492eddda5d9792453182d2ad64294d2266b6e93c4c
F src/main.c 7bd4fdc41ef53535271a1816ff043ba153cda03842b444b6e2f57b27b2cb9090
F src/malloc.c ef796bcc0e81d845d59a469f1cf235056caf9024172fd524e32136e65593647b
F src/malloc.c 183c2bf45cee1589254e4047e220f1ffbcc0a3bc8e4fe46fe64ba5db447a79af
F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645
F src/mem1.c c12a42539b1ba105e3707d0e628ad70e611040d8f5e38cf942cee30c867083de
F src/mem2.c c8bfc9446fd0798bddd495eb5d9dbafa7d4b7287d8c22d50a83ac9daa26d8a75
@ -555,7 +555,7 @@ F src/shell.c.in e7ee6517544d075d9f06ee2571567026b89cf9fbeef16a74918019b1cb42576
F src/sqlite.h.in 5cd209ac7dc4180f0e19292846f40440b8488015849ca0110c70b906b57d68f0
F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8
F src/sqlite3ext.h 8ff2fd2c166150b2e48639f5e506fb44e29f1a3f65031710b9e89d1c126ac839
F src/sqliteInt.h 178eb899c1edc08dcddf37e79dfaa39404a1f5d44a1d512509cd5d41867aa836
F src/sqliteInt.h ab40ea9c294c656e0d6ab14e67d58f10b015a77e962dd075fdbe3ea3cc1a976b
F src/sqliteLimit.h d7323ffea5208c6af2734574bae933ca8ed2ab728083caa117c9738581a31657
F src/status.c 4b8bc2a6905163a38b739854a35b826c737333fab5b1f8e03fa7eb9a4799c4c1
F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1
@ -622,7 +622,7 @@ F src/upsert.c 8789047a8f0a601ea42fa0256d1ba3190c13746b6ba940fe2d25643a7e991937
F src/utf.c ee39565f0843775cc2c81135751ddd93eceb91a673ea2c57f61c76f288b041a0
F src/util.c 30df8356e231dad33be10bb27897655002668343280004ba28c734489414a167
F src/vacuum.c 6c38ddc52f0619865c91dae9c441d4d48bf3040d7dc1bc5b22da1e45547ed0b3
F src/vdbe.c 94af4eba93ad9ca7dd929cd19792ce2a5feb4797a7a64ec3cb3b2277e1467a8b
F src/vdbe.c 6176125ea038f593597b5897898328142b5253201d321369df74e187b2b1abaa
F src/vdbe.h 25dabb25c7e157b84e59260cfb5b466c3ac103ede9f36f4db371332c47601abe
F src/vdbeInt.h fd1103c7ecec8c84164038c8eacaa4a633cb3c10a2f725aae7bd865d4a4fcceb
F src/vdbeapi.c 22c79072ae7d8a01e9bcae8ba16e918d60d202eaa9553b5fda38f99f7464d99a
@ -637,7 +637,7 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9
F src/wal.c ed0398a7adf02c31e34aada42cc86c58f413a7afe5f741a5d373ad087abde028
F src/wal.h c3aa7825bfa2fe0d85bef2db94655f99870a285778baa36307c0a16da32b226a
F src/walker.c f890a3298418d7cba3b69b8803594fdc484ea241206a8dfa99db6dd36f8cbb3b
F src/where.c b07c5eefecffa1b69b91c366a83c69d01a83f1c900b9d9b1ffb6eb5ab59902a1
F src/where.c 04ead529a272341a4cae3ef0dcd2f7675d433627acc5fb87fed1407e7b3d8614
F src/whereInt.h 5c6601d6d0b7b8936482506d2d835981cc6efcd8e106a829893a27a14cfb10b8
F src/wherecode.c fa667db48db1077b42731bfd97e9181b39409ffdc7051162ecae6895ca71ad2c
F src/whereexpr.c 19394cb463003e9cc9305730b1508b8817a22bb7247170d81234b691a7f05b89
@ -1933,7 +1933,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 5be2470f8755ef454f813c880e659bdbf82f2396be9320cf3079cd4ca8e81a19
R da3cb867f9ab99abba060de93457c8d9
P 06f6fefd67086896bc49272c6319545ff6c6792f18babe23aced27b60b032119
R 5f90145148ed31800619d7b25d77beb7
U drh
Z d87a509afa829d1cf21b5a6dcadef441
Z 3013773c2d9a03770ac02d1fdd22abd3

View File

@ -1 +1 @@
06f6fefd67086896bc49272c6319545ff6c6792f18babe23aced27b60b032119
a7adcf69088cba4b86cc5731a45c9a5263af4355bc0a38f5225cab421c915f7f

View File

@ -211,6 +211,25 @@ sqlite3_int64 sqlite3_memory_highwater(int resetFlag){
return mx;
}
/*
** Return an estimate of the amount of unallocated memory.
**
** This the hard heap limit minus the current memory usage. It might
** not be possible to allocate this much memory all at once. This is
** only an estimate.
*/
sqlite3_int64 sqlite3EstMemoryAvailable(void){
sqlite3_int64 n;
sqlite3_mutex_enter(mem0.mutex);
n = mem0.alarmThreshold;
if( n<=0 ) n = mem0.hardLimit;
sqlite3_mutex_leave(mem0.mutex);
if( n<=0 ) n = LARGEST_INT64;
n -= sqlite3_memory_used();
if( n<0 ) n = 0;
return n;
}
/*
** Trigger the alarm
*/

View File

@ -4293,6 +4293,7 @@ void sqlite3MemSetDefault(void);
void sqlite3BenignMallocHooks(void (*)(void), void (*)(void));
#endif
int sqlite3HeapNearlyFull(void);
sqlite3_int64 sqlite3EstMemoryAvailable(void);
/*
** On systems with ample stack space and that support alloca(), make

View File

@ -672,17 +672,29 @@ static Mem *out2Prerelease(Vdbe *p, VdbeOp *pOp){
}
/*
** Default size of a bloom filter, in bytes
** The minimum size (in bytes) for a Bloom filter.
**
** No Bloom filter will be smaller than this many bytes. But they
** may be larger.
*/
#define SQLITE_BLOOM_SZ 10000
#ifndef SQLITE_BLOOM_MIN
# define SQLITE_BLOOM_MIN 10000
#endif
/*
** The maximum size (in bytes) for a Bloom filter.
*/
#ifndef SQLITE_BLOOM_MAX
# define SQLITE_BLOOM_MAX 1000000
#endif
/*
** Compute a bloom filter hash using pOp->p4.i registers from aMem[] beginning
** with pOp->p3. Return the hash.
*/
static unsigned int filterHash(const Mem *aMem, const Op *pOp){
static u64 filterHash(const Mem *aMem, const Op *pOp){
int i, mx;
u32 h = 0;
u64 h = 0;
i = pOp->p3;
assert( pOp->p4type==P4_INT32 );
@ -690,15 +702,15 @@ static unsigned int filterHash(const Mem *aMem, const Op *pOp){
for(i=pOp->p3, mx=i+pOp->p4.i; i<mx; i++){
const Mem *p = &aMem[i];
if( p->flags & (MEM_Int|MEM_IntReal) ){
h += (u32)(p->u.i&0xffffffff);
h += p->u.i;
}else if( p->flags & MEM_Real ){
h += (u32)(sqlite3VdbeIntValue(p)&0xffffffff);
h += sqlite3VdbeIntValue(p);
}else if( p->flags & (MEM_Str|MEM_Blob) ){
h += p->n;
if( p->flags & MEM_Zero ) h += p->u.nZero;
}
}
return h % (SQLITE_BLOOM_SZ*8);
return h;
}
/*
@ -8157,15 +8169,44 @@ case OP_Function: { /* group */
break;
}
/* Opcode: FilterInit P1 * * * *
** Synopsis: filter(P1) = empty
/* Opcode: FilterInit P1 P2 * * *
**
** Initialize register P1 so that is an empty bloom filter.
**
** If P2 is positive, it is a register that holds an estimate on
** the number of entries to be added to the Bloom filter. The
** Bloom filter is sized accordingly. If P2 is zero or negative,
** then a default-size Bloom filter is created.
**
** It is ok for P1 and P2 to be the same register. In that case the
** integer value originally in that register will be overwritten
** with the new empty bloom filter.
*/
case OP_FilterInit: {
i64 n, mx;
assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) );
pIn1 = &aMem[pOp->p1];
sqlite3VdbeMemSetZeroBlob(pIn1, SQLITE_BLOOM_SZ);
if( pOp->p2>0 ){
assert( pOp->p2<=(p->nMem+1 - p->nCursor) );
n = sqlite3VdbeIntValue(&aMem[pOp->p2]);
if( n<SQLITE_BLOOM_MIN ){
n = SQLITE_BLOOM_MIN;
}else if( n>SQLITE_BLOOM_MAX ){
n = SQLITE_BLOOM_MAX;
}
}else{
n = SQLITE_BLOOM_MIN;
}
mx = sqlite3EstMemoryAvailable()/2;
if( n>mx && mx>SQLITE_BLOOM_MIN ){
n = mx;
}
#ifdef SQLITE_DEBUG
if( db->flags&SQLITE_VdbeTrace ){
printf("Bloom-filter size: %llu bytes\n", n);
}
#endif
sqlite3VdbeMemSetZeroBlob(pIn1, n);
if( sqlite3VdbeMemExpandBlob(pIn1) ) goto no_mem;
break;
}
@ -8177,12 +8218,12 @@ case OP_FilterInit: {
** add that hash to the bloom filter contained in r[P1].
*/
case OP_FilterAdd: {
u32 h;
u64 h;
assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) );
pIn1 = &aMem[pOp->p1];
assert( pIn1->flags & MEM_Blob );
assert( pIn1->n==SQLITE_BLOOM_SZ );
assert( pIn1->n>0 );
h = filterHash(aMem, pOp);
#ifdef SQLITE_DEBUG
if( db->flags&SQLITE_VdbeTrace ){
@ -8190,10 +8231,10 @@ case OP_FilterAdd: {
for(ii=pOp->p3; ii<pOp->p3+pOp->p4.i; ii++){
registerTrace(ii, &aMem[ii]);
}
printf("hash = %u\n", h);
printf("hash: %llu modulo %d -> %u\n", h, pIn1->n, (int)(h%pIn1->n));
}
#endif
assert( h>=0 && h<SQLITE_BLOOM_SZ*8 );
h %= pIn1->n;
pIn1->z[h/8] |= 1<<(h&7);
break;
}
@ -8213,12 +8254,14 @@ case OP_FilterAdd: {
** false positive - if the jump is taken when it should fall through.
*/
case OP_Filter: { /* jump */
u32 h;
u64 h;
assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) );
pIn1 = &aMem[pOp->p1];
assert( pIn1->flags & MEM_Blob );
assert( pIn1->n==SQLITE_BLOOM_SZ );
if( (pIn1->flags & MEM_Blob)==0 || NEVER(pIn1->n<=0) ){
VdbeBranchTaken(0, 2);
break;
}
h = filterHash(aMem, pOp);
#ifdef SQLITE_DEBUG
if( db->flags&SQLITE_VdbeTrace ){
@ -8226,10 +8269,10 @@ case OP_Filter: { /* jump */
for(ii=pOp->p3; ii<pOp->p3+pOp->p4.i; ii++){
registerTrace(ii, &aMem[ii]);
}
printf("hash = %u\n", h);
printf("hash: %llu modulo %d -> %u\n", h, pIn1->n, (int)(h%pIn1->n));
}
#endif
assert( h>=0 && h<SQLITE_BLOOM_SZ*8 );
h %= pIn1->n;
if( (pIn1->z[h/8] & (1<<(h&7)))==0 ){
VdbeBranchTaken(1, 2);
goto jump_to_p2;

View File

@ -1010,8 +1010,9 @@ static SQLITE_NOINLINE void constructBloomFilter(
addrCont = sqlite3VdbeMakeLabel(pParse);
iCur = pLevel->iTabCur;
pLevel->regFilter = ++pParse->nMem;
sqlite3VdbeAddOp1(v, OP_FilterInit, pLevel->regFilter);
addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, iCur); VdbeCoverage(v);
sqlite3VdbeAddOp3(v, OP_Count, iCur, pLevel->regFilter, 1);
sqlite3VdbeAddOp2(v, OP_FilterInit, pLevel->regFilter, pLevel->regFilter);
pWCEnd = &pWInfo->sWC.a[pWInfo->sWC.nTerm];
for(pTerm=pWInfo->sWC.a; pTerm<pWCEnd; pTerm++){
Expr *pExpr = pTerm->pExpr;
@ -1039,7 +1040,7 @@ static SQLITE_NOINLINE void constructBloomFilter(
sqlite3ReleaseTempRange(pParse, r1, n);
}
sqlite3VdbeResolveLabel(v, addrCont);
sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1);
sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+3);
VdbeCoverage(v);
sqlite3VdbeJumpHere(v, addrTop);
pLoop->wsFlags &= ~WHERE_BLOOMFILTER;