diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index e86da1c06e..3ae709f622 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -583,6 +583,41 @@ static u16 fts5GetU16(const u8 *aIn){ return ((u16)aIn[0] << 8) + aIn[1]; } +static u64 fts5GetU64(u8 *a){ + return ((u64)a[0] << 56) + + ((u64)a[1] << 48) + + ((u64)a[2] << 40) + + ((u64)a[3] << 32) + + ((u64)a[4] << 24) + + ((u64)a[5] << 16) + + ((u64)a[6] << 8) + + ((u64)a[7] << 0); +} + +static void fts5PutU64(u8 *a, u64 iVal){ + a[0] = ((iVal >> 56) & 0xFF); + a[1] = ((iVal >> 48) & 0xFF); + a[2] = ((iVal >> 40) & 0xFF); + a[3] = ((iVal >> 32) & 0xFF); + a[4] = ((iVal >> 24) & 0xFF); + a[5] = ((iVal >> 16) & 0xFF); + a[6] = ((iVal >> 8) & 0xFF); + a[7] = ((iVal >> 0) & 0xFF); +} + +static u32 fts5GetU32(const u8 *a){ + return ((u32)a[0] << 24) + + ((u32)a[1] << 16) + + ((u32)a[2] << 8) + + ((u32)a[3] << 0); +} +static void fts5PutU32(u8 *a, u32 iVal){ + a[0] = ((iVal >> 24) & 0xFF); + a[1] = ((iVal >> 16) & 0xFF); + a[2] = ((iVal >> 8) & 0xFF); + a[3] = ((iVal >> 0) & 0xFF); +} + /* ** Allocate and return a buffer at least nByte bytes in size. ** @@ -2987,26 +3022,26 @@ static void fts5MultiIterSetEof(Fts5Iter *pIter){ pIter->iSwitchRowid = pSeg->iRowid; } -static u64 fts5GetU64(u8 *a){ - return ((u64)a[0] << 56) - + ((u64)a[1] << 48) - + ((u64)a[2] << 40) - + ((u64)a[3] << 32) - + ((u64)a[4] << 24) - + ((u64)a[5] << 16) - + ((u64)a[6] << 8) - + ((u64)a[7] << 0); -} +static int fts5IndexTombstoneQuery(u8 *aHash, int nHash, u64 iRowid){ + int szKey = aHash[3] ? 8 : 4; + int nSlot = (nHash - 8) / szKey; + int iSlot = iRowid % nSlot; -static void fts5PutU64(u8 *a, u64 iVal){ - a[0] = ((iVal >> 56) & 0xFF); - a[1] = ((iVal >> 48) & 0xFF); - a[2] = ((iVal >> 40) & 0xFF); - a[3] = ((iVal >> 32) & 0xFF); - a[4] = ((iVal >> 24) & 0xFF); - a[5] = ((iVal >> 16) & 0xFF); - a[6] = ((iVal >> 8) & 0xFF); - a[7] = ((iVal >> 0) & 0xFF); + if( szKey==4 ){ + u32 *aSlot = (u32*)&aHash[8]; + while( aSlot[iSlot] ){ + if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1; + iSlot = (iSlot+1)%nSlot; + } + }else{ + u64 *aSlot = (u64*)&aHash[8]; + while( aSlot[iSlot] ){ + if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1; + iSlot = (iSlot+1)%nSlot; + } + } + + return 0; } static int fts5MultiIterIsDeleted(Fts5Iter *pIter){ @@ -3014,11 +3049,9 @@ static int fts5MultiIterIsDeleted(Fts5Iter *pIter){ Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; if( pSeg->pTombstone ){ - int ii; - for(ii=0; iipTombstone->nn; ii+=8){ - i64 iVal = (i64)fts5GetU64(&pSeg->pTombstone->p[ii]); - if( iVal==pSeg->iRowid ) return 1; - } + return fts5IndexTombstoneQuery( + pSeg->pTombstone->p, pSeg->pTombstone->nn, pSeg->iRowid + ); } return 0; @@ -6380,35 +6413,103 @@ int sqlite3Fts5IndexGetLocation(Fts5Index *p, i64 *piLoc){ return fts5IndexReturn(p); } +/* +** Add a tombstone for rowid iRowid to segment pSeg. +** +** All tombstones for a single segment are stored in a blob formatted to +** contain a hash table. The format is: +** +** * 32-bit integer. 1 for 64-bit unsigned keys, 0 for 32-bit unsigned keys. +** * 32-bit integer. The number of entries currently in the hash table. +** +** Then an array of entries. The number of entries can be calculated based +** on the size of the blob in the database and the size of the keys as +** specified by the first 32-bit field of the hash table header. +** +** All values in the hash table are stored as big-endian integers. +*/ static void fts5IndexTombstoneAdd( Fts5Index *p, Fts5StructureSegment *pSeg, - i64 iRowid + u64 iRowid ){ Fts5Data *pHash = 0; + u8 *aFree = 0; u8 *aNew = 0; int nNew = 0; + int szKey = 0; + int nSlot = 0; + int bKey64 = (iRowid>0xFFFFFFFF); + u32 nHash = 0; + /* Load the current hash table, if any */ pHash = fts5DataReadOpt(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid)); if( p->rc ) return; if( pHash ){ - nNew = 8 + pHash->nn; - }else{ - nNew = 8; + szKey = pHash->p[3] ? 8 : 4; + nSlot = (pHash->nn - 8) / szKey; + nHash = fts5GetU32(&pHash->p[4]); } - aNew = sqlite3_malloc(nNew); - if( aNew==0 ){ - p->rc = SQLITE_NOMEM; - }else{ - if( pHash ){ - memcpy(aNew, pHash->p, pHash->nn); + + /* Check if the current hash table needs to be rebuilt. Either because + ** (a) it does not yet exist, (b) it is full, or (c) it is using the + ** wrong sized keys. */ + if( pHash==0 || nSlot<=(nHash*2) || (bKey64 && szKey==4) ){ + int szNewKey = (bKey64 || szKey==8) ? 8 : 4; + int nNewSlot = (nSlot ? nSlot*2 : 16); + + nNew = 8 + (nNewSlot * szNewKey); + aFree = aNew = (u8*)sqlite3Fts5MallocZero(&p->rc, nNew); + if( aNew ){ + int iSlot = 0; + int ii; + fts5PutU32(aNew, (szNewKey==8 ? 1 : 0)); + for(ii=0; iip[8 + ii*szKey]); + }else{ + iVal = fts5GetU64(&pHash->p[8 + ii*szKey]); + } + + iSlot = iVal % nNewSlot; + if( szNewKey==4 ){ + u32 *aSlot = (u32*)&aNew[8]; + while( aSlot[iSlot]!=0 ) iSlot = (iSlot+1) % nNewSlot; + fts5PutU32((u8*)&aSlot[iSlot], (u32)iVal); + }else{ + u64 *aSlot = (u64*)&aNew[8]; + while( aSlot[iSlot]!=0 ) iSlot = (iSlot+1) % nNewSlot; + fts5PutU64((u8*)&aSlot[iSlot], iRowid); + } + } } - fts5PutU64(&aNew[nNew-8], iRowid); + szKey = szNewKey; + nSlot = nNewSlot; + }else{ + aNew = pHash->p; + nNew = pHash->nn; + } + + if( aNew ){ + int iSlot = (iRowid % nSlot); + if( szKey==4 ){ + u32 *aSlot = (u32*)&aNew[8]; + while( aSlot[iSlot]!=0 ) iSlot = (iSlot+1) % nSlot; + fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid); + }else{ + u64 *aSlot = (u64*)&aNew[8]; + while( aSlot[iSlot]!=0 ) iSlot = (iSlot+1) % nSlot; + fts5PutU64((u8*)&aSlot[iSlot], iRowid); + } + + fts5PutU32((u8*)&aNew[4], nHash+1); + assert( nNew>8 ); fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid), aNew, nNew); } - sqlite3_free(aNew); + sqlite3_free(aFree); fts5DataRelease(pHash); } diff --git a/ext/fts5/test/fts5contentless.test b/ext/fts5/test/fts5contentless.test index 9fc5667589..b9b7a2a4a1 100644 --- a/ext/fts5/test/fts5contentless.test +++ b/ext/fts5/test/fts5contentless.test @@ -164,11 +164,21 @@ foreach v {A B C D E F G H I J K L M N O P Q R S T U V W X Y Z} { do_test 4.6.$v { set L1 } $L2 } -execsql_pp { - SELECT fts5_decode(id, block) FROM ft_data +#execsql_pp { SELECT fts5_decode(id, block) FROM ft_data } + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, content='', contentless_delete=1); + INSERT INTO ft(rowid, x) VALUES(1, 'one two three'); + INSERT INTO ft(rowid, x) VALUES(2, 'one two four'); + INSERT INTO ft(rowid, x) VALUES(3, 'one two five'); } - +breakpoint +do_execsql_test 5.1 { + INSERT INTO ft(ft, rowid) VALUES('delete', 2); +} finish_test diff --git a/manifest b/manifest index 389da73618..8f2c159d4e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Begin\sadding\ssupport\sfor\sdeleting\srows\sfrom\scontentless\sfts5\stables. -D 2023-07-10T20:44:09.251 +C Use\sa\shash-table\sinstead\sof\sa\sflat\slist\sto\sstore\stombstone\srowids. +D 2023-07-11T18:55:19.003 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -92,7 +92,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292 F ext/fts5/fts5_config.c 010fabcc0aaa0dfa76b19146e8bddf7de368933eeac01e294af6607447500caa F ext/fts5/fts5_expr.c 58fb8ceddfb1cefcd54510f9f2f33c220ef9d1b3fa77462111f5ae2a825ab7b1 F ext/fts5/fts5_hash.c d4fb70940359f2120ccd1de7ffe64cc3efe65de9e8995b822cd536ff64c96982 -F ext/fts5/fts5_index.c 80fdc17d423f0b881109b397bbfb167830e3c2dc06a8399aded75beba7ef3903 +F ext/fts5/fts5_index.c 60c815859589d279ea237a4fdb88386cd5e154288c1c7963e2834ff1edf24915 F ext/fts5/fts5_main.c 0f4d21152f23fb5182310d1cb2565bbdf2a8085888185a0f1f9117d2c265cc10 F ext/fts5/fts5_storage.c beff4be2a53c530676d59355b408733ab28202ae351a0840fa211df17b103c4a F ext/fts5/fts5_tcl.c b1445cbe69908c411df8084a10b2485500ac70a9c747cdc8cda175a3da59d8ae @@ -132,7 +132,7 @@ F ext/fts5/test/fts5config.test 60094712debc59286c59aef0e6cf511c37d866802776a825 F ext/fts5/test/fts5conflict.test 655925678e630d3cdf145d18725a558971806416f453ac8410ca8c04d934238d F ext/fts5/test/fts5connect.test 08030168fc96fc278fa81f28654fb7e90566f33aff269c073e19b3ae9126b2f4 F ext/fts5/test/fts5content.test 213506436fb2c87567b8e31f6d43ab30aab99354cec74ed679f22aad0cdbf283 -F ext/fts5/test/fts5contentless.test e3cee6bac3681707031d2cd5f957178fa43c0d856e90c0ea6fcb3c1bb2fff154 +F ext/fts5/test/fts5contentless.test b807a15020dfae84f215370f08d7270aa01bbcc5abdb54a42ec2dee8998e4842 F ext/fts5/test/fts5corrupt.test 77ae6f41a7eba10620efb921cf7dbe218b0ef232b04519deb43581cb17a57ebe F ext/fts5/test/fts5corrupt2.test 7453752ba12ce91690c469a6449d412561cc604b1dec994e16ab132952e7805f F ext/fts5/test/fts5corrupt3.test 7da9895dafa404efd20728f66ff4b94399788bdc042c36fe2689801bba2ccd78 @@ -2044,11 +2044,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 07d95ed60f0a17ea13b4bc19c2ab2ec9052fedd27c9e1e57a1ec6e3a6470e5b7 -R 04413d3e9bd70379e2afd8eeb162f8d3 -T *branch * fts5-contentless-delete -T *sym-fts5-contentless-delete * -T -sym-trunk * +P e513bea84dfaf2280f7429c9a528b3a1354a46c36e58ab178ca45478975634e0 +R e240ab2da08f49c28fa1cdca40ac2a12 U dan -Z 17a9272f129a5cd1a6eee3f0f514cf63 +Z d2566e3569908687c69b9bc52ae7c980 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 0166f4e233..1aeb8a479d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e513bea84dfaf2280f7429c9a528b3a1354a46c36e58ab178ca45478975634e0 \ No newline at end of file +948267b066d0dbe667881b3d26a007fa24576da6e57c112676fadeb846c13f0b \ No newline at end of file