Use a hash-table instead of a flat list to store tombstone rowids.

FossilOrigin-Name: 948267b066d0dbe667881b3d26a007fa24576da6e57c112676fadeb846c13f0b
This commit is contained in:
dan 2023-07-11 18:55:19 +00:00
parent 6788c7b7c0
commit d23f210377
4 changed files with 158 additions and 50 deletions

View File

@ -583,6 +583,41 @@ static u16 fts5GetU16(const u8 *aIn){
return ((u16)aIn[0] << 8) + aIn[1];
}
static u64 fts5GetU64(u8 *a){
return ((u64)a[0] << 56)
+ ((u64)a[1] << 48)
+ ((u64)a[2] << 40)
+ ((u64)a[3] << 32)
+ ((u64)a[4] << 24)
+ ((u64)a[5] << 16)
+ ((u64)a[6] << 8)
+ ((u64)a[7] << 0);
}
static void fts5PutU64(u8 *a, u64 iVal){
a[0] = ((iVal >> 56) & 0xFF);
a[1] = ((iVal >> 48) & 0xFF);
a[2] = ((iVal >> 40) & 0xFF);
a[3] = ((iVal >> 32) & 0xFF);
a[4] = ((iVal >> 24) & 0xFF);
a[5] = ((iVal >> 16) & 0xFF);
a[6] = ((iVal >> 8) & 0xFF);
a[7] = ((iVal >> 0) & 0xFF);
}
static u32 fts5GetU32(const u8 *a){
return ((u32)a[0] << 24)
+ ((u32)a[1] << 16)
+ ((u32)a[2] << 8)
+ ((u32)a[3] << 0);
}
static void fts5PutU32(u8 *a, u32 iVal){
a[0] = ((iVal >> 24) & 0xFF);
a[1] = ((iVal >> 16) & 0xFF);
a[2] = ((iVal >> 8) & 0xFF);
a[3] = ((iVal >> 0) & 0xFF);
}
/*
** Allocate and return a buffer at least nByte bytes in size.
**
@ -2987,26 +3022,26 @@ static void fts5MultiIterSetEof(Fts5Iter *pIter){
pIter->iSwitchRowid = pSeg->iRowid;
}
static u64 fts5GetU64(u8 *a){
return ((u64)a[0] << 56)
+ ((u64)a[1] << 48)
+ ((u64)a[2] << 40)
+ ((u64)a[3] << 32)
+ ((u64)a[4] << 24)
+ ((u64)a[5] << 16)
+ ((u64)a[6] << 8)
+ ((u64)a[7] << 0);
}
static int fts5IndexTombstoneQuery(u8 *aHash, int nHash, u64 iRowid){
int szKey = aHash[3] ? 8 : 4;
int nSlot = (nHash - 8) / szKey;
int iSlot = iRowid % nSlot;
static void fts5PutU64(u8 *a, u64 iVal){
a[0] = ((iVal >> 56) & 0xFF);
a[1] = ((iVal >> 48) & 0xFF);
a[2] = ((iVal >> 40) & 0xFF);
a[3] = ((iVal >> 32) & 0xFF);
a[4] = ((iVal >> 24) & 0xFF);
a[5] = ((iVal >> 16) & 0xFF);
a[6] = ((iVal >> 8) & 0xFF);
a[7] = ((iVal >> 0) & 0xFF);
if( szKey==4 ){
u32 *aSlot = (u32*)&aHash[8];
while( aSlot[iSlot] ){
if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1;
iSlot = (iSlot+1)%nSlot;
}
}else{
u64 *aSlot = (u64*)&aHash[8];
while( aSlot[iSlot] ){
if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1;
iSlot = (iSlot+1)%nSlot;
}
}
return 0;
}
static int fts5MultiIterIsDeleted(Fts5Iter *pIter){
@ -3014,11 +3049,9 @@ static int fts5MultiIterIsDeleted(Fts5Iter *pIter){
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
if( pSeg->pTombstone ){
int ii;
for(ii=0; ii<pSeg->pTombstone->nn; ii+=8){
i64 iVal = (i64)fts5GetU64(&pSeg->pTombstone->p[ii]);
if( iVal==pSeg->iRowid ) return 1;
}
return fts5IndexTombstoneQuery(
pSeg->pTombstone->p, pSeg->pTombstone->nn, pSeg->iRowid
);
}
return 0;
@ -6380,35 +6413,103 @@ int sqlite3Fts5IndexGetLocation(Fts5Index *p, i64 *piLoc){
return fts5IndexReturn(p);
}
/*
** Add a tombstone for rowid iRowid to segment pSeg.
**
** All tombstones for a single segment are stored in a blob formatted to
** contain a hash table. The format is:
**
** * 32-bit integer. 1 for 64-bit unsigned keys, 0 for 32-bit unsigned keys.
** * 32-bit integer. The number of entries currently in the hash table.
**
** Then an array of entries. The number of entries can be calculated based
** on the size of the blob in the database and the size of the keys as
** specified by the first 32-bit field of the hash table header.
**
** All values in the hash table are stored as big-endian integers.
*/
static void fts5IndexTombstoneAdd(
Fts5Index *p,
Fts5StructureSegment *pSeg,
i64 iRowid
u64 iRowid
){
Fts5Data *pHash = 0;
u8 *aFree = 0;
u8 *aNew = 0;
int nNew = 0;
int szKey = 0;
int nSlot = 0;
int bKey64 = (iRowid>0xFFFFFFFF);
u32 nHash = 0;
/* Load the current hash table, if any */
pHash = fts5DataReadOpt(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid));
if( p->rc ) return;
if( pHash ){
nNew = 8 + pHash->nn;
}else{
nNew = 8;
szKey = pHash->p[3] ? 8 : 4;
nSlot = (pHash->nn - 8) / szKey;
nHash = fts5GetU32(&pHash->p[4]);
}
aNew = sqlite3_malloc(nNew);
if( aNew==0 ){
p->rc = SQLITE_NOMEM;
}else{
if( pHash ){
memcpy(aNew, pHash->p, pHash->nn);
/* Check if the current hash table needs to be rebuilt. Either because
** (a) it does not yet exist, (b) it is full, or (c) it is using the
** wrong sized keys. */
if( pHash==0 || nSlot<=(nHash*2) || (bKey64 && szKey==4) ){
int szNewKey = (bKey64 || szKey==8) ? 8 : 4;
int nNewSlot = (nSlot ? nSlot*2 : 16);
nNew = 8 + (nNewSlot * szNewKey);
aFree = aNew = (u8*)sqlite3Fts5MallocZero(&p->rc, nNew);
if( aNew ){
int iSlot = 0;
int ii;
fts5PutU32(aNew, (szNewKey==8 ? 1 : 0));
for(ii=0; ii<nSlot; ii++){
u64 iVal = 0;
if( szKey==4 ){
iVal = (u64)fts5GetU32(&pHash->p[8 + ii*szKey]);
}else{
iVal = fts5GetU64(&pHash->p[8 + ii*szKey]);
}
iSlot = iVal % nNewSlot;
if( szNewKey==4 ){
u32 *aSlot = (u32*)&aNew[8];
while( aSlot[iSlot]!=0 ) iSlot = (iSlot+1) % nNewSlot;
fts5PutU32((u8*)&aSlot[iSlot], (u32)iVal);
}else{
u64 *aSlot = (u64*)&aNew[8];
while( aSlot[iSlot]!=0 ) iSlot = (iSlot+1) % nNewSlot;
fts5PutU64((u8*)&aSlot[iSlot], iRowid);
}
}
}
fts5PutU64(&aNew[nNew-8], iRowid);
szKey = szNewKey;
nSlot = nNewSlot;
}else{
aNew = pHash->p;
nNew = pHash->nn;
}
if( aNew ){
int iSlot = (iRowid % nSlot);
if( szKey==4 ){
u32 *aSlot = (u32*)&aNew[8];
while( aSlot[iSlot]!=0 ) iSlot = (iSlot+1) % nSlot;
fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid);
}else{
u64 *aSlot = (u64*)&aNew[8];
while( aSlot[iSlot]!=0 ) iSlot = (iSlot+1) % nSlot;
fts5PutU64((u8*)&aSlot[iSlot], iRowid);
}
fts5PutU32((u8*)&aNew[4], nHash+1);
assert( nNew>8 );
fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid), aNew, nNew);
}
sqlite3_free(aNew);
sqlite3_free(aFree);
fts5DataRelease(pHash);
}

View File

@ -164,11 +164,21 @@ foreach v {A B C D E F G H I J K L M N O P Q R S T U V W X Y Z} {
do_test 4.6.$v { set L1 } $L2
}
execsql_pp {
SELECT fts5_decode(id, block) FROM ft_data
#execsql_pp { SELECT fts5_decode(id, block) FROM ft_data }
#-------------------------------------------------------------------------
reset_db
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE ft USING fts5(x, content='', contentless_delete=1);
INSERT INTO ft(rowid, x) VALUES(1, 'one two three');
INSERT INTO ft(rowid, x) VALUES(2, 'one two four');
INSERT INTO ft(rowid, x) VALUES(3, 'one two five');
}
breakpoint
do_execsql_test 5.1 {
INSERT INTO ft(ft, rowid) VALUES('delete', 2);
}
finish_test

View File

@ -1,5 +1,5 @@
C Begin\sadding\ssupport\sfor\sdeleting\srows\sfrom\scontentless\sfts5\stables.
D 2023-07-10T20:44:09.251
C Use\sa\shash-table\sinstead\sof\sa\sflat\slist\sto\sstore\stombstone\srowids.
D 2023-07-11T18:55:19.003
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -92,7 +92,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292
F ext/fts5/fts5_config.c 010fabcc0aaa0dfa76b19146e8bddf7de368933eeac01e294af6607447500caa
F ext/fts5/fts5_expr.c 58fb8ceddfb1cefcd54510f9f2f33c220ef9d1b3fa77462111f5ae2a825ab7b1
F ext/fts5/fts5_hash.c d4fb70940359f2120ccd1de7ffe64cc3efe65de9e8995b822cd536ff64c96982
F ext/fts5/fts5_index.c 80fdc17d423f0b881109b397bbfb167830e3c2dc06a8399aded75beba7ef3903
F ext/fts5/fts5_index.c 60c815859589d279ea237a4fdb88386cd5e154288c1c7963e2834ff1edf24915
F ext/fts5/fts5_main.c 0f4d21152f23fb5182310d1cb2565bbdf2a8085888185a0f1f9117d2c265cc10
F ext/fts5/fts5_storage.c beff4be2a53c530676d59355b408733ab28202ae351a0840fa211df17b103c4a
F ext/fts5/fts5_tcl.c b1445cbe69908c411df8084a10b2485500ac70a9c747cdc8cda175a3da59d8ae
@ -132,7 +132,7 @@ F ext/fts5/test/fts5config.test 60094712debc59286c59aef0e6cf511c37d866802776a825
F ext/fts5/test/fts5conflict.test 655925678e630d3cdf145d18725a558971806416f453ac8410ca8c04d934238d
F ext/fts5/test/fts5connect.test 08030168fc96fc278fa81f28654fb7e90566f33aff269c073e19b3ae9126b2f4
F ext/fts5/test/fts5content.test 213506436fb2c87567b8e31f6d43ab30aab99354cec74ed679f22aad0cdbf283
F ext/fts5/test/fts5contentless.test e3cee6bac3681707031d2cd5f957178fa43c0d856e90c0ea6fcb3c1bb2fff154
F ext/fts5/test/fts5contentless.test b807a15020dfae84f215370f08d7270aa01bbcc5abdb54a42ec2dee8998e4842
F ext/fts5/test/fts5corrupt.test 77ae6f41a7eba10620efb921cf7dbe218b0ef232b04519deb43581cb17a57ebe
F ext/fts5/test/fts5corrupt2.test 7453752ba12ce91690c469a6449d412561cc604b1dec994e16ab132952e7805f
F ext/fts5/test/fts5corrupt3.test 7da9895dafa404efd20728f66ff4b94399788bdc042c36fe2689801bba2ccd78
@ -2044,11 +2044,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 07d95ed60f0a17ea13b4bc19c2ab2ec9052fedd27c9e1e57a1ec6e3a6470e5b7
R 04413d3e9bd70379e2afd8eeb162f8d3
T *branch * fts5-contentless-delete
T *sym-fts5-contentless-delete *
T -sym-trunk *
P e513bea84dfaf2280f7429c9a528b3a1354a46c36e58ab178ca45478975634e0
R e240ab2da08f49c28fa1cdca40ac2a12
U dan
Z 17a9272f129a5cd1a6eee3f0f514cf63
Z d2566e3569908687c69b9bc52ae7c980
# Remove this line to create a well-formed Fossil manifest.

View File

@ -1 +1 @@
e513bea84dfaf2280f7429c9a528b3a1354a46c36e58ab178ca45478975634e0
948267b066d0dbe667881b3d26a007fa24576da6e57c112676fadeb846c13f0b