Avoid writing delete markers to the oldest segment in an FTS index.

FossilOrigin-Name: 1baeb1cee61d9c56c718b50af034a24f1018a322
This commit is contained in:
dan 2014-08-06 20:04:14 +00:00
parent 7b71fbaf93
commit d90aab8f94
6 changed files with 161 additions and 53 deletions

View File

@ -640,7 +640,7 @@ static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) ");
}else{
sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
"(structure idx=%d)", (int)(iKey-10)
"{structure idx=%d}", (int)(iKey-10)
);
}
}
@ -1066,6 +1066,7 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
assert( pLvl->nMerge<=pLvl->nSeg );
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
@ -1228,7 +1229,9 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){
pIter->iRowid += iVal;
pIter->iLeafPgno--;
while( a[iOff-1]==0x00 ){
while( iOff>pIter->iFirstOff
&& a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0
){
iOff--;
pIter->iLeafPgno--;
}
@ -2835,19 +2838,26 @@ static void fts5WriteFinish(
){
int i;
*pnLeaf = pWriter->aWriter[0].pgno;
fts5WriteFlushLeaf(p, pWriter);
if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
fts5WriteBtreeGrow(p, pWriter);
}
if( pWriter->nWriter>1 ){
fts5WriteBtreeNEmpty(p, pWriter);
}
*pnHeight = pWriter->nWriter;
if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){
*pnLeaf = 0;
*pnHeight = 0;
}else{
fts5WriteFlushLeaf(p, pWriter);
if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
fts5WriteBtreeGrow(p, pWriter);
}
if( pWriter->nWriter>1 ){
fts5WriteBtreeNEmpty(p, pWriter);
}
*pnHeight = pWriter->nWriter;
for(i=1; i<pWriter->nWriter; i++){
Fts5PageWriter *pPg = &pWriter->aWriter[i];
i64 iRow = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno);
fts5DataWrite(p, iRow, pPg->buf.p, pPg->buf.n);
for(i=1; i<pWriter->nWriter; i++){
Fts5PageWriter *pPg = &pWriter->aWriter[i];
fts5DataWrite(p,
FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno),
pPg->buf.p, pPg->buf.n
);
}
}
for(i=0; i<pWriter->nWriter; i++){
Fts5PageWriter *pPg = &pWriter->aWriter[i];
@ -2970,7 +2980,8 @@ static void fts5IndexMergeLevel(
Fts5SegWriter writer; /* Writer object */
Fts5StructureSegment *pSeg; /* Output segment */
Fts5Buffer term;
int bRequireDoclistTerm = 0;
int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */
int bOldest; /* True if the output segment is the oldest */
assert( iLvl<pStruct->nLevel );
assert( pLvl->nMerge<=pLvl->nSeg );
@ -2997,6 +3008,8 @@ static void fts5IndexMergeLevel(
/* Read input from all segments in the input level */
nInput = pLvl->nSeg;
}
bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
#if 0
fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl);
fflush(stdout);
@ -3008,35 +3021,45 @@ fflush(stdout);
){
Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ];
Fts5ChunkIter sPos; /* Used to iterate through position list */
int nTerm;
const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);
if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
if( writer.nLeafWritten>nRem ) break;
/* This is a new term. Append a term to the output segment. */
if( bRequireDoclistTerm ){
fts5WriteAppendZerobyte(p, &writer);
}
fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
fts5BufferSet(&p->rc, &term, nTerm, pTerm);
bRequireDoclistTerm = 1;
}
/* Append the rowid to the output */
fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
/* Copy the position list from input to output */
/* If the segment being written is the oldest in the entire index and
** the position list is empty (i.e. the entry is a delete marker), no
** entry need be written to the output. */
fts5ChunkIterInit(p, pSeg, &sPos);
fts5WriteAppendPoslistInt(p, &writer, sPos.nRem);
for(/* noop */; fts5ChunkIterEof(p, &sPos)==0; fts5ChunkIterNext(p, &sPos)){
int iOff = 0;
while( iOff<sPos.n ){
int iVal;
iOff += getVarint32(&sPos.p[iOff], iVal);
fts5WriteAppendPoslistInt(p, &writer, iVal);
if( bOldest==0 || sPos.nRem>0 ){
int nTerm;
const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);
if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
if( writer.nLeafWritten>nRem ){
fts5ChunkIterRelease(&sPos);
break;
}
/* This is a new term. Append a term to the output segment. */
if( bRequireDoclistTerm ){
fts5WriteAppendZerobyte(p, &writer);
}
fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
fts5BufferSet(&p->rc, &term, nTerm, pTerm);
bRequireDoclistTerm = 1;
}
/* Append the rowid to the output */
fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
/* Copy the position list from input to output */
fts5WriteAppendPoslistInt(p, &writer, sPos.nRem);
for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
int iOff = 0;
while( iOff<sPos.n ){
int iVal;
iOff += getVarint32(&sPos.p[iOff], iVal);
fts5WriteAppendPoslistInt(p, &writer, iVal);
}
}
}
fts5ChunkIterRelease(&sPos);
}
/* Flush the last leaf page to disk. Set the output segment b-tree height
@ -3058,7 +3081,11 @@ fflush(stdout);
}
pLvl->nSeg -= nInput;
pLvl->nMerge = 0;
if( pSeg->pgnoLast==0 ){
pLvlOut->nSeg--;
}
}else{
assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 );
fts5TrimSegments(p, pIter);
pLvl->nMerge = nInput;
}
@ -3095,10 +3122,11 @@ static void fts5IndexWork(
while( nRem>0 ){
int iLvl; /* To iterate through levels */
int iBestLvl = -1; /* Level offering the most input segments */
int iBestLvl = 0; /* Level offering the most input segments */
int nBest = 0; /* Number of input segments on best level */
/* Set iBestLvl to the level to read input segments from. */
assert( pStruct->nLevel>0 );
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
if( pLvl->nMerge ){
@ -3113,7 +3141,13 @@ static void fts5IndexWork(
iBestLvl = iLvl;
}
}
assert( iBestLvl>=0 && nBest>0 );
/* If nBest is still 0, then the index must be empty. */
#ifdef SQLITE_DEBUG
for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
assert( pStruct->aLevel[iLvl].nSeg==0 );
}
#endif
if( nBest<p->nMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break;
fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem);
@ -3444,6 +3478,7 @@ static void fts5DlidxIterTestReverse(
fts5DlidxIterEof(p, pDlidx)==0;
fts5DlidxIterNext(pDlidx)
){
assert( pDlidx->iLeafPgno>iLeaf );
cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 ));
cksum1 = (cksum1 ^ pDlidx->iRowid);
}
@ -3454,6 +3489,7 @@ static void fts5DlidxIterTestReverse(
fts5DlidxIterEof(p, pDlidx)==0;
fts5DlidxIterPrev(pDlidx)
){
assert( pDlidx->iLeafPgno>iLeaf );
cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 ));
cksum2 = (cksum2 ^ pDlidx->iRowid);
}

View File

@ -1,5 +1,5 @@
C Add\ssupport\sfor\ssavepoints\sto\sfts5.
D 2014-08-06T16:30:21.057
C Avoid\swriting\sdelete\smarkers\sto\sthe\soldest\ssegment\sin\san\sFTS\sindex.
D 2014-08-06T20:04:14.831
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710
F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24
F ext/fts5/fts5_index.c 6a9f851490562d8843edc4d54b27eb9472c62d68
F ext/fts5/fts5_index.c dab399c67cb6bdd23009d2f1280ea60a9585b47c
F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
@ -595,7 +595,7 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7
F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b
F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849
F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36
F test/fts5aa.test 2d136b61c4523ec018699e59b35c005313569b9e
F test/fts5aa.test 4c7cbf1d38d30e7aaa8febf44958dd13bbb53bf8
F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397
F test/fts5ac.test 399533fe52b7383053368ab8ba01ae182391e5d7
F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07
@ -604,6 +604,7 @@ F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74
F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420
F test/fts5ah.test 009b993a9b7ebc43f84c10e53bd778b1dc8ffbe7
F test/fts5ai.test 4dee71c23ddbcf2b0fc5d5586f241002b883c10e
F test/fts5aj.test d16f44bd1f7da9714ef99bd8b1996c5867aee8f5
F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4
F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d
F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef
@ -771,7 +772,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0
F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d
F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025
F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54
F test/permutations.test 9875e7bacd0ab0cf78525e4b2d287840f284599b
F test/permutations.test 4f71bc5c9ce9a249cc94ad415cda809ce7f2360b
F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0
F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13
F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552
@ -1200,7 +1201,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 5d38e6edc40ef188fbf96505073797036aa6783a
R e4475b88a5ed55985c58ff03d579a6b6
P 3b19eba042bb2eeb1be60f8d58ebaa0a045d6a5c
R 26f7f3d5f6581939b65c2ce6e063db0c
U dan
Z 1737719499cd3c712a0cb60804b1642b
Z 78bed80e24eecafbae841451888b9b1c

View File

@ -1 +1 @@
3b19eba042bb2eeb1be60f8d58ebaa0a045d6a5c
1baeb1cee61d9c56c718b50af034a24f1018a322

View File

@ -50,7 +50,7 @@ do_execsql_test 2.1 {
do_execsql_test 2.2 {
SELECT fts5_decode(id, block) FROM t1_data WHERE id==10
} {
{(structure idx=0) {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}}
{{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}}
}
do_execsql_test 2.3 {
INSERT INTO t1(t1) VALUES('integrity-check');

71
test/fts5aj.test Normal file
View File

@ -0,0 +1,71 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# Specifically, this tests that, provided the amount of data remains
# constant, the FTS index does not grow indefinitely as rows are inserted
# and deleted,
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts5aj
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
proc doc {} {
set dict [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
set res [list]
for {set i 0} {$i < 20} {incr i} {
lappend res [lindex $dict [expr int(rand() * 26)]]
}
set res
}
proc structure {} {
set val [db one {SELECT fts5_decode(rowid,block) FROM t1_data WHERE rowid=10}]
foreach lvl [lrange $val 1 end] {
lappend res [expr [llength $lvl]-2]
}
set res
}
expr srand(0)
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1) VALUES('pgsz=64');
}
for {set iTest 0} {$iTest < 50000} {incr iTest} {
if {$iTest > 1000} { execsql { DELETE FROM t1 WHERE rowid=($iTest-1000) } }
set new [doc]
execsql { INSERT INTO t1 VALUES($new) }
if {$iTest==10000} { set sz1 [db one {SELECT count(*) FROM t1_data}] }
if {0==($iTest % 1000)} {
set sz [db one {SELECT count(*) FROM t1_data}]
set s [structure]
do_test 1.$iTest.$sz.{$s} {} {}
}
}
#db eval { SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') }
finish_test

View File

@ -226,7 +226,7 @@ test_suite "fts5" -prefix "" -description {
All FTS5 tests.
} -files {
fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test
fts5af.test fts5ag.test fts5ah.test fts5ai.test
fts5af.test fts5ag.test fts5ah.test fts5ai.test fts5aj.test
}
test_suite "nofaultsim" -prefix "" -description {