Fix various problems to do with segment promotion. Add test file fts4growth2.test, containing tests to check that the FTS index does not grow indefinitely as the table is updated.

FossilOrigin-Name: 21491a9bc686e63bec32f1a67103622f1160a2f9
This commit is contained in:
dan 2014-05-14 15:58:47 +00:00
parent c0caea21de
commit 9e83e465eb
8 changed files with 262 additions and 40 deletions

View File

@ -1333,7 +1333,7 @@ static int fts3InitVtab(
p->bHasStat = isFts4;
p->bFts4 = isFts4;
p->bDescIdx = bDescIdx;
p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */
p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */
p->zContentTbl = zContent;
p->zLanguageid = zLanguageid;
zContent = 0;
@ -3302,7 +3302,10 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table*)pVtab;
int rc = sqlite3Fts3PendingTermsFlush(p);
if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){
if( rc==SQLITE_OK
&& p->nLeafAdd>(nMinMerge/16)
&& p->nAutoincrmerge && p->nAutoincrmerge!=0xff
){
int mxLevel = 0; /* Maximum relative level value in db */
int A; /* Incr-merge parameter A */
@ -3310,7 +3313,7 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){
assert( rc==SQLITE_OK || mxLevel==0 );
A = p->nLeafAdd * mxLevel;
A += (A/2);
if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8);
if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge);
}
sqlite3Fts3SegmentsClose(p);
return rc;

View File

@ -210,7 +210,7 @@ struct Fts3Table {
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
char *zContentTbl; /* content=xxx option, or NULL */
char *zLanguageid; /* languageid=xxx option, or NULL */
u8 bAutoincrmerge; /* True if automerge=1 */
int nAutoincrmerge; /* Value configured by 'automerge' */
u32 nLeafAdd; /* Number of leaf blocks added this trans */
/* Precompiled statements used by the implementation. Each of these

View File

@ -2971,16 +2971,23 @@ void sqlite3Fts3SegReaderFinish(
/*
** Decode the "end_block" field, selected by column iCol of the SELECT
** statement passed as the first argument.
**
** The "end_block" field may contain either an integer, or a text field
** containing the text representation of two non-negative integers separated
** by one or more space (0x20) characters. In the first case, set *piEndBlock
** to the integer value and *pnByte to zero before returning. In the second,
** set *piEndBlock to the first value and *pnByte to the second.
*/
static void fts3ReadEndBlockField(
sqlite3_stmt *pStmt,
int iCol,
i64 *piEndBlock,
i64 *piEndBlock,
i64 *pnByte
){
const unsigned char *zText = sqlite3_column_text(pStmt, iCol);
if( zText ){
int i;
int iMul = 1;
i64 iVal = 0;
for(i=0; zText[i]>='0' && zText[i]<='9'; i++){
iVal = iVal*10 + (zText[i] - '0');
@ -2988,10 +2995,14 @@ static void fts3ReadEndBlockField(
*piEndBlock = iVal;
while( zText[i]==' ' ) i++;
iVal = 0;
if( zText[i]=='-' ){
i++;
iMul = -1;
}
for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){
iVal = iVal*10 + (zText[i] - '0');
}
*pnByte = iVal;
*pnByte = (iVal * (i64)iMul);
}
}
@ -3011,18 +3022,30 @@ static int fts3PromoteSegments(
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0);
if( rc==SQLITE_OK ){
int bOk = 1;
int bOk = 0;
int iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1;
i64 nLimit = (nByte*3)/2;
/* Loop through all entries in the %_segdir table corresponding to
** segments in this index on levels greater than iAbsLevel. If there is
** at least one such segment, and it is possible to determine that all
** such segments are smaller than nLimit bytes in size, they will be
** promoted to level iAbsLevel. */
sqlite3_bind_int(pRange, 1, iAbsLevel+1);
sqlite3_bind_int(pRange, 2, iLast);
while( SQLITE_ROW==sqlite3_step(pRange) ){
i64 nSize, dummy;
fts3ReadEndBlockField(pRange, 2, &dummy, &nSize);
if( nSize>nByte ){
if( nSize<=0 || nSize>nLimit ){
/* If nSize==0, then the %_segdir.end_block field does not not
** contain a size value. This happens if it was written by an
** old version of FTS. In this case it is not possible to determine
** the size of the segment, and so segment promotion does not
** take place. */
bOk = 0;
break;
}
bOk = 1;
}
rc = sqlite3_reset(pRange);
@ -3039,6 +3062,16 @@ static int fts3PromoteSegments(
}
if( rc==SQLITE_OK ){
/* Loop through all %_segdir entries for segments in this index with
** levels equal to or greater than iAbsLevel. As each entry is visited,
** updated it to set (level = -1) and (idx = N), where N is 0 for the
** oldest segment in the range, 1 for the next oldest, and so on.
**
** In other words, move all segments being promoted to level -1,
** setting the "idx" fields as appropriate to keep them in the same
** order. The contents of level -1 (which is never used, except
** transiently here), will be moved back to level iAbsLevel below. */
sqlite3_bind_int(pRange, 1, iAbsLevel);
while( SQLITE_ROW==sqlite3_step(pRange) ){
sqlite3_bind_int(pUpdate1, 1, iIdx++);
@ -3056,6 +3089,7 @@ static int fts3PromoteSegments(
rc = sqlite3_reset(pRange);
}
/* Move level -1 to level iAbsLevel */
if( rc==SQLITE_OK ){
sqlite3_bind_int(pUpdate2, 1, iAbsLevel);
sqlite3_step(pUpdate2);
@ -3188,14 +3222,15 @@ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
** estimate the number of leaf blocks of content to be written
*/
if( rc==SQLITE_OK && p->bHasStat
&& p->bAutoincrmerge==0xff && p->nLeafAdd>0
&& p->nAutoincrmerge==0xff && p->nLeafAdd>0
){
sqlite3_stmt *pStmt = 0;
rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
rc = sqlite3_step(pStmt);
p->bAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0));
p->nAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0));
if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8;
rc = sqlite3_reset(pStmt);
}
}
@ -4105,6 +4140,9 @@ static int fts3IncrmergeLoad(
iStart = sqlite3_column_int64(pSelect, 1);
iLeafEnd = sqlite3_column_int64(pSelect, 2);
fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData);
if( pWriter->nLeafData<0 ){
pWriter->nLeafData = pWriter->nLeafData * -1;
}
nRoot = sqlite3_column_bytes(pSelect, 4);
aRoot = sqlite3_column_blob(pSelect, 4);
}else{
@ -4828,6 +4866,9 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
}
}
if( nSeg!=0 ){
pWriter->nLeafData = pWriter->nLeafData * -1;
}
fts3IncrmergeRelease(p, pWriter, &rc);
if( nSeg==0 ){
fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData);
@ -4918,7 +4959,10 @@ static int fts3DoAutoincrmerge(
){
int rc = SQLITE_OK;
sqlite3_stmt *pStmt = 0;
p->bAutoincrmerge = fts3Getint(&zParam)!=0;
p->nAutoincrmerge = fts3Getint(&zParam);
if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){
p->nAutoincrmerge = 8;
}
if( !p->bHasStat ){
assert( p->bFts4==0 );
sqlite3Fts3CreateStatTable(&rc, p);
@ -4927,7 +4971,7 @@ static int fts3DoAutoincrmerge(
rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
if( rc ) return rc;
sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge);
sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge);
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
return rc;

View File

@ -1,5 +1,5 @@
C Fix\sa\sproblem\spreventing\sdelete\smarkers\sfrom\sever\sbeing\sremoved\sfrom\sthe\sFTS\sindex.
D 2014-05-13T20:11:37.423
C Fix\svarious\sproblems\sto\sdo\swith\ssegment\spromotion.\sAdd\stest\sfile\sfts4growth2.test,\scontaining\stests\sto\scheck\sthat\sthe\sFTS\sindex\sdoes\snot\sgrow\sindefinitely\sas\sthe\stable\sis\supdated.
D 2014-05-14T15:58:47.565
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in dd2b1aba364ff9b05de41086f74407f285c57670
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -78,9 +78,9 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 41b1920b9a8657963f09cb93b208c2671c5568db
F ext/fts3/fts3.c e83f894cf1adaf8decd6b1de76bfdcdb79b25507
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h b250b154764b86be4ee540f8c233a59858e88f1d
F ext/fts3/fts3Int.h 16cddf2d7b0e5f3681615ae1d8ca0e45fca44918
F ext/fts3/fts3_aux.c 5c211e17a64885faeb16b9ba7772f9d5445c2365
F ext/fts3/fts3_expr.c 2ac35bda474f00c14c19608e49a02c8c7ceb9970
F ext/fts3/fts3_hash.c 29b986e43f4e9dd40110eafa377dc0d63c422c60
@ -96,7 +96,7 @@ F ext/fts3/fts3_tokenizer.h 64c6ef6c5272c51ebe60fc607a896e84288fcbc3
F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004
F ext/fts3/fts3_unicode.c 92391b4b4fb043564c6539ea9b8661e3bcba47b9
F ext/fts3/fts3_unicode2.c 0113d3acf13429e6dc38e0647d1bc71211c31a4d
F ext/fts3/fts3_write.c 283b24477729129a210d91b48f7c53181583a848
F ext/fts3/fts3_write.c 5e13a15dad44bf516181e9d6251c7d6a27861dee
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
@ -570,7 +570,8 @@ F test/fts4aa.test 0c3152322c7f0b548cc942ad763eaba0da87ccca
F test/fts4check.test 66fa274cab2b615f2fb338b257713aba8fad88a8
F test/fts4content.test 2e7252557d6d24afa101d9ba1de710d6140e6d06
F test/fts4docid.test e33c383cfbdff0284685604d256f347a18fdbf01
F test/fts4growth.test 3b1f8c98b603b38dc9fe4a266f4f5ddb0c73f092
F test/fts4growth.test 50befaa001a6dd4046d32e5d7b3cb75df8360ca8
F test/fts4growth2.test 19fda88600266629b63a4cece512c2dc6f7c9fc5
F test/fts4incr.test 361960ed3550e781f3f313e17e2182ef9cefc0e9
F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7
F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee
@ -745,7 +746,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0
F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d
F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025
F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54
F test/permutations.test 40add071ba71aefe1c04f5845308cf46f7de8d04
F test/permutations.test 975f65293a46cf8deb896ecdaf1ba563ccaca24a
F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0
F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13
F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552
@ -1171,7 +1172,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P b3b505a4dd0c679437a4272109f1188175088cd1
R d1a058fc899c4ecbd09fd7fceab935e7
P 7f47ae5c5ddb1227484ddae7c6960183932a052a
R a0acf9231899e84a47fa0a8ff85a8e05
U dan
Z f678f48fb654227badac5ce98e23d62c
Z 4a0ad803ddf94719a771980af32709c7

View File

@ -1 +1 @@
7f47ae5c5ddb1227484ddae7c6960183932a052a
21491a9bc686e63bec32f1a67103622f1160a2f9

View File

@ -131,7 +131,7 @@ do_execsql_test 2.5 {
SELECT end_block FROM x2_segdir WHERE level=3;
INSERT INTO x2(x2) VALUES('merge=4,4');
SELECT end_block FROM x2_segdir WHERE level=3;
} {{3828 3430} {3828 10191} {3828 14109}}
} {{3828 -3430} {3828 -10191} {3828 -14109}}
do_execsql_test 2.6 {
SELECT sum(length(block)) FROM x2_segdir, x2_segments WHERE
@ -235,6 +235,9 @@ do_test 3.3.2 {
} {1 0 412}
#--------------------------------------------------------------------------
# Check a theory on a bug in fts4 - that segments with idx==0 were not
# being incrementally merged correctly. Theory turned out to be false.
#
do_execsql_test 4.1 {
DROP TABLE IF EXISTS x4;
DROP TABLE IF EXISTS t1;
@ -256,14 +259,23 @@ do_execsql_test 4.4 {
SELECT count(*) FROM x4_segdir;
} {3}
breakpoint
do_execsql_test 4.5 {
INSERT INTO x4(x4) VALUES('merge=10,2');
SELECT count(*) FROM x4_segdir;
} {3}
if 0 {
do_execsql_test 3.1 {
do_execsql_test 4.6 {
INSERT INTO x4(x4) VALUES('merge=1000,2');
SELECT count(*) FROM x4_segdir;
} {1}
#--------------------------------------------------------------------------
# Check that segments are not promoted if the "end_block" field does not
# contain a size.
#
do_execsql_test 5.1 {
DROP TABLE IF EXISTS x2;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1(docid, words);
@ -271,6 +283,41 @@ do_execsql_test 3.1 {
}
fts_kjv_genesis
proc first {L} {lindex $L 0}
db func first first
do_test 5.2 {
foreach r [db eval { SELECT rowid FROM t1 }] {
execsql {
INSERT INTO x2(docid, content) SELECT docid, words FROM t1 WHERE rowid=$r
}
}
foreach d [db eval { SELECT docid FROM t1 LIMIT -1 OFFSET 20 }] {
execsql { DELETE FROM x2 WHERE docid = $d }
}
execsql {
INSERT INTO x2(x2) VALUES('optimize');
SELECT level, idx, end_block FROM x2_segdir
}
} {2 0 {752 1926}}
do_execsql_test 5.3 {
UPDATE x2_segdir SET end_block = CAST( first(end_block) AS INTEGER );
SELECT end_block, typeof(end_block) FROM x2_segdir;
} {752 integer}
do_execsql_test 5.4 {
INSERT INTO x2 SELECT words FROM t1 LIMIT 50;
SELECT level, idx, end_block FROM x2_segdir
} {2 0 752 0 0 {758 5174}}
do_execsql_test 5.5 {
UPDATE x2_segdir SET end_block = end_block || ' 1926' WHERE level=2;
INSERT INTO x2 SELECT words FROM t1 LIMIT 40;
SELECT level, idx, end_block FROM x2_segdir
} {0 0 {752 1926} 0 1 {758 5174} 0 2 {763 4170}}
proc t1_to_x2 {} {
foreach id [db eval {SELECT docid FROM t1 LIMIT 2}] {
execsql {
@ -280,24 +327,60 @@ proc t1_to_x2 {} {
}
}
#--------------------------------------------------------------------------
# Check that segments created by auto-merge are not promoted until they
# are completed.
#
do_execsql_test 6.1 {
CREATE VIRTUAL TABLE x5 USING fts4;
INSERT INTO x5 SELECT words FROM t1 LIMIT 100 OFFSET 0;
INSERT INTO x5 SELECT words FROM t1 LIMIT 100 OFFSET 25;
INSERT INTO x5 SELECT words FROM t1 LIMIT 100 OFFSET 50;
INSERT INTO x5 SELECT words FROM t1 LIMIT 100 OFFSET 75;
SELECT count(*) FROM x5_segdir
} {4}
do_execsql_test 6.2 {
INSERT INTO x5(x5) VALUES('merge=2,4');
SELECT level, idx, end_block FROM x5_segdir;
} {0 0 {10 9216} 0 1 {21 9330} 0 2 {31 8850} 0 3 {40 8689} 1 0 {1320 -3117}}
do_execsql_test 6.3 {
INSERT INTO x5 SELECT words FROM t1 LIMIT 100 OFFSET 100;
SELECT level, idx, end_block FROM x5_segdir;
} {
0 0 {10 9216} 0 1 {21 9330} 0 2 {31 8850}
0 3 {40 8689} 1 0 {1320 -3117} 0 4 {1329 8297}
}
do_execsql_test 6.4 {
INSERT INTO x5(x5) VALUES('merge=200,4');
SELECT level, idx, end_block FROM x5_segdir;
} {0 0 {1329 8297} 1 0 {1320 28009}}
do_execsql_test 6.5 {
INSERT INTO x5 SELECT words FROM t1;
SELECT level, idx, end_block FROM x5_segdir;
} {
0 1 {1329 8297} 0 0 {1320 28009} 0 2 {1449 118006}
}
#do_test 3.2 {
#t1_to_x2
#execsql {SELECT level, count(*) FROM x2_segdir GROUP BY level}
#} {0 13 1 15 2 5}
proc second {x} { lindex $x 1 }
db func second second
for {set i 0} {$i <1000} {incr i} {
t1_to_x2
db eval {
SELECT level, group_concat( second(end_block), ' ' ) AS c FROM x2_segdir GROUP BY level;
} {
puts "$i.$level: $c"
}
}
}
#proc second {x} { lindex $x 1 }
#db func second second
#for {set i 0} {$i <1000} {incr i} {
# t1_to_x2
# db eval {
# SELECT level, group_concat( second(end_block), ' ' ) AS c FROM x2_segdir GROUP BY level;
# } {
# puts "$i.$level: $c"
# }
#}
finish_test

89
test/fts4growth2.test Normal file
View File

@ -0,0 +1,89 @@
# 2014 May 12
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS4 module.
#
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts4growth
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
source $testdir/genesis.tcl
do_execsql_test 1.0 { CREATE TABLE t1(docid, words); }
fts_kjv_genesis
proc tt {val} {
execsql {
BEGIN;
DELETE FROM x1
WHERE docid IN (SELECT docid FROM t1 WHERE (rowid-1)%4==$val+0);
INSERT INTO x1(docid, content)
SELECT docid, words FROM t1 WHERE (rowid%4)==$val+0;
COMMIT;
}
}
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE x1 USING fts4;
INSERT INTO x1(x1) VALUES('automerge=2');
}
do_test 1.2 {
for {set i 0} {$i < 100} {incr i} {
tt 0 ; tt 1 ; tt 2 ; tt 3
}
execsql {
SELECT max(level) FROM x1_segdir;
SELECT count(*) FROM x1_segdir WHERE level=4;
}
} {4 1}
do_test 1.3 {
for {set i 0} {$i < 100} {incr i} {
tt 0 ; tt 1 ; tt 2 ; tt 3
}
execsql {
SELECT max(level) FROM x1_segdir;
SELECT count(*) FROM x1_segdir WHERE level=4;
}
} {4 1}
#-------------------------------------------------------------------------
#
do_execsql_test 2.1 {
DELETE FROM t1 WHERE rowid>16;
DROP TABLE IF EXISTS x1;
CREATE VIRTUAL TABLE x1 USING fts4;
}
db func second second
proc second {L} {lindex $L 1}
for {set tn 0} {$tn < 40} {incr tn} {
do_test 2.2.$tn {
for {set i 0} {$i < 100} {incr i} {
tt 0 ; tt 1 ; tt 2 ; tt 3
}
execsql { SELECT max(level) FROM x1_segdir }
} {1}
}
finish_test

View File

@ -112,6 +112,7 @@ set allquicktests [test_set $alltests -exclude {
incrvacuum_ioerr.test autovacuum_crash.test btree8.test shared_err.test
vtab_err.test walslow.test walcrash.test walcrash3.test
walthread.test rtree3.test indexfault.test securedel2.test
fts3growth2.test
}]
if {[info exists ::env(QUICKTEST_INCLUDE)]} {
set allquicktests [concat $allquicktests $::env(QUICKTEST_INCLUDE)]
@ -196,6 +197,7 @@ test_suite "fts3" -prefix "" -description {
fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test
fts4check.test fts4unicode.test fts4noti.test
fts3varint.test
fts4growth.test fts4growth2.test
}
test_suite "nofaultsim" -prefix "" -description {