Experimental code to prevent FTS indexes from growing indefinitely as the table is updated.

FossilOrigin-Name: b3b505a4dd0c679437a4272109f1188175088cd1
This commit is contained in:
dan 2014-05-12 20:04:48 +00:00
parent 1273614191
commit 6bb9889ef0
5 changed files with 352 additions and 24 deletions

View File

@ -216,7 +216,7 @@ struct Fts3Table {
/* Precompiled statements used by the implementation. Each of these
** statements is run and reset within a single virtual table API call.
*/
sqlite3_stmt *aStmt[37];
sqlite3_stmt *aStmt[40];
char *zReadExprlist;
char *zWriteExprlist;

View File

@ -193,6 +193,7 @@ struct SegmentWriter {
int nSize; /* Size of allocation at aData */
int nData; /* Bytes of data in aData */
char *aData; /* Pointer to block from malloc() */
i64 nLeafData; /* Number of bytes of leaf data written */
};
/*
@ -268,6 +269,10 @@ struct SegmentNode {
#define SQL_SELECT_INDEXES 35
#define SQL_SELECT_MXLEVEL 36
#define SQL_SELECT_LEVEL_RANGE2 37
#define SQL_UPDATE_LEVEL_IDX 38
#define SQL_UPDATE_LEVEL 39
/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
@ -369,7 +374,17 @@ static int fts3SqlStmt(
/* SQL_SELECT_MXLEVEL
** Return the largest relative level in the FTS index or indexes. */
/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'"
/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'",
/* Return segments in order from oldest to newest.*/
/* 37 */ "SELECT level, idx, end_block "
"FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?"
"ORDER BY level DESC, idx ASC",
/* Update statements used while promoting segments */
/* 38 */ "UPDATE %Q.'%q_segdir' SET level=-1,idx=? WHERE level=? AND idx=?",
/* 39 */ "UPDATE %Q.'%q_segdir' SET level=? WHERE level=-1"
};
int rc = SQLITE_OK;
sqlite3_stmt *pStmt;
@ -1910,6 +1925,7 @@ static int fts3WriteSegdir(
sqlite3_int64 iStartBlock, /* Value for "start_block" field */
sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */
sqlite3_int64 iEndBlock, /* Value for "end_block" field */
sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */
char *zRoot, /* Blob value for "root" field */
int nRoot /* Number of bytes in buffer zRoot */
){
@ -1920,7 +1936,13 @@ static int fts3WriteSegdir(
sqlite3_bind_int(pStmt, 2, iIdx);
sqlite3_bind_int64(pStmt, 3, iStartBlock);
sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
sqlite3_bind_int64(pStmt, 5, iEndBlock);
if( nLeafData==0 ){
sqlite3_bind_int64(pStmt, 5, iEndBlock);
}else{
char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData);
if( !zEnd ) return SQLITE_NOMEM;
sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free);
}
sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
@ -2246,6 +2268,9 @@ static int fts3SegWriterAdd(
nDoclist; /* Doclist data */
}
/* Increase the total number of bytes written to account for the new entry. */
pWriter->nLeafData += nReq;
/* If the buffer currently allocated is too small for this entry, realloc
** the buffer to make it large enough.
*/
@ -2317,13 +2342,13 @@ static int fts3SegWriterFlush(
pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot);
}
if( rc==SQLITE_OK ){
rc = fts3WriteSegdir(
p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot);
rc = fts3WriteSegdir(p, iLevel, iIdx,
pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot);
}
}else{
/* The entire tree fits on the root node. Write it to the segdir table. */
rc = fts3WriteSegdir(
p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData);
rc = fts3WriteSegdir(p, iLevel, iIdx,
0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData);
}
p->nLeafAdd++;
return rc;
@ -2942,6 +2967,106 @@ void sqlite3Fts3SegReaderFinish(
}
}
/*
** Decode the "end_block" field, selected by column iCol of the SELECT
** statement passed as the first argument.
*/
static void fts3ReadEndBlockField(
sqlite3_stmt *pStmt,
int iCol,
i64 *piEndBlock,
i64 *pnByte
){
const unsigned char *zText = sqlite3_column_text(pStmt, iCol);
if( zText ){
int i;
i64 iVal = 0;
for(i=0; zText[i]>='0' && zText[i]<='9'; i++){
iVal = iVal*10 + (zText[i] - '0');
}
*piEndBlock = iVal;
while( zText[i]==' ' ) i++;
iVal = 0;
for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){
iVal = iVal*10 + (zText[i] - '0');
}
*pnByte = iVal;
}
}
/*
** A segment of size nByte bytes has just been written to absolute level
** iAbsLevel. Promote any segments that should be promoted as a result.
*/
static int fts3PromoteSegments(
Fts3Table *p, /* FTS table handle */
int iAbsLevel, /* Absolute level just updated */
sqlite3_int64 nByte /* Size of new segment at iAbsLevel */
){
int rc = SQLITE_OK;
sqlite3_stmt *pRange;
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0);
if( rc==SQLITE_OK ){
int bOk = 1;
int iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1;
sqlite3_bind_int(pRange, 1, iAbsLevel+1);
sqlite3_bind_int(pRange, 2, iLast);
while( SQLITE_ROW==sqlite3_step(pRange) ){
i64 nSize, dummy;
fts3ReadEndBlockField(pRange, 2, &dummy, &nSize);
if( nSize>nByte ){
bOk = 0;
break;
}
}
rc = sqlite3_reset(pRange);
if( bOk ){
int iIdx = 0;
sqlite3_stmt *pUpdate1;
sqlite3_stmt *pUpdate2;
if( rc==SQLITE_OK ){
rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0);
}
if( rc==SQLITE_OK ){
rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0);
}
if( rc==SQLITE_OK ){
sqlite3_bind_int(pRange, 1, iAbsLevel);
while( SQLITE_ROW==sqlite3_step(pRange) ){
sqlite3_bind_int(pUpdate1, 1, iIdx++);
sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0));
sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1));
sqlite3_step(pUpdate1);
rc = sqlite3_reset(pUpdate1);
if( rc!=SQLITE_OK ){
sqlite3_reset(pRange);
break;
}
}
}
if( rc==SQLITE_OK ){
rc = sqlite3_reset(pRange);
}
if( rc==SQLITE_OK ){
sqlite3_bind_int(pUpdate2, 1, iAbsLevel);
sqlite3_step(pUpdate2);
rc = sqlite3_reset(pUpdate2);
}
}
}
return rc;
}
/*
** Merge all level iLevel segments in the database into a single
** iLevel+1 segment. Or, if iLevel<0, merge all segments into a
@ -3026,6 +3151,9 @@ static int fts3SegmentMerge(
if( rc!=SQLITE_OK ) goto finished;
}
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
if( rc==SQLITE_OK ){
rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData);
}
finished:
fts3SegWriterFree(pWriter);
@ -3035,7 +3163,7 @@ static int fts3SegmentMerge(
/*
** Flush the contents of pendingTerms to level 0 segments.
** Flush the contents of pendingTerms to level 0 segments.
*/
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
int rc = SQLITE_OK;
@ -3426,6 +3554,7 @@ struct IncrmergeWriter {
int iIdx; /* Index of *output* segment in iAbsLevel+1 */
sqlite3_int64 iStart; /* Block number of first allocated block */
sqlite3_int64 iEnd; /* Block number of last allocated block */
sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */
NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT];
};
@ -3764,8 +3893,8 @@ static int fts3IncrmergeAppend(
nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
}
pWriter->nLeafData += nSpace;
blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc);
if( rc==SQLITE_OK ){
if( pLeaf->block.n==0 ){
pLeaf->block.n = 1;
@ -3864,6 +3993,7 @@ static void fts3IncrmergeRelease(
pWriter->iStart, /* start_block */
pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */
pWriter->iEnd, /* end_block */
pWriter->nLeafData, /* end_block */
pRoot->block.a, pRoot->block.n /* root */
);
}
@ -3965,7 +4095,7 @@ static int fts3IncrmergeLoad(
if( sqlite3_step(pSelect)==SQLITE_ROW ){
iStart = sqlite3_column_int64(pSelect, 1);
iLeafEnd = sqlite3_column_int64(pSelect, 2);
iEnd = sqlite3_column_int64(pSelect, 3);
fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData);
nRoot = sqlite3_column_bytes(pSelect, 4);
aRoot = sqlite3_column_blob(pSelect, 4);
}else{
@ -4566,11 +4696,11 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){
/*
** Attempt an incremental merge that writes nMerge leaf blocks.
**
** Incremental merges happen nMin segments at a time. The two
** segments to be merged are the nMin oldest segments (the ones with
** the smallest indexes) in the highest level that contains at least
** nMin segments. Multiple merges might occur in an attempt to write the
** quota of nMerge leaf blocks.
** Incremental merges happen nMin segments at a time. The segments
** to be merged are the nMin oldest segments (the ones with the smallest
** values for the _segdir.idx field) in the highest level that contains
** at least nMin segments. Multiple merges might occur in an attempt to
** write the quota of nMerge leaf blocks.
*/
int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
int rc; /* Return code */
@ -4687,6 +4817,9 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
}
fts3IncrmergeRelease(p, pWriter, &rc);
if( nSeg==0 ){
fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData);
}
}
sqlite3Fts3SegReaderFinish(pCsr);

View File

@ -1,5 +1,5 @@
C When\susing\sVisual\sStudio\s2013,\sadd\sthe\sappropriate\sMaxPlatformVersion\sattribute\sto\sthe\sVSIX\sSDK\smanifest.
D 2014-05-10T17:28:45.158
C Experimental\scode\sto\sprevent\sFTS\sindexes\sfrom\sgrowing\sindefinitely\sas\sthe\stable\sis\supdated.
D 2014-05-12T20:04:48.571
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in dd2b1aba364ff9b05de41086f74407f285c57670
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -80,7 +80,7 @@ F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 41b1920b9a8657963f09cb93b208c2671c5568db
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h bdeb9015405e8facffb8fc7e09174521a2a780f4
F ext/fts3/fts3Int.h b250b154764b86be4ee540f8c233a59858e88f1d
F ext/fts3/fts3_aux.c 5c211e17a64885faeb16b9ba7772f9d5445c2365
F ext/fts3/fts3_expr.c 2ac35bda474f00c14c19608e49a02c8c7ceb9970
F ext/fts3/fts3_hash.c 29b986e43f4e9dd40110eafa377dc0d63c422c60
@ -96,7 +96,7 @@ F ext/fts3/fts3_tokenizer.h 64c6ef6c5272c51ebe60fc607a896e84288fcbc3
F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004
F ext/fts3/fts3_unicode.c 92391b4b4fb043564c6539ea9b8661e3bcba47b9
F ext/fts3/fts3_unicode2.c 0113d3acf13429e6dc38e0647d1bc71211c31a4d
F ext/fts3/fts3_write.c 74c00329006c3ed6325ba4e5ab7c9b5fc99c8934
F ext/fts3/fts3_write.c 5fd2aa9d1812387c6254304e20d9ac2b29e16700
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
@ -570,6 +570,7 @@ F test/fts4aa.test 0c3152322c7f0b548cc942ad763eaba0da87ccca
F test/fts4check.test 66fa274cab2b615f2fb338b257713aba8fad88a8
F test/fts4content.test 2e7252557d6d24afa101d9ba1de710d6140e6d06
F test/fts4docid.test e33c383cfbdff0284685604d256f347a18fdbf01
F test/fts4growth.test f7eac9fadfe67765c4a0d6202c85f7272766fb9e
F test/fts4incr.test 361960ed3550e781f3f313e17e2182ef9cefc0e9
F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7
F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee
@ -1170,7 +1171,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P c3dce2e7390eec3a337be1b99f80ad5f721cc647
R 7cb8e1b3df65a486b753c19d33da3975
U mistachkin
Z f2874f77f50d8a6763121ff9dede6833
P 0a4f59676bd0ab33b2c86c9a35a2ebbdbaf09ee7
R de45a14af3a90f6b390273a9a2d8cfa9
T *branch * fts4-experimental
T *sym-fts4-experimental *
T -sym-trunk *
U dan
Z a03d85f9bfa278d813164b1e97a88ff7

View File

@ -1 +1 @@
0a4f59676bd0ab33b2c86c9a35a2ebbdbaf09ee7
b3b505a4dd0c679437a4272109f1188175088cd1

191
test/fts4growth.test Normal file
View File

@ -0,0 +1,191 @@
# 2014 May 12
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS4 module.
#
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts4growth
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
source $testdir/genesis.tcl
do_execsql_test 1.1 { CREATE VIRTUAL TABLE x1 USING fts3; }
do_test 1.2 {
foreach L {
{"See here, young man," said Mulga Bill, "from Walgett to the sea,}
{From Conroy's Gap to Castlereagh, there's none can ride like me.}
{I'm good all round at everything as everybody knows,}
{Although I'm not the one to talk -- I hate a man that blows.}
} {
execsql { INSERT INTO x1 VALUES($L) }
}
execsql { SELECT end_block, length(root) FROM x1_segdir }
} {{0 114} 114 {0 118} 118 {0 95} 95 {0 115} 115}
do_execsql_test 1.3 {
INSERT INTO x1(x1) VALUES('optimize');
SELECT level, end_block, length(root) FROM x1_segdir;
} {0 {0 394} 394}
do_test 1.4 {
foreach L {
{But riding is my special gift, my chiefest, sole delight;}
{Just ask a wild duck can it swim, a wildcat can it fight.}
{There's nothing clothed in hair or hide, or built of flesh or steel,}
{There's nothing walks or jumps, or runs, on axle, hoof, or wheel,}
{But what I'll sit, while hide will hold and girths and straps are tight:}
{I'll ride this here two-wheeled concern right straight away at sight."}
} {
execsql { INSERT INTO x1 VALUES($L) }
}
execsql {
INSERT INTO x1(x1) VALUES('merge=4,4');
SELECT level, end_block, length(root) FROM x1_segdir;
}
} {0 {0 110} 110 0 {0 132} 132 0 {0 129} 129 1 {128 658} 2}
do_execsql_test 1.5 {
SELECT length(block) FROM x1_segments;
} {658 {}}
do_test 1.6 {
foreach L {
{'Twas Mulga Bill, from Eaglehawk, that sought his own abode,}
{That perched above Dead Man's Creek, beside the mountain road.}
{He turned the cycle down the hill and mounted for the fray,}
{But 'ere he'd gone a dozen yards it bolted clean away.}
{It left the track, and through the trees, just like a silver steak,}
{It whistled down the awful slope towards the Dead Man's Creek.}
{It shaved a stump by half an inch, it dodged a big white-box:}
{The very wallaroos in fright went scrambling up the rocks,}
{The wombats hiding in their caves dug deeper underground,}
{As Mulga Bill, as white as chalk, sat tight to every bound.}
{It struck a stone and gave a spring that cleared a fallen tree,}
{It raced beside a precipice as close as close could be;}
{And then as Mulga Bill let out one last despairing shriek}
{It made a leap of twenty feet into the Dead Man's Creek.}
} {
execsql { INSERT INTO x1 VALUES($L) }
}
execsql {
SELECT level, end_block, length(root) FROM x1_segdir;
}
} {1 {128 658} 2 1 {130 1377} 6 0 {0 117} 117}
do_execsql_test 1.7 {
SELECT sum(length(block)) FROM x1_segments WHERE blockid IN (129, 130);
} {1377}
#-------------------------------------------------------------------------
#
do_execsql_test 2.1 {
CREATE TABLE t1(docid, words);
CREATE VIRTUAL TABLE x2 USING fts4;
}
fts_kjv_genesis
do_test 2.2 {
foreach id [db eval {SELECT docid FROM t1}] {
execsql {
INSERT INTO x2(docid, content) SELECT $id, words FROM t1 WHERE docid=$id
}
}
foreach id [db eval {SELECT docid FROM t1}] {
execsql {
INSERT INTO x2(docid, content) SELECT NULL, words FROM t1 WHERE docid=$id
}
if {[db one {SELECT count(*) FROM x2_segdir WHERE level<2}]==2} break
}
} {}
do_execsql_test 2.3 {
SELECT count(*) FROM x2_segdir WHERE level=2;
SELECT count(*) FROM x2_segdir WHERE level=3;
} {6 0}
do_execsql_test 2.4 {
INSERT INTO x2(x2) VALUES('merge=4,4');
SELECT count(*) FROM x2_segdir WHERE level=2;
SELECT count(*) FROM x2_segdir WHERE level=3;
} {6 1}
do_execsql_test 2.5 {
SELECT end_block FROM x2_segdir WHERE level=3;
INSERT INTO x2(x2) VALUES('merge=4,4');
SELECT end_block FROM x2_segdir WHERE level=3;
INSERT INTO x2(x2) VALUES('merge=4,4');
SELECT end_block FROM x2_segdir WHERE level=3;
} {{3828 3430} {3828 10191} {3828 14109}}
do_execsql_test 2.6 {
SELECT sum(length(block)) FROM x2_segdir, x2_segments WHERE
blockid BETWEEN start_block AND leaves_end_block
AND level=3
} {14109}
do_execsql_test 2.7 {
INSERT INTO x2(x2) VALUES('merge=1000,4');
SELECT end_block FROM x2_segdir WHERE level=3;
} {{3828 86120}}
do_execsql_test 2.8 {
SELECT sum(length(block)) FROM x2_segdir, x2_segments WHERE
blockid BETWEEN start_block AND leaves_end_block
AND level=3
} {86120}
#--------------------------------------------------------------------------
do_execsql_test 3.1 {
DROP TABLE IF EXISTS x2;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1(docid, words);
CREATE VIRTUAL TABLE x2 USING fts4;
}
fts_kjv_genesis
proc t1_to_x2 {} {
foreach id [db eval {SELECT docid FROM t1 LIMIT 2}] {
execsql {
DELETE FROM x2 WHERE docid=$id;
INSERT INTO x2(docid, content) SELECT $id, words FROM t1 WHERE docid=$id;
}
}
}
#do_test 3.2 {
#t1_to_x2
#execsql {SELECT level, count(*) FROM x2_segdir GROUP BY level}
#} {0 13 1 15 2 5}
#proc second {x} { lindex $x 1 }
#db func second second
#for {set i 0} {$i <1000} {incr i} {
# t1_to_x2
# db eval {
# SELECT level, group_concat( second(end_block), ' ' ) AS c FROM x2_segdir GROUP BY level;
# } {
# puts "$i.$level: $c"
# }
#}
finish_test