diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index a5bb2f0041..171a75d124 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -216,7 +216,7 @@ struct Fts3Table { /* Precompiled statements used by the implementation. Each of these ** statements is run and reset within a single virtual table API call. */ - sqlite3_stmt *aStmt[37]; + sqlite3_stmt *aStmt[40]; char *zReadExprlist; char *zWriteExprlist; diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index 90d1609226..b54d61390a 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -193,6 +193,7 @@ struct SegmentWriter { int nSize; /* Size of allocation at aData */ int nData; /* Bytes of data in aData */ char *aData; /* Pointer to block from malloc() */ + i64 nLeafData; /* Number of bytes of leaf data written */ }; /* @@ -268,6 +269,10 @@ struct SegmentNode { #define SQL_SELECT_INDEXES 35 #define SQL_SELECT_MXLEVEL 36 +#define SQL_SELECT_LEVEL_RANGE2 37 +#define SQL_UPDATE_LEVEL_IDX 38 +#define SQL_UPDATE_LEVEL 39 + /* ** This function is used to obtain an SQLite prepared statement handle ** for the statement identified by the second argument. If successful, @@ -369,7 +374,17 @@ static int fts3SqlStmt( /* SQL_SELECT_MXLEVEL ** Return the largest relative level in the FTS index or indexes. */ -/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'" +/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", + + /* Return segments in order from oldest to newest.*/ +/* 37 */ "SELECT level, idx, end_block " + "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" + "ORDER BY level DESC, idx ASC", + + /* Update statements used while promoting segments */ +/* 38 */ "UPDATE %Q.'%q_segdir' SET level=-1,idx=? WHERE level=? AND idx=?", +/* 39 */ "UPDATE %Q.'%q_segdir' SET level=? WHERE level=-1" + }; int rc = SQLITE_OK; sqlite3_stmt *pStmt; @@ -1910,6 +1925,7 @@ static int fts3WriteSegdir( sqlite3_int64 iStartBlock, /* Value for "start_block" field */ sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ sqlite3_int64 iEndBlock, /* Value for "end_block" field */ + sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */ char *zRoot, /* Blob value for "root" field */ int nRoot /* Number of bytes in buffer zRoot */ ){ @@ -1920,7 +1936,13 @@ static int fts3WriteSegdir( sqlite3_bind_int(pStmt, 2, iIdx); sqlite3_bind_int64(pStmt, 3, iStartBlock); sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); - sqlite3_bind_int64(pStmt, 5, iEndBlock); + if( nLeafData==0 ){ + sqlite3_bind_int64(pStmt, 5, iEndBlock); + }else{ + char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); + if( !zEnd ) return SQLITE_NOMEM; + sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); + } sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); sqlite3_step(pStmt); rc = sqlite3_reset(pStmt); @@ -2246,6 +2268,9 @@ static int fts3SegWriterAdd( nDoclist; /* Doclist data */ } + /* Increase the total number of bytes written to account for the new entry. */ + pWriter->nLeafData += nReq; + /* If the buffer currently allocated is too small for this entry, realloc ** the buffer to make it large enough. */ @@ -2317,13 +2342,13 @@ static int fts3SegWriterFlush( pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); } if( rc==SQLITE_OK ){ - rc = fts3WriteSegdir( - p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot); + rc = fts3WriteSegdir(p, iLevel, iIdx, + pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot); } }else{ /* The entire tree fits on the root node. Write it to the segdir table. */ - rc = fts3WriteSegdir( - p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData); + rc = fts3WriteSegdir(p, iLevel, iIdx, + 0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData); } p->nLeafAdd++; return rc; @@ -2942,6 +2967,106 @@ void sqlite3Fts3SegReaderFinish( } } +/* +** Decode the "end_block" field, selected by column iCol of the SELECT +** statement passed as the first argument. +*/ +static void fts3ReadEndBlockField( + sqlite3_stmt *pStmt, + int iCol, + i64 *piEndBlock, + i64 *pnByte +){ + const unsigned char *zText = sqlite3_column_text(pStmt, iCol); + if( zText ){ + int i; + i64 iVal = 0; + for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); + } + *piEndBlock = iVal; + while( zText[i]==' ' ) i++; + iVal = 0; + for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); + } + *pnByte = iVal; + } +} + + +/* +** A segment of size nByte bytes has just been written to absolute level +** iAbsLevel. Promote any segments that should be promoted as a result. +*/ +static int fts3PromoteSegments( + Fts3Table *p, /* FTS table handle */ + int iAbsLevel, /* Absolute level just updated */ + sqlite3_int64 nByte /* Size of new segment at iAbsLevel */ +){ + int rc = SQLITE_OK; + sqlite3_stmt *pRange; + + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); + + if( rc==SQLITE_OK ){ + int bOk = 1; + int iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; + + sqlite3_bind_int(pRange, 1, iAbsLevel+1); + sqlite3_bind_int(pRange, 2, iLast); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + i64 nSize, dummy; + fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); + if( nSize>nByte ){ + bOk = 0; + break; + } + } + rc = sqlite3_reset(pRange); + + if( bOk ){ + int iIdx = 0; + sqlite3_stmt *pUpdate1; + sqlite3_stmt *pUpdate2; + + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); + } + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); + } + + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pRange, 1, iAbsLevel); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + sqlite3_bind_int(pUpdate1, 1, iIdx++); + sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); + sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); + sqlite3_step(pUpdate1); + rc = sqlite3_reset(pUpdate1); + if( rc!=SQLITE_OK ){ + sqlite3_reset(pRange); + break; + } + } + } + if( rc==SQLITE_OK ){ + rc = sqlite3_reset(pRange); + } + + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pUpdate2, 1, iAbsLevel); + sqlite3_step(pUpdate2); + rc = sqlite3_reset(pUpdate2); + } + } + } + + + return rc; +} + /* ** Merge all level iLevel segments in the database into a single ** iLevel+1 segment. Or, if iLevel<0, merge all segments into a @@ -3026,6 +3151,9 @@ static int fts3SegmentMerge( if( rc!=SQLITE_OK ) goto finished; } rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); + if( rc==SQLITE_OK ){ + rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData); + } finished: fts3SegWriterFree(pWriter); @@ -3035,7 +3163,7 @@ static int fts3SegmentMerge( /* -** Flush the contents of pendingTerms to level 0 segments. +** Flush the contents of pendingTerms to level 0 segments. */ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ int rc = SQLITE_OK; @@ -3426,6 +3554,7 @@ struct IncrmergeWriter { int iIdx; /* Index of *output* segment in iAbsLevel+1 */ sqlite3_int64 iStart; /* Block number of first allocated block */ sqlite3_int64 iEnd; /* Block number of last allocated block */ + sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */ NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT]; }; @@ -3764,8 +3893,8 @@ static int fts3IncrmergeAppend( nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; } + pWriter->nLeafData += nSpace; blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); - if( rc==SQLITE_OK ){ if( pLeaf->block.n==0 ){ pLeaf->block.n = 1; @@ -3864,6 +3993,7 @@ static void fts3IncrmergeRelease( pWriter->iStart, /* start_block */ pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */ pWriter->iEnd, /* end_block */ + pWriter->nLeafData, /* end_block */ pRoot->block.a, pRoot->block.n /* root */ ); } @@ -3965,7 +4095,7 @@ static int fts3IncrmergeLoad( if( sqlite3_step(pSelect)==SQLITE_ROW ){ iStart = sqlite3_column_int64(pSelect, 1); iLeafEnd = sqlite3_column_int64(pSelect, 2); - iEnd = sqlite3_column_int64(pSelect, 3); + fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); nRoot = sqlite3_column_bytes(pSelect, 4); aRoot = sqlite3_column_blob(pSelect, 4); }else{ @@ -4566,11 +4696,11 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ /* ** Attempt an incremental merge that writes nMerge leaf blocks. ** -** Incremental merges happen nMin segments at a time. The two -** segments to be merged are the nMin oldest segments (the ones with -** the smallest indexes) in the highest level that contains at least -** nMin segments. Multiple merges might occur in an attempt to write the -** quota of nMerge leaf blocks. +** Incremental merges happen nMin segments at a time. The segments +** to be merged are the nMin oldest segments (the ones with the smallest +** values for the _segdir.idx field) in the highest level that contains +** at least nMin segments. Multiple merges might occur in an attempt to +** write the quota of nMerge leaf blocks. */ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ int rc; /* Return code */ @@ -4687,6 +4817,9 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ } fts3IncrmergeRelease(p, pWriter, &rc); + if( nSeg==0 ){ + fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); + } } sqlite3Fts3SegReaderFinish(pCsr); diff --git a/manifest b/manifest index 00a1115ee6..1ff6b237e6 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C When\susing\sVisual\sStudio\s2013,\sadd\sthe\sappropriate\sMaxPlatformVersion\sattribute\sto\sthe\sVSIX\sSDK\smanifest. -D 2014-05-10T17:28:45.158 +C Experimental\scode\sto\sprevent\sFTS\sindexes\sfrom\sgrowing\sindefinitely\sas\sthe\stable\sis\supdated. +D 2014-05-12T20:04:48.571 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in dd2b1aba364ff9b05de41086f74407f285c57670 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -80,7 +80,7 @@ F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d F ext/fts3/fts3.c 41b1920b9a8657963f09cb93b208c2671c5568db F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h bdeb9015405e8facffb8fc7e09174521a2a780f4 +F ext/fts3/fts3Int.h b250b154764b86be4ee540f8c233a59858e88f1d F ext/fts3/fts3_aux.c 5c211e17a64885faeb16b9ba7772f9d5445c2365 F ext/fts3/fts3_expr.c 2ac35bda474f00c14c19608e49a02c8c7ceb9970 F ext/fts3/fts3_hash.c 29b986e43f4e9dd40110eafa377dc0d63c422c60 @@ -96,7 +96,7 @@ F ext/fts3/fts3_tokenizer.h 64c6ef6c5272c51ebe60fc607a896e84288fcbc3 F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004 F ext/fts3/fts3_unicode.c 92391b4b4fb043564c6539ea9b8661e3bcba47b9 F ext/fts3/fts3_unicode2.c 0113d3acf13429e6dc38e0647d1bc71211c31a4d -F ext/fts3/fts3_write.c 74c00329006c3ed6325ba4e5ab7c9b5fc99c8934 +F ext/fts3/fts3_write.c 5fd2aa9d1812387c6254304e20d9ac2b29e16700 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 @@ -570,6 +570,7 @@ F test/fts4aa.test 0c3152322c7f0b548cc942ad763eaba0da87ccca F test/fts4check.test 66fa274cab2b615f2fb338b257713aba8fad88a8 F test/fts4content.test 2e7252557d6d24afa101d9ba1de710d6140e6d06 F test/fts4docid.test e33c383cfbdff0284685604d256f347a18fdbf01 +F test/fts4growth.test f7eac9fadfe67765c4a0d6202c85f7272766fb9e F test/fts4incr.test 361960ed3550e781f3f313e17e2182ef9cefc0e9 F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7 F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee @@ -1170,7 +1171,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c3dce2e7390eec3a337be1b99f80ad5f721cc647 -R 7cb8e1b3df65a486b753c19d33da3975 -U mistachkin -Z f2874f77f50d8a6763121ff9dede6833 +P 0a4f59676bd0ab33b2c86c9a35a2ebbdbaf09ee7 +R de45a14af3a90f6b390273a9a2d8cfa9 +T *branch * fts4-experimental +T *sym-fts4-experimental * +T -sym-trunk * +U dan +Z a03d85f9bfa278d813164b1e97a88ff7 diff --git a/manifest.uuid b/manifest.uuid index ff047bf0ce..a9facd1d8e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0a4f59676bd0ab33b2c86c9a35a2ebbdbaf09ee7 \ No newline at end of file +b3b505a4dd0c679437a4272109f1188175088cd1 \ No newline at end of file diff --git a/test/fts4growth.test b/test/fts4growth.test new file mode 100644 index 0000000000..ab2ad62142 --- /dev/null +++ b/test/fts4growth.test @@ -0,0 +1,191 @@ +# 2014 May 12 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS4 module. +# +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts4growth + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +source $testdir/genesis.tcl + +do_execsql_test 1.1 { CREATE VIRTUAL TABLE x1 USING fts3; } + +do_test 1.2 { + foreach L { + {"See here, young man," said Mulga Bill, "from Walgett to the sea,} + {From Conroy's Gap to Castlereagh, there's none can ride like me.} + {I'm good all round at everything as everybody knows,} + {Although I'm not the one to talk -- I hate a man that blows.} + } { + execsql { INSERT INTO x1 VALUES($L) } + } + execsql { SELECT end_block, length(root) FROM x1_segdir } +} {{0 114} 114 {0 118} 118 {0 95} 95 {0 115} 115} + +do_execsql_test 1.3 { + INSERT INTO x1(x1) VALUES('optimize'); + SELECT level, end_block, length(root) FROM x1_segdir; +} {0 {0 394} 394} + +do_test 1.4 { + foreach L { + {But riding is my special gift, my chiefest, sole delight;} + {Just ask a wild duck can it swim, a wildcat can it fight.} + {There's nothing clothed in hair or hide, or built of flesh or steel,} + {There's nothing walks or jumps, or runs, on axle, hoof, or wheel,} + {But what I'll sit, while hide will hold and girths and straps are tight:} + {I'll ride this here two-wheeled concern right straight away at sight."} + } { + execsql { INSERT INTO x1 VALUES($L) } + } + execsql { + INSERT INTO x1(x1) VALUES('merge=4,4'); + SELECT level, end_block, length(root) FROM x1_segdir; + } +} {0 {0 110} 110 0 {0 132} 132 0 {0 129} 129 1 {128 658} 2} + +do_execsql_test 1.5 { + SELECT length(block) FROM x1_segments; +} {658 {}} + +do_test 1.6 { + foreach L { + {'Twas Mulga Bill, from Eaglehawk, that sought his own abode,} + {That perched above Dead Man's Creek, beside the mountain road.} + {He turned the cycle down the hill and mounted for the fray,} + {But 'ere he'd gone a dozen yards it bolted clean away.} + {It left the track, and through the trees, just like a silver steak,} + {It whistled down the awful slope towards the Dead Man's Creek.} + {It shaved a stump by half an inch, it dodged a big white-box:} + {The very wallaroos in fright went scrambling up the rocks,} + {The wombats hiding in their caves dug deeper underground,} + {As Mulga Bill, as white as chalk, sat tight to every bound.} + {It struck a stone and gave a spring that cleared a fallen tree,} + {It raced beside a precipice as close as close could be;} + {And then as Mulga Bill let out one last despairing shriek} + {It made a leap of twenty feet into the Dead Man's Creek.} + } { + execsql { INSERT INTO x1 VALUES($L) } + } + execsql { + SELECT level, end_block, length(root) FROM x1_segdir; + } +} {1 {128 658} 2 1 {130 1377} 6 0 {0 117} 117} + +do_execsql_test 1.7 { + SELECT sum(length(block)) FROM x1_segments WHERE blockid IN (129, 130); +} {1377} + +#------------------------------------------------------------------------- +# +do_execsql_test 2.1 { + CREATE TABLE t1(docid, words); + CREATE VIRTUAL TABLE x2 USING fts4; +} +fts_kjv_genesis +do_test 2.2 { + foreach id [db eval {SELECT docid FROM t1}] { + execsql { + INSERT INTO x2(docid, content) SELECT $id, words FROM t1 WHERE docid=$id + } + } + foreach id [db eval {SELECT docid FROM t1}] { + execsql { + INSERT INTO x2(docid, content) SELECT NULL, words FROM t1 WHERE docid=$id + } + if {[db one {SELECT count(*) FROM x2_segdir WHERE level<2}]==2} break + } +} {} + +do_execsql_test 2.3 { + SELECT count(*) FROM x2_segdir WHERE level=2; + SELECT count(*) FROM x2_segdir WHERE level=3; +} {6 0} + +do_execsql_test 2.4 { + INSERT INTO x2(x2) VALUES('merge=4,4'); + SELECT count(*) FROM x2_segdir WHERE level=2; + SELECT count(*) FROM x2_segdir WHERE level=3; +} {6 1} + +do_execsql_test 2.5 { + SELECT end_block FROM x2_segdir WHERE level=3; + INSERT INTO x2(x2) VALUES('merge=4,4'); + SELECT end_block FROM x2_segdir WHERE level=3; + INSERT INTO x2(x2) VALUES('merge=4,4'); + SELECT end_block FROM x2_segdir WHERE level=3; +} {{3828 3430} {3828 10191} {3828 14109}} + +do_execsql_test 2.6 { + SELECT sum(length(block)) FROM x2_segdir, x2_segments WHERE + blockid BETWEEN start_block AND leaves_end_block + AND level=3 +} {14109} + +do_execsql_test 2.7 { + INSERT INTO x2(x2) VALUES('merge=1000,4'); + SELECT end_block FROM x2_segdir WHERE level=3; +} {{3828 86120}} + +do_execsql_test 2.8 { + SELECT sum(length(block)) FROM x2_segdir, x2_segments WHERE + blockid BETWEEN start_block AND leaves_end_block + AND level=3 +} {86120} + +#-------------------------------------------------------------------------- + +do_execsql_test 3.1 { + DROP TABLE IF EXISTS x2; + DROP TABLE IF EXISTS t1; + CREATE TABLE t1(docid, words); + CREATE VIRTUAL TABLE x2 USING fts4; +} +fts_kjv_genesis + +proc t1_to_x2 {} { + foreach id [db eval {SELECT docid FROM t1 LIMIT 2}] { + execsql { + DELETE FROM x2 WHERE docid=$id; + INSERT INTO x2(docid, content) SELECT $id, words FROM t1 WHERE docid=$id; + } + } +} + +#do_test 3.2 { + #t1_to_x2 + #execsql {SELECT level, count(*) FROM x2_segdir GROUP BY level} +#} {0 13 1 15 2 5} + +#proc second {x} { lindex $x 1 } +#db func second second +#for {set i 0} {$i <1000} {incr i} { +# t1_to_x2 +# db eval { +# SELECT level, group_concat( second(end_block), ' ' ) AS c FROM x2_segdir GROUP BY level; +# } { +# puts "$i.$level: $c" +# } +#} + + +finish_test + +