diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 961206f2b3..c4e7506fb1 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -47,6 +47,10 @@ typedef sqlite3_uint64 u64; #endif +/* Truncate very long tokens to this many bytes. Hard limit is +** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset +** field that occurs at the start of each leaf page (see fts5_index.c). */ +#define FTS5_MAX_TOKEN_SIZE 32768 /* ** Maximum number of prefix indexes on single FTS5 table. This must be diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 0bc61414b0..eada84f70e 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1493,6 +1493,7 @@ static int fts5ParseTokenize( /* If an error has already occurred, this is a no-op */ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; + if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; @@ -2495,6 +2496,7 @@ static int fts5ExprPopulatePoslistsCb( UNUSED_PARAM2(iUnused1, iUnused2); + if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; for(i=0; inPhrase; i++){ Fts5ExprTerm *pTerm; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index d08e65bfe6..c477ea6ca6 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2321,6 +2321,18 @@ static void fts5LeafSeek( fts5SegIterLoadNPos(p, pIter); } +static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ + if( p->pIdxSelect==0 ){ + Fts5Config *pConfig = p->pConfig; + fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( + "SELECT pgno FROM '%q'.'%q_idx' WHERE " + "segid=? AND term<=? ORDER BY term DESC LIMIT 1", + pConfig->zDb, pConfig->zName + )); + } + return p->pIdxSelect; +} + /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. @@ -2338,6 +2350,7 @@ static void fts5SegIterSeekInit( int iPg = 1; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ + sqlite3_stmt *pIdxSelect = 0; assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); @@ -2346,23 +2359,16 @@ static void fts5SegIterSeekInit( /* This block sets stack variable iPg to the leaf page number that may ** contain term (pTerm/nTerm), if it is present in the segment. */ - if( p->pIdxSelect==0 ){ - Fts5Config *pConfig = p->pConfig; - fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( - "SELECT pgno FROM '%q'.'%q_idx' WHERE " - "segid=? AND term<=? ORDER BY term DESC LIMIT 1", - pConfig->zDb, pConfig->zName - )); - } + pIdxSelect = fts5IdxSelectStmt(p); if( p->rc ) return; - sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid); - sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){ - i64 val = sqlite3_column_int(p->pIdxSelect, 0); + sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid); + sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); + if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){ + i64 val = sqlite3_column_int(pIdxSelect, 0); iPg = (int)(val>>1); bDlidx = (val & 0x0001); } - p->rc = sqlite3_reset(p->pIdxSelect); + p->rc = sqlite3_reset(pIdxSelect); if( iPgpgnoFirst ){ iPg = pSeg->pgnoFirst; @@ -3552,6 +3558,18 @@ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ } } assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT ); + + { + sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); + if( p->rc==SQLITE_OK ){ + int rc; + u8 aBlob[2] = {0xff, 0xff}; + sqlite3_bind_int(pIdxSelect, 1, iSegid); + sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC); + assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW ); + p->rc = sqlite3_reset(pIdxSelect); + } + } #endif } } @@ -3798,6 +3816,9 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; +static int nCall = 0; +nCall++; + assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); /* Set the szLeaf header field. */ diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 3cca990ed5..90df3396c3 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -369,6 +369,7 @@ static int fts5StorageInsertCallback( Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; UNUSED_PARAM2(iUnused1, iUnused2); + if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } @@ -815,6 +816,7 @@ static int fts5StorageIntegrityCallback( int iCol; UNUSED_PARAM2(iUnused1, iUnused2); + if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; diff --git a/ext/fts5/test/fts5simple.test b/ext/fts5/test/fts5simple.test index 3483e40ba5..2bc02cf49a 100644 --- a/ext/fts5/test/fts5simple.test +++ b/ext/fts5/test/fts5simple.test @@ -448,4 +448,24 @@ do_execsql_test 20.2 { INSERT INTO x1(x1) VALUES('integrity-check'); } {} +#------------------------------------------------------------------------- +reset_db +set doc "a b [string repeat x 100000]" +do_execsql_test 21.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); + INSERT INTO x1(rowid, x) VALUES(11111, $doc); + INSERT INTO x1(rowid, x) VALUES(11112, $doc); +} +do_execsql_test 21.1 { + INSERT INTO x1(x1) VALUES('integrity-check'); +} +do_execsql_test 21.2 { + SELECT rowid FROM x1($doc); +} {11111 11112} +do_execsql_test 21.3 { + DELETE FROM x1 WHERE rowid=11111; + INSERT INTO x1(x1) VALUES('integrity-check'); + SELECT rowid FROM x1($doc); +} {11112} + finish_test diff --git a/manifest b/manifest index 187f8d0c9b..79eb4b3013 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\sa\srequirement\smark.\s\sNo\schanges\sto\scode. -D 2016-03-23T13:46:05.453 +C Explicitly\slimit\sthe\ssize\sof\sfts5\stokens\sto\s32768\sbytes. +D 2016-03-23T15:04:00.239 F Makefile.in f53429fb2f313c099283659d0df6f20f932c861f F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc df0bf9ff7f8b3f4dd9fb4cc43f92fe58f6ec5c66 @@ -98,15 +98,15 @@ F ext/fts3/unicode/mkunicode.tcl 2debed3f582d77b3fdd0b8830880250021571fd8 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h ff9c2782e8ed890b0de2f697a8d63971939e70c7 -F ext/fts5/fts5Int.h 4e507abebae0d7d3ac9b8daebf049d5153d00961 +F ext/fts5/fts5Int.h 3677076aecbf645a7f2a019115c6a4ec3272dd78 F ext/fts5/fts5_aux.c daa57fb45216491814520bbb587e97bf81ced458 F ext/fts5/fts5_buffer.c 4c1502d4c956cd092c89ce4480867f9d8bf325cd F ext/fts5/fts5_config.c 5af9c360e99669d29f06492c370892394aba0857 -F ext/fts5/fts5_expr.c 35e9d92c89e7c7ea0759b73d24da1ecb7630a24b +F ext/fts5/fts5_expr.c 5ca4bafe29aa3d27683c90e836192e4aefd20a3f F ext/fts5/fts5_hash.c f3a7217c86eb8f272871be5f6aa1b6798960a337 -F ext/fts5/fts5_index.c 19df86d29d24cc56bfb01a6a07dcaac227d2fcdf +F ext/fts5/fts5_index.c b271b19dd28d3501772c3a9317272add4751af95 F ext/fts5/fts5_main.c b4a0fc5bf17f2f1f056ee76cdd7d2af08b360f55 -F ext/fts5/fts5_storage.c 98e3129047d250fc5acc4a4ba7ba4fde9b0ae030 +F ext/fts5/fts5_storage.c 3309c6a8e34b974513016fd1ef47c83f5898f94c F ext/fts5/fts5_tcl.c f8731e0508299bd43f1a2eff7dbeaac870768966 F ext/fts5/fts5_test_mi.c 783b86697ebf773c18fc109992426c0173a055bc F ext/fts5/fts5_test_tok.c db08af63673c3a7d39f053b36fd6e065017706be @@ -180,7 +180,7 @@ F ext/fts5/test/fts5rank.test 7e9e64eac7245637f6f2033aec4b292aaf611aab F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17 F ext/fts5/test/fts5rowid.test 16908a99d6efc9ba21081b4f2b86b3fc699839a6 -F ext/fts5/test/fts5simple.test 5b7b05bcc89bcb718a0fc7f473092d3513cc8e19 +F ext/fts5/test/fts5simple.test cd23d4072ea095d652c9b6db12284cc642e49c98 F ext/fts5/test/fts5simple2.test 98377ae1ff7749a42c21fe1a139c1ed312522c46 F ext/fts5/test/fts5simple3.test 8e71733b3d1b0e695011d02c68ebc5ca40b6124e F ext/fts5/test/fts5synonym.test 6475d189c2e20d60795808f83e36bf9318708d48 @@ -1459,7 +1459,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 605eba4a756e7185119088e2242f82691d078b01 -R 0cef5aac6dc3fd4662874a0245bd6fe2 -U drh -Z 79401c6ddf17e634854d1f69b3d42451 +P 412984642af40578ec611d8c0b7c0508cb5cf9c9 +R 7ff8c5a33cf0f5a9bc31dcb4123a3529 +U dan +Z a8f8abdf00b8979540d6caa40226ffc8 diff --git a/manifest.uuid b/manifest.uuid index 48f9df63e0..efdf00d8e6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -412984642af40578ec611d8c0b7c0508cb5cf9c9 \ No newline at end of file +70fc69eed9b09159899d7cbd1416a59d04210a63 \ No newline at end of file