From cbcd9f5357e4f480104f6dc39471663e3cc392da Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 26 Mar 2012 10:36:55 +0000 Subject: [PATCH] Add an experimental integrity-check function to FTS. FossilOrigin-Name: 40fc8804743dfb005991e9c5ef7b0ebcb3c2e731 --- ext/fts3/fts3.c | 8 +- ext/fts3/fts3Int.h | 1 + ext/fts3/fts3_write.c | 207 +++++++++++++++++++++++++++++++++++++++++ manifest | 21 +++-- manifest.uuid | 2 +- test/fts4check.test | 157 +++++++++++++++++++++++++++++++ test/fts4merge.test | 5 + test/permutations.test | 1 + 8 files changed, 385 insertions(+), 17 deletions(-) create mode 100644 test/fts4check.test diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 63661393a5..9b7510eead 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -2691,7 +2691,7 @@ static int fts3SegReaderCursor( */ int sqlite3Fts3SegReaderCursor( Fts3Table *p, /* FTS3 table handle */ - int iLangid, + int iLangid, /* Language-id to search */ int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ @@ -2709,12 +2709,7 @@ int sqlite3Fts3SegReaderCursor( assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 ); assert( isPrefix==0 || isScan==0 ); - /* "isScan" is only set to true by the ft4aux module, an ordinary - ** full-text tables. */ - assert( isScan==0 || p->aIndex==0 ); - memset(pCsr, 0, sizeof(Fts3MultiSegReader)); - return fts3SegReaderCursor( p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr ); @@ -5206,6 +5201,7 @@ void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ } } + /* ** Return SQLITE_CORRUPT_VTAB. */ diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 0ce67a2d10..d0ee847ede 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -134,6 +134,7 @@ typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */ typedef short int i16; /* 2-byte (or larger) signed integer */ typedef unsigned int u32; /* 4-byte unsigned integer */ typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */ +typedef sqlite3_int64 i64; /* 8-byte signed integer */ /* ** Macro used to suppress compiler warnings for unused parameters. diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index 2d3a91d87e..872b1f0d60 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -4706,6 +4706,211 @@ static int fts3DoAutoincrmerge( return rc; } +/* +** Return a 64-bit checksum for the FTS index entry specified by the +** arguments to this function. +*/ +static i64 fts3ChecksumEntry( + const char *zTerm, /* Pointer to buffer containing term */ + int nTerm, /* Size of zTerm in bytes */ + int iLangid, /* Language id for current row */ + int iIndex, /* Index (0..Fts3Table.nIndex-1) */ + i64 iDocid, /* Docid for current row. */ + int iCol, /* Column number */ + int iPos /* Position */ +){ + int i; + i64 ret = iDocid; + + ret += (ret<<3) + iLangid; + ret += (ret<<3) + iIndex; + ret += (ret<<3) + iCol; + ret += (ret<<3) + iPos; + for(i=0; inIndex-1) */ + int *pRc /* OUT: Return code */ +){ + Fts3SegFilter filter; + Fts3MultiSegReader csr; + int rc; + i64 cksum = 0; + + assert( *pRc==SQLITE_OK ); + + memset(&filter, 0, sizeof(filter)); + memset(&csr, 0, sizeof(csr)); + filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; + filter.flags |= FTS3_SEGMENT_SCAN; + + rc = sqlite3Fts3SegReaderCursor( + p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr + ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); + } + + if( rc==SQLITE_OK ){ + while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){ + char *pCsr = csr.aDoclist; + char *pEnd = &pCsr[csr.nDoclist]; + + i64 iDocid = 0; + i64 iCol = 0; + i64 iPos = 0; + + pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid); + while( pCsrnIndex); + while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){ + int iLangid = sqlite3_column_int(pAllLangid, 0); + int i; + for(i=0; inIndex; i++){ + cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc); + } + } + rc2 = sqlite3_reset(pAllLangid); + if( rc==SQLITE_OK ) rc = rc2; + } + + /* This block calculates the checksum according to the %_content table */ + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); + if( rc==SQLITE_OK ){ + sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule; + sqlite3_stmt *pStmt = 0; + char *zSql; + + zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); + } + + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + i64 iDocid = sqlite3_column_int64(pStmt, 0); + int iLang = langidFromSelect(p, pStmt); + int iCol; + + for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ + const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); + int nText = sqlite3_column_bytes(pStmt, iCol+1); + sqlite3_tokenizer_cursor *pT = 0; + + rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken; /* Number of bytes in token */ + int iDum1, iDum2; /* Dummy variables */ + int iPos; /* Position of token in zText */ + + rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); + if( rc==SQLITE_OK ){ + int i; + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, nToken, iLang, 0, iDocid, iCol, iPos + ); + for(i=1; inIndex; i++){ + if( p->aIndex[i].nPrefix<=nToken ){ + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos + ); + } + } + } + } + if( pT ) pModule->xClose(pT); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + } + + sqlite3_finalize(pStmt); + } + + *pbOk = (cksum1==cksum2); + return rc; +} + +/* +** Run the integrity-check. If no error occurs and the current contents of +** the FTS index are correct, return SQLITE_OK. Or, if the contents of the +** FTS index are incorrect, return SQLITE_CORRUPT_VTAB. +** +** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite +** error code. +*/ +static int fts3DoIntegrityCheck( + Fts3Table *p /* FTS3 table handle */ +){ + int rc; + int bOk = 0; + rc = fts3IntegrityCheck(p, &bOk); + if( rc==SQLITE_OK && bOk==0 ) rc = SQLITE_CORRUPT_VTAB; + return rc; +} /* ** Handle a 'special' INSERT of the form: @@ -4726,6 +4931,8 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ rc = fts3DoOptimize(p, 0); }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){ rc = fts3DoRebuild(p); + }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){ + rc = fts3DoIntegrityCheck(p); }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){ rc = fts3DoIncrmerge(p, &zVal[6]); }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){ diff --git a/manifest b/manifest index 7fcb982eb0..418d5baf3b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sSQLITE_DBSTATUS_CACHE_WRITE.\sUsed\sto\squery\sa\sdatabase\sconnection\sfor\sthe\scumulative\snumber\sof\sdatabase\spages\swritten. -D 2012-03-24T19:44:56.637 +C Add\san\sexperimental\sintegrity-check\sfunction\sto\sFTS. +D 2012-03-26T10:36:55.434 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2f37e468503dbe79d35c9f6dffcf3fae1ae9ec20 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -63,9 +63,9 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 95409b49801ee7736755d7e307e606571b754a58 +F ext/fts3/fts3.c a36f2add4c795b9e1ca1e1a16bd1e45c697a1f37 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h eb749124db7c94b6f89d793cdd4d993a52c46646 +F ext/fts3/fts3Int.h 133e5c613ac6920be5b914d43acc1478df1332e1 F ext/fts3/fts3_aux.c 5205182bd8f372782597888156404766edf5781e F ext/fts3/fts3_expr.c dbc7ba4c3a6061adde0f38ed8e9b349568299551 F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914 @@ -78,7 +78,7 @@ F ext/fts3/fts3_test.c 6b7cc68aef4efb084e1449f7d20c4b20d3bdf6b4 F ext/fts3/fts3_tokenizer.c 3da7254a9881f7e270ab28e2004e0d22b3212bce F ext/fts3/fts3_tokenizer.h 66dec98e365854b6cd2d54f1a96bb6d428fc5a68 F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004 -F ext/fts3/fts3_write.c 6a092ee27198716969bfbaa2194aa67eabeb2ff6 +F ext/fts3/fts3_write.c a95e0f29a438bbba69ef686c75f03fbdf7ac79ac F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -497,9 +497,10 @@ F test/fts3shared.test 8bb266521d7c5495c0ae522bb4d376ad5387d4a2 F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2 F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659 F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68 +F test/fts4check.test 72134071f4e9f8bed76af1f2375fd5aff0c5ea48 F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7 -F test/fts4merge.test 120e0baf17a01f0cb696d6f6f9b6506e1587ef90 +F test/fts4merge.test 16ba38960dc06ffd0c47c5487ec1060b5130661f F test/fts4merge2.test 5faa558d1b672f82b847d2a337465fa745e46891 F test/fts4merge3.test e0e21332f592fc003fcab112928ea891407d83cb F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca @@ -641,7 +642,7 @@ F test/pageropt.test 9191867ed19a2b3db6c42d1b36b6fbc657cd1ab0 F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16 F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 -F test/permutations.test 0ab1e7748de5d29c4c648ba5ce3b983ab80653d1 +F test/permutations.test dbda172249564f43ec556108a704581044c57dbd F test/pragma.test c51c148defe32bf4a419a522f95d26838d5cf677 F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -998,7 +999,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P cc051fc0b2d89603b27b94cf2afdbda417ee9d94 -R 9ca09f80a0e10e381b2361a47e7d01bf +P 3cb6a879f1220db03a66429d63330e27e8ca6e49 +R a8a5e3d4a755c2fc6211bbe12afb7dcc U dan -Z e2a698085c147deaded5cb32de63b51c +Z 32d9f92fc095c5c84155b7497aaefe53 diff --git a/manifest.uuid b/manifest.uuid index 2f98caaa3d..afcb8fab6b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3cb6a879f1220db03a66429d63330e27e8ca6e49 \ No newline at end of file +40fc8804743dfb005991e9c5ef7b0ebcb3c2e731 \ No newline at end of file diff --git a/test/fts4check.test b/test/fts4check.test new file mode 100644 index 0000000000..77815b2ab7 --- /dev/null +++ b/test/fts4check.test @@ -0,0 +1,157 @@ +# 2012 March 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS 'integrity-check' function, +# used to check if the current FTS index accurately reflects the content +# of the table. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/fts3_common.tcl +set ::testprefix fts4check + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +# Run the integrity-check on FTS table $tbl using database handle $db. If +# the integrity-check passes, return "ok". Otherwise, throw an exception. +# +proc fts_integrity {db tbl} { + $db eval "INSERT INTO $tbl ($tbl) VALUES('integrity-check')" + return "ok" +} + +#------------------------------------------------------------------------- +# Test cases 1.* +# +# 1.0: Build a reasonably sized FTS table (5000 rows). +# +# 1.1: Run the integrity check code to check it passes. +# +# 1.2: Make a series of minor changes to the underlying FTS data structures +# (e.g. delete or insert a row from the %_content table). Check that +# this causes the integrity-check code to fail. +# + +# Build an FTS table and check the integrity-check passes. +# +do_test 1.0 { fts3_build_db_1 5000 } {} +do_test 1.1 { fts_integrity db t1 } {ok} + +# Mess around with the underlying tables. Check that this causes the +# integrity-check test to fail. +# +foreach {tn disruption} { + 1 { + INSERT INTO t1_content(docid, c0x, c1y) VALUES(NULL, 'a', 'b'); + } + 2 { + DELETE FROM t1_content WHERE docid = (SELECT max(docid) FROM t1_content); + } + 3 { + DELETE FROM t1_segdir WHERE level=0 AND idx=( + SELECT max(idx) FROM t1_segdir WHERE level=0 + ); + } +} { + do_execsql_test 1.2.1.$tn "BEGIN; $disruption" + do_catchsql_test 1.2.2.$tn { + INSERT INTO t1 (t1) VALUES('integrity-check') + } {1 {database disk image is malformed}} + do_execsql_test 1.2.3.$tn "ROLLBACK" +} + +do_test 1.3 { fts_integrity db t1 } {ok} + +#------------------------------------------------------------------------- +# Test cases 2.* +# +# 2.0: Build a reasonably sized FTS table (20000 rows) that includes +# prefix indexes. +# +# 2.1: Run the integrity check code to check it passes. +# +# 2.2: Make a series of minor changes to the underlying FTS data structures +# (e.g. delete or insert a row from the %_content table). Check that +# this causes the integrity-check code to fail. +# + +do_test 2.0 { fts3_build_db_2 20000 {prefix="3,1"} } {} +do_test 2.1 { fts_integrity db t2 } {ok} +foreach {tn disruption} { + 1 { + INSERT INTO t2_content VALUES(NULL, 'xyz') + } + 3 { + DELETE FROM t2_segdir WHERE level=0 AND idx=( + SELECT max(idx) FROM t2_segdir WHERE level=1024 + ); + } +} { + do_execsql_test 2.2.1.$tn "BEGIN; $disruption" + do_catchsql_test 2.2.2.$tn { + INSERT INTO t2 (t2) VALUES('integrity-check') + } {1 {database disk image is malformed}} + do_execsql_test 2.2.3.$tn "ROLLBACK" +} + + +#------------------------------------------------------------------------- +# Test cases 3.* +# +# 3.0: Build a reasonably sized FTS table (5000 rows) that includes +# prefix indexes and uses the languageid= feature. +# +# 3.1: Run the integrity check code to check it passes. +# +# 3.2: Make a series of minor changes to the underlying FTS data structures +# (e.g. delete or insert a row from the %_content table). Check that +# this causes the integrity-check code to fail. +# +do_test 3.0 { + reset_db + fts3_build_db_1 5000 + execsql { + CREATE VIRTUAL TABLE t3 USING fts4(x, y, prefix="2,3", languageid=langid); + } + foreach docid [execsql {SELECT docid FROM t1 ORDER BY 1 ASC}] { + execsql { + INSERT INTO t3(x, y, langid) + SELECT x, y, (docid%9)*4 FROM t1 WHERE docid=$docid; + } + } +} {} +do_test 3.1 { fts_integrity db t3 } {ok} + +foreach {tn disruption} { + 1 { + INSERT INTO t3_content(c0x, c1y, langid) VALUES(NULL, 'a', 0); + } + 2 { + UPDATE t3_content SET langid=langid+1 WHERE rowid = ( + SELECT max(rowid) FROM t3_content + ) + } +} { + do_execsql_test 3.2.1.$tn "BEGIN; $disruption" + do_catchsql_test 3.2.2.$tn { + INSERT INTO t3 (t3) VALUES('integrity-check') + } {1 {database disk image is malformed}} + do_execsql_test 3.2.3.$tn "ROLLBACK" +} + + + +finish_test diff --git a/test/fts4merge.test b/test/fts4merge.test index 422d585cbe..95910ea654 100644 --- a/test/fts4merge.test +++ b/test/fts4merge.test @@ -23,6 +23,11 @@ ifcapable !fts3 { return } +proc fts3_integrity_check {tbl} { + db eval "INSERT INTO $tbl ($tbl) VALUES('integrity-check')" + return "ok" +} + #------------------------------------------------------------------------- # Test cases 1.* # diff --git a/test/permutations.test b/test/permutations.test index 6f66e85e94..3165ea3eb9 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -185,6 +185,7 @@ test_suite "fts3" -prefix "" -description { fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test + fts4check.test }