Add an experimental integrity-check function to FTS.

FossilOrigin-Name: 40fc8804743dfb005991e9c5ef7b0ebcb3c2e731
This commit is contained in:
dan 2012-03-26 10:36:55 +00:00
parent bde1a0b196
commit cbcd9f5357
8 changed files with 385 additions and 17 deletions

View File

@ -2691,7 +2691,7 @@ static int fts3SegReaderCursor(
*/
int sqlite3Fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLangid,
int iLangid, /* Language-id to search */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
@ -2709,12 +2709,7 @@ int sqlite3Fts3SegReaderCursor(
assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
assert( isPrefix==0 || isScan==0 );
/* "isScan" is only set to true by the ft4aux module, an ordinary
** full-text tables. */
assert( isScan==0 || p->aIndex==0 );
memset(pCsr, 0, sizeof(Fts3MultiSegReader));
return fts3SegReaderCursor(
p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
);
@ -5206,6 +5201,7 @@ void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
}
}
/*
** Return SQLITE_CORRUPT_VTAB.
*/

View File

@ -134,6 +134,7 @@ typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */
typedef short int i16; /* 2-byte (or larger) signed integer */
typedef unsigned int u32; /* 4-byte unsigned integer */
typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */
typedef sqlite3_int64 i64; /* 8-byte signed integer */
/*
** Macro used to suppress compiler warnings for unused parameters.

View File

@ -4706,6 +4706,211 @@ static int fts3DoAutoincrmerge(
return rc;
}
/*
** Return a 64-bit checksum for the FTS index entry specified by the
** arguments to this function.
*/
static i64 fts3ChecksumEntry(
const char *zTerm, /* Pointer to buffer containing term */
int nTerm, /* Size of zTerm in bytes */
int iLangid, /* Language id for current row */
int iIndex, /* Index (0..Fts3Table.nIndex-1) */
i64 iDocid, /* Docid for current row. */
int iCol, /* Column number */
int iPos /* Position */
){
int i;
i64 ret = iDocid;
ret += (ret<<3) + iLangid;
ret += (ret<<3) + iIndex;
ret += (ret<<3) + iCol;
ret += (ret<<3) + iPos;
for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i];
return ret;
}
/*
** Return a checksum of all entries in the FTS index that correspond to
** language id iLangid. The checksum is calculated by XORing the checksums
** of each individual entry (see fts3ChecksumEntry()) together.
**
** If successful, the checksum value is returned and *pRc set to SQLITE_OK.
** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The
** return value is undefined in this case.
*/
static i64 fts3ChecksumIndex(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language id to return cksum for */
int iIndex, /* Index to cksum (0..p->nIndex-1) */
int *pRc /* OUT: Return code */
){
Fts3SegFilter filter;
Fts3MultiSegReader csr;
int rc;
i64 cksum = 0;
assert( *pRc==SQLITE_OK );
memset(&filter, 0, sizeof(filter));
memset(&csr, 0, sizeof(csr));
filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
filter.flags |= FTS3_SEGMENT_SCAN;
rc = sqlite3Fts3SegReaderCursor(
p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
}
if( rc==SQLITE_OK ){
while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){
char *pCsr = csr.aDoclist;
char *pEnd = &pCsr[csr.nDoclist];
i64 iDocid = 0;
i64 iCol = 0;
i64 iPos = 0;
pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid);
while( pCsr<pEnd ){
i64 iVal = 0;
pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
if( pCsr<pEnd ){
if( iVal==0 || iVal==1 ){
iCol = 0;
iPos = 0;
if( iVal ){
pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
}else{
pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
iDocid += iVal;
}
}else{
iPos += (iVal - 2);
cksum = cksum ^ fts3ChecksumEntry(
csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid, iCol, iPos
);
}
}
}
}
}
sqlite3Fts3SegReaderFinish(&csr);
*pRc = rc;
return cksum;
}
/*
** Check if the contents of the FTS index match the current contents of the
** content table. If no error occurs and the contents do match, set *pbOk
** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk
** to false before returning.
**
** If an error occurs (e.g. an OOM or IO error), return an SQLite error
** code. The final value of *pbOk is undefined in this case.
*/
static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
int rc = SQLITE_OK; /* Return code */
i64 cksum1 = 0; /* Checksum based on FTS index contents */
i64 cksum2 = 0; /* Checksum based on %_content contents */
sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */
/* This block calculates the checksum according to the FTS index. */
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
if( rc==SQLITE_OK ){
int rc2;
sqlite3_bind_int(pAllLangid, 1, p->nIndex);
while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){
int iLangid = sqlite3_column_int(pAllLangid, 0);
int i;
for(i=0; i<p->nIndex; i++){
cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc);
}
}
rc2 = sqlite3_reset(pAllLangid);
if( rc==SQLITE_OK ) rc = rc2;
}
/* This block calculates the checksum according to the %_content table */
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
if( rc==SQLITE_OK ){
sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule;
sqlite3_stmt *pStmt = 0;
char *zSql;
zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
if( !zSql ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
sqlite3_free(zSql);
}
while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
i64 iDocid = sqlite3_column_int64(pStmt, 0);
int iLang = langidFromSelect(p, pStmt);
int iCol;
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
int nText = sqlite3_column_bytes(pStmt, iCol+1);
sqlite3_tokenizer_cursor *pT = 0;
rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
int nToken; /* Number of bytes in token */
int iDum1, iDum2; /* Dummy variables */
int iPos; /* Position of token in zText */
rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
if( rc==SQLITE_OK ){
int i;
cksum2 = cksum2 ^ fts3ChecksumEntry(
zToken, nToken, iLang, 0, iDocid, iCol, iPos
);
for(i=1; i<p->nIndex; i++){
if( p->aIndex[i].nPrefix<=nToken ){
cksum2 = cksum2 ^ fts3ChecksumEntry(
zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
);
}
}
}
}
if( pT ) pModule->xClose(pT);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
}
sqlite3_finalize(pStmt);
}
*pbOk = (cksum1==cksum2);
return rc;
}
/*
** Run the integrity-check. If no error occurs and the current contents of
** the FTS index are correct, return SQLITE_OK. Or, if the contents of the
** FTS index are incorrect, return SQLITE_CORRUPT_VTAB.
**
** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite
** error code.
*/
static int fts3DoIntegrityCheck(
Fts3Table *p /* FTS3 table handle */
){
int rc;
int bOk = 0;
rc = fts3IntegrityCheck(p, &bOk);
if( rc==SQLITE_OK && bOk==0 ) rc = SQLITE_CORRUPT_VTAB;
return rc;
}
/*
** Handle a 'special' INSERT of the form:
@ -4726,6 +4931,8 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
rc = fts3DoOptimize(p, 0);
}else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){
rc = fts3DoRebuild(p);
}else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){
rc = fts3DoIntegrityCheck(p);
}else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){
rc = fts3DoIncrmerge(p, &zVal[6]);
}else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){

View File

@ -1,5 +1,5 @@
C Add\sSQLITE_DBSTATUS_CACHE_WRITE.\sUsed\sto\squery\sa\sdatabase\sconnection\sfor\sthe\scumulative\snumber\sof\sdatabase\spages\swritten.
D 2012-03-24T19:44:56.637
C Add\san\sexperimental\sintegrity-check\sfunction\sto\sFTS.
D 2012-03-26T10:36:55.434
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 2f37e468503dbe79d35c9f6dffcf3fae1ae9ec20
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -63,9 +63,9 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 95409b49801ee7736755d7e307e606571b754a58
F ext/fts3/fts3.c a36f2add4c795b9e1ca1e1a16bd1e45c697a1f37
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h eb749124db7c94b6f89d793cdd4d993a52c46646
F ext/fts3/fts3Int.h 133e5c613ac6920be5b914d43acc1478df1332e1
F ext/fts3/fts3_aux.c 5205182bd8f372782597888156404766edf5781e
F ext/fts3/fts3_expr.c dbc7ba4c3a6061adde0f38ed8e9b349568299551
F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914
@ -78,7 +78,7 @@ F ext/fts3/fts3_test.c 6b7cc68aef4efb084e1449f7d20c4b20d3bdf6b4
F ext/fts3/fts3_tokenizer.c 3da7254a9881f7e270ab28e2004e0d22b3212bce
F ext/fts3/fts3_tokenizer.h 66dec98e365854b6cd2d54f1a96bb6d428fc5a68
F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004
F ext/fts3/fts3_write.c 6a092ee27198716969bfbaa2194aa67eabeb2ff6
F ext/fts3/fts3_write.c a95e0f29a438bbba69ef686c75f03fbdf7ac79ac
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
@ -497,9 +497,10 @@ F test/fts3shared.test 8bb266521d7c5495c0ae522bb4d376ad5387d4a2
F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2
F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659
F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68
F test/fts4check.test 72134071f4e9f8bed76af1f2375fd5aff0c5ea48
F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f
F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7
F test/fts4merge.test 120e0baf17a01f0cb696d6f6f9b6506e1587ef90
F test/fts4merge.test 16ba38960dc06ffd0c47c5487ec1060b5130661f
F test/fts4merge2.test 5faa558d1b672f82b847d2a337465fa745e46891
F test/fts4merge3.test e0e21332f592fc003fcab112928ea891407d83cb
F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca
@ -641,7 +642,7 @@ F test/pageropt.test 9191867ed19a2b3db6c42d1b36b6fbc657cd1ab0
F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0
F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16
F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025
F test/permutations.test 0ab1e7748de5d29c4c648ba5ce3b983ab80653d1
F test/permutations.test dbda172249564f43ec556108a704581044c57dbd
F test/pragma.test c51c148defe32bf4a419a522f95d26838d5cf677
F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947
F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552
@ -998,7 +999,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
P cc051fc0b2d89603b27b94cf2afdbda417ee9d94
R 9ca09f80a0e10e381b2361a47e7d01bf
P 3cb6a879f1220db03a66429d63330e27e8ca6e49
R a8a5e3d4a755c2fc6211bbe12afb7dcc
U dan
Z e2a698085c147deaded5cb32de63b51c
Z 32d9f92fc095c5c84155b7497aaefe53

View File

@ -1 +1 @@
3cb6a879f1220db03a66429d63330e27e8ca6e49
40fc8804743dfb005991e9c5ef7b0ebcb3c2e731

157
test/fts4check.test Normal file
View File

@ -0,0 +1,157 @@
# 2012 March 26
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS 'integrity-check' function,
# used to check if the current FTS index accurately reflects the content
# of the table.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/fts3_common.tcl
set ::testprefix fts4check
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
# Run the integrity-check on FTS table $tbl using database handle $db. If
# the integrity-check passes, return "ok". Otherwise, throw an exception.
#
proc fts_integrity {db tbl} {
$db eval "INSERT INTO $tbl ($tbl) VALUES('integrity-check')"
return "ok"
}
#-------------------------------------------------------------------------
# Test cases 1.*
#
# 1.0: Build a reasonably sized FTS table (5000 rows).
#
# 1.1: Run the integrity check code to check it passes.
#
# 1.2: Make a series of minor changes to the underlying FTS data structures
# (e.g. delete or insert a row from the %_content table). Check that
# this causes the integrity-check code to fail.
#
# Build an FTS table and check the integrity-check passes.
#
do_test 1.0 { fts3_build_db_1 5000 } {}
do_test 1.1 { fts_integrity db t1 } {ok}
# Mess around with the underlying tables. Check that this causes the
# integrity-check test to fail.
#
foreach {tn disruption} {
1 {
INSERT INTO t1_content(docid, c0x, c1y) VALUES(NULL, 'a', 'b');
}
2 {
DELETE FROM t1_content WHERE docid = (SELECT max(docid) FROM t1_content);
}
3 {
DELETE FROM t1_segdir WHERE level=0 AND idx=(
SELECT max(idx) FROM t1_segdir WHERE level=0
);
}
} {
do_execsql_test 1.2.1.$tn "BEGIN; $disruption"
do_catchsql_test 1.2.2.$tn {
INSERT INTO t1 (t1) VALUES('integrity-check')
} {1 {database disk image is malformed}}
do_execsql_test 1.2.3.$tn "ROLLBACK"
}
do_test 1.3 { fts_integrity db t1 } {ok}
#-------------------------------------------------------------------------
# Test cases 2.*
#
# 2.0: Build a reasonably sized FTS table (20000 rows) that includes
# prefix indexes.
#
# 2.1: Run the integrity check code to check it passes.
#
# 2.2: Make a series of minor changes to the underlying FTS data structures
# (e.g. delete or insert a row from the %_content table). Check that
# this causes the integrity-check code to fail.
#
do_test 2.0 { fts3_build_db_2 20000 {prefix="3,1"} } {}
do_test 2.1 { fts_integrity db t2 } {ok}
foreach {tn disruption} {
1 {
INSERT INTO t2_content VALUES(NULL, 'xyz')
}
3 {
DELETE FROM t2_segdir WHERE level=0 AND idx=(
SELECT max(idx) FROM t2_segdir WHERE level=1024
);
}
} {
do_execsql_test 2.2.1.$tn "BEGIN; $disruption"
do_catchsql_test 2.2.2.$tn {
INSERT INTO t2 (t2) VALUES('integrity-check')
} {1 {database disk image is malformed}}
do_execsql_test 2.2.3.$tn "ROLLBACK"
}
#-------------------------------------------------------------------------
# Test cases 3.*
#
# 3.0: Build a reasonably sized FTS table (5000 rows) that includes
# prefix indexes and uses the languageid= feature.
#
# 3.1: Run the integrity check code to check it passes.
#
# 3.2: Make a series of minor changes to the underlying FTS data structures
# (e.g. delete or insert a row from the %_content table). Check that
# this causes the integrity-check code to fail.
#
do_test 3.0 {
reset_db
fts3_build_db_1 5000
execsql {
CREATE VIRTUAL TABLE t3 USING fts4(x, y, prefix="2,3", languageid=langid);
}
foreach docid [execsql {SELECT docid FROM t1 ORDER BY 1 ASC}] {
execsql {
INSERT INTO t3(x, y, langid)
SELECT x, y, (docid%9)*4 FROM t1 WHERE docid=$docid;
}
}
} {}
do_test 3.1 { fts_integrity db t3 } {ok}
foreach {tn disruption} {
1 {
INSERT INTO t3_content(c0x, c1y, langid) VALUES(NULL, 'a', 0);
}
2 {
UPDATE t3_content SET langid=langid+1 WHERE rowid = (
SELECT max(rowid) FROM t3_content
)
}
} {
do_execsql_test 3.2.1.$tn "BEGIN; $disruption"
do_catchsql_test 3.2.2.$tn {
INSERT INTO t3 (t3) VALUES('integrity-check')
} {1 {database disk image is malformed}}
do_execsql_test 3.2.3.$tn "ROLLBACK"
}
finish_test

View File

@ -23,6 +23,11 @@ ifcapable !fts3 {
return
}
proc fts3_integrity_check {tbl} {
db eval "INSERT INTO $tbl ($tbl) VALUES('integrity-check')"
return "ok"
}
#-------------------------------------------------------------------------
# Test cases 1.*
#

View File

@ -185,6 +185,7 @@ test_suite "fts3" -prefix "" -description {
fts4aa.test fts4content.test
fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test
fts4check.test
}