Have the fts5 integrity-check verify that doclist indexes match the contents of the leaf pages that they index.

FossilOrigin-Name: 37a7d3035eb4bbad7e32fe550321ac9fae611a57
This commit is contained in:
dan 2014-08-01 19:27:07 +00:00
parent a29284d65f
commit d34742901d
5 changed files with 108 additions and 30 deletions

View File

@ -255,6 +255,12 @@ static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif
#ifdef SQLITE_DEBUG
static int fts5MissingData() { return 0; }
#else
# define fts5MissingData()
#endif
typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
@ -530,6 +536,7 @@ struct Fts5NodeIter {
Fts5Buffer term;
int nEmpty;
int iChild;
int bDlidx;
};
/*
@ -566,6 +573,7 @@ struct Fts5BtreeIter {
int iLeaf; /* Leaf containing terms >= current term */
int nEmpty; /* Number of "empty" leaves following iLeaf */
int bEof; /* Set to true at EOF */
int bDlidx; /* True if there exists a dlidx */
};
static void fts5PutU16(u8 *aOut, u16 iVal){
@ -670,6 +678,8 @@ static Fts5Data *fts5DataReadOrBuffer(
rc = sqlite3_blob_reopen(p->pReader, iRowid);
}
if( rc ) fts5MissingData();
if( rc==SQLITE_OK ){
int nByte = sqlite3_blob_bytes(p->pReader);
if( pBuf ){
@ -980,10 +990,12 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){
*/
static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){
if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){
pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01;
pIter->iOff++;
pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty);
}else{
pIter->nEmpty = 0;
pIter->bDlidx = 0;
}
}
@ -2082,13 +2094,15 @@ static int fts5PrefixCompress(
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
if( pWriter->nEmpty ){
Fts5PageWriter *pPg = &pWriter->aWriter[1];
int bFlag = 0;
Fts5PageWriter *pPg;
pPg = &pWriter->aWriter[1];
if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
i64 iKey = FTS5_DOCLIST_IDX_ROWID(
pWriter->iIdx, pWriter->iSegid,
pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
);
assert( pWriter->dlidx.n>0 );
fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n);
bFlag = 1;
}
@ -2103,6 +2117,22 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
pWriter->bDlidxPrevValid = 0;
}
static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){
Fts5PageWriter *aNew;
Fts5PageWriter *pNew;
int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1);
aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew);
if( aNew==0 ) return;
pNew = &aNew[pWriter->nWriter];
memset(pNew, 0, sizeof(Fts5PageWriter));
pNew->pgno = 1;
fts5BufferAppendVarint(&p->rc, &pNew->buf, 1);
pWriter->nWriter++;
pWriter->aWriter = aNew;
}
/*
** This is called once for each leaf page except the first that contains
@ -2123,19 +2153,8 @@ static void fts5WriteBtreeTerm(
Fts5PageWriter *pPage;
if( iHeight>=pWriter->nWriter ){
Fts5PageWriter *aNew;
Fts5PageWriter *pNew;
int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1);
aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew);
if( aNew==0 ) return;
pNew = &aNew[pWriter->nWriter];
memset(pNew, 0, sizeof(Fts5PageWriter));
pNew->pgno = 1;
fts5BufferAppendVarint(&p->rc, &pNew->buf, 1);
pWriter->nWriter++;
pWriter->aWriter = aNew;
fts5WriteBtreeGrow(p, pWriter);
if( p->rc ) return;
}
pPage = &pWriter->aWriter[iHeight];
@ -2202,6 +2221,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
if( pPage->term.n==0 ){
/* No term was written to this page. */
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
fts5WriteBtreeNoTerm(p, pWriter);
}
@ -2379,11 +2399,15 @@ static void fts5WriteFinish(
){
int i;
*pnLeaf = pWriter->aWriter[0].pgno;
*pnHeight = pWriter->nWriter;
fts5WriteFlushLeaf(p, pWriter);
if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
fts5WriteBtreeGrow(p, pWriter);
}
if( pWriter->nWriter>1 ){
fts5WriteBtreeNEmpty(p, pWriter);
}
*pnHeight = pWriter->nWriter;
for(i=1; i<pWriter->nWriter; i++){
Fts5PageWriter *pPg = &pWriter->aWriter[i];
i64 iRow = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno);
@ -2905,6 +2929,7 @@ static void fts5BtreeIterInit(
}else{
pIter->nEmpty = pIter->aLvl[0].s.nEmpty;
pIter->iLeaf = pIter->aLvl[0].s.iChild;
pIter->bDlidx = pIter->aLvl[0].s.bDlidx;
}
}
@ -2940,6 +2965,7 @@ static void fts5BtreeIterNext(Fts5BtreeIter *pIter){
}
pIter->nEmpty = pIter->aLvl[0].s.nEmpty;
pIter->bDlidx = pIter->aLvl[0].s.bDlidx;
pIter->iLeaf = pIter->aLvl[0].s.iChild;
assert( p->rc==SQLITE_OK || pIter->bEof );
}
@ -2958,6 +2984,37 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){
fts5BufferFree(&pIter->term);
}
typedef struct DoclistIdxIter DoclistIdxIter;
struct DoclistIdxIter {
Fts5Data *pDlidx; /* Data for doclist index, if any */
int iOff; /* Current offset into pDlidx */
int bRowidValid; /* iRowid is valid */
int bZero; /* True if current leaf has no rowid */
i64 iRowid; /* If bZero==0, first rowid on leaf */
};
/*
** Return non-zero if EOF is reached.
*/
static int fts5IndexDoclistIterNext(DoclistIdxIter *pIter){
i64 iVal;
if( pIter->iOff>=pIter->pDlidx->n ) return 1;
pIter->iOff += getVarint(&pIter->pDlidx->p[pIter->iOff], (u64*)&iVal);
if( iVal==0 ){
pIter->bZero = 1;
}else{
pIter->bZero = 0;
if( pIter->bRowidValid ){
pIter->iRowid -= iVal;
}else{
pIter->bRowidValid = 1;
pIter->iRowid = iVal;
}
}
return 0;
}
static void fts5IndexIntegrityCheckSegment(
Fts5Index *p, /* FTS5 backend object */
int iIdx, /* Index that pSeg is a part of */
@ -2974,6 +3031,7 @@ static void fts5IndexIntegrityCheckSegment(
Fts5Data *pLeaf; /* Data for this leaf */
int iOff; /* Offset of first term on leaf */
int i; /* Used to iterate through empty leaves */
DoclistIdxIter dliter; /* For iterating through any doclist index */
/* If the leaf in question has already been trimmed from the segment,
** ignore this b-tree entry. Otherwise, load it into memory. */
@ -3000,6 +3058,12 @@ static void fts5IndexIntegrityCheckSegment(
fts5DataRelease(pLeaf);
if( p->rc ) break;
memset(&dliter, 0, sizeof(DoclistIdxIter));
if( iter.bDlidx ){
i64 iDlidxRowid = FTS5_DOCLIST_IDX_ROWID(iIdx, pSeg->iSegid, iter.iLeaf);
dliter.pDlidx = fts5DataRead(p, iDlidxRowid);
}
/* Now check that the iter.nEmpty leaves following the current leaf
** (a) exist and (b) contain no terms. */
for(i=1; i<=iter.nEmpty; i++){
@ -3007,8 +3071,23 @@ static void fts5IndexIntegrityCheckSegment(
if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){
p->rc = FTS5_CORRUPT;
}
if( pLeaf && dliter.pDlidx ){
if( fts5IndexDoclistIterNext(&dliter) ){
p->rc = FTS5_CORRUPT;
}else{
int iRowidOff = fts5GetU16(&pLeaf->p[0]);
if( dliter.bZero ){
if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT;
}else{
i64 iRowid;
getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT;
}
}
}
fts5DataRelease(pLeaf);
}
fts5DataRelease(dliter.pDlidx);
}
if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
@ -3218,7 +3297,7 @@ static void fts5DecodeFunction(
int i = 0;
i64 iPrev;
sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)",
iIdx, iSegid, iHeight, iPgno
iIdx, iSegid, iPgno
);
if( n>0 ){
i = getVarint(&a[i], (u64*)&iPrev);
@ -3305,7 +3384,9 @@ static void fts5DecodeFunction(
);
}
if( ss.nEmpty ){
sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d", ss.nEmpty);
sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty,
ss.bDlidx ? "*" : ""
);
}
}
fts5NodeIterFree(&ss);

View File

@ -1,5 +1,5 @@
C Add\s"doclist\sindex"\srecords\sto\sthe\sdatabase.\sThese\sare\sto\smake\snavigating\swithin\svery\slarge\sdoclists\sfaster.\sThey\sare\snot\syet\sused\sby\squeries.
D 2014-08-01T11:16:25.207
C Have\sthe\sfts5\sintegrity-check\sverify\sthat\sdoclist\sindexes\smatch\sthe\scontents\sof\sthe\sleaf\spages\sthat\sthey\sindex.
D 2014-08-01T19:27:07.492
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710
F ext/fts5/fts5_expr.c e764d75c58a3accda795f1da1b45960ac87dc77a
F ext/fts5/fts5_index.c 618d54ecf41887b6db59491b71e654ae3315f8c9
F ext/fts5/fts5_index.c 3e33e3b86f026fc5b2cb3c573ba05375c8e4de0b
F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
@ -602,7 +602,7 @@ F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07
F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e
F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74
F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420
F test/fts5ah.test bfa6ebd7ee87f73c4146b9e316a105fd0e43d01a
F test/fts5ah.test dfb54897c470e2dcf88912fc4f5b1ca4ac8307f7
F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4
F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d
F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef
@ -770,7 +770,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0
F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d
F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025
F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54
F test/permutations.test 5f1f942bae4139b33626b82627aa262c0f72d936
F test/permutations.test 542edb965245565d06b9284e708f17bb93d70691
F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0
F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13
F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552
@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P e6af3b7a3cf331210f4c87848e2af007dbd5ef30
R a017a4de54c141d4f4f840978af83e33
P 89377421ff69f2450364987afe781b6d8bcbf087
R 49a5d37abb265ab7fa662e06ee8ea874
U dan
Z 90f2786a7e9f28e43c6798f77c65d6dc
Z 90844fe42071f9a2a3f80f69e16c73d8

View File

@ -1 +1 @@
89377421ff69f2450364987afe781b6d8bcbf087
37a7d3035eb4bbad7e32fe550321ac9fae611a57

View File

@ -49,9 +49,6 @@ do_execsql_test 1.3 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
do_execsql_test 1.4 {
SELECT count(*) FROM t1_data
}
finish_test

View File

@ -226,7 +226,7 @@ test_suite "fts5" -prefix "" -description {
All FTS5 tests.
} -files {
fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test
fts5af.test fts5ag.test
fts5af.test fts5ag.test fts5ah.test
}
test_suite "nofaultsim" -prefix "" -description {