diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 1443751046..685492d0b7 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -255,6 +255,12 @@ static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB #endif +#ifdef SQLITE_DEBUG +static int fts5MissingData() { return 0; } +#else +# define fts5MissingData() +#endif + typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; @@ -530,6 +536,7 @@ struct Fts5NodeIter { Fts5Buffer term; int nEmpty; int iChild; + int bDlidx; }; /* @@ -566,6 +573,7 @@ struct Fts5BtreeIter { int iLeaf; /* Leaf containing terms >= current term */ int nEmpty; /* Number of "empty" leaves following iLeaf */ int bEof; /* Set to true at EOF */ + int bDlidx; /* True if there exists a dlidx */ }; static void fts5PutU16(u8 *aOut, u16 iVal){ @@ -670,6 +678,8 @@ static Fts5Data *fts5DataReadOrBuffer( rc = sqlite3_blob_reopen(p->pReader, iRowid); } + if( rc ) fts5MissingData(); + if( rc==SQLITE_OK ){ int nByte = sqlite3_blob_bytes(p->pReader); if( pBuf ){ @@ -980,10 +990,12 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ */ static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ + pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01; pIter->iOff++; pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); }else{ pIter->nEmpty = 0; + pIter->bDlidx = 0; } } @@ -2082,13 +2094,15 @@ static int fts5PrefixCompress( */ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ if( pWriter->nEmpty ){ - Fts5PageWriter *pPg = &pWriter->aWriter[1]; int bFlag = 0; + Fts5PageWriter *pPg; + pPg = &pWriter->aWriter[1]; if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ i64 iKey = FTS5_DOCLIST_IDX_ROWID( pWriter->iIdx, pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty ); + assert( pWriter->dlidx.n>0 ); fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n); bFlag = 1; } @@ -2103,6 +2117,22 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ pWriter->bDlidxPrevValid = 0; } +static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ + Fts5PageWriter *aNew; + Fts5PageWriter *pNew; + int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); + + aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); + if( aNew==0 ) return; + + pNew = &aNew[pWriter->nWriter]; + memset(pNew, 0, sizeof(Fts5PageWriter)); + pNew->pgno = 1; + fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); + + pWriter->nWriter++; + pWriter->aWriter = aNew; +} /* ** This is called once for each leaf page except the first that contains @@ -2123,19 +2153,8 @@ static void fts5WriteBtreeTerm( Fts5PageWriter *pPage; if( iHeight>=pWriter->nWriter ){ - Fts5PageWriter *aNew; - Fts5PageWriter *pNew; - int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); - aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); - if( aNew==0 ) return; - - pNew = &aNew[pWriter->nWriter]; - memset(pNew, 0, sizeof(Fts5PageWriter)); - pNew->pgno = 1; - fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); - - pWriter->nWriter++; - pWriter->aWriter = aNew; + fts5WriteBtreeGrow(p, pWriter); + if( p->rc ) return; } pPage = &pWriter->aWriter[iHeight]; @@ -2202,6 +2221,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ if( pPage->term.n==0 ){ /* No term was written to this page. */ + assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5WriteBtreeNoTerm(p, pWriter); } @@ -2379,11 +2399,15 @@ static void fts5WriteFinish( ){ int i; *pnLeaf = pWriter->aWriter[0].pgno; - *pnHeight = pWriter->nWriter; fts5WriteFlushLeaf(p, pWriter); + if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + fts5WriteBtreeGrow(p, pWriter); + } if( pWriter->nWriter>1 ){ fts5WriteBtreeNEmpty(p, pWriter); } + *pnHeight = pWriter->nWriter; + for(i=1; inWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; i64 iRow = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno); @@ -2905,6 +2929,7 @@ static void fts5BtreeIterInit( }else{ pIter->nEmpty = pIter->aLvl[0].s.nEmpty; pIter->iLeaf = pIter->aLvl[0].s.iChild; + pIter->bDlidx = pIter->aLvl[0].s.bDlidx; } } @@ -2940,6 +2965,7 @@ static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ } pIter->nEmpty = pIter->aLvl[0].s.nEmpty; + pIter->bDlidx = pIter->aLvl[0].s.bDlidx; pIter->iLeaf = pIter->aLvl[0].s.iChild; assert( p->rc==SQLITE_OK || pIter->bEof ); } @@ -2958,6 +2984,37 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ fts5BufferFree(&pIter->term); } +typedef struct DoclistIdxIter DoclistIdxIter; +struct DoclistIdxIter { + Fts5Data *pDlidx; /* Data for doclist index, if any */ + int iOff; /* Current offset into pDlidx */ + int bRowidValid; /* iRowid is valid */ + + int bZero; /* True if current leaf has no rowid */ + i64 iRowid; /* If bZero==0, first rowid on leaf */ +}; + +/* +** Return non-zero if EOF is reached. +*/ +static int fts5IndexDoclistIterNext(DoclistIdxIter *pIter){ + i64 iVal; + if( pIter->iOff>=pIter->pDlidx->n ) return 1; + pIter->iOff += getVarint(&pIter->pDlidx->p[pIter->iOff], (u64*)&iVal); + if( iVal==0 ){ + pIter->bZero = 1; + }else{ + pIter->bZero = 0; + if( pIter->bRowidValid ){ + pIter->iRowid -= iVal; + }else{ + pIter->bRowidValid = 1; + pIter->iRowid = iVal; + } + } + return 0; +} + static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index that pSeg is a part of */ @@ -2974,6 +3031,7 @@ static void fts5IndexIntegrityCheckSegment( Fts5Data *pLeaf; /* Data for this leaf */ int iOff; /* Offset of first term on leaf */ int i; /* Used to iterate through empty leaves */ + DoclistIdxIter dliter; /* For iterating through any doclist index */ /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ @@ -3000,6 +3058,12 @@ static void fts5IndexIntegrityCheckSegment( fts5DataRelease(pLeaf); if( p->rc ) break; + memset(&dliter, 0, sizeof(DoclistIdxIter)); + if( iter.bDlidx ){ + i64 iDlidxRowid = FTS5_DOCLIST_IDX_ROWID(iIdx, pSeg->iSegid, iter.iLeaf); + dliter.pDlidx = fts5DataRead(p, iDlidxRowid); + } + /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=1; i<=iter.nEmpty; i++){ @@ -3007,8 +3071,23 @@ static void fts5IndexIntegrityCheckSegment( if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ p->rc = FTS5_CORRUPT; } + if( pLeaf && dliter.pDlidx ){ + if( fts5IndexDoclistIterNext(&dliter) ){ + p->rc = FTS5_CORRUPT; + }else{ + int iRowidOff = fts5GetU16(&pLeaf->p[0]); + if( dliter.bZero ){ + if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT; + }else{ + i64 iRowid; + getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); + if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT; + } + } + } fts5DataRelease(pLeaf); } + fts5DataRelease(dliter.pDlidx); } if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ @@ -3218,7 +3297,7 @@ static void fts5DecodeFunction( int i = 0; i64 iPrev; sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)", - iIdx, iSegid, iHeight, iPgno + iIdx, iSegid, iPgno ); if( n>0 ){ i = getVarint(&a[i], (u64*)&iPrev); @@ -3305,7 +3384,9 @@ static void fts5DecodeFunction( ); } if( ss.nEmpty ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d", ss.nEmpty); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty, + ss.bDlidx ? "*" : "" + ); } } fts5NodeIterFree(&ss); diff --git a/manifest b/manifest index f006bde39e..ec7f134bbc 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\s"doclist\sindex"\srecords\sto\sthe\sdatabase.\sThese\sare\sto\smake\snavigating\swithin\svery\slarge\sdoclists\sfaster.\sThey\sare\snot\syet\sused\sby\squeries. -D 2014-08-01T11:16:25.207 +C Have\sthe\sfts5\sintegrity-check\sverify\sthat\sdoclist\sindexes\smatch\sthe\scontents\sof\sthe\sleaf\spages\sthat\sthey\sindex. +D 2014-08-01T19:27:07.492 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c e764d75c58a3accda795f1da1b45960ac87dc77a -F ext/fts5/fts5_index.c 618d54ecf41887b6db59491b71e654ae3315f8c9 +F ext/fts5/fts5_index.c 3e33e3b86f026fc5b2cb3c573ba05375c8e4de0b F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -602,7 +602,7 @@ F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 -F test/fts5ah.test bfa6ebd7ee87f73c4146b9e316a105fd0e43d01a +F test/fts5ah.test dfb54897c470e2dcf88912fc4f5b1ca4ac8307f7 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -770,7 +770,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test 5f1f942bae4139b33626b82627aa262c0f72d936 +F test/permutations.test 542edb965245565d06b9284e708f17bb93d70691 F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e6af3b7a3cf331210f4c87848e2af007dbd5ef30 -R a017a4de54c141d4f4f840978af83e33 +P 89377421ff69f2450364987afe781b6d8bcbf087 +R 49a5d37abb265ab7fa662e06ee8ea874 U dan -Z 90f2786a7e9f28e43c6798f77c65d6dc +Z 90844fe42071f9a2a3f80f69e16c73d8 diff --git a/manifest.uuid b/manifest.uuid index 5a6a2d5b54..e25707dfd1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -89377421ff69f2450364987afe781b6d8bcbf087 \ No newline at end of file +37a7d3035eb4bbad7e32fe550321ac9fae611a57 \ No newline at end of file diff --git a/test/fts5ah.test b/test/fts5ah.test index 88fd524eb9..f5d1eee1b9 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -49,9 +49,6 @@ do_execsql_test 1.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } -do_execsql_test 1.4 { - SELECT count(*) FROM t1_data -} finish_test diff --git a/test/permutations.test b/test/permutations.test index 9587d3bef0..41659ef898 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -226,7 +226,7 @@ test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test - fts5af.test fts5ag.test + fts5af.test fts5ag.test fts5ah.test } test_suite "nofaultsim" -prefix "" -description {