Fix an fts5 integrity-check problem that affects offsets=0 tables with prefix indexes.

FossilOrigin-Name: 609a0bc7f34e6dae74ce756aff920f3df78fe828
This commit is contained in:
dan 2015-12-21 18:45:09 +00:00
parent c58b9eeaaa
commit 159fd77e0f
10 changed files with 133 additions and 89 deletions

View File

@ -297,7 +297,7 @@ int sqlite3Fts5IsBareword(char t);
/* Bucket of terms object used by the integrity-check in offsets=0 mode. */
typedef struct Fts5Termset Fts5Termset;
int sqlite3Fts5TermsetNew(Fts5Termset**);
int sqlite3Fts5TermsetAdd(Fts5Termset*, const char*, int, int *pbPresent);
int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent);
void sqlite3Fts5TermsetFree(Fts5Termset*);
/*

View File

@ -298,6 +298,7 @@ typedef struct Fts5TermsetEntry Fts5TermsetEntry;
struct Fts5TermsetEntry {
char *pTerm;
int nTerm;
int iIdx; /* Index (main or aPrefix[] entry) */
Fts5TermsetEntry *pNext;
};
@ -313,36 +314,44 @@ int sqlite3Fts5TermsetNew(Fts5Termset **pp){
int sqlite3Fts5TermsetAdd(
Fts5Termset *p,
int iIdx,
const char *pTerm, int nTerm,
int *pbPresent
){
int rc = SQLITE_OK;
int i;
int hash = 13;
Fts5TermsetEntry *pEntry;
/* Calculate a hash value for this term */
for(i=0; i<nTerm; i++){
hash += (hash << 3) + (int)pTerm[i];
}
hash = hash % ArraySize(p->apHash);
*pbPresent = 0;
for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
if( pEntry->nTerm==nTerm && memcmp(pEntry->pTerm, pTerm, nTerm)==0 ){
*pbPresent = 1;
break;
}
}
if( p ){
int i;
int hash;
Fts5TermsetEntry *pEntry;
if( pEntry==0 ){
pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
if( pEntry ){
pEntry->pTerm = (char*)&pEntry[1];
pEntry->nTerm = nTerm;
memcpy(pEntry->pTerm, pTerm, nTerm);
pEntry->pNext = p->apHash[hash];
p->apHash[hash] = pEntry;
/* Calculate a hash value for this term */
hash = 104 + iIdx;
for(i=0; i<nTerm; i++){
hash += (hash << 3) + (int)pTerm[i];
}
hash = hash % ArraySize(p->apHash);
for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
if( pEntry->iIdx==iIdx
&& pEntry->nTerm==nTerm
&& memcmp(pEntry->pTerm, pTerm, nTerm)==0
){
*pbPresent = 1;
break;
}
}
if( pEntry==0 ){
pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
if( pEntry ){
pEntry->pTerm = (char*)&pEntry[1];
pEntry->nTerm = nTerm;
pEntry->iIdx = iIdx;
memcpy(pEntry->pTerm, pTerm, nTerm);
pEntry->pNext = p->apHash[hash];
p->apHash[hash] = pEntry;
}
}
}

View File

@ -4373,7 +4373,9 @@ static void fts5MergePrefixLists(
sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1);
}
}
p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
if( iNew!=writer.iPrev || tmp.n==0 ){
p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
}
}
/* WRITEPOSLISTSIZE */
@ -4608,7 +4610,11 @@ int sqlite3Fts5IndexClose(Fts5Index *p){
** size. Return the number of bytes in the nChar character prefix of the
** buffer, or 0 if there are less than nChar characters in total.
*/
static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){
static int sqlite3Fts5IndexCharlenToBytelen(
const char *p,
int nByte,
int nChar
){
int n = 0;
int i;
for(i=0; i<nChar; i++){
@ -4665,7 +4671,8 @@ int sqlite3Fts5IndexWrite(
);
for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]);
const int nChar = pConfig->aPrefix[i];
int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
if( nByte ){
rc = sqlite3Fts5HashWrite(p->pHash,
p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
@ -4983,7 +4990,7 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
/*
** Return a simple checksum value based on the arguments.
*/
static u64 fts5IndexEntryCksum(
u64 sqlite3Fts5IndexEntryCksum(
i64 iRowid,
int iCol,
int iPos,
@ -5071,7 +5078,7 @@ static int fts5QueryCksum(
){
int iCol = FTS5_POS2COLUMN(sReader.iPos);
int iOff = FTS5_POS2OFFSET(sReader.iPos);
cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
}
rc = sqlite3Fts5IterNext(pIdxIter);
}
@ -5370,7 +5377,7 @@ static void fts5IndexIntegrityCheckSegment(
/*
** Run internal checks to ensure that the FTS index (a) is internally
** consistent and (b) contains entries for which the XOR of the checksums
** as calculated by fts5IndexEntryCksum() is cksum.
** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
**
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
** checksum does not match. Return SQLITE_OK if all checks pass without
@ -5434,7 +5441,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
int iCol = FTS5_POS2COLUMN(iPos);
int iTokOff = FTS5_POS2OFFSET(iPos);
cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
}
}
fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
@ -5450,34 +5457,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
return fts5IndexReturn(p);
}
/*
** Calculate and return a checksum that is the XOR of the index entry
** checksum of all entries that would be generated by the token specified
** by the final 5 arguments.
*/
u64 sqlite3Fts5IndexCksum(
Fts5Config *pConfig, /* Configuration object */
i64 iRowid, /* Document term appears in */
int iCol, /* Column term appears in */
int iPos, /* Position term appears in */
const char *pTerm, int nTerm /* Term at iPos */
){
u64 ret = 0; /* Return value */
int iIdx; /* For iterating through indexes */
ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm);
for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){
int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]);
if( nByte ){
ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte);
}
}
return ret;
}
/*************************************************************************
**************************************************************************
** Below this point is the implementation of the fts5_decode() scalar

View File

@ -829,6 +829,7 @@ struct Fts5IntegrityCtx {
Fts5Config *pConfig;
};
/*
** Tokenization callback used by integrity check.
*/
@ -840,25 +841,41 @@ static int fts5StorageIntegrityCallback(
int iStart, /* Start offset of token */
int iEnd /* End offset of token */
){
int rc = SQLITE_OK;
Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
Fts5Termset *pTermset = pCtx->pTermset;
int bPresent;
int ii;
int rc = SQLITE_OK;
int iPos;
int iCol;
if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
pCtx->szCol++;
}
if( pCtx->pTermset ){
int bPresent = 0;
rc = sqlite3Fts5TermsetAdd(pCtx->pTermset, pToken, nToken, &bPresent);
if( rc==SQLITE_OK && bPresent==0 ){
pCtx->cksum ^= sqlite3Fts5IndexCksum(
pCtx->pConfig, pCtx->iRowid, 0, pCtx->iCol, pToken, nToken
);
}
}else{
pCtx->cksum ^= sqlite3Fts5IndexCksum(
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
iPos = pTermset ? pCtx->iCol : pCtx->szCol-1;
iCol = pTermset ? 0 : pCtx->iCol;
rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent);
if( rc==SQLITE_OK && bPresent==0 ){
pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
pCtx->iRowid, iCol, iPos, 0, pToken, nToken
);
}
for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){
const int nChar = pCtx->pConfig->aPrefix[ii];
int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
if( nByte ){
rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent);
if( bPresent==0 ){
pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte
);
}
}
}
return rc;
}

View File

@ -74,6 +74,22 @@ foreach {T create} {
BEGIN;
}
6 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
7 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
8 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
BEGIN;
}
} {
do_test $T.1 {

View File

@ -369,7 +369,6 @@ foreach {tn expr} {
} {
breakpoint
do_auto_test 4.$tn yy {c1 c2 c3} $expr
}

View File

@ -21,7 +21,6 @@ ifcapable !fts5 {
return
}
#--------------------------------------------------------------------------
# Simple tests.
#
@ -65,5 +64,20 @@ do_catchsql_test 1.3.2 {
SELECT rowid FROM t1('NEAR(h d)');
} {1 {fts5: NEAR queries are not supported (offsets=0)}}
#-------------------------------------------------------------------------
# integrity-check with both offsets= and prefix= options.
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(a, offsets=0, prefix="1");
INSERT INTO t2(a) VALUES('aa ab');
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}
breakpoint
do_execsql_test 2.1 {
INSERT INTO t2(t2) VALUES('integrity-check');
}
finish_test

View File

@ -18,7 +18,7 @@ ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
#
set doc "x x [string repeat {y } 50]z z"
@ -350,6 +350,16 @@ do_execsql_test 4.1 {
SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads'
} {0 {} 4}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 15.0 {
CREATE VIRTUAL TABLE x2 USING fts5(x, prefix=1);
INSERT INTO x2 VALUES('ab');
}
do_execsql_test 15.1 {
INSERT INTO x2(x2) VALUES('integrity-check');
}
finish_test

View File

@ -1,5 +1,5 @@
C Fix\sa\sproblem\swith\sprefix\squeries\son\sfts5\soffsets=0\stables.
D 2015-12-18T19:07:14.984
C Fix\san\sfts5\sintegrity-check\sproblem\sthat\saffects\soffsets=0\stables\swith\sprefix\sindexes.
D 2015-12-21T18:45:09.329
F Makefile.in 28bcd6149e050dff35d4dcfd97e890cd387a499d
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
F Makefile.msc 5fff077fcc46de7714ed6eebb6159a4c00eab751
@ -97,15 +97,15 @@ F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
F ext/fts5/fts5.h 8b9a13b309b180e9fb88ea5666c0d8d73c6102d9
F ext/fts5/fts5Int.h 4e1bb66d8e607bf38e881eb455cdf36cc3fa9e42
F ext/fts5/fts5Int.h e71739ed0b816758f1f77baaccca1b4a9064c4dc
F ext/fts5/fts5_aux.c 1f384972d606375b8fa078319f25ab4b5feb1b35
F ext/fts5/fts5_buffer.c 389d377d04f6e622644c3343ab5e511f6646de36
F ext/fts5/fts5_buffer.c 87204c8b3b8bc62b27376eab09b74d6d5acc41f1
F ext/fts5/fts5_config.c ba5248a05c28ec6a6fdf2599a86e9fd67e5c61e2
F ext/fts5/fts5_expr.c 3b2c7ac54e6c03e732751a6a4bf9ced8f408e2a2
F ext/fts5/fts5_hash.c d4a6b52faca0134cc7bcc880f03a257a0dec2636
F ext/fts5/fts5_index.c d862dc84c54133d902893a6a7992699cd5272c48
F ext/fts5/fts5_index.c 1d1939afbc434907993d7e9a0c631be630d5b0dc
F ext/fts5/fts5_main.c ef04699949ab8e42d590ae30188afef7ad58776e
F ext/fts5/fts5_storage.c 0dc37a6183e1061e255f23971198d8878159d4ef
F ext/fts5/fts5_storage.c 14e0bb6549a66da54adf4fa1564edbf24647cb22
F ext/fts5/fts5_tcl.c 3bf445e66de32137d4693694ff7b1fd6074e32bd
F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf
F ext/fts5/fts5_tokenize.c 618efe033bceb80c521b1e9ddfd9fee85fb5946e
@ -118,7 +118,7 @@ F ext/fts5/test/fts5_common.tcl 51f7ef3af444b89c6f6ce3896a0ac349ff4e996d
F ext/fts5/test/fts5aa.test 2c553eea4dab4bc5a75928f56729277c7bc1d206
F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad
F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c
F ext/fts5/test/fts5ad.test e3dfb150fce971b4fd832498c29f56924d451b63
F ext/fts5/test/fts5ad.test 21d87b12c7ec83b4ec48816d24503443dffb10a1
F ext/fts5/test/fts5ae.test 0a9984fc3479f89f8c63d9848d6ed0c465dfcebe
F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a
F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505
@ -128,7 +128,7 @@ F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8
F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592
F ext/fts5/test/fts5al.test a1b7b6393376bc2adc216527a28f5ae5594069df
F ext/fts5/test/fts5alter.test 6022c61467a82aa11c70822ccad22b328dcf0d04
F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca
F ext/fts5/test/fts5auto.test 2a6241673657b340427f521528f7809ddaa02a9e
F ext/fts5/test/fts5aux.test 8c687c948cc98e9a94be014df7d518acc1b3b74f
F ext/fts5/test/fts5auxdata.test 141a7cbffcceb1bd2799b4b29c183ff8780d586e
F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb
@ -156,7 +156,7 @@ F ext/fts5/test/fts5integrity.test 87db5d4e7da0ce04a1dcba5ba91658673c997a65
F ext/fts5/test/fts5matchinfo.test 2163b0013e824bba65499da9e34ea4da41349cc2
F ext/fts5/test/fts5merge.test 8f3cdba2ec9c5e7e568246e81b700ad37f764367
F ext/fts5/test/fts5near.test b214cddb1c1f1bddf45c75af768f20145f7e71cc
F ext/fts5/test/fts5offsets.test 8410ec485d652da168c138b3d39e7418ff8406bf
F ext/fts5/test/fts5offsets.test 1a2d53c34a896d2038b839df2178410c45977671
F ext/fts5/test/fts5onepass.test 7ed9608e258132cb8d55e7c479b08676ad68810c
F ext/fts5/test/fts5optimize.test 42741e7c085ee0a1276140a752d4407d97c2c9f5
F ext/fts5/test/fts5phrase.test f6d1d464da5beb25dc56277aa4f1d6102f0d9a2f
@ -169,7 +169,7 @@ F ext/fts5/test/fts5rank.test 7e9e64eac7245637f6f2033aec4b292aaf611aab
F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b
F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17
F ext/fts5/test/fts5rowid.test 400384798349d658eaf06aefa1e364957d5d4821
F ext/fts5/test/fts5simple.test 9bded45827b4ab8933c87b7b3bcc3cd47f7378a4
F ext/fts5/test/fts5simple.test a599b7577bc3827a9a678add3b43d8b818b93456
F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671
F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89
F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841
@ -1406,7 +1406,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 40b5bbf02a824ca73b33aa4ae1c7d5f65b7cda10
R 3f45bfddac5bb4e5875c9115c2804e36
P ad0987d83c252dd8d6a69321893629d7be805c28
R f519655f5c64b0aca45be5b21174cea1
U dan
Z 1ef5385fc9e0304197471eb155fb346c
Z 77d1a35391efdb0fe1eff3fc97431cfb

View File

@ -1 +1 @@
ad0987d83c252dd8d6a69321893629d7be805c28
609a0bc7f34e6dae74ce756aff920f3df78fe828