Fix a problem with fts5 doclist-indexes that occured if the first rowid of the first non-term page of a doclist is zero.

FossilOrigin-Name: f704bc059e06b01f1d68fa7dad89e33eace6c389
This commit is contained in:
dan 2015-01-27 20:41:00 +00:00
parent 8ac3025419
commit 900f1922ac
6 changed files with 109 additions and 76 deletions

View File

@ -44,7 +44,7 @@
#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
#define FTS5_MIN_DLIDX_SIZE 4000 /* Add dlidx if this many empty pages */
#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
/*
** Details:
@ -192,11 +192,15 @@
**
** 5. Segment doclist indexes:
**
** A list of varints - the first docid on each page (starting with the
** first termless page) of the doclist. First element in the list is a
** literal docid. Each docid thereafter is a (negative) delta. If there
** are no docids at all on a page, a 0x00 byte takes the place of the
** delta value.
** A list of varints. If the first termless page contains at least one
** docid, the list begins with that docid as a varint followed by the
** value 1 (0x01). Or, if the first termless page contains no docids,
** a varint containing the last docid stored on the term page followed
** by a 0 (0x00) value.
**
** For each subsequent page in the doclist, either a 0x00 byte if the
** page contains no terms, or a delta-encoded docid (always +ve)
** representing the first docid on the page otherwise.
*/
/*
@ -373,7 +377,7 @@ struct Fts5SegWriter {
u8 bFirstTermInPage; /* True if next term will be first in leaf */
int nLeafWritten; /* Number of leaf pages written */
int nEmpty; /* Number of contiguous term-less nodes */
Fts5Buffer dlidx; /* Doclist index */
Fts5Buffer cdlidx; /* Doclist index */
i64 iDlidxPrev; /* Previous rowid appended to dlidx */
int bDlidxPrevValid; /* True if iDlidxPrev is valid */
};
@ -1335,28 +1339,42 @@ static void fts5NodeIterFree(Fts5NodeIter *pIter){
**
** pData: pointer to doclist-index record,
** iLeafPgno: page number that this doclist-index is associated with.
**
** When this function is called pIter->iLeafPgno is the page number the
** doclist is associated with (the one featuring the term).
*/
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
Fts5Data *pData = pIter->pData;
int i;
int bPresent;
assert( pIter->pData );
assert( pIter->iLeafPgno>0 );
/* Count the number of leading 0x00 bytes. Then set iLeafPgno. */
for(i=0; i<pData->n; i++){
if( pData->p[i] ) break;
/* Read the first rowid value. And the "present" flag that follows it. */
pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid);
bPresent = pData->p[pIter->iOff++];
if( bPresent ){
i = 0;
}else{
/* Count the number of leading 0x00 bytes. */
for(i=1; pIter->iOff<pData->n; i++){
if( pData->p[pIter->iOff] ) break;
pIter->iOff++;
}
/* Unless we are already at the end of the doclist-index, load the first
** rowid value. */
if( pIter->iOff<pData->n ){
i64 iVal;
pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal);
pIter->iRowid += iVal;
}else{
pIter->bEof = 1;
}
}
pIter->iLeafPgno += (i+1);
pIter->iOff = i;
/* Unless we are already at the end of the doclist-index, load the first
** rowid value. */
if( pIter->iOff<pData->n ){
pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&pIter->iRowid);
}else{
pIter->bEof = 1;
}
pIter->iFirstOff = pIter->iOff;
return pIter->bEof;
}
@ -1376,7 +1394,7 @@ static int fts5DlidxIterNext(Fts5DlidxIter *pIter){
i64 iVal;
pIter->iLeafPgno += (iOff - pIter->iOff) + 1;
iOff += getVarint(&pData->p[iOff], (u64*)&iVal);
pIter->iRowid -= iVal;
pIter->iRowid += iVal;
pIter->iOff = iOff;
}else{
pIter->bEof = 1;
@ -1417,7 +1435,7 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){
}
getVarint(&a[iOff], (u64*)&iVal);
pIter->iRowid += iVal;
pIter->iRowid -= iVal;
pIter->iLeafPgno--;
while( iOff>pIter->iFirstOff
@ -1432,18 +1450,15 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){
return pIter->bEof;
}
static void fts5DlidxIterInit(
static void fts5DlidxIterInitFromData(
Fts5Index *p, /* Fts5 Backend to iterate within */
int bRev, /* True for ORDER BY ASC */
int iIdx, int iSegid, /* Segment iSegid within index iIdx */
int iLeafPgno, /* Leaf page number to load dlidx for */
int iLeafPgno, /* Leaf page number dlidx is for */
Fts5Data *pDlidx, /* Leaf index data */
Fts5DlidxIter **ppIter /* OUT: Populated iterator */
){
Fts5DlidxIter *pIter = *ppIter;
Fts5Data *pDlidx;
pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno));
if( pDlidx==0 ) return;
if( pIter==0 ){
*ppIter = pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter));
if( pIter==0 ){
@ -1463,6 +1478,19 @@ static void fts5DlidxIterInit(
}
}
static void fts5DlidxIterInit(
Fts5Index *p, /* Fts5 Backend to iterate within */
int bRev, /* True for ORDER BY ASC */
int iIdx, int iSegid, /* Segment iSegid within index iIdx */
int iLeafPgno, /* Leaf page number to load dlidx for */
Fts5DlidxIter **ppIter /* OUT: Populated iterator */
){
Fts5Data *pDlidx;
pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno));
if( pDlidx==0 ) return;
fts5DlidxIterInitFromData(p, bRev, iLeafPgno, pDlidx, ppIter);
}
/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
*/
@ -2104,7 +2132,7 @@ static void fts5SegIterNextFrom(
assert( pIter->pLeaf );
if( bRev==0 ){
while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){
while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){
iLeafPgno = pDlidx->iLeafPgno;
fts5DlidxIterNext(pDlidx);
}
@ -2114,8 +2142,8 @@ static void fts5SegIterNextFrom(
bMove = 0;
}
}else{
assert( iMatch>pIter->iRowid );
while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){
assert( iMatch<pIter->iRowid );
while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){
fts5DlidxIterPrev(pDlidx);
}
iLeafPgno = pDlidx->iLeafPgno;
@ -2132,8 +2160,8 @@ static void fts5SegIterNextFrom(
while( 1 ){
if( bMove ) fts5SegIterNext(p, pIter);
if( pIter->pLeaf==0 ) break;
if( bRev==0 && pIter->iRowid<=iMatch ) break;
if( bRev!=0 && pIter->iRowid>=iMatch ) break;
if( bRev==0 && pIter->iRowid>=iMatch ) break;
if( bRev!=0 && pIter->iRowid<=iMatch ) break;
bMove = 1;
}
}
@ -2551,8 +2579,8 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
pWriter->iIdx, pWriter->iSegid,
pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
);
assert( pWriter->dlidx.n>0 );
fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n);
assert( pWriter->cdlidx.n>0 );
fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n);
bFlag = 1;
}
fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
@ -2562,7 +2590,7 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
/* Whether or not it was written to disk, zero the doclist index at this
** point */
sqlite3Fts5BufferZero(&pWriter->dlidx);
sqlite3Fts5BufferZero(&pWriter->cdlidx);
pWriter->bDlidxPrevValid = 0;
}
@ -2643,7 +2671,13 @@ static void fts5WriteBtreeNoTerm(
if( pWriter->bFirstRowidInPage ){
/* No rowids on this page. Append an 0x00 byte to the current
** doclist-index */
sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, 0);
if( pWriter->bDlidxPrevValid==0 ){
i64 iRowid = pWriter->iPrevRowid;
sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid);
pWriter->bDlidxPrevValid = 1;
pWriter->iDlidxPrev = iRowid;
}
sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0);
}
pWriter->nEmpty++;
}
@ -2659,11 +2693,12 @@ static void fts5WriteDlidxAppend(
){
i64 iVal;
if( pWriter->bDlidxPrevValid ){
iVal = pWriter->iDlidxPrev - iRowid;
iVal = iRowid - pWriter->iDlidxPrev;
}else{
iVal = iRowid;
sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid);
iVal = 1;
}
sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, iVal);
sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal);
pWriter->bDlidxPrevValid = 1;
pWriter->iDlidxPrev = iRowid;
}
@ -2898,7 +2933,7 @@ static void fts5WriteFinish(
}
}
sqlite3_free(pWriter->aWriter);
sqlite3Fts5BufferFree(&pWriter->dlidx);
sqlite3Fts5BufferFree(&pWriter->cdlidx);
}
static void fts5WriteInit(
@ -3661,7 +3696,7 @@ static void fts5IndexIntegrityCheckSegment(
}
fts5DlidxIterFree(pDlidx);
fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf);
// fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf);
}
}
@ -4638,25 +4673,23 @@ static void fts5DecodeFunction(
fts5DebugRowid(&rc, &s, iRowid);
if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
int i = 0;
i64 iPrev;
if( n>0 ){
i = getVarint(&a[i], (u64*)&iPrev);
sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
}
while( i<n ){
i64 iVal;
i += getVarint(&a[i], (u64*)&iVal);
if( iVal==0 ){
sqlite3Fts5BufferAppendPrintf(&rc, &s, " x");
}else{
iPrev = iPrev - iVal;
sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
}
}
Fts5Data dlidx;
Fts5DlidxIter iter;
}else
if( iSegid==0 ){
dlidx.p = a;
dlidx.n = n;
dlidx.nRef = 2;
memset(&iter, 0, sizeof(Fts5DlidxIter));
iter.pData = &dlidx;
iter.iLeafPgno = iPgno;
for(fts5DlidxIterFirst(&iter); iter.bEof==0; fts5DlidxIterNext(&iter)){
sqlite3Fts5BufferAppendPrintf(&rc, &s,
" %d(%lld)", iter.iLeafPgno, iter.iRowid
);
}
}else if( iSegid==0 ){
if( iRowid==FTS5_AVERAGES_ROWID ){
/* todo */
}else{

View File

@ -185,9 +185,10 @@ for {set i 1} {$i <= 10} {incr i} {
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
} {}
if {$i==2} break
# if {$i==1} break
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#exit
#-------------------------------------------------------------------------
#

View File

@ -304,12 +304,11 @@ proc fts5_test_poslist {cmd} {
sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist
#-------------------------------------------------------------------------
# Test phrase queries.
#
foreach {tn phrase} {
8 "c"
1 "o"
2 "b q"
3 "e a e"

View File

@ -42,15 +42,15 @@ do_test 1.0 {
do_execsql_test 1.1.1 {
SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w'
} [lsort -integer -decr $W]
} [lsort -integer -incr $W]
do_execsql_test 1.1.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'x* AND w*'
} [lsort -integer -decr $W]
} [lsort -integer -incr $W]
do_execsql_test 1.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x'
} [lsort -integer -decr $Y]
} [lsort -integer -incr $Y]
do_execsql_test 1.3 {
INSERT INTO t1(t1) VALUES('integrity-check');
@ -98,8 +98,8 @@ foreach {tn q res} "
expr {$n < ($nReadX / 10)}
} {1}
do_execsql_test 1.6.$tn.3 $q [lsort -int -decr $res]
do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid ASC" [lsort -int -incr $res]
do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res]
do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res]
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

View File

@ -1,5 +1,5 @@
C Have\sfts5\sstore\srowids\sin\sascending\sorder.\sQuery\sspeed\sis\svirtually\sthe\ssame\sregardless\sof\srowid\sorder,\sand\sascending\sorder\smakes\ssome\sinsert\soptimizations\seasier.
D 2015-01-24T19:57:03.097
C Fix\sa\sproblem\swith\sfts5\sdoclist-indexes\sthat\soccured\sif\sthe\sfirst\srowid\sof\sthe\sfirst\snon-term\spage\sof\sa\sdoclist\sis\szero.
D 2015-01-27T20:41:00.681
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9
F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700
F ext/fts5/fts5_expr.c 473e3428a9a637fa6e61d64d8ca3796ec57a58e9
F ext/fts5/fts5_hash.c 4ab952b75f27d5ed3ef0f3b4f7fa1464744483e8
F ext/fts5/fts5_index.c b3e8e38c70178a638f4b0a183694db60ecde5366
F ext/fts5/fts5_index.c ef6c7764a9f4968465936839c8f7e7423d8458c2
F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548
F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5
F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0
@ -120,14 +120,14 @@ F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32
F ext/fts5/test/fts5aa.test e77e28ac85c70891fc2603ff4b15de571eca628f
F ext/fts5/test/fts5aa.test 065767c60ad301f77ad95f24369305e13347aa00
F ext/fts5/test/fts5ab.test 127769288519ed549c57d7e11628dbe5b9952ad5
F ext/fts5/test/fts5ac.test 1dfa0751bcf32fd9cfaad1557b7729950e5cc930
F ext/fts5/test/fts5ac.test cc39f7debda6f10ca2422e17163f9b6f078d5560
F ext/fts5/test/fts5ad.test 6c970531caf865b65f4e1dd9d6d43bd6ea37d754
F ext/fts5/test/fts5ae.test 347c96db06aab23ff00cf6a6b4064a8dbb182e42
F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a
F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505
F ext/fts5/test/fts5ah.test 17ba8e197a781ca10548b7260e39ed8269d24b93
F ext/fts5/test/fts5ah.test 56b5a2599707621bf2fd1b5a00ddc0c0c1ffbf06
F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37
F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8
F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592
@ -1283,7 +1283,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 51444f67c0cc58a3023eb1cd78e7cf889da6c80f
R bde0099a6ffad2afb653ac6add38295f
P 5206ca6005bfa9dfc7346d4b89430c9748d32c10
R dc023966ceb63b949d8070662e553f89
U dan
Z 8b04510bfa3b18ba6ca879f4b4c9a36e
Z 99344f3fa1c5e2c02514e48da6c76a56

View File

@ -1 +1 @@
5206ca6005bfa9dfc7346d4b89430c9748d32c10
f704bc059e06b01f1d68fa7dad89e33eace6c389