From 494016a424ebf8c2b1457f0bcd4bf0cd60838a2f Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 25 May 2015 11:46:33 +0000 Subject: [PATCH] Avoid redundant loads from the %_data table in the fts5 code. FossilOrigin-Name: 02069782f8b7896a582582c79185b50418622736 --- ext/fts5/fts5Int.h | 2 +- ext/fts5/fts5_expr.c | 6 +- ext/fts5/fts5_index.c | 188 ++++++++++++++++---------------------- ext/fts5/fts5_vocab.c | 3 +- ext/fts5/test/fts5ah.test | 6 +- manifest | 20 ++-- manifest.uuid | 2 +- 7 files changed, 100 insertions(+), 127 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 7221a979dd..b0e9484c79 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -284,7 +284,7 @@ int sqlite3Fts5IterEof(Fts5IndexIter*); int sqlite3Fts5IterNext(Fts5IndexIter*); int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); -int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn); +int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn, i64 *pi); int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf); /* diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 1d1e359b63..945bb637f8 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -357,9 +357,10 @@ static int fts5ExprPhraseIsMatch( /* Initialize a term iterator for each term in the phrase */ for(i=0; inTerm; i++){ + i64 dummy; int n; const u8 *a; - rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n); + rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy); if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){ goto ismatch_out; } @@ -685,9 +686,8 @@ static int fts5ExprNearNextMatch( Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; assert( pPhrase->poslist.nSpace==0 ); - pNode->iRowid = sqlite3Fts5IterRowid(pIter); return sqlite3Fts5IterPoslist(pIter, - (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n + (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid ); }else{ int rc = SQLITE_OK; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 8759cf5901..8ed53190ed 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -309,7 +309,6 @@ int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; -typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; @@ -317,7 +316,6 @@ typedef struct Fts5DlidxWriter Fts5DlidxWriter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; -typedef struct Fts5PosIter Fts5PosIter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; @@ -516,6 +514,7 @@ struct Fts5SegIter { int flags; /* Mask of configuration flags */ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ + Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ int iLeafOffset; /* Byte offset within current leaf */ /* The page and offset from which the current term was read. The offset @@ -541,30 +540,6 @@ struct Fts5SegIter { #define FTS5_SEGITER_REVERSE 0x02 -/* -** Object for iterating through paginated data. -*/ -struct Fts5ChunkIter { - Fts5Data *pLeaf; /* Current leaf data. NULL -> EOF. */ - i64 iLeafRowid; /* Absolute rowid of current leaf */ - int nRem; /* Remaining bytes of data to read */ - - /* Output parameters */ - u8 *p; /* Pointer to chunk of data */ - int n; /* Size of buffer p in bytes */ -}; - -/* -** Object for iterating through a single position list on disk. -*/ -struct Fts5PosIter { - Fts5ChunkIter chunk; /* Current chunk of data */ - int iOff; /* Offset within chunk data */ - - int iCol; - int iPos; -}; - /* ** Object for iterating through the conents of a single internal node in ** memory. @@ -1713,7 +1688,11 @@ static void fts5SegIterNextPage( Fts5StructureSegment *pSeg = pIter->pSeg; fts5DataRelease(pIter->pLeaf); pIter->iLeafPgno++; - if( pIter->iLeafPgno<=pSeg->pgnoLast ){ + if( pIter->pNextLeaf ){ + assert( pIter->iLeafPgno<=pSeg->pgnoLast ); + pIter->pLeaf = pIter->pNextLeaf; + pIter->pNextLeaf = 0; + }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno) ); @@ -1958,7 +1937,7 @@ static void fts5SegIterNext( assert( pbNewTerm==0 || *pbNewTerm==0 ); if( p->rc==SQLITE_OK ){ if( pIter->flags & FTS5_SEGITER_REVERSE ){ - + assert( pIter->pNextLeaf==0 ); if( pIter->iRowidOffset>0 ){ u8 *a = pIter->pLeaf->p; int iOff; @@ -2337,6 +2316,7 @@ static void fts5SegIterHashInit( static void fts5SegIterClear(Fts5SegIter *pIter){ fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); + fts5DataRelease(pIter->pNextLeaf); fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); @@ -2483,9 +2463,12 @@ static void fts5SegIterGotoPage( int iLeafPgno ){ assert( iLeafPgno>pIter->iLeafPgno ); + if( iLeafPgno>pIter->pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; }else{ + fts5DataRelease(pIter->pNextLeaf); + pIter->pNextLeaf = 0; pIter->iLeafPgno = iLeafPgno-1; fts5SegIterNextPage(p, pIter); assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); @@ -2537,6 +2520,7 @@ static void fts5SegIterNextFrom( bMove = 0; } }else{ + assert( pIter->pNextLeaf==0 ); assert( iMatchiRowid ); while( !fts5DlidxIterEof(p, pDlidx) && iMatchterm.p; } -/* -** Return true if the chunk iterator passed as the second argument is -** at EOF. Or if an error has already occurred. Otherwise, return false. -*/ -static int fts5ChunkIterEof(Fts5Index *p, Fts5ChunkIter *pIter){ - return (p->rc || pIter->pLeaf==0); -} +static void fts5ChunkIterate( + Fts5Index *p, /* Index object */ + Fts5SegIter *pSeg, /* Poslist of this iterator */ + void *pCtx, /* Context pointer for xChunk callback */ + void (*xChunk)(Fts5Index*, void*, const u8*, int) +){ + int nRem = pSeg->nPos; /* Number of bytes still to come */ + Fts5Data *pData = 0; + u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; + int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset); + int pgno = pSeg->iLeafPgno; + int pgnoSave = 0; -/* -** Advance the chunk-iterator to the next chunk of data to read. -*/ -static void fts5ChunkIterNext(Fts5Index *p, Fts5ChunkIter *pIter){ - assert( pIter->nRem>=pIter->n ); - pIter->nRem -= pIter->n; - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - pIter->p = 0; - if( pIter->nRem>0 ){ - Fts5Data *pLeaf; - pIter->iLeafRowid++; - pLeaf = pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid); - if( pLeaf ){ - pIter->n = MIN(pIter->nRem, pLeaf->n-4); - pIter->p = pLeaf->p+4; + if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ + pgnoSave = pgno+1; + } + + while( 1 ){ + xChunk(p, pCtx, pChunk, nChunk); + nRem -= nChunk; + fts5DataRelease(pData); + if( nRem<=0 ){ + break; + }else{ + pgno++; + pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno)); + if( pData==0 ) break; + pChunk = &pData->p[4]; + nChunk = MIN(nRem, pData->n - 4); + if( pgno==pgnoSave ){ + assert( pSeg->pNextLeaf==0 ); + pSeg->pNextLeaf = pData; + pData = 0; + } } } } -/* -** Intialize the chunk iterator to read the position list data for which -** the size field is at offset iOff of leaf pLeaf. -*/ -static void fts5ChunkIterInit( - Fts5Index *p, /* FTS5 backend object */ - Fts5SegIter *pSeg, /* Segment iterator to read poslist from */ - Fts5ChunkIter *pIter /* Initialize this object */ -){ - Fts5Data *pLeaf = pSeg->pLeaf; - int iOff = pSeg->iLeafOffset; - - memset(pIter, 0, sizeof(*pIter)); - /* If Fts5SegIter.pSeg is NULL, then this iterator iterates through data - ** currently stored in a hash table. In this case there is no leaf-rowid - ** to calculate. */ - if( pSeg->pSeg ){ - i64 rowid = FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pSeg->iLeafPgno); - pIter->iLeafRowid = rowid; - } - - fts5DataReference(pLeaf); - pIter->pLeaf = pLeaf; - pIter->nRem = pSeg->nPos; - pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); - pIter->p = pLeaf->p + iOff; - if( pIter->n==0 ){ - fts5ChunkIterNext(p, pIter); - } -} - -static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; -} /* @@ -3512,6 +3471,15 @@ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ fts5BufferFree(&buf); } +static void fts5MergeChunkCallback( + Fts5Index *p, + void *pCtx, + const u8 *pChunk, int nChunk +){ + Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; + fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); +} + /* ** */ @@ -3583,7 +3551,6 @@ fflush(stdout); fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; - Fts5ChunkIter sPos; /* Used to iterate through position list */ int nPos; /* position-list size field value */ int nTerm; const u8 *pTerm; @@ -3591,12 +3558,9 @@ fflush(stdout); /* Check for key annihilation. */ if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue; - fts5ChunkIterInit(p, pSeg, &sPos); - pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ if( pnRem && writer.nLeafWritten>nRem ){ - fts5ChunkIterRelease(&sPos); break; } @@ -3614,11 +3578,8 @@ fflush(stdout); nPos = pSeg->nPos*2 + pSeg->bDel; fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos); - for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ - fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n); - } - - fts5ChunkIterRelease(&sPos); + /* Append the position-list data to the output */ + fts5ChunkIterate(p, pSeg, (void*)&writer, fts5MergeChunkCallback); } /* Flush the last leaf page to disk. Set the output segment b-tree height @@ -4057,6 +4018,14 @@ int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ return fts5IndexReturn(p); } +static void fts5PoslistCallback( + Fts5Index *p, + void *pCtx, + const u8 *pChunk, int nChunk +){ + fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); +} + /* ** Iterator pIter currently points to a valid entry (not EOF). This ** function appends the position list data for the current entry to @@ -4068,15 +4037,7 @@ static void fts5SegiterPoslist( Fts5SegIter *pSeg, Fts5Buffer *pBuf ){ - if( p->rc==SQLITE_OK ){ - Fts5ChunkIter iter; - fts5ChunkIterInit(p, pSeg, &iter); - while( fts5ChunkIterEof(p, &iter)==0 ){ - fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); - fts5ChunkIterNext(p, &iter); - } - fts5ChunkIterRelease(&iter); - } + fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); } /* @@ -4692,14 +4653,22 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ ** The returned position list does not include the "number of bytes" varint ** field that starts the position list on disk. */ -int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){ +int sqlite3Fts5IterPoslist( + Fts5IndexIter *pIter, + const u8 **pp, /* OUT: Pointer to position-list data */ + int *pn, /* OUT: Size of position-list in bytes */ + i64 *piRowid /* OUT: Current rowid */ +){ + Fts5DoclistIter *pDoclist = pIter->pDoclist; assert( pIter->pIndex->rc==SQLITE_OK ); - if( pIter->pDoclist ){ - *pn = pIter->pDoclist->nPoslist; - *pp = pIter->pDoclist->aPoslist; + if( pDoclist ){ + *pn = pDoclist->nPoslist; + *pp = pDoclist->aPoslist; + *piRowid = pDoclist->iRowid; }else{ Fts5MultiSegIter *pMulti = pIter->pMulti; Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; + *piRowid = pSeg->iRowid; *pn = pSeg->nPos; if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){ *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; @@ -4983,10 +4952,11 @@ static int fts5QueryCksum( int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ + i64 dummy; const u8 *pPos; int nPos; i64 rowid = sqlite3Fts5IterRowid(pIdxIter); - rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); + rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy); if( rc==SQLITE_OK ){ Fts5PoslistReader sReader; for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); diff --git a/ext/fts5/fts5_vocab.c b/ext/fts5/fts5_vocab.c index 715811b1ca..c21ec2effe 100644 --- a/ext/fts5/fts5_vocab.c +++ b/ext/fts5/fts5_vocab.c @@ -347,11 +347,12 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); while( rc==SQLITE_OK ){ + i64 dummy; const u8 *pPos; int nPos; /* Position list */ i64 iPos = 0; /* 64-bit position read from poslist */ int iOff = 0; /* Current offset within position list */ - rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos); + rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos, &dummy); if( rc==SQLITE_OK ){ if( pTab->eType==FTS5_VOCAB_ROW ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ diff --git a/ext/fts5/test/fts5ah.test b/ext/fts5/test/fts5ah.test index 1ee4ab123e..ed2940763d 100644 --- a/ext/fts5/test/fts5ah.test +++ b/ext/fts5/test/fts5ah.test @@ -90,12 +90,14 @@ foreach {tn q res} " do_test 1.6.$tn.1 { set n [execsql_reads $q] - expr {$n < ($nReadX / 10)} + puts -nonewline "(n=$n nReadX=$nReadX)" + expr {$n < ($nReadX / 8)} } {1} do_test 1.6.$tn.2 { set n [execsql_reads "$q ORDER BY rowid DESC"] - expr {$n < ($nReadX / 10)} + puts -nonewline "(n=$n nReadX=$nReadX)" + expr {$n < ($nReadX / 8)} } {1} do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res] diff --git a/manifest b/manifest index 3367ab84de..8ad943d33d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Avoid\smaking\sredundant\scopies\sof\sposition-lists\swithin\sthe\sfts5\scode. -D 2015-05-23T15:43:05.567 +C Avoid\sredundant\sloads\sfrom\sthe\s%_data\stable\sin\sthe\sfts5\scode. +D 2015-05-25T11:46:33.325 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -107,18 +107,18 @@ F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 74d18b4dc7518c7cd85609f1541e83bc564619a2 F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 -F ext/fts5/fts5Int.h 271d2197ac32049adf3c947d671b6e682d8432b6 +F ext/fts5/fts5Int.h 2ce5c5e68852dd16de404b7a9a2a78f4f4588eb4 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c 638df4962683986e8c6e627d06934ee87ed68da2 +F ext/fts5/fts5_expr.c a8b31d363c02108dae01e13948661859f449ebb9 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c 985bfa5ab258918b34b4c44866ce9f9a0f2a6b0e +F ext/fts5/fts5_index.c 79b8fcf40bee484dc62a7a0ba1f3d8de0a662812 F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 24649425adfea2c4877d8f69f2754b70374940ec F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 -F ext/fts5/fts5_vocab.c 3d06e4306660fcd92a596c1e57c8be58dcc779dd +F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 6d663e8c3d8409857363f66560df96b8ca813e79 @@ -129,7 +129,7 @@ F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 -F ext/fts5/test/fts5ah.test d74cf8b7de5b8424f732acef69fe12122a12f2bf +F ext/fts5/test/fts5ah.test dbc37d736886e1e38cfa5cd523812db1ad8d0a31 F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 @@ -1331,7 +1331,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 065ab83a6ce36e16d3b95a61505aa3cff0bfea84 -R c8769c201431bb53a20a3f0848ead683 +P 5165de548b84825cb000d33e5d3de12b0ef112c0 +R 0a35d34e585361d7ce3301bdbbc300de U dan -Z a11fb9d59a1c2f9d5ef19052d7f0a43f +Z 478e4cabed12f1b6c85b518dcaf965c8 diff --git a/manifest.uuid b/manifest.uuid index 9d1be15fc6..139c411caa 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5165de548b84825cb000d33e5d3de12b0ef112c0 \ No newline at end of file +02069782f8b7896a582582c79185b50418622736 \ No newline at end of file