Allow multi-token phrases to load doclists from the database incrementally. This allows queries that feature such phrases to benefit from the "docid<?" optimization.

FossilOrigin-Name: ea543f081d93ed1bf66c21ce2108ec94e349f4c5
This commit is contained in:
dan 2013-10-01 20:02:32 +00:00
parent 73632e37c9
commit ff574f4c76
3 changed files with 174 additions and 38 deletions

View File

@ -4015,6 +4015,12 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
return SQLITE_OK;
}
/*
** Maximum number of tokens a phrase may have to be considered for the
** incremental doclists strategy.
*/
#define MAX_INCR_PHRASE_TOKENS 4
/*
** This function is called for each Fts3Phrase in a full-text query
** expression to initialize the mechanism for returning rows. Once this
@ -4028,28 +4034,38 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
int rc; /* Error code */
Fts3PhraseToken *pFirst = &p->aToken[0];
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc = SQLITE_OK; /* Error code */
int i;
if( pCsr->bDesc==pTab->bDescIdx
&& bOptOk==1
&& p->nToken==1
&& pFirst->pSegcsr
&& pFirst->pSegcsr->bLookup
&& pFirst->bFirst==0
){
/* Determine if doclists may be loaded from disk incrementally. This is
** possible if the bOptOk argument is true, the FTS doclists will be
** scanned in forward order, and the phrase consists of
** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
** tokens or prefix tokens that cannot use a prefix-index. */
int bIncrOk = (bOptOk
&& pCsr->bDesc==pTab->bDescIdx
&& p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
);
for(i=0; bIncrOk==1 && i<p->nToken; i++){
Fts3PhraseToken *pToken = &p->aToken[i];
if( pToken->bFirst || !pToken->pSegcsr || !pToken->pSegcsr->bLookup ){
bIncrOk = 0;
}
}
if( bIncrOk ){
/* Use the incremental approach. */
int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
rc = sqlite3Fts3MsrIncrStart(
pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
p->bIncr = 1;
for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
Fts3PhraseToken *pTok = &p->aToken[i];
rc = sqlite3Fts3MsrIncrStart(pTab, pTok->pSegcsr, iCol, pTok->z, pTok->n);
}
}else{
/* Load the full doclist for the phrase into memory. */
rc = fts3EvalPhraseLoad(pCsr, p);
p->bIncr = 0;
}
p->bIncr = bIncrOk;
assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
return rc;
@ -4153,6 +4169,133 @@ void sqlite3Fts3DoclistNext(
*ppIter = p;
}
/*
** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext().
*/
typedef struct TokenDoclist TokenDoclist;
struct TokenDoclist {
sqlite3_int64 iDocid;
char *pList;
int nList;
};
/*
** Token pToken is an incrementally loaded token that is part of a
** multi-token phrase. Advance it to the next matching document in the
** database and populate output variable *p with the details of the new
** entry. Or, if the iterator has reached EOF, set *pbEof to true.
**
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK.
*/
static int incrPhraseTokenNext(
Fts3Table *pTab, /* Virtual table handle */
Fts3PhraseToken *pToken, /* Advance the iterator for this token */
TokenDoclist *p, /* OUT: Docid and doclist for new entry */
int *pbEof /* OUT: True if iterator is at EOF */
){
int rc;
assert( pToken->pDeferred==0 );
rc = sqlite3Fts3MsrIncrNext(
pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
);
if( p->pList==0 ) *pbEof = 1;
return rc;
}
/*
** The phrase iterator passed as the second argument uses the incremental
** doclist strategy. Advance it to the next matching documnent in the
** database. If an error occurs, return an SQLite error code. Otherwise,
** return SQLITE_OK.
**
** If there is no "next" entry and no error occurs, then *pbEof is set to
** 1 before returning. Otherwise, if no error occurs and the iterator is
** successfully advanced, *pbEof is set to 0.
*/
static int fts3EvalIncrPhraseNext(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Phrase *p, /* Phrase object to advance to next docid */
u8 *pbEof /* OUT: Set to 1 if EOF */
){
int rc = SQLITE_OK;
Fts3Doclist *pDL = &p->doclist;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int bEof = 0;
assert( p->bIncr==1 );
assert( pDL->pNextDocid==0 );
if( p->nToken==1 ){
rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,
&pDL->iDocid, &pDL->pList, &pDL->nList
);
if( pDL->pList==0 ) bEof = 1;
}else{
int bDescDoclist = pCsr->bDesc;
struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS];
assert( p->nToken<=MAX_INCR_PHRASE_TOKENS );
while( bEof==0 ){
sqlite3_int64 iMax; /* Largest docid for all iterators */
int i; /* Used to iterate through tokens */
/* Advance the iterator for each token in the phrase once. */
for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof);
if( i==0 || DOCID_CMP(iMax, a[i].iDocid)<0 ){
iMax = a[i].iDocid;
}
}
/* Keep advancing iterators until they all point to the same document */
if( bEof==0 && rc==SQLITE_OK ){
for(i=0; i<p->nToken; i++){
while( DOCID_CMP(a[i].iDocid, iMax)<0 && rc==SQLITE_OK && bEof==0 ){
rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof);
if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
iMax = a[i].iDocid;
i = 0;
}
}
}
}
/* Check if the current entries really are a phrase match */
if( bEof==0 ){
int nByte = a[p->nToken-1].nList;
char *aDoclist = sqlite3_malloc(nByte+1);
if( !aDoclist ) return SQLITE_NOMEM;
memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);
int nList;
for(i=0; i<(p->nToken-1); i++){
char *pLeft = a[i].pList;
char *pRight = aDoclist;
char *pOut = aDoclist;
int nDist = p->nToken-1-i;
int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pLeft, &pRight);
if( res==0 ) break;
nList = (pOut - aDoclist);
}
if( i==(p->nToken-1) ){
pDL->iDocid = a[0].iDocid;
pDL->pList = aDoclist;
pDL->nList = nList;
pDL->bFreeList = 1;
break;
}
sqlite3_free(aDoclist);
}
}
}
*pbEof = bEof;
return rc;
}
/*
** Attempt to move the phrase iterator to point to the next matching docid.
** If an error occurs, return an SQLite error code. Otherwise, return
@ -4172,14 +4315,7 @@ static int fts3EvalPhraseNext(
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
if( p->bIncr ){
assert( p->nToken==1 );
assert( pDL->pNextDocid==0 );
rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,
&pDL->iDocid, &pDL->pList, &pDL->nList
);
if( rc==SQLITE_OK && !pDL->pList ){
*pbEof = 1;
}
rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof);
}else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll,
&pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
@ -4245,7 +4381,6 @@ static int fts3EvalPhraseNext(
static void fts3EvalStartReaders(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Expr *pExpr, /* Expression to initialize phrases in */
int bOptOk, /* True to enable incremental loading */
int *pRc /* IN/OUT: Error code */
){
if( pExpr && SQLITE_OK==*pRc ){
@ -4256,10 +4391,10 @@ static void fts3EvalStartReaders(
if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
}
pExpr->bDeferred = (i==nToken);
*pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
*pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);
}else{
fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc);
fts3EvalStartReaders(pCsr, pExpr->pRight, pRc);
pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
}
}
@ -4581,7 +4716,7 @@ static int fts3EvalStart(Fts3Cursor *pCsr){
}
#endif
fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc);
return rc;
}
@ -5097,12 +5232,13 @@ static void fts3EvalRestart(
if( pPhrase ){
fts3EvalInvalidatePoslist(pPhrase);
if( pPhrase->bIncr ){
assert( pPhrase->nToken==1 );
assert( pPhrase->aToken[0].pSegcsr );
sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);
int i;
for(i=0; i<pPhrase->nToken; i++){
assert( pPhrase->aToken[i].pSegcsr );
sqlite3Fts3MsrIncrRestart(pPhrase->aToken[i].pSegcsr);
}
*pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
}
pPhrase->doclist.pNextDocid = 0;
pPhrase->doclist.iDocid = 0;
}

View File

@ -1,5 +1,5 @@
C Merge\strunk\schanges\swith\sthis\sbranch.
D 2013-09-30T18:16:07.269
C Allow\smulti-token\sphrases\sto\sload\sdoclists\sfrom\sthe\sdatabase\sincrementally.\sThis\sallows\squeries\sthat\sfeature\ssuch\sphrases\sto\sbenefit\sfrom\sthe\s"docid<?"\soptimization.
D 2013-10-01T20:02:32.683
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -78,7 +78,7 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 00a56dc5ce0527e8d1428831d460ff32e1ae45b9
F ext/fts3/fts3.c e781a0147f26b4055c650861060b24792f4f0fb1
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h 0634f768e7f6d5767972014e1ca83055ad2e09e3
F ext/fts3/fts3_aux.c b02632f6dd0e375ce97870206d914ea6d8df5ccd
@ -1118,7 +1118,7 @@ F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
F tool/wherecosttest.c f407dc4c79786982a475261866a161cd007947ae
F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff
P 6622424a3a149edd35ba2ba0881aa41b4536417b fa0f2f0e3e79ae653118b901e1cca7725dfaf249
R 5cf6afd99182ff167821268a5e701ab6
P e294a9c7c525d1da698259c2fef089579bfd1c0b
R a99fa9f8f9b9179941beba7f84a38d11
U dan
Z 325d6a36551be1798f7d76b70cf41307
Z 1790cd325eb740a839ef9b2fc7e8e452

View File

@ -1 +1 @@
e294a9c7c525d1da698259c2fef089579bfd1c0b
ea543f081d93ed1bf66c21ce2108ec94e349f4c5