Avoid loading doclists for infrequent terms that are part of phrases twice.

FossilOrigin-Name: 8f939723f742329cedba8930f71dff42004f3d0d
This commit is contained in:
dan 2011-06-17 17:37:31 +00:00
parent 10fc4d8c45
commit c5f21892a8
4 changed files with 85 additions and 56 deletions

View File

@ -3182,6 +3182,8 @@ static void fts3EvalAllocateReaders(
return;
}
}
assert( pExpr->pPhrase->iDoclistToken==0 );
pExpr->pPhrase->iDoclistToken = -1;
}else{
*pnOr += (pExpr->eType==FTSQUERY_OR);
fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
@ -3190,6 +3192,60 @@ static void fts3EvalAllocateReaders(
}
}
static void fts3EvalPhraseMergeToken(
Fts3Table *pTab,
Fts3Phrase *p,
int iToken,
char *pList,
int nList
){
assert( iToken!=p->iDoclistToken );
if( pList==0 ){
sqlite3_free(p->doclist.aAll);
p->doclist.aAll = 0;
p->doclist.nAll = 0;
}
else if( p->iDoclistToken<0 ){
p->doclist.aAll = pList;
p->doclist.nAll = nList;
}
else if( p->doclist.aAll==0 ){
sqlite3_free(pList);
}
else {
char *pLeft;
char *pRight;
int nLeft;
int nRight;
int nDiff;
if( p->iDoclistToken<iToken ){
pLeft = p->doclist.aAll;
nLeft = p->doclist.nAll;
pRight = pList;
nRight = nList;
nDiff = iToken - p->iDoclistToken;
}else{
pRight = p->doclist.aAll;
nRight = p->doclist.nAll;
pLeft = pList;
nLeft = nList;
nDiff = p->iDoclistToken - iToken;
}
fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);
sqlite3_free(pLeft);
p->doclist.aAll = pRight;
p->doclist.nAll = nRight;
}
if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
}
static int fts3EvalPhraseLoad(
Fts3Cursor *pCsr,
Fts3Phrase *p
@ -3198,47 +3254,21 @@ static int fts3EvalPhraseLoad(
int iToken;
int rc = SQLITE_OK;
char *aDoclist = 0;
int nDoclist = 0;
int iPrev = -1;
for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
Fts3PhraseToken *pToken = &p->aToken[iToken];
assert( pToken->pSegcsr || pToken->pDeferred );
assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
if( pToken->pDeferred==0 ){
if( pToken->pSegcsr ){
int nThis = 0;
char *pThis = 0;
rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis);
if( rc==SQLITE_OK ){
if( pThis==0 ){
sqlite3_free(aDoclist);
aDoclist = 0;
nDoclist = 0;
break;
}else if( aDoclist==0 ){
aDoclist = pThis;
nDoclist = nThis;
}else{
assert( iPrev>=0 );
fts3DoclistPhraseMerge(pTab->bDescIdx,
iToken-iPrev, aDoclist, nDoclist, pThis, &nThis
);
sqlite3_free(aDoclist);
aDoclist = pThis;
nDoclist = nThis;
}
iPrev = iToken;
fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
}
}
assert( pToken->pSegcsr==0 );
}
if( rc==SQLITE_OK ){
p->doclist.aAll = aDoclist;
p->doclist.nAll = nDoclist;
}else{
sqlite3_free(aDoclist);
}
return rc;
}
@ -3246,7 +3276,7 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
int iToken;
int rc = SQLITE_OK;
int nMaxUndeferred = -1;
int nMaxUndeferred = pPhrase->iDoclistToken;
char *aPoslist = 0;
int nPoslist = 0;
int iPrev = -1;
@ -3291,8 +3321,6 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
}
}
iPrev = iToken;
}else{
nMaxUndeferred = iToken;
}
}
@ -3351,9 +3379,11 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
Fts3PhraseToken *pFirst = &p->aToken[0];
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
assert( p->doclist.aAll==0 );
if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1
&& pFirst->pSegcsr && pFirst->pSegcsr->bLookup
if( pCsr->bDesc==pTab->bDescIdx
&& bOptOk==1
&& p->nToken==1
&& pFirst->pSegcsr
&& pFirst->pSegcsr->bLookup
){
/* Use the incremental approach. */
int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
@ -3524,13 +3554,14 @@ static void fts3EvalStartReaders(
}
}
typedef struct Fts3TokenAndCost Fts3TokenAndCost;
struct Fts3TokenAndCost {
Fts3PhraseToken *pToken;
Fts3Expr *pRoot;
Fts3Phrase *pPhrase; /* The phrase the token belongs to */
int iToken; /* Position of token in phrase */
Fts3PhraseToken *pToken; /* The token itself */
Fts3Expr *pRoot;
int nOvfl;
int iCol;
int iCol; /* The column the token must match */
};
static void fts3EvalTokenCosts(
@ -3547,6 +3578,8 @@ static void fts3EvalTokenCosts(
int i;
for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
Fts3TokenAndCost *pTC = (*ppTC)++;
pTC->pPhrase = pPhrase;
pTC->iToken = i;
pTC->pRoot = pRoot;
pTC->pToken = &pPhrase->aToken[i];
pTC->iCol = pPhrase->iColumn;
@ -3659,19 +3692,15 @@ static int fts3EvalSelectDeferred(
if( pTC->nOvfl ){
nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
}else{
/* TODO: Fix this so that the doclist need not be read twice. */
Fts3PhraseToken *pToken = pTC->pToken;
int nList = 0;
char *pList = 0;
rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
assert( rc==SQLITE_OK || pList==0 );
if( rc==SQLITE_OK ){
nDocEst = fts3DoclistCountDocids(1, pList, nList);
}
sqlite3_free(pList);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3TermSegReaderCursor(pCsr,
pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
);
fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
}
}
}else{

View File

@ -306,7 +306,6 @@ struct Fts3PhraseToken {
/* Variables above this point are populated when the expression is
** parsed (by code in fts3_expr.c). Below this point the variables are
** used when evaluating the expression. */
int bFulltext; /* True if full-text index was used */
Fts3DeferredToken *pDeferred; /* Deferred token object for this token */
Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */
};
@ -315,6 +314,7 @@ struct Fts3Phrase {
/* Cache of doclist for this phrase. */
Fts3Doclist doclist;
int bIncr; /* True if doclist is loaded incrementally */
int iDoclistToken;
/* Variables below this point are populated by fts3_expr.c when parsing
** a MATCH expression. Everything above is part of the evaluation phase.

View File

@ -1,5 +1,5 @@
C Add\sa\smissing\sdeclaration\sto\sfts3Int.h.
D 2011-06-17T16:04:39.347
C Avoid\sloading\sdoclists\sfor\sinfrequent\sterms\sthat\sare\spart\sof\sphrases\stwice.
D 2011-06-17T17:37:31.284
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -62,9 +62,9 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 78b02b5f0195e397c4239ef9213e5506b7d3fa97
F ext/fts3/fts3.c f919a7966426e539b3f39f696bc94269e3726033
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h 974cf471fae5fed8ad87069cd86c1fe5a9bf6f9c
F ext/fts3/fts3Int.h 8ece4390eb44e7179bb05c59d40f447663f5c077
F ext/fts3/fts3_aux.c 0ebfa7b86cf8ff6a0861605fcc63b83ec1b70691
F ext/fts3/fts3_expr.c 23791de01b3a5d313d76e02befd2601d4096bc2b
F ext/fts3/fts3_hash.c aad95afa01cf2a5ffaa448e4b0ab043880cd1efb
@ -946,7 +946,7 @@ F tool/split-sqlite3c.tcl d9be87f1c340285a3e081eb19b4a247981ed290c
F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings.sh 347d974d143cf132f953b565fbc03026f19fcb4d
P a117005f502482c4529661616cbb26eee1fe75d1
R faf357353d86a944921df5db701e63d3
P 3bfd4466f50711eb71d1a13231025ff4e1e76246
R cd3d6544b7cd834ce139927827bd3fe8
U dan
Z d361a7bc828cbb05e3ac27317d622fe4
Z fa9494c386a1e1a2787b09d7f28a7b96

View File

@ -1 +1 @@
3bfd4466f50711eb71d1a13231025ff4e1e76246
8f939723f742329cedba8930f71dff42004f3d0d