Have FTS take advantage of "docid<?" constraints when they are present. Extend the FTS "incremental doclist" optimization so that it is used for tokens within multi-token phrases.

FossilOrigin-Name: baf8ce5916ea9baf0ec557263cb9c7ecf716431f
This commit is contained in:
dan 2013-10-03 20:41:18 +00:00
commit 5db2605a80
7 changed files with 565 additions and 94 deletions

View File

@ -1457,7 +1457,11 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
Fts3Table *p = (Fts3Table *)pVTab;
int i; /* Iterator variable */
int iCons = -1; /* Index of constraint to use */
int iLangidCons = -1; /* Index of langid=x constraint, if present */
int iDocidGe = -1; /* Index of docid>=x constraint, if present */
int iDocidLe = -1; /* Index of docid<=x constraint, if present */
int iIdx;
/* By default use a full table scan. This is an expensive option,
** so search through the constraints to see if a more efficient
@ -1466,14 +1470,14 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
pInfo->estimatedCost = 5000000;
for(i=0; i<pInfo->nConstraint; i++){
int bDocid; /* True if this constraint is on docid */
struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
if( pCons->usable==0 ) continue;
bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1);
/* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
if( iCons<0
&& pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
&& (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
){
if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){
pInfo->idxNum = FTS3_DOCID_SEARCH;
pInfo->estimatedCost = 1.0;
iCons = i;
@ -1502,14 +1506,38 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
){
iLangidCons = i;
}
if( bDocid ){
switch( pCons->op ){
case SQLITE_INDEX_CONSTRAINT_GE:
case SQLITE_INDEX_CONSTRAINT_GT:
iDocidGe = i;
break;
case SQLITE_INDEX_CONSTRAINT_LE:
case SQLITE_INDEX_CONSTRAINT_LT:
iDocidLe = i;
break;
}
}
}
iIdx = 1;
if( iCons>=0 ){
pInfo->aConstraintUsage[iCons].argvIndex = 1;
pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;
pInfo->aConstraintUsage[iCons].omit = 1;
}
if( iLangidCons>=0 ){
pInfo->aConstraintUsage[iLangidCons].argvIndex = 2;
pInfo->idxNum |= FTS3_HAVE_LANGID;
pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++;
}
if( iDocidGe>=0 ){
pInfo->idxNum |= FTS3_HAVE_DOCID_GE;
pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++;
}
if( iDocidLe>=0 ){
pInfo->idxNum |= FTS3_HAVE_DOCID_LE;
pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++;
}
/* Regardless of the strategy selected, FTS can deliver rows in rowid (or
@ -2956,6 +2984,33 @@ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
return rc;
}
/*
** The following are copied from sqliteInt.h.
**
** Constants for the largest and smallest possible 64-bit signed integers.
** These macros are designed to work correctly on both 32-bit and 64-bit
** compilers.
*/
#ifndef SQLITE_AMALGAMATION
# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32))
# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64)
#endif
/*
** If the numeric type of argument pVal is "integer", then return it
** converted to a 64-bit signed integer. Otherwise, return a copy of
** the second parameter, iDefault.
*/
static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){
if( pVal ){
int eType = sqlite3_value_numeric_type(pVal);
if( eType==SQLITE_INTEGER ){
return sqlite3_value_int64(pVal);
}
}
return iDefault;
}
/*
** This is the xFilter interface for the virtual table. See
** the virtual table xFilter method documentation for additional
@ -2981,40 +3036,58 @@ static int fts3FilterMethod(
){
int rc;
char *zSql; /* SQL statement used to access %_content */
int eSearch;;
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */
sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */
sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */
sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */
int iIdx;
UNUSED_PARAMETER(idxStr);
UNUSED_PARAMETER(nVal);
assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
assert( nVal==0 || nVal==1 || nVal==2 );
assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
eSearch = (idxNum & 0x0000FFFF);
assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
assert( p->pSegments==0 );
/* Collect arguments into local variables */
iIdx = 0;
if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++];
if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++];
if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++];
if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++];
assert( iIdx==nVal );
/* In case the cursor has been used before, clear it now. */
sqlite3_finalize(pCsr->pStmt);
sqlite3_free(pCsr->aDoclist);
sqlite3Fts3ExprFree(pCsr->pExpr);
memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
/* Set the lower and upper bounds on docids to return */
pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64);
pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64);
if( idxStr ){
pCsr->bDesc = (idxStr[0]=='D');
}else{
pCsr->bDesc = p->bDescIdx;
}
pCsr->eSearch = (i16)idxNum;
pCsr->eSearch = (i16)eSearch;
if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){
int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);
if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){
int iCol = eSearch-FTS3_FULLTEXT_SEARCH;
const char *zQuery = (const char *)sqlite3_value_text(pCons);
if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){
return SQLITE_NOMEM;
}
pCsr->iLangid = 0;
if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid);
assert( p->base.zErrMsg==0 );
rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
@ -3037,7 +3110,7 @@ static int fts3FilterMethod(
** full-text query or docid lookup, the statement retrieves a single
** row by docid.
*/
if( idxNum==FTS3_FULLSCAN_SEARCH ){
if( eSearch==FTS3_FULLSCAN_SEARCH ){
zSql = sqlite3_mprintf(
"SELECT %s ORDER BY rowid %s",
p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
@ -3048,10 +3121,10 @@ static int fts3FilterMethod(
}else{
rc = SQLITE_NOMEM;
}
}else if( idxNum==FTS3_DOCID_SEARCH ){
}else if( eSearch==FTS3_DOCID_SEARCH ){
rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);
if( rc==SQLITE_OK ){
rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons);
}
}
if( rc!=SQLITE_OK ) return rc;
@ -3942,6 +4015,12 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
return SQLITE_OK;
}
/*
** Maximum number of tokens a phrase may have to be considered for the
** incremental doclists strategy.
*/
#define MAX_INCR_PHRASE_TOKENS 4
/*
** This function is called for each Fts3Phrase in a full-text query
** expression to initialize the mechanism for returning rows. Once this
@ -3955,23 +4034,43 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
int rc; /* Error code */
Fts3PhraseToken *pFirst = &p->aToken[0];
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc = SQLITE_OK; /* Error code */
int i;
if( pCsr->bDesc==pTab->bDescIdx
&& bOptOk==1
&& p->nToken==1
&& pFirst->pSegcsr
&& pFirst->pSegcsr->bLookup
&& pFirst->bFirst==0
){
/* Determine if doclists may be loaded from disk incrementally. This is
** possible if the bOptOk argument is true, the FTS doclists will be
** scanned in forward order, and the phrase consists of
** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
** tokens or prefix tokens that cannot use a prefix-index. */
int bHaveIncr = 0;
int bIncrOk = (bOptOk
&& pCsr->bDesc==pTab->bDescIdx
&& p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
&& p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
#ifdef SQLITE_TEST
&& pTab->bNoIncrDoclist==0
#endif
);
for(i=0; bIncrOk==1 && i<p->nToken; i++){
Fts3PhraseToken *pToken = &p->aToken[i];
if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){
bIncrOk = 0;
}
if( pToken->pSegcsr ) bHaveIncr = 1;
}
if( bIncrOk && bHaveIncr ){
/* Use the incremental approach. */
int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
rc = sqlite3Fts3MsrIncrStart(
pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
Fts3PhraseToken *pToken = &p->aToken[i];
Fts3MultiSegReader *pSegcsr = pToken->pSegcsr;
if( pSegcsr ){
rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n);
}
}
p->bIncr = 1;
}else{
/* Load the full doclist for the phrase into memory. */
rc = fts3EvalPhraseLoad(pCsr, p);
@ -4080,6 +4179,216 @@ void sqlite3Fts3DoclistNext(
*ppIter = p;
}
/*
** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof
** to true if EOF is reached.
*/
static void fts3EvalDlPhraseNext(
Fts3Table *pTab,
Fts3Doclist *pDL,
u8 *pbEof
){
char *pIter; /* Used to iterate through aAll */
char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */
if( pDL->pNextDocid ){
pIter = pDL->pNextDocid;
}else{
pIter = pDL->aAll;
}
if( pIter>=pEnd ){
/* We have already reached the end of this doclist. EOF. */
*pbEof = 1;
}else{
sqlite3_int64 iDelta;
pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
pDL->iDocid += iDelta;
}else{
pDL->iDocid -= iDelta;
}
pDL->pList = pIter;
fts3PoslistCopy(0, &pIter);
pDL->nList = (int)(pIter - pDL->pList);
/* pIter now points just past the 0x00 that terminates the position-
** list for document pDL->iDocid. However, if this position-list was
** edited in place by fts3EvalNearTrim(), then pIter may not actually
** point to the start of the next docid value. The following line deals
** with this case by advancing pIter past the zero-padding added by
** fts3EvalNearTrim(). */
while( pIter<pEnd && *pIter==0 ) pIter++;
pDL->pNextDocid = pIter;
assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
*pbEof = 0;
}
}
/*
** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext().
*/
typedef struct TokenDoclist TokenDoclist;
struct TokenDoclist {
int bIgnore;
sqlite3_int64 iDocid;
char *pList;
int nList;
};
/*
** Token pToken is an incrementally loaded token that is part of a
** multi-token phrase. Advance it to the next matching document in the
** database and populate output variable *p with the details of the new
** entry. Or, if the iterator has reached EOF, set *pbEof to true.
**
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK.
*/
static int incrPhraseTokenNext(
Fts3Table *pTab, /* Virtual table handle */
Fts3Phrase *pPhrase, /* Phrase to advance token of */
int iToken, /* Specific token to advance */
TokenDoclist *p, /* OUT: Docid and doclist for new entry */
u8 *pbEof /* OUT: True if iterator is at EOF */
){
int rc = SQLITE_OK;
if( pPhrase->iDoclistToken==iToken ){
assert( p->bIgnore==0 );
assert( pPhrase->aToken[iToken].pSegcsr==0 );
fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof);
p->pList = pPhrase->doclist.pList;
p->nList = pPhrase->doclist.nList;
p->iDocid = pPhrase->doclist.iDocid;
}else{
Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
assert( pToken->pDeferred==0 );
assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 );
if( pToken->pSegcsr ){
assert( p->bIgnore==0 );
rc = sqlite3Fts3MsrIncrNext(
pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
);
if( p->pList==0 ) *pbEof = 1;
}else{
p->bIgnore = 1;
}
}
return rc;
}
/*
** The phrase iterator passed as the second argument:
**
** * features at least one token that uses an incremental doclist, and
**
** * does not contain any deferred tokens.
**
** Advance it to the next matching documnent in the database and populate
** the Fts3Doclist.pList and nList fields.
**
** If there is no "next" entry and no error occurs, then *pbEof is set to
** 1 before returning. Otherwise, if no error occurs and the iterator is
** successfully advanced, *pbEof is set to 0.
**
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK.
*/
static int fts3EvalIncrPhraseNext(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Phrase *p, /* Phrase object to advance to next docid */
u8 *pbEof /* OUT: Set to 1 if EOF */
){
int rc = SQLITE_OK;
Fts3Doclist *pDL = &p->doclist;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
u8 bEof = 0;
/* This is only called if it is guaranteed that the phrase has at least
** one incremental token. In which case the bIncr flag is set. */
assert( p->bIncr==1 );
if( p->nToken==1 && p->bIncr ){
rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,
&pDL->iDocid, &pDL->pList, &pDL->nList
);
if( pDL->pList==0 ) bEof = 1;
}else{
int bDescDoclist = pCsr->bDesc;
struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS];
memset(a, 0, sizeof(a));
assert( p->nToken<=MAX_INCR_PHRASE_TOKENS );
assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS );
while( bEof==0 ){
int bMaxSet = 0;
sqlite3_int64 iMax; /* Largest docid for all iterators */
int i; /* Used to iterate through tokens */
/* Advance the iterator for each token in the phrase once. */
for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){
iMax = a[i].iDocid;
bMaxSet = 1;
}
}
assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 );
assert( rc!=SQLITE_OK || bMaxSet );
/* Keep advancing iterators until they all point to the same document */
for(i=0; i<p->nToken; i++){
while( rc==SQLITE_OK && bEof==0
&& a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0
){
rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
iMax = a[i].iDocid;
i = 0;
}
}
}
/* Check if the current entries really are a phrase match */
if( bEof==0 ){
int nList = 0;
int nByte = a[p->nToken-1].nList;
char *aDoclist = sqlite3_malloc(nByte+1);
if( !aDoclist ) return SQLITE_NOMEM;
memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);
for(i=0; i<(p->nToken-1); i++){
if( a[i].bIgnore==0 ){
char *pL = a[i].pList;
char *pR = aDoclist;
char *pOut = aDoclist;
int nDist = p->nToken-1-i;
int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR);
if( res==0 ) break;
nList = (pOut - aDoclist);
}
}
if( i==(p->nToken-1) ){
pDL->iDocid = iMax;
pDL->pList = aDoclist;
pDL->nList = nList;
pDL->bFreeList = 1;
break;
}
sqlite3_free(aDoclist);
}
}
}
*pbEof = bEof;
return rc;
}
/*
** Attempt to move the phrase iterator to point to the next matching docid.
** If an error occurs, return an SQLite error code. Otherwise, return
@ -4099,55 +4408,14 @@ static int fts3EvalPhraseNext(
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
if( p->bIncr ){
assert( p->nToken==1 );
assert( pDL->pNextDocid==0 );
rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,
&pDL->iDocid, &pDL->pList, &pDL->nList
);
if( rc==SQLITE_OK && !pDL->pList ){
*pbEof = 1;
}
rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof);
}else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll,
&pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
);
pDL->pList = pDL->pNextDocid;
}else{
char *pIter; /* Used to iterate through aAll */
char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */
if( pDL->pNextDocid ){
pIter = pDL->pNextDocid;
}else{
pIter = pDL->aAll;
}
if( pIter>=pEnd ){
/* We have already reached the end of this doclist. EOF. */
*pbEof = 1;
}else{
sqlite3_int64 iDelta;
pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
pDL->iDocid += iDelta;
}else{
pDL->iDocid -= iDelta;
}
pDL->pList = pIter;
fts3PoslistCopy(0, &pIter);
pDL->nList = (int)(pIter - pDL->pList);
/* pIter now points just past the 0x00 that terminates the position-
** list for document pDL->iDocid. However, if this position-list was
** edited in place by fts3EvalNearTrim(), then pIter may not actually
** point to the start of the next docid value. The following line deals
** with this case by advancing pIter past the zero-padding added by
** fts3EvalNearTrim(). */
while( pIter<pEnd && *pIter==0 ) pIter++;
pDL->pNextDocid = pIter;
assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
*pbEof = 0;
}
fts3EvalDlPhraseNext(pTab, pDL, pbEof);
}
return rc;
@ -4172,7 +4440,6 @@ static int fts3EvalPhraseNext(
static void fts3EvalStartReaders(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Expr *pExpr, /* Expression to initialize phrases in */
int bOptOk, /* True to enable incremental loading */
int *pRc /* IN/OUT: Error code */
){
if( pExpr && SQLITE_OK==*pRc ){
@ -4183,10 +4450,10 @@ static void fts3EvalStartReaders(
if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
}
pExpr->bDeferred = (i==nToken);
*pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
*pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);
}else{
fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc);
fts3EvalStartReaders(pCsr, pExpr->pRight, pRc);
pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
}
}
@ -4428,7 +4695,7 @@ static int fts3EvalSelectDeferred(
** overflowing the 32-bit integer it is stored in. */
if( ii<12 ) nLoad4 = nLoad4*4;
if( ii==0 || pTC->pPhrase->nToken>1 ){
if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){
/* Either this is the cheapest token in the entire query, or it is
** part of a multi-token phrase. Either way, the entire doclist will
** (eventually) be loaded into memory. It may as well be now. */
@ -4508,7 +4775,7 @@ static int fts3EvalStart(Fts3Cursor *pCsr){
}
#endif
fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc);
return rc;
}
@ -4991,6 +5258,16 @@ static int fts3EvalNext(Fts3Cursor *pCsr){
pCsr->iPrevId = pExpr->iDocid;
}while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
}
/* Check if the cursor is past the end of the docid range specified
** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */
if( rc==SQLITE_OK && (
(pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid)
|| (pCsr->bDesc!=0 && pCsr->iPrevId<pCsr->iMinDocid)
)){
pCsr->isEof = 1;
}
return rc;
}
@ -5014,12 +5291,16 @@ static void fts3EvalRestart(
if( pPhrase ){
fts3EvalInvalidatePoslist(pPhrase);
if( pPhrase->bIncr ){
assert( pPhrase->nToken==1 );
assert( pPhrase->aToken[0].pSegcsr );
sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);
int i;
for(i=0; i<pPhrase->nToken; i++){
Fts3PhraseToken *pToken = &pPhrase->aToken[i];
assert( pToken->pDeferred==0 );
if( pToken->pSegcsr ){
sqlite3Fts3MsrIncrRestart(pToken->pSegcsr);
}
}
*pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
}
pPhrase->doclist.pNextDocid = 0;
pPhrase->doclist.iDocid = 0;
}

View File

@ -267,6 +267,12 @@ struct Fts3Table {
int inTransaction; /* True after xBegin but before xCommit/xRollback */
int mxSavepoint; /* Largest valid xSavepoint integer */
#endif
#ifdef SQLITE_TEST
/* True to disable the incremental doclist optimization. This is controled
** by special insert command 'test-no-incr-doclist'. */
int bNoIncrDoclist;
#endif
};
/*
@ -292,7 +298,8 @@ struct Fts3Cursor {
int eEvalmode; /* An FTS3_EVAL_XX constant */
int nRowAvg; /* Average size of database rows, in pages */
sqlite3_int64 nDoc; /* Documents in table */
i64 iMinDocid; /* Minimum docid to return */
i64 iMaxDocid; /* Maximum docid to return */
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
u32 *aMatchinfo; /* Information about most recent match */
int nMatchinfo; /* Number of elements in aMatchinfo[] */
@ -322,6 +329,15 @@ struct Fts3Cursor {
#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */
#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */
/*
** The lower 16-bits of the sqlite3_index_info.idxNum value set by
** the xBestIndex() method contains the Fts3Cursor.eSearch value described
** above. The upper 16-bits contain a combination of the following
** bits, used to describe extra constraints on full-text searches.
*/
#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */
#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */
#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */
struct Fts3Doclist {
char *aAll; /* Array containing doclist (or NULL) */

View File

@ -5050,6 +5050,9 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
}else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
p->nMaxPendingData = atoi(&zVal[11]);
rc = SQLITE_OK;
}else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){
p->bNoIncrDoclist = atoi(&zVal[21]);
rc = SQLITE_OK;
#endif
}else{
rc = SQLITE_ERROR;

View File

@ -1,5 +1,5 @@
C The\ssqlite3FixInit()\sroutine\scannot\sfail.\s\sSo\schange\sthe\sreturn\stype\sfrom\s"int"\nto\s"void".
D 2013-10-03T15:39:44.505
C Have\sFTS\stake\sadvantage\sof\s"docid<?"\sconstraints\swhen\sthey\sare\spresent.\sExtend\sthe\sFTS\s"incremental\sdoclist"\soptimization\sso\sthat\sit\sis\sused\sfor\stokens\swithin\smulti-token\sphrases.
D 2013-10-03T20:41:18.513
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -78,9 +78,9 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c e1240ab6f5999174309a41ffac63b94ed1233098
F ext/fts3/fts3.c 6d277a3ff6b20ff815184395407c5a4bd7787f9c
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h c7a451661c2d9b2440b2008c3f63ce06f13181d6
F ext/fts3/fts3Int.h 8689f7cf85020e7f88d1e761eeac480c3b0ea7ad
F ext/fts3/fts3_aux.c b02632f6dd0e375ce97870206d914ea6d8df5ccd
F ext/fts3/fts3_expr.c f8eb1046063ba342c7114eba175cabb31c4a64e7
F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914
@ -96,7 +96,7 @@ F ext/fts3/fts3_tokenizer.h 64c6ef6c5272c51ebe60fc607a896e84288fcbc3
F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004
F ext/fts3/fts3_unicode.c 92391b4b4fb043564c6539ea9b8661e3bcba47b9
F ext/fts3/fts3_unicode2.c 0113d3acf13429e6dc38e0647d1bc71211c31a4d
F ext/fts3/fts3_write.c ce45c3ea578464f26b0293ea8e54a39694f18b64
F ext/fts3/fts3_write.c 851e65f413576055f2e86a63d45b7d67c829db56
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
@ -555,6 +555,8 @@ F test/fts3tok_err.test 52273cd193b9036282f7bacb43da78c6be87418d
F test/fts4aa.test 0c3152322c7f0b548cc942ad763eaba0da87ccca
F test/fts4check.test 66fa274cab2b615f2fb338b257713aba8fad88a8
F test/fts4content.test 2e7252557d6d24afa101d9ba1de710d6140e6d06
F test/fts4docid.test e33c383cfbdff0284685604d256f347a18fdbf01
F test/fts4incr.test 2fae04582c2329a038b2b1f985e702478fb94888
F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7
F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee
F test/fts4merge2.test 5faa558d1b672f82b847d2a337465fa745e46891
@ -1119,7 +1121,7 @@ F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
F tool/wherecosttest.c f407dc4c79786982a475261866a161cd007947ae
F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff
P 8338232a111be16d6c2ab57176d0a23a001f02ad
R b1cce97e822326c63df11398c4318c43
U drh
Z da47fe5fd9aed60cdbc3a07080f59327
P 500c5932fe3f5fcd0940522f7839d581c555e0eb 24aa20da222a9cc181473bc41d0f8791be91fa97
R 699a595349579dae27a85bc7620bf39e
U dan
Z 13c7e5229c05389938219ac34fe1504f

View File

@ -1 +1 @@
500c5932fe3f5fcd0940522f7839d581c555e0eb
baf8ce5916ea9baf0ec557263cb9c7ecf716431f

116
test/fts4docid.test Normal file
View File

@ -0,0 +1,116 @@
# 2012 March 26
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/fts3_common.tcl
set ::testprefix fts4docid
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
# Initialize a table with pseudo-randomly generated data.
#
do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts4; }
do_test 1.1 {
foreach {docid content} {
0 {F N K B T I K V B A} 1 {D M J E S P H E L O}
2 {W U T Q T Q T L H G} 3 {D W H M B R S Z B K}
4 {F Q I N P Q J L Z D} 5 {J O Q E Y A O E L B}
6 {O V R A C R K C Y H} 7 {Z J H T Q Q O R A G}
8 {L K J W G D Y W B M} 9 {K E Y I A Q R Q T S}
10 {N P H Y Z M R T I C} 11 {E X H O I S E S Z F}
12 {B Y Q T J X C L L J} 13 {Q D C U U A Q E Z U}
14 {S I T C J R X S J M} 15 {M X M K E X L H Q Y}
16 {O W E I C H U Y S Y} 17 {P V V E M T H C C S}
18 {L Y A M I E N M X O} 19 {S Y R U L S Q Y F P}
20 {U J S T T J J S V X} 21 {T E I W P O V A A P}
22 {W D K H D H F G O J} 23 {T X Y P G M J U I L}
24 {F V X E B C N B K W} 25 {E B A Y N N T Z I C}
26 {G E E B C P U D H G} 27 {J D J K N S B Q T M}
28 {Q T G M D O D Y V G} 29 {P X W I W V P W Z G}
} {
execsql { INSERT INTO t1(docid, content) VALUES($docid, $content) }
}
} {}
# Quick test regarding affinites and the docid/rowid column.
do_execsql_test 2.1.1 { SELECT docid FROM t1 WHERE docid = 5 } {5}
do_execsql_test 2.1.2 { SELECT docid FROM t1 WHERE docid = '5' } {5}
do_execsql_test 2.1.3 { SELECT docid FROM t1 WHERE docid = +5 } {5}
do_execsql_test 2.1.4 { SELECT docid FROM t1 WHERE docid = +'5' } {5}
do_execsql_test 2.1.5 { SELECT docid FROM t1 WHERE docid < 5 } {0 1 2 3 4}
do_execsql_test 2.1.6 { SELECT docid FROM t1 WHERE docid < '5' } {0 1 2 3 4}
do_execsql_test 2.2.1 { SELECT rowid FROM t1 WHERE rowid = 5 } {5}
do_execsql_test 2.2.2 { SELECT rowid FROM t1 WHERE rowid = '5' } {5}
do_execsql_test 2.2.3 { SELECT rowid FROM t1 WHERE rowid = +5 } {5}
do_execsql_test 2.2.4 { SELECT rowid FROM t1 WHERE rowid = +'5' } {5}
do_execsql_test 2.2.5 { SELECT rowid FROM t1 WHERE rowid < 5 } {0 1 2 3 4}
do_execsql_test 2.2.6 { SELECT rowid FROM t1 WHERE rowid < '5' } {0 1 2 3 4}
#-------------------------------------------------------------------------
# Now test a bunch of full-text queries featuring range constraints on
# the docid field. Each query is run so that the range constraint:
#
# * is on the docid field,
# * is on the docid field with a unary +,
# * is on the rowid field,
# * is on the rowid field with a unary +.
#
# Queries are run with both "ORDER BY docid DESC" and "ORDER BY docid ASC"
# clauses.
#
foreach {tn where result} {
1 {WHERE t1 MATCH 'O' AND xxx < 17} {1 5 6 7 11 16}
2 {WHERE t1 MATCH 'O' AND xxx < 4123456789123456} {1 5 6 7 11 16 18 21 22 28}
3 {WHERE t1 MATCH 'O' AND xxx < 1} {}
4 {WHERE t1 MATCH 'O' AND xxx < -4123456789123456} {}
5 {WHERE t1 MATCH 'O' AND xxx > 17} {18 21 22 28}
6 {WHERE t1 MATCH 'O' AND xxx > 4123456789123456} {}
7 {WHERE t1 MATCH 'O' AND xxx > 1} {5 6 7 11 16 18 21 22 28}
8 {WHERE t1 MATCH 'O' AND xxx > -4123456789123456} {1 5 6 7 11 16 18 21 22 28}
9 {WHERE t1 MATCH '"Q T"' AND xxx < 27} {2 9 12}
10 {WHERE t1 MATCH '"Q T"' AND xxx <= 27} {2 9 12 27}
11 {WHERE t1 MATCH '"Q T"' AND xxx > 27} {28}
12 {WHERE t1 MATCH '"Q T"' AND xxx >= 27} {27 28}
} {
foreach {tn2 ref order} {
1 docid "ORDER BY docid ASC"
2 +docid "ORDER BY docid ASC"
3 rowid "ORDER BY docid ASC"
4 +rowid "ORDER BY docid ASC"
5 docid "ORDER BY docid DESC"
6 +docid "ORDER BY docid DESC"
7 rowid "ORDER BY docid DESC"
8 +rowid "ORDER BY docid DESC"
} {
set w [string map "xxx $ref" $where]
set q "SELECT docid FROM t1 $w $order"
if {$tn2<5} {
set r [lsort -integer -increasing $result]
} else {
set r [lsort -integer -decreasing $result]
}
do_execsql_test 3.$tn.$tn2 $q $r
}
}
finish_test

53
test/fts4incr.test Normal file
View File

@ -0,0 +1,53 @@
# 2012 March 26
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/fts3_common.tcl
set ::testprefix fts4incr
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
# Create the fts_kjv_genesis procedure which fills and FTS3/4 table
# with the complete text of the Book of Genesis.
#
source $testdir/genesis.tcl
do_test 1.0 {
execsql { CREATE VIRTUAL TABLE t1 USING fts4(words) }
fts_kjv_genesis
} {}
do_execsql_test 1.1 {
SELECT min(docid), max(docid) FROM t1;
} {1001001 1050026}
foreach {tn q res} {
1 { SELECT count(*) FROM t1 WHERE t1 MATCH 'and' AND docid < 1010000} 224
2 { SELECT count(*) FROM t1 WHERE t1 MATCH '"in the"' AND docid < 1010000} 47
3 { SELECT count(*) FROM t1 WHERE t1 MATCH '"And God"' AND docid < 1010000} 33
4 { SELECT count(*) FROM t1 WHERE t1
MATCH '"land of canaan"' AND docid < 1030000 } 7
} {
foreach s {0 1} {
execsql "INSERT INTO t1(t1) VALUES('test-no-incr-doclist=$s')"
do_execsql_test 2.$tn.$s $q $res
set t($s) [lindex [time [list execsql $q] 100] 0]
}
puts "with optimization: $t(0) without: $t(1)"
}
finish_test