From d17f70a624dd40670b547ebb50ac35015b35f7b7 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 19 Oct 2011 11:57:13 +0000 Subject: [PATCH] Change the way ^ tokens work in FTS so that the filtering is done as part of reading the FTS index instead of waiting until an entire doclist has been retrieved and then filtering it. FossilOrigin-Name: 9b58c59eb4efaa38ce50a3ce1b52f9ba578c71d6 --- ext/fts3/fts3.c | 97 ++++++++++++++++++------------------------- ext/fts3/fts3Int.h | 3 +- ext/fts3/fts3_write.c | 26 +++++++++--- manifest | 16 +++---- manifest.uuid | 2 +- 5 files changed, 72 insertions(+), 72 deletions(-) diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index e82d1f5e89..e1330004a2 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -2348,65 +2348,53 @@ static void fts3DoclistPhraseMerge( } /* -** When this function is called, pList points to a doclist containing position -** data, length *pnList bytes. This removes all entries from the doclist that -** do not correspond to the first token in a column and overwrites pList -** with the result. *pnList is set to the length of the new doclist before -** returning. -** -** If bDescDoclist is true, then both the input and output are in descending -** order. Otherwise, ascending. +** Argument pList points to a position list nList bytes in size. This +** function checks to see if the position list contains any entries for +** a token in position 0 (of any column). If so, it writes argument iDelta +** to the output buffer pOut, followed by a position list consisting only +** of the entries from pList at position 0, and terminated by an 0x00 byte. +** The value returned is the number of bytes written to pOut (if any). */ -static void fts3DoclistFirstFilter( - int bDescDoclist, /* True if pList is a descending doclist */ - char *pList, /* Buffer containing doclist */ - int *pnList /* IN/OUT: Size of doclist */ +int sqlite3Fts3FirstFilter( + sqlite3_int64 iDelta, /* Varint that may be written to pOut */ + char *pList, /* Position list (no 0x00 term) */ + int nList, /* Size of pList in bytes */ + char *pOut /* Write output here */ ){ + int nOut = 0; + int bWritten = 0; /* True once iDelta has been written */ char *p = pList; - char *pOut = pList; - char *pEnd = &pList[*pnList]; + char *pEnd = &pList[nList]; - sqlite3_int64 iDoc; - sqlite3_int64 iPrev; - int bFirstOut = 0; - - fts3GetDeltaVarint3(&p, pEnd, 0, &iDoc); - while( p ){ - int bWritten = 0; - if( *p!=0x01 ){ - if( *p==0x02 ){ - fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); - *pOut++ = 0x02; - bWritten = 1; - } - fts3ColumnlistCopy(0, &p); + if( *p!=0x01 ){ + if( *p==0x02 ){ + nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); + pOut[nOut++] = 0x02; + bWritten = 1; } - - while( *p==0x01 ){ - sqlite3_int64 iCol; - p++; - p += sqlite3Fts3GetVarint(p, &iCol); - if( *p==0x02 ){ - if( bWritten==0 ){ - fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); - bWritten = 1; - } - *pOut++ = 0x01; - pOut += sqlite3Fts3PutVarint(pOut, iCol); - *pOut++ = 0x02; - } - fts3ColumnlistCopy(0, &p); - } - if( bWritten ){ - *pOut++ = 0x00; - } - - assert( *p==0x00 ); - p++; - fts3GetDeltaVarint3(&p, pEnd, bDescDoclist, &iDoc); + fts3ColumnlistCopy(0, &p); } - *pnList = (pOut - pList); + while( pisPrefix ? FTS3_SEGMENT_PREFIX : 0) + | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0) | (iColumnnColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; filter.zTerm = pTok->z; @@ -3580,10 +3569,6 @@ static void fts3EvalPhraseMergeToken( ){ assert( iToken!=p->iDoclistToken ); - if( p->aToken[iToken].bFirst ){ - fts3DoclistFirstFilter(pTab->bDescIdx, pList, &nList); - } - if( pList==0 ){ sqlite3_free(p->doclist.aAll); p->doclist.aAll = 0; diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index c9b291c6cc..5f0f8dea01 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -429,6 +429,7 @@ int sqlite3Fts3SegReaderCursor( #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 #define FTS3_SEGMENT_PREFIX 0x00000008 #define FTS3_SEGMENT_SCAN 0x00000010 +#define FTS3_SEGMENT_FIRST 0x00000020 /* Type passed as 4th argument to SegmentReaderIterate() */ struct Fts3SegFilter { @@ -468,8 +469,8 @@ int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); - int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); +int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index 40c8e2f9ad..855e97764e 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -2509,6 +2509,7 @@ int sqlite3Fts3SegReaderStep( int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); + int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); Fts3SegReader **apSegment = pCsr->apSegment; int nSegment = pCsr->nSegment; @@ -2568,6 +2569,7 @@ int sqlite3Fts3SegReaderStep( assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); if( nMerge==1 && !isIgnoreEmpty + && !isFirst && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) ){ pCsr->nDoclist = apSegment[0]->nDoclist; @@ -2633,12 +2635,24 @@ int sqlite3Fts3SegReaderStep( } pCsr->aBuffer = aNew; } - nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); - iPrev = iDocid; - if( isRequirePos ){ - memcpy(&pCsr->aBuffer[nDoclist], pList, nList); - nDoclist += nList; - pCsr->aBuffer[nDoclist++] = '\0'; + + if( isFirst ){ + char *a = &pCsr->aBuffer[nDoclist]; + int nWrite; + + nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); + if( nWrite ){ + iPrev = iDocid; + nDoclist += nWrite; + } + }else{ + nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); + iPrev = iDocid; + if( isRequirePos ){ + memcpy(&pCsr->aBuffer[nDoclist], pList, nList); + nDoclist += nList; + pCsr->aBuffer[nDoclist++] = '\0'; + } } } diff --git a/manifest b/manifest index f155d09e14..df0a118c08 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stests\sfor\sFTS\s^\ssearches\sand\smatchinfo(). -D 2011-10-19T10:18:01.912 +C Change\sthe\sway\s^\stokens\swork\sin\sFTS\sso\sthat\sthe\sfiltering\sis\sdone\sas\spart\sof\sreading\sthe\sFTS\sindex\sinstead\sof\swaiting\suntil\san\sentire\sdoclist\shas\sbeen\sretrieved\sand\sthen\sfiltering\sit. +D 2011-10-19T11:57:13.985 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in a162fe39e249b8ed4a65ee947c30152786cfe897 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -62,9 +62,9 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 708122f0ed7b7b0aa9813fe302eb40a238956276 +F ext/fts3/fts3.c 064b660a11ae29651b647fa7c3e9954d901ab58a F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h bc27eebe2c5919115aa1858fdd308a230af6a359 +F ext/fts3/fts3Int.h 7a0deb219371d29b8d385fb5e929ede2bdc7c239 F ext/fts3/fts3_aux.c 0ebfa7b86cf8ff6a0861605fcc63b83ec1b70691 F ext/fts3/fts3_expr.c dd0facbede8fd7d1376670cc6154f1fef3a4c5bc F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914 @@ -77,7 +77,7 @@ F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894 F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68 -F ext/fts3/fts3_write.c 567380f2d6671df16cfbb56324b321c71d5ab0d3 +F ext/fts3/fts3_write.c aaf0885fd5d37c6869071ee58b5aa3ba07cc0d87 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -967,7 +967,7 @@ F tool/symbols.sh caaf6ccc7300fd43353318b44524853e222557d5 F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh b7fdb2cc525f5ef4fa43c80e771636dd3690f9d2 -P 2c03b24f4cc6f2c28c9d5b9984320d41b8486c32 -R 402b5e477b47b6aa728a3149c74db091 +P 92618c1463fb304cf8057d082b2c7096152dff27 +R c2d7bdd9838ac956262a194e0ae43b40 U dan -Z 87cae0b85a25fbf32c49680729e400c4 +Z 8d1b6b02c37e3947f3ee71176bf9a674 diff --git a/manifest.uuid b/manifest.uuid index 4e40bb93ac..4b15a85377 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -92618c1463fb304cf8057d082b2c7096152dff27 \ No newline at end of file +9b58c59eb4efaa38ce50a3ce1b52f9ba578c71d6 \ No newline at end of file