Optimizations for fts5 expressions that filter on column. More still to come.

FossilOrigin-Name: bf1607ac155018573ca40fb58aca62c5fea7e60b
This commit is contained in:
dan 2015-10-06 20:53:26 +00:00
parent 9f1ef45f6a
commit a2507137f3
8 changed files with 249 additions and 40 deletions

View File

@ -81,6 +81,20 @@ extern int sqlite3_fts5_may_be_corrupt;
#endif
typedef struct Fts5Global Fts5Global;
typedef struct Fts5ExprColset Fts5ExprColset;
/* If a NEAR() clump or phrase may only match a specific set of columns,
** then an object of the following type is used to record the set of columns.
** Each entry in the aiCol[] array is a column that may be matched.
**
** This object is used by fts5_expr.c and fts5_index.c.
*/
struct Fts5ExprColset {
int nCol;
int aiCol[1];
};
/**************************************************************************
** Interface to code in fts5_config.c. fts5_config.c contains contains code
@ -305,7 +319,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p);
/*
** for(
** pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0);
** sqlite3Fts5IndexQuery(p, "token", 5, 0, 0, &pIter);
** 0==sqlite3Fts5IterEof(pIter);
** sqlite3Fts5IterNext(pIter)
** ){
@ -321,7 +335,8 @@ int sqlite3Fts5IndexQuery(
Fts5Index *p, /* FTS index to query */
const char *pToken, int nToken, /* Token (or prefix) to query for */
int flags, /* Mask of FTS5INDEX_QUERY_X flags */
Fts5IndexIter **ppIter
Fts5ExprColset *pColset, /* Match these columns only */
Fts5IndexIter **ppIter /* OUT: New iterator object */
);
/*
@ -567,7 +582,6 @@ typedef struct Fts5Parse Fts5Parse;
typedef struct Fts5Token Fts5Token;
typedef struct Fts5ExprPhrase Fts5ExprPhrase;
typedef struct Fts5ExprNearset Fts5ExprNearset;
typedef struct Fts5ExprColset Fts5ExprColset;
struct Fts5Token {
const char *p; /* Token text (not NULL terminated) */

View File

@ -89,16 +89,6 @@ struct Fts5ExprPhrase {
Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */
};
/*
** If a NEAR() clump may only match a specific set of columns, then
** Fts5ExprNearset.pColset points to an object of the following type.
** Each entry in the aiCol[] array
*/
struct Fts5ExprColset {
int nCol;
int aiCol[1];
};
/*
** One or more phrases that must appear within a certain token distance of
** each other within each matching document.
@ -1002,6 +992,7 @@ static int fts5ExprNearInitAll(
pExpr->pIndex, p->zTerm, strlen(p->zTerm),
(pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
(pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
pNear->pColset,
&p->pIter
);
assert( rc==SQLITE_OK || p->pIter==0 );

View File

@ -3942,12 +3942,81 @@ int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
static void fts5PoslistCallback(
Fts5Index *p,
void *pCtx,
void *pContext,
const u8 *pChunk, int nChunk
){
assert_nc( nChunk>=0 );
if( nChunk>0 ){
fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pContext, nChunk, pChunk);
}
}
typedef struct PoslistCallbackCtx PoslistCallbackCtx;
struct PoslistCallbackCtx {
Fts5Buffer *pBuf; /* Append to this buffer */
Fts5ExprColset *pColset; /* Restrict matches to this column */
int eState; /* See above */
};
/*
** TODO: Make this more efficient!
*/
static int fts5IndexColsetTest(Fts5ExprColset *pColset, int iCol){
int i;
for(i=0; i<pColset->nCol; i++){
if( pColset->aiCol[i]==iCol ) return 1;
}
return 0;
}
static void fts5PoslistFilterCallback(
Fts5Index *p,
void *pContext,
const u8 *pChunk, int nChunk
){
PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
assert_nc( nChunk>=0 );
if( nChunk>0 ){
/* Search through to find the first varint with value 1. This is the
** start of the next columns hits. */
int i = 0;
int iStart = 0;
if( pCtx->eState==2 ){
int iCol;
fts5IndexGetVarint32(pChunk, i, iCol);
if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
pCtx->eState = 1;
fts5BufferAppendVarint(&p->rc, pCtx->pBuf, 1);
}else{
pCtx->eState = 0;
}
}
do {
while( i<nChunk && pChunk[i]!=0x01 ){
while( pChunk[i] & 0x80 ) i++;
i++;
}
if( pCtx->eState ){
fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]);
}
if( i<nChunk ){
int iCol;
iStart = i;
i++;
if( i>=nChunk ){
pCtx->eState = 2;
}else{
fts5IndexGetVarint32(pChunk, i, iCol);
pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
if( pCtx->eState ){
fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]);
iStart = i;
}
}
}
}while( i<nChunk );
}
}
@ -3960,9 +4029,19 @@ static void fts5PoslistCallback(
static void fts5SegiterPoslist(
Fts5Index *p,
Fts5SegIter *pSeg,
Fts5ExprColset *pColset,
Fts5Buffer *pBuf
){
fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
if( pColset==0 ){
fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
}else{
PoslistCallbackCtx sCtx;
sCtx.pBuf = pBuf;
sCtx.pColset = pColset;
sCtx.eState = pColset ? fts5IndexColsetTest(pColset, 0) : 1;
assert( sCtx.eState==0 || sCtx.eState==1 );
fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
}
}
/*
@ -3973,22 +4052,45 @@ static void fts5SegiterPoslist(
** If an error occurs, an error code is left in p->rc. It is assumed
** no error has already occurred when this function is called.
*/
static void fts5MultiIterPoslist(
static int fts5MultiIterPoslist(
Fts5Index *p,
Fts5IndexIter *pMulti,
Fts5ExprColset *pColset,
int bSz, /* Append a size field before the data */
Fts5Buffer *pBuf
){
if( p->rc==SQLITE_OK ){
int iSz;
int iData;
Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
assert( fts5MultiIterEof(p, pMulti)==0 );
if( bSz ){
/* WRITEPOSLISTSIZE */
iSz = pBuf->n;
fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2);
iData = pBuf->n;
}
fts5SegiterPoslist(p, pSeg, pColset, pBuf);
if( bSz && pColset ){
int nActual = pBuf->n - iData;
if( nActual!=pSeg->nPos ){
/* WRITEPOSLISTSIZE */
if( nActual==0 ){
return 1;
}else{
int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2));
while( iSz<(iData-nReq) ){ pBuf->p[iSz++] = 0x80; }
sqlite3Fts5PutVarint(&pBuf->p[iSz], nActual*2);
}
}
}
fts5SegiterPoslist(p, pSeg, pBuf);
}
return 0;
}
static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
@ -4149,7 +4251,8 @@ static void fts5SetupPrefixIter(
int bDesc, /* True for "ORDER BY rowid DESC" */
const u8 *pToken, /* Buffer containing prefix to match */
int nToken, /* Size of buffer pToken in bytes */
Fts5IndexIter **ppIter /* OUT: New iterator */
Fts5ExprColset *pColset, /* Restrict matches to these columns */
Fts5IndexIter **ppIter /* OUT: New iterator */
){
Fts5Structure *pStruct;
Fts5Buffer *aBuf;
@ -4192,8 +4295,14 @@ static void fts5SetupPrefixIter(
}
if( 0==sqlite3Fts5BufferGrow(&p->rc, &doclist, 9) ){
fts5MergeAppendDocid(&doclist, iLastRowid, iRowid);
fts5MultiIterPoslist(p, p1, 1, &doclist);
int iSave = doclist.n;
assert( doclist.n!=0 || iLastRowid==0 );
fts5BufferSafeAppendVarint(&doclist, iRowid - iLastRowid);
if( fts5MultiIterPoslist(p, p1, pColset, 1, &doclist) ){
doclist.n = iSave;
}else{
iLastRowid = iRowid;
}
}
}
@ -4427,6 +4536,7 @@ int sqlite3Fts5IndexQuery(
Fts5Index *p, /* FTS index to query */
const char *pToken, int nToken, /* Token (or prefix) to query for */
int flags, /* Mask of FTS5INDEX_QUERY_X flags */
Fts5ExprColset *pColset, /* Match these columns only */
Fts5IndexIter **ppIter /* OUT: New iterator object */
){
Fts5Config *pConfig = p->pConfig;
@ -4470,7 +4580,7 @@ int sqlite3Fts5IndexQuery(
}else{
int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
buf.p[0] = FTS5_MAIN_PREFIX;
fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, &pRet);
fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
}
if( p->rc ){
@ -4572,7 +4682,7 @@ int sqlite3Fts5IterPoslist(
*pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
}else{
fts5BufferZero(&pIter->poslist);
fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist);
fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
*pp = pIter->poslist.p;
}
return fts5IndexReturn(pIter->pIndex);
@ -4588,7 +4698,7 @@ int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
assert( p->rc==SQLITE_OK );
fts5BufferZero(pBuf);
fts5MultiIterPoslist(p, pIter, 0, pBuf);
fts5MultiIterPoslist(p, pIter, 0, 0, pBuf);
return fts5IndexReturn(p);
}
@ -4763,7 +4873,7 @@ static int fts5QueryCksum(
){
u64 cksum = *pCksum;
Fts5IndexIter *pIdxIter = 0;
int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);
int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter);
while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
i64 dummy;
@ -5137,7 +5247,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
poslist.n = 0;
fts5MultiIterPoslist(p, pIter, 0, &poslist);
fts5MultiIterPoslist(p, pIter, 0, 0, &poslist);
while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
int iCol = FTS5_POS2COLUMN(iPos);
int iTokOff = FTS5_POS2OFFSET(iPos);

View File

@ -402,7 +402,7 @@ static int fts5VocabFilterMethod(
const int flags = FTS5INDEX_QUERY_SCAN;
fts5VocabResetCursor(pCsr);
rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, &pCsr->pIter);
rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, 0, &pCsr->pIter);
if( rc==SQLITE_OK ){
rc = fts5VocabNextMethod(pCursor);
}

View File

@ -62,6 +62,89 @@ foreach {tn q res} {
do_execsql_test 2.3.$tn $q $res
}
#-------------------------------------------------------------------------
# Check that prefix queries with:
#
# * a column filter, and
# * no prefix index.
#
# work Ok.
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t3 USING fts5(a, b, c);
INSERT INTO t3(t3, rank) VALUES('pgsz', 32);
BEGIN;
INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1
INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2
INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3
INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4
INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5
INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6
INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7
INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8
INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9
COMMIT;
}
foreach {tn match res} {
1 "a : c*" {1 2 4 6 7 9}
2 "b : c*" {1 3 4 5 6 8 9}
3 "c : c*" {1 2 4 6 7 8 9}
4 "a : b*" {1 3 5 6 7 8 9}
5 "b : b*" {1 2 3 5 7 9}
6 "c : b*" {1 3 7 9}
7 "a : a*" {1 3 4 5 6 8}
8 "b : a*" {2 3 4 6 7 8}
9 "c : a*" {2 3 4 5 6 7 8 9}
} {
do_execsql_test 3.1.$tn {
SELECT rowid FROM t3($match)
} $res
}
do_test 3.2 {
expr srand(0)
execsql { DELETE FROM t3 }
for {set i 0} {$i < 1000} {incr i} {
set a [fts5_rnddoc 3]
set b [fts5_rnddoc 8]
set c [fts5_rnddoc 20]
execsql { INSERT INTO t3 VALUES($a, $b, $c) }
}
execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
} {}
proc gmatch {col pattern} {
expr {[lsearch -glob $col $pattern]>=0}
}
db func gmatch gmatch
for {set x 0} {$x<2} {incr x} {
foreach {tn pattern} {
1 {xa*}
2 {xb*}
3 {xc*}
4 {xd*}
5 {xe*}
6 {xf*}
7 {xg*}
8 {xh*}
9 {xi*}
10 {xj*}
} {
foreach col {b} {
set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"]
set query "$col : $pattern"
do_execsql_test 3.3.$x.$tn.$col {
SELECT rowid FROM t3($query);
} $res
}
}
execsql { INSERT INTO t3(t3) VALUES('optimize') }
execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
}
finish_test

View File

@ -239,6 +239,17 @@ do_execsql_test 9.3 {
SELECT rowid FROM ft2('b AND c');
} {2}
#-------------------------------------------------------------------------
#
do_execsql_test 10.0 {
CREATE VIRTUAL TABLE t3 USING fts5(a, b, c);
INSERT INTO t3 VALUES('bac aab bab', 'c bac c', 'acb aba abb'); -- 1
INSERT INTO t3 VALUES('bab abc c', 'acb c abb', 'c aaa c'); -- 2
}
do_execsql_test 10.1 {
SELECT rowid FROM t3('c: c*');
} {2}
finish_test

View File

@ -1,5 +1,5 @@
C Fix\sthe\sLIMIT\sand\sOFFSET\shandling\sfor\sUNION\sALL\squeries\sthat\scontain\sa\s\nsubquery\swith\sORDER\sBY\son\sthe\sright-hand\sside.\s\sFix\sfor\sticket\n[b65cb2c8d91f668584].
D 2015-10-06T17:27:18.737
C Optimizations\sfor\sfts5\sexpressions\sthat\sfilter\son\scolumn.\sMore\sstill\sto\scome.
D 2015-10-06T20:53:26.753
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 2143eeef6d0cc26006ae5fc4bb242a4a8b973412
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
F ext/fts5/fts5.h 98f802fe41481f9d797fce496f0fefcad72c7782
F ext/fts5/fts5Int.h ff78a77d819a7fc04a7f8b08b0e1ce361a3395e4
F ext/fts5/fts5Int.h eba5b20f1049a8908f867ff1b59299f49bb392a4
F ext/fts5/fts5_aux.c 7a307760a9c57c750d043188ec0bad59f5b5ec7e
F ext/fts5/fts5_buffer.c 54b18497395a19dfe1d00f63a3b403e5f93d4fd1
F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695
F ext/fts5/fts5_expr.c 667faaf14a69a5683ac383acdc8d942cf32c3f93
F ext/fts5/fts5_expr.c bd2618ceaaadadbc8a4792ba977b393d2d1d3a08
F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246
F ext/fts5/fts5_index.c ca3912a44ef5a173ef098f3454465519bd4b8e88
F ext/fts5/fts5_index.c 11687c48902238e1fedb0bb8e1e8b5b8f6d82e1c
F ext/fts5/fts5_main.c fe5243d6bbb79217394f0ec7f4f5199ddbc9e7e8
F ext/fts5/fts5_storage.c df061a5caf9e50fbbd43113009b5b248362f4995
F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37
@ -120,7 +120,7 @@ F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf
F ext/fts5/fts5_tokenize.c f380f46f341af9c9a9908e1aade685ba1eaa157a
F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
F ext/fts5/fts5_varint.c 3f86ce09cab152e3d45490d7586b7ed2e40c13f1
F ext/fts5/fts5_vocab.c 4622e0b7d84a488a1585aaa56eb214ee67a988bc
F ext/fts5/fts5_vocab.c 17320c476a5296ee475ab616d95fd10515bacfec
F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl b6e6a40ef5d069c8e86ca4fbad491e1195485dbc
@ -169,12 +169,12 @@ F ext/fts5/test/fts5optimize.test 42741e7c085ee0a1276140a752d4407d97c2c9f5
F ext/fts5/test/fts5plan.test 6a55ecbac9890765b0e16f8c421c7e0888cfe436
F ext/fts5/test/fts5porter.test 7cdc07bef301d70eebbfa75dcaf45c3680e1d0e1
F ext/fts5/test/fts5porter2.test 2e65633d58a1c525d5af0f6c01e5a59155bb3487
F ext/fts5/test/fts5prefix.test 552a462f0e8595676611f41643de217fb4ac2808
F ext/fts5/test/fts5prefix.test 5d4fd42696789843ff98a62f4b84e3f66ecad9d6
F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1
F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b
F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17
F ext/fts5/test/fts5rowid.test 400384798349d658eaf06aefa1e364957d5d4821
F ext/fts5/test/fts5simple.test 06d4afbecc37f6f490a58ece4f2f7324cf2b2024
F ext/fts5/test/fts5simple.test 84d22123e0a7584f1ffb6efcd37eee46f317ab90
F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671
F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89
F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841
@ -1392,7 +1392,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 3168326ebfa1c961d8fc6435453b02be23d910cc
R 3e47dd30d61b7e1fff555418e15c6a1d
U drh
Z e6b33681aca5812f13faee0e253c4fc3
P 4b631364354068af95a01630469cb6fbfe8b52fd
R 8d25a208807a117d422df371476abd9e
U dan
Z f7ba9c7b6fd75653c93e200489a4e473

View File

@ -1 +1 @@
4b631364354068af95a01630469cb6fbfe8b52fd
bf1607ac155018573ca40fb58aca62c5fea7e60b