Allow fts5 trigram tables created with detail=column or detail=none to optimize LIKE and GLOB queries. Allow case-insensitive tables to optimize GLOB as well as LIKE.

FossilOrigin-Name: 64782463be62b72b5cd0bfaa7c9b69aa487d807c5fe0e65a272080b7739fd21b
This commit is contained in:
dan 2020-10-05 16:41:56 +00:00
parent a344ad47f3
commit f46be6a1b9
7 changed files with 250 additions and 68 deletions

View File

@ -686,11 +686,19 @@ struct Fts5Token {
/* Parse a MATCH expression. */
int sqlite3Fts5ExprNew(
Fts5Config *pConfig,
int bPhraseToAnd,
int iCol, /* Column on LHS of MATCH operator */
const char *zExpr,
Fts5Expr **ppNew,
char **pzErr
);
int sqlite3Fts5ExprPattern(
Fts5Config *pConfig,
int bGlob,
int iCol,
const char *zText,
Fts5Expr **pp
);
/*
** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);

View File

@ -128,6 +128,7 @@ struct Fts5Parse {
int nPhrase; /* Size of apPhrase array */
Fts5ExprPhrase **apPhrase; /* Array of all phrases */
Fts5ExprNode *pExpr; /* Result of a successful parse */
int bPhraseToAnd; /* Convert "a+b" to "a AND b" */
};
void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
@ -216,6 +217,7 @@ static void fts5ParseFree(void *p){ sqlite3_free(p); }
int sqlite3Fts5ExprNew(
Fts5Config *pConfig, /* FTS5 Configuration */
int bPhraseToAnd,
int iCol,
const char *zExpr, /* Expression text */
Fts5Expr **ppNew,
@ -231,6 +233,7 @@ int sqlite3Fts5ExprNew(
*ppNew = 0;
*pzErr = 0;
memset(&sParse, 0, sizeof(sParse));
sParse.bPhraseToAnd = bPhraseToAnd;
pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
if( pEngine==0 ){ return SQLITE_NOMEM; }
sParse.pConfig = pConfig;
@ -273,6 +276,7 @@ int sqlite3Fts5ExprNew(
pNew->pConfig = pConfig;
pNew->apExprPhrase = sParse.apPhrase;
pNew->nPhrase = sParse.nPhrase;
pNew->bDesc = 0;
sParse.apPhrase = 0;
}
}else{
@ -293,7 +297,7 @@ int sqlite3Fts5ExprNew(
** code.
*/
int sqlite3Fts5ExprPattern(
Fts5Config *pConfig, int iCol, const char *zText, Fts5Expr **pp
Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp
){
i64 nText = strlen(zText);
char *zExpr = (char*)sqlite3_malloc64(nText*4 + 1);
@ -307,7 +311,7 @@ int sqlite3Fts5ExprPattern(
int i = 0;
int iFirst = 0;
if( pConfig->ePattern==FTS5_PATTERN_LIKE ){
if( bGlob==0 ){
aSpec[0] = '_';
aSpec[1] = '%';
aSpec[2] = 0;
@ -341,8 +345,15 @@ int sqlite3Fts5ExprPattern(
i++;
}
if( iOut>0 ){
int bAnd = 0;
if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
bAnd = 1;
if( pConfig->eDetail==FTS5_DETAIL_NONE ){
iCol = pConfig->nCol;
}
}
zExpr[iOut] = '\0';
rc = sqlite3Fts5ExprNew(pConfig, iCol, zExpr, pp, pConfig->pzErrmsg);
rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg);
}else{
*pp = 0;
}
@ -1729,6 +1740,20 @@ void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
pParse->pExpr = p;
}
static int parseGrowPhraseArray(Fts5Parse *pParse){
if( (pParse->nPhrase % 8)==0 ){
sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
Fts5ExprPhrase **apNew;
apNew = (Fts5ExprPhrase**)sqlite3_realloc64(pParse->apPhrase, nByte);
if( apNew==0 ){
pParse->rc = SQLITE_NOMEM;
return SQLITE_NOMEM;
}
pParse->apPhrase = apNew;
}
return SQLITE_OK;
}
/*
** This function is called by the parser to process a string token. The
** string may or may not be quoted. In any case it is tokenized and a
@ -1764,17 +1789,10 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(
}else{
if( pAppend==0 ){
if( (pParse->nPhrase % 8)==0 ){
sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
Fts5ExprPhrase **apNew;
apNew = (Fts5ExprPhrase**)sqlite3_realloc64(pParse->apPhrase, nByte);
if( apNew==0 ){
pParse->rc = SQLITE_NOMEM;
if( parseGrowPhraseArray(pParse) ){
fts5ExprPhraseFree(sCtx.pPhrase);
return 0;
}
pParse->apPhrase = apNew;
}
pParse->nPhrase++;
}
@ -2180,6 +2198,67 @@ static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
}
}
/*
** This function is used when parsing LIKE or GLOB patterns against
** trigram indexes that specify either detail=column or detail=none.
** It converts a phrase:
**
** abc + def + ghi
**
** into an AND tree:
**
** abc AND def AND ghi
*/
static Fts5ExprNode *fts5ParsePhraseToAnd(
Fts5Parse *pParse,
Fts5ExprNearset *pNear
){
int nTerm = pNear->apPhrase[0]->nTerm;
int ii;
int nByte;
Fts5ExprNode *pRet;
assert( pNear->nPhrase==1 );
assert( pParse->bPhraseToAnd );
nByte = sizeof(Fts5ExprNode) + nTerm*sizeof(Fts5ExprNode*);
pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
if( pRet ){
pRet->eType = FTS5_AND;
pRet->nChild = nTerm;
fts5ExprAssignXNext(pRet);
pParse->nPhrase--;
for(ii=0; ii<nTerm; ii++){
Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(
&pParse->rc, sizeof(Fts5ExprPhrase)
);
if( pPhrase ){
if( parseGrowPhraseArray(pParse) ){
fts5ExprPhraseFree(pPhrase);
}else{
pParse->apPhrase[pParse->nPhrase++] = pPhrase;
pPhrase->nTerm = 1;
pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup(
&pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1
);
pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING,
0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase)
);
}
}
}
if( pParse->rc ){
sqlite3Fts5ParseNodeFree(pRet);
pRet = 0;
}else{
sqlite3Fts5ParseNearsetFree(pNear);
}
}
return pRet;
}
/*
** Allocate and return a new expression object. If anything goes wrong (i.e.
** OOM error), leave an error code in pParse and return NULL.
@ -2204,6 +2283,12 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
if( eType!=FTS5_STRING && pLeft==0 ) return pRight;
if( eType!=FTS5_STRING && pRight==0 ) return pLeft;
if( eType==FTS5_STRING
&& pParse->bPhraseToAnd
&& pNear->apPhrase[0]->nTerm>1
){
pRet = fts5ParsePhraseToAnd(pParse, pNear);
}else{
if( eType==FTS5_NOT ){
nChild = 2;
}else if( eType==FTS5_AND || eType==FTS5_OR ){
@ -2252,6 +2337,7 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
}
}
}
}
if( pRet==0 ){
assert( pParse->rc!=SQLITE_OK );
@ -2602,7 +2688,7 @@ static void fts5ExprFunction(
rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
if( rc==SQLITE_OK ){
rc = sqlite3Fts5ExprNew(pConfig, pConfig->nCol, zExpr, &pExpr, &zErr);
rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr);
}
if( rc==SQLITE_OK ){
char *zText;

View File

@ -464,6 +464,23 @@ static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){
#endif
}
static int fts5UsePatternMatch(
Fts5Config *pConfig,
struct sqlite3_index_constraint *p
){
assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB );
assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE );
if( pConfig->ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){
return 1;
}
if( pConfig->ePattern==FTS5_PATTERN_LIKE
&& (p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB)
){
return 1;
}
return 0;
}
/*
** Implementation of the xBestIndex method for FTS5 tables. Within the
** WHERE constraint, it searches for the following:
@ -591,7 +608,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
pInfo->aConstraintUsage[i].omit = 1;
}
}else if( p->usable ){
if( iCol>=0 && iCol<nCol && pConfig->ePattern==p->op ){
if( iCol>=0 && iCol<nCol && fts5UsePatternMatch(pConfig, p) ){
assert( p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB );
idxStr[iIdxStr++] = p->op==FTS5_PATTERN_LIKE ? 'L' : 'G';
sqlite3_snprintf(6, &idxStr[iIdxStr], "%d", iCol);
@ -1252,7 +1269,7 @@ static int fts5FilterMethod(
goto filter_out;
}else{
char **pzErr = &pTab->p.base.zErrMsg;
rc = sqlite3Fts5ExprNew(pConfig, iCol, zText, &pExpr, pzErr);
rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr);
if( rc==SQLITE_OK ){
rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
pExpr = 0;
@ -1264,6 +1281,7 @@ static int fts5FilterMethod(
}
case 'L':
case 'G': {
int bGlob = (idxStr[iIdxStr-1]=='G');
const char *zText = (const char*)sqlite3_value_text(apVal[i]);
iCol = 0;
do{
@ -1271,7 +1289,7 @@ static int fts5FilterMethod(
iIdxStr++;
}while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' );
if( zText ){
rc = sqlite3Fts5ExprPattern(pConfig, iCol, zText, &pExpr);
rc = sqlite3Fts5ExprPattern(pConfig, bGlob, iCol, zText, &pExpr);
}
if( rc==SQLITE_OK ){
rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);

View File

@ -37,7 +37,7 @@ do_execsql_test 2.0 {
}
faultsim_save_and_close
do_faultsim_test 2 -faults ioerr-t* -prep {
do_faultsim_test 2 -prep {
faultsim_restore_and_reopen
} -body {
execsql {
@ -48,6 +48,24 @@ do_faultsim_test 2 -faults ioerr-t* -prep {
faultsim_test_result {0 1} {1 {vtable constructor failed: t1}}
}
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y, tokenize=trigram, detail=none);
INSERT INTO t1 VALUES('abcdefghijklmnopqrstuvwxyz', NULL);
}
faultsim_save_and_close
do_faultsim_test 3 -prep {
faultsim_restore_and_reopen
} -body {
execsql {
SELECT count(*) FROM t1 WHERE x LIKE '%mnopqrs%' AND t1 MATCH 'abc'
}
} -test {
faultsim_test_result {0 1} {1 {vtable constructor failed: t1}}
}
finish_test

View File

@ -145,5 +145,57 @@ do_execsql_test 4.2 {
INSERT INTO t0(t0) VALUES('integrity-check');
}
#-------------------------------------------------------------------------
reset_db
foreach_detail_mode $::testprefix {
foreach {ci} {0 1} {
reset_db
do_execsql_test 5.cs=$ci.0.1 "
CREATE VIRTUAL TABLE t1 USING fts5(
y, tokenize=\"trigram case_sensitive $ci\", detail=%DETAIL%
);
"
do_execsql_test 5.cs=$ci.0.2 {
INSERT INTO t1 VALUES('abcdefghijklm');
INSERT INTO t1 VALUES('กรุงเทพมหานคร');
}
foreach {tn like res} {
1 {%cDef%} 1
2 {cDef%} {}
3 {%f%} 1
4 {%f_h%} 1
5 {%f_g%} {}
6 {abc%klm} 1
7 {ABCDEFG%} 1
8 {%รุงเ%} 2
} {
do_execsql_test 5.cs=$ci.1.$tn {
SELECT rowid FROM t1 WHERE y LIKE $like
} $res
}
}
}
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE ci0 USING fts5(x, tokenize="trigram");
CREATE VIRTUAL TABLE ci1 USING fts5(x, tokenize="trigram case_sensitive 1");
}
# LIKE and GLOB both work with case-insensitive tokenizers. Only GLOB works
# with case-sensitive.
do_eqp_test 6.1 {
SELECT * FROM ci0 WHERE x LIKE ?
} {VIRTUAL TABLE INDEX 0:L0}
do_eqp_test 6.2 {
SELECT * FROM ci0 WHERE x GLOB ?
} {VIRTUAL TABLE INDEX 0:G0}
do_eqp_test 6.3 {
SELECT * FROM ci1 WHERE x LIKE ?
} {{SCAN TABLE ci1 VIRTUAL TABLE INDEX 0:}}
do_eqp_test 6.4 {
SELECT * FROM ci1 WHERE x GLOB ?
} {VIRTUAL TABLE INDEX 0:G0}
finish_test

View File

@ -1,5 +1,5 @@
C Do\snot\sapply\saffinities\swhen\screating\sentries\sfor\sautomatic\sindexes\son\sviews.\sFix\sfor\s[95302bdb].
D 2020-10-03T19:16:36.025
C Allow\sfts5\strigram\stables\screated\swith\sdetail=column\sor\sdetail=none\sto\soptimize\sLIKE\sand\sGLOB\squeries.\sAllow\scase-insensitive\stables\sto\soptimize\sGLOB\sas\swell\sas\sLIKE.
D 2020-10-05T16:41:56.128
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -112,14 +112,14 @@ F ext/fts3/unicode/mkunicode.tcl bf7fcaa6d68e6d38223467983785d054f1cff4d9e3905dd
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
F ext/fts5/fts5.h c132a9323f22a972c4c93a8d5a3d901113a6e612faf30ca8e695788438c5ca2a
F ext/fts5/fts5Int.h 928aed51dbeb4acc0d2e3ceeebb5f6918d64c9ad5c4e7634a238895abea40350
F ext/fts5/fts5Int.h 26c74dd5776f798436fbf604a0bf0e8de263b35b5060b05c15f9085845d9fda2
F ext/fts5/fts5_aux.c dcc627d8b6e3fc773db528ff67b39955dab7b51628f9dba8e15849e5bedfd7fa
F ext/fts5/fts5_buffer.c 5a5fe0159752c0fb0a5a93c722e9db2662822709490769d482b76a6dc8aaca70
F ext/fts5/fts5_config.c be54f44fca491e96c6923a4b9a736f2da2b13811600eb6e38d1bcc91c4ea2e61
F ext/fts5/fts5_expr.c 7eba8fed2a8f154413814f63c21b34d7562b7d80a62614b9301a5ba6a700f4f0
F ext/fts5/fts5_expr.c e527e3a7410393075598cec544e3831798a8c88b3e8878e2cfb7cb147113e925
F ext/fts5/fts5_hash.c 15bffa734fbdca013b2289c6f8827a3b935ef14bd4dde5837d31a75434c00627
F ext/fts5/fts5_index.c 255d3ce3fec28be11c533451e5b23bd79e71a13a1b120f3658b34fff6b097816
F ext/fts5/fts5_main.c 8b99e0ba99fd3414566b3afa26f891b53349db801f4b0bea69eec05cc00487d5
F ext/fts5/fts5_main.c b4e4931c7fcc9acfa0c3b8b5e5e80b5b424b8d9207aae3a22b674bd35ccf149d
F ext/fts5/fts5_storage.c 58ba71e6cd3d43a5735815e7956ee167babb4d2cbfe206905174792af4d09d75
F ext/fts5/fts5_tcl.c 39bcbae507f594aad778172fa914cad0f585bf92fd3b078c686e249282db0d95
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
@ -180,7 +180,7 @@ F ext/fts5/test/fts5fault9.test 098e6b894bbdf9b2192f994a30f4043673fb3f338b6b8ab1
F ext/fts5/test/fts5faultA.test be4487576bff8c22cee6597d1893b312f306504a8c6ccd3c53ca85af12290c8c
F ext/fts5/test/fts5faultB.test d606bdb8e81aaeb6f41de3fc9fc7ae315733f0903fbff05cf54f5b045b729ab5
F ext/fts5/test/fts5faultD.test e7ed7895abfe6bc98a5e853826f6b74956e7ba7f594f1860bbf9e504b9647996
F ext/fts5/test/fts5faultE.test aa7caab3597390b753e0755c087f118f775804a070bd0960f5a4bb6246ed6a29
F ext/fts5/test/fts5faultE.test 844586ce71dab4be85bb86880e87b624d089f851654cd22e4710c77eb8ce7075
F ext/fts5/test/fts5first.test 3fcf2365c00a15fc9704233674789a3b95131d12de18a9b996159f6909dc8079
F ext/fts5/test/fts5full.test e1701a112354e0ff9a1fdffb0c940c576530c33732ee20ac5e8361777070d717
F ext/fts5/test/fts5fuzz1.test 238d8c45f3b81342aa384de3e581ff2fa330bf922a7b69e484bbc06051a1080e
@ -217,7 +217,7 @@ F ext/fts5/test/fts5synonym2.test b54cce5c34ec08ed616f646635538ae82e34a0e28f947e
F ext/fts5/test/fts5tok1.test ce6551e41ff56f30b69963577324624733bed0d1753589f06120d664d9cd45c9
F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
F ext/fts5/test/fts5tokenizer.test ac3c9112b263a639fb0508ae73a3ee886bf4866d2153771a8e8a20c721305a43
F ext/fts5/test/fts5trigram.test d96062318c184ad74185c29aac95fe60a10a3d93d63d17b09c45d27672e3236a
F ext/fts5/test/fts5trigram.test e2ce256fd0ccd6707e740aa3596206aedb0d5834b100c0cb49e344dcd1d8463c
F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602
F ext/fts5/test/fts5unicode.test 17056f4efe6b0a5d4f41fdf7a7dc9af2873004562eaa899d40633b93dc95f5a9
F ext/fts5/test/fts5unicode2.test 9b3df486de05fb4bde4aa7ee8de2e6dae1df6eb90e3f2e242c9383b95d314e3e
@ -1882,7 +1882,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 6e72a08de764077f2bba6f7e3b99ea29001941671a971f2ccf7ceeb9c682fb1a
R b5c2d2603dfe08fa61d71e85b04bd8e5
P df12f097224ebc9473d9e2a8933bd7efed6490d09af2c012c9b59a7892369b7f
R 9f99bc6eea7328683a4833687f438fc2
U dan
Z 7b521d3697cf8721db6deda0bbf532cb
Z ee7ba08673592ab83794033954bbc420

View File

@ -1 +1 @@
df12f097224ebc9473d9e2a8933bd7efed6490d09af2c012c9b59a7892369b7f
64782463be62b72b5cd0bfaa7c9b69aa487d807c5fe0e65a272080b7739fd21b