From 64f1ef6abd699f929f9b32a5761a088667f65498 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 2 Dec 2020 19:08:15 +0000 Subject: [PATCH] Allow a search for an N character prefix in fts5 to use a prefix index of size N+1, if no prefix index of size N exists. FossilOrigin-Name: 78a7801d8fc9e58a62e5168e35b52b7440340549123fc6a537e2abd571f6fe7b --- ext/fts5/fts5_index.c | 32 ++++++++++++++++--- ext/fts5/test/fts5prefix2.test | 57 ++++++++++++++++++++++++++++++++++ manifest | 15 ++++----- manifest.uuid | 2 +- 4 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 ext/fts5/test/fts5prefix2.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 0b35eee597..60dc03c3b4 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -5082,7 +5082,8 @@ static void fts5MergePrefixLists( static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ - const u8 *pToken, /* Buffer containing prefix to match */ + int iIdx, /* Index to scan for data */ + u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset, /* Restrict matches to these columns */ Fts5Iter **ppIter /* OUT: New iterator */ @@ -5116,6 +5117,27 @@ static void fts5SetupPrefixIter( int bNewTerm = 1; memset(&doclist, 0, sizeof(doclist)); + if( iIdx!=0 ){ + int dummy = 0; + const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; + pToken[0] = FTS5_MAIN_PREFIX; + fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1); + fts5IterSetOutputCb(&p->rc, p1); + for(; + fts5MultiIterEof(p, p1)==0; + fts5MultiIterNext2(p, p1, &dummy) + ){ + Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; + p1->xSetOutputs(p1, pSeg); + if( p1->base.nData ){ + xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist); + iLastRowid = p1->base.iRowid; + } + } + fts5MultiIterFree(p1); + } + + pToken[0] = FTS5_MAIN_PREFIX + iIdx; fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); fts5IterSetOutputCb(&p->rc, p1); for( /* no-op */ ; @@ -5411,6 +5433,7 @@ int sqlite3Fts5IndexQuery( if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ int iIdx = 0; /* Index to search */ + int iPrefixIdx = 0; /* +1 prefix index */ if( nToken ) memcpy(&buf.p[1], pToken, nToken); /* Figure out which index to search and set iIdx accordingly. If this @@ -5432,7 +5455,9 @@ int sqlite3Fts5IndexQuery( if( flags & FTS5INDEX_QUERY_PREFIX ){ int nChar = fts5IndexCharlen(pToken, nToken); for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ - if( pConfig->aPrefix[iIdx-1]==nChar ) break; + int nIdxChar = pConfig->aPrefix[iIdx-1]; + if( nIdxChar==nChar ) break; + if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx; } } @@ -5449,8 +5474,7 @@ int sqlite3Fts5IndexQuery( }else{ /* Scan multiple terms in the main index */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; - buf.p[0] = FTS5_MAIN_PREFIX; - fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); + fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); assert( p->rc!=SQLITE_OK || pRet->pColset==0 ); fts5IterSetOutputCb(&p->rc, pRet); if( p->rc==SQLITE_OK ){ diff --git a/ext/fts5/test/fts5prefix2.test b/ext/fts5/test/fts5prefix2.test new file mode 100644 index 0000000000..bf16e81a73 --- /dev/null +++ b/ext/fts5/test/fts5prefix2.test @@ -0,0 +1,57 @@ +# 2020 Dec 3 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file contains tests focused on prefix indexes. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5prefix2 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +foreach p {3 2 1} { + reset_db + do_execsql_test 1.$p.0 " + CREATE VIRTUAL TABLE t1 USING fts5(xyz, prefix=$p); + " + do_execsql_test 1.$p.1 { + INSERT INTO t1 VALUES + ('May you do good and not evil.'), + ('May you find forgiveness for yourself and forgive others.'), + ('May you share freely, never taking more than you give f.'); + } + + do_execsql_test 1.$p.2 { + SELECT highlight(t1, 0, '[', ']') FROM t1('f*'); + } { + {May you [find] [forgiveness] [for] yourself and [forgive] others.} + {May you share [freely], never taking more than you give [f].} + } +} + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t2 USING fts5(one, prefix=3); + INSERT INTO t2 VALUES('top'); + INSERT INTO t2 VALUES('to'); + INSERT INTO t2 VALUES('tommy'); +} + +do_execsql_test 2.1 { + SELECT * FROM t2('to*'); +} {top to tommy} + + + +finish_test diff --git a/manifest b/manifest index 41892ddfab..6daed48465 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sthe\s".open"\scommand\sin\sthe\sCLI\sso\sthat\sit\saccepts\scommand-line\soptions\nboth\sbefore\sand\safter\sthe\sfilename. -D 2020-12-02T18:27:48.742 +C Allow\sa\ssearch\sfor\san\sN\scharacter\sprefix\sin\sfts5\sto\suse\sa\sprefix\sindex\sof\ssize\sN+1,\sif\sno\sprefix\sindex\sof\ssize\sN\sexists. +D 2020-12-02T19:08:15.960 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -119,7 +119,7 @@ F ext/fts5/fts5_buffer.c 5a5fe0159752c0fb0a5a93c722e9db2662822709490769d482b76a6 F ext/fts5/fts5_config.c be54f44fca491e96c6923a4b9a736f2da2b13811600eb6e38d1bcc91c4ea2e61 F ext/fts5/fts5_expr.c e527e3a7410393075598cec544e3831798a8c88b3e8878e2cfb7cb147113e925 F ext/fts5/fts5_hash.c 1aa93c9b5f461afba66701ee226297dc78402b3bdde81e90a10de5fe3df14959 -F ext/fts5/fts5_index.c 728cb3b5dd5dffec055185ac89bdd441c97cd16de72c6dba8a85c7762cafa68f +F ext/fts5/fts5_index.c e4650549c755acadc3974c69952f40867181d6cb7583ab24e9da72354e0f58ad F ext/fts5/fts5_main.c b4e4931c7fcc9acfa0c3b8b5e5e80b5b424b8d9207aae3a22b674bd35ccf149d F ext/fts5/fts5_storage.c 58ba71e6cd3d43a5735815e7956ee167babb4d2cbfe206905174792af4d09d75 F ext/fts5/fts5_tcl.c 39bcbae507f594aad778172fa914cad0f585bf92fd3b078c686e249282db0d95 @@ -204,6 +204,7 @@ F ext/fts5/test/fts5plan.test 79d35b5e83bbdcba48d946a7f008df161f6b0ede1a966892d0 F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 F ext/fts5/test/fts5porter2.test 0d251a673f02fa13ca7f011654873b3add20745f7402f108600a23e52d8c7457 F ext/fts5/test/fts5prefix.test a0fa67b06650f2deaa7bf27745899d94e0fb547ad9ecbd08bfad98c04912c056 +F ext/fts5/test/fts5prefix2.test 3847ce46f70b82d61c6095103a9d7c53f2952c40a4704157bc079c04d9c8b18b F ext/fts5/test/fts5query.test ac363b17a442620bb0780e93c24f16a5f963dfe2f23dc85647b869efcfada728 F ext/fts5/test/fts5rank.test c9fd4a1e36b4fa92d572ec13d846469b97da249d1c2f7fd3ee7e017ce46f2416 F ext/fts5/test/fts5rebuild.test 55d6f17715cddbf825680dd6551efbc72ed916d8cf1cde40a46fc5d785b451e7 @@ -1886,7 +1887,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P d8de2f236d43a88fac7550a0451951dd5a945eb304e32f82e662479cea7c2684 -R 514e397878ccd7b216cdd7d01f670ef0 -U drh -Z 0bdc6b19b24e33aa04196376633bb49f +P d330bf0c02e67f70f49496e4b1e484bb4e876622becc6a062b2aefbd585d0117 +R f01d1880d7623e5e268039319f8ab01a +U dan +Z c03fc1ff589c028c9b59c035c598373d diff --git a/manifest.uuid b/manifest.uuid index 935ed2e4cf..341a8fec64 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d330bf0c02e67f70f49496e4b1e484bb4e876622becc6a062b2aefbd585d0117 \ No newline at end of file +78a7801d8fc9e58a62e5168e35b52b7440340549123fc6a537e2abd571f6fe7b \ No newline at end of file