Re-introduce the prefix-search optimization of [feef1b15d6], which was lost in a reorganization of FTS3 code.

FossilOrigin-Name: d692434b4935e8e7858230af1c126b0be8203077
This commit is contained in:
dan 2010-07-19 11:16:36 +00:00
parent bd0c001405
commit a69f7d5aa5
6 changed files with 157 additions and 96 deletions

View File

@ -1621,10 +1621,58 @@ static int fts3DoclistMerge(
typedef struct TermSelect TermSelect;
struct TermSelect {
int isReqPos;
char *aOutput; /* Malloc'd output buffer */
int nOutput; /* Size of output in bytes */
char *aaOutput[16]; /* Malloc'd output buffer */
int anOutput[16]; /* Size of output in bytes */
};
/*
** Merge all doclists in the TermSelect.aaOutput[] array into a single
** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
**
** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
** the responsibility of the caller to free any doclists left in the
** TermSelect.aaOutput[] array.
*/
static int fts3TermSelectMerge(TermSelect *pTS){
int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);
char *aOut = 0;
int nOut = 0;
int i;
/* Loop through the doclists in the aaOutput[] array. Merge them all
** into a single doclist.
*/
for(i=0; i<SizeofArray(pTS->aaOutput); i++){
if( pTS->aaOutput[i] ){
if( !aOut ){
aOut = pTS->aaOutput[i];
nOut = pTS->anOutput[i];
pTS->aaOutput[0] = 0;
}else{
int nNew = nOut + pTS->anOutput[i];
char *aNew = sqlite3_malloc(nNew);
if( !aNew ){
sqlite3_free(aOut);
return SQLITE_NOMEM;
}
fts3DoclistMerge(mergetype, 0, 0,
aNew, &nNew, pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut
);
sqlite3_free(pTS->aaOutput[i]);
sqlite3_free(aOut);
pTS->aaOutput[i] = 0;
aOut = aNew;
nOut = nNew;
}
}
}
pTS->aaOutput[0] = aOut;
pTS->anOutput[0] = nOut;
return SQLITE_OK;
}
/*
** This function is used as the sqlite3Fts3SegReaderIterate() callback when
** querying the full-text index for a doclist associated with a term or
@ -1639,38 +1687,63 @@ static int fts3TermSelectCb(
int nDoclist
){
TermSelect *pTS = (TermSelect *)pContext;
int nNew = pTS->nOutput + nDoclist;
char *aNew = sqlite3_malloc(nNew);
UNUSED_PARAMETER(p);
UNUSED_PARAMETER(zTerm);
UNUSED_PARAMETER(nTerm);
if( !aNew ){
return SQLITE_NOMEM;
}
if( pTS->nOutput==0 ){
if( pTS->aaOutput[0]==0 ){
/* If this is the first term selected, copy the doclist to the output
** buffer using memcpy(). TODO: Add a way to transfer control of the
** aDoclist buffer from the caller so as to avoid the memcpy().
*/
memcpy(aNew, aDoclist, nDoclist);
pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
pTS->anOutput[0] = nDoclist;
if( pTS->aaOutput[0] ){
memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
}else{
return SQLITE_NOMEM;
}
}else{
/* The output buffer is not empty. Merge doclist aDoclist with the
** existing output. This can only happen with prefix-searches (as
** searches for exact terms return exactly one doclist).
*/
int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);
fts3DoclistMerge(mergetype, 0, 0,
aNew, &nNew, pTS->aOutput, pTS->nOutput, aDoclist, nDoclist
);
char *aMerge = aDoclist;
int nMerge = nDoclist;
int iOut;
for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){
char *aNew;
int nNew;
if( pTS->aaOutput[iOut]==0 ){
assert( iOut>0 );
pTS->aaOutput[iOut] = aMerge;
pTS->anOutput[iOut] = nMerge;
break;
}
nNew = nMerge + pTS->anOutput[iOut];
aNew = sqlite3_malloc(nNew);
if( !aNew ){
if( aMerge!=aDoclist ){
sqlite3_free(aMerge);
}
return SQLITE_NOMEM;
}
fts3DoclistMerge(mergetype, 0, 0,
aNew, &nNew, pTS->aaOutput[iOut], pTS->anOutput[iOut], aMerge, nMerge
);
if( iOut>0 ) sqlite3_free(aMerge);
sqlite3_free(pTS->aaOutput[iOut]);
pTS->aaOutput[iOut] = 0;
aMerge = aNew;
nMerge = nNew;
if( (iOut+1)==SizeofArray(pTS->aaOutput) ){
pTS->aaOutput[iOut] = aMerge;
pTS->anOutput[iOut] = nMerge;
}
}
}
sqlite3_free(pTS->aOutput);
pTS->aOutput = aNew;
pTS->nOutput = nNew;
return SQLITE_OK;
}
@ -1794,12 +1867,17 @@ static int fts3TermSelect(
rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter,
fts3TermSelectCb, (void *)&tsc
);
if( rc==SQLITE_OK ){
rc = fts3TermSelectMerge(&tsc);
}
if( rc==SQLITE_OK ){
*ppOut = tsc.aOutput;
*pnOut = tsc.nOutput;
*ppOut = tsc.aaOutput[0];
*pnOut = tsc.anOutput[0];
}else{
sqlite3_free(tsc.aOutput);
for(i=0; i<SizeofArray(tsc.aaOutput); i++){
sqlite3_free(tsc.aaOutput[i]);
}
}
finished:

View File

@ -1,5 +1,5 @@
C Enable\spreviously\sfailing\stests\sin\se_expr.test\sthat\spass\sfollowing\s[3e5975aa3b].
D 2010-07-19T05:27:18
C Re-introduce\sthe\sprefix-search\soptimization\sof\s[feef1b15d6],\swhich\swas\slost\sin\sa\sreorganization\sof\sFTS3\scode.
D 2010-07-19T11:16:37
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in ec08dc838fd8110fe24c92e5130bcd91cbb1ff2e
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -60,7 +60,7 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 51948505e018316cf0b76d249cdd87e409254e8f
F ext/fts3/fts3.c 9dec342fa1cf0914da679a3b7c0d4b53a27883ba
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h 70528ba8c33991699f96ecc64112122833cdbdb5
F ext/fts3/fts3_expr.c f4ff02ebe854e97ac03ff00b38b728a9ab57fd4b
@ -392,7 +392,7 @@ F test/fts2p.test 4b48c35c91e6a7dbf5ac8d1e5691823cc999aafb
F test/fts2q.test b2fbbe038b7a31a52a6079b215e71226d8c6a682
F test/fts2r.test b154c30b63061d8725e320fba1a39e2201cadd5e
F test/fts2token.test d8070b241a15ff13592a9ae4a8b7c171af6f445a
F test/fts3.test 471611ee89e815accf5514fe4a00bcec569e373d
F test/fts3.test 672a040ea57036fb4b6fdc09027c18d7d24ab654
F test/fts3_common.tcl 4d8eec9db565fed9098f45c378f28e1657802011
F test/fts3aa.test 5327d4c1d9b6c61021696746cc9a6cdc5bf159c0
F test/fts3ab.test 09aeaa162aee6513d9ff336b6932211008b9d1f9
@ -407,7 +407,7 @@ F test/fts3aj.test 584facbc9ac4381a7ec624bfde677340ffc2a5a4
F test/fts3ak.test bd14deafe9d1586e8e9bf032411026ac4f8c925d
F test/fts3al.test 07d64326e79bbdbab20ee87fc3328fbf01641c9f
F test/fts3am.test 218aa6ba0dfc50c7c16b2022aac5c6be593d08d8
F test/fts3an.test 931fa21bd80641ca594bfa32e105250a8a07918b
F test/fts3an.test a49ccadc07a2f7d646ec1b81bc09da2d85a85b18
F test/fts3ao.test 0aa29dd4fc1c8d46b1f7cfe5926f7ac97551bea9
F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa
F test/fts3b.test e93bbb653e52afde110ad53bbd793f14fe7a8984
@ -546,7 +546,7 @@ F test/pageropt.test 8146bf448cf09e87bb1867c2217b921fb5857806
F test/pagesize.test 76aa9f23ecb0741a4ed9d2e16c5fa82671f28efb
F test/pcache.test 4118a183908ecaed343a06fcef3ba82e87e0129d
F test/pcache2.test 0d85f2ab6963aee28c671d4c71bec038c00a1d16
F test/permutations.test 15683aaa9b41fb8d19f8a3c616b409cd77e13bef
F test/permutations.test 3fe47c21c32b294b2354e702a25bfbff65747bb1
F test/pragma.test ed78d200f65c6998df51196cb8c39d5300570f24
F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47
F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea
@ -837,7 +837,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P 3e5975aa3bb9df9e1f954bcce99384e0f13cb453
R 70e259e7c9124a680075accbca8e9ac0
P 3d59c54a39c4c3149bf01063a91c3db35ec84b31
R d482059365bac67fc30275f964ac7b3d
U dan
Z 4442afa27e00d12db3352013b9fcc5ac
Z e89aa551deb152bbb2478c332f32e64d

View File

@ -1 +1 @@
3d59c54a39c4c3149bf01063a91c3db35ec84b31
d692434b4935e8e7858230af1c126b0be8203077

View File

@ -9,61 +9,11 @@
#
# $Id: fts3.test,v 1.2 2008/07/23 18:17:32 drh Exp $
proc lshift {lvar} {
upvar $lvar l
set ret [lindex $l 0]
set l [lrange $l 1 end]
return $ret
}
while {[set arg [lshift argv]] != ""} {
switch -- $arg {
-sharedpagercache {
sqlite3_enable_shared_cache 1
}
-soak {
set G(issoak) 1
}
default {
set argv [linsert $argv 0 $arg]
break
}
}
}
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
return
}
rename finish_test really_finish_test
proc finish_test {} {}
set G(isquick) 1
source $testdir/permutations.test
set EXCLUDE {
fts3.test
fts3malloc.test
fts3rnd.test
ifcapable fts3 {
run_test_suite fts3
}
# Files to include in the test. If this list is empty then everything
# that is not in the EXCLUDE list is run.
#
set INCLUDE {
}
foreach testfile [lsort -dictionary [glob $testdir/fts3*.test]] {
set tail [file tail $testfile]
if {[lsearch -exact $EXCLUDE $tail]>=0} continue
if {[llength $INCLUDE]>0 && [lsearch -exact $INCLUDE $tail]<0} continue
source $testfile
catch {db close}
if {$sqlite_open_file_count>0} {
puts "$tail did not close all files: $sqlite_open_file_count"
fail_test $tail
set sqlite_open_file_count 0
}
}
set sqlite_open_file_count 0
really_finish_test
finish_test

View File

@ -185,12 +185,34 @@ do_test fts3an-3.1 {
set t
} $ret
# TODO(shess) It would be useful to test a couple edge cases, but I
# don't know if we have the precision to manage it from here at this
# time. Prefix hits can cross leaves, which the code above _should_
# hit by virtue of size. There are two variations on this. If the
# tree is 2 levels high, the code will find the leaf-node extent
# directly, but if it is higher, the code will have to follow two
# separate interior branches down the tree. Both should be tested.
# Test a boundary condition: More than 2^16 terms that match a searched for
# prefix in a single segment.
#
puts "This next test can take a little while (~ 30 seconds)..."
do_test fts3an-4.1 {
execsql { CREATE VIRTUAL TABLE ft USING fts3(x) }
execsql BEGIN
execsql { INSERT INTO ft VALUES(NULL) }
execsql { INSERT INTO ft SELECT * FROM ft } ;# 2
execsql { INSERT INTO ft SELECT * FROM ft } ;# 4
execsql { INSERT INTO ft SELECT * FROM ft } ;# 8
execsql { INSERT INTO ft SELECT * FROM ft } ;# 16
execsql { INSERT INTO ft SELECT * FROM ft } ;# 32
execsql { INSERT INTO ft SELECT * FROM ft } ;# 64
execsql { INSERT INTO ft SELECT * FROM ft } ;# 128
execsql { INSERT INTO ft SELECT * FROM ft } ;# 256
execsql { INSERT INTO ft SELECT * FROM ft } ;# 512
execsql { INSERT INTO ft SELECT * FROM ft } ;# 1024
execsql { INSERT INTO ft SELECT * FROM ft } ;# 2048
execsql { INSERT INTO ft SELECT * FROM ft } ;# 4096
execsql { INSERT INTO ft SELECT * FROM ft } ;# 8192
execsql { INSERT INTO ft SELECT * FROM ft } ;# 16384
execsql { INSERT INTO ft SELECT * FROM ft } ;# 32768
execsql { INSERT INTO ft SELECT * FROM ft } ;# 65536
execsql { INSERT INTO ft SELECT * FROM ft } ;# 131072
execsql COMMIT
execsql { UPDATE ft SET x = 'abc' || rowid }
execsql { SELECT count(*) FROM ft WHERE x MATCH 'abc*' }
} {131072}
finish_test

View File

@ -153,6 +153,17 @@ test_suite "threads" -prefix "" -description {
thread004.test thread005.test walthread.test
}
test_suite "fts3" -prefix "" -description {
All FTS3 tests except fts3malloc.test and fts3rnd.test.
} -files {
fts3aa.test fts3ab.test fts3ac.test fts3ad.test fts3ae.test
fts3af.test fts3ag.test fts3ah.test fts3ai.test fts3aj.test
fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test
fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test
fts3e.test fts3expr.test fts3expr2.test fts3near.test
fts3query.test fts3snippet.test
}
lappend ::testsuitelist xxx
#-------------------------------------------------------------------------