Performance enhancement for fts5 column filter queries on detail=full tables.

FossilOrigin-Name: 13fb4aa6a87c5c6258979953da82eedc1a7bf14f
This commit is contained in:
dan 2016-01-30 19:16:11 +00:00
parent 784c1b93fb
commit 52f5d1ba6b
5 changed files with 170 additions and 147 deletions

View File

@ -4333,26 +4333,32 @@ static int fts5IndexExtractCol(
int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
const u8 *p = *pa;
const u8 *pEnd = &p[n]; /* One byte past end of position list */
u8 prev = 0;
while( iCol>iCurrent ){
/* Advance pointer p until it points to pEnd or an 0x01 byte that is
** not part of a varint */
while( (prev & 0x80) || *p!=0x01 ){
prev = *p++;
if( p==pEnd ) return 0;
** not part of a varint. Note that it is not possible for a negative
** or extremely large varint to occur within an uncorrupted position
** list. So the last byte of each varint may be assumed to have a clear
** 0x80 bit. */
while( *p!=0x01 ){
while( *p++ & 0x80 );
if( p>=pEnd ) return 0;
}
*pa = p++;
p += fts5GetVarint32(p, iCurrent);
iCurrent = *p++;
if( iCurrent & 0x80 ){
p--;
p += fts5GetVarint32(p, iCurrent);
}
}
if( iCol!=iCurrent ) return 0;
/* Advance pointer p until it points to pEnd or an 0x01 byte that is
** not part of a varint */
assert( (prev & 0x80)==0 );
while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
prev = *p++;
while( p<pEnd && *p!=0x01 ){
while( *p++ & 0x80 );
}
return p - (*pa);
}

View File

@ -11,7 +11,6 @@ set Q {
{1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"}
{1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"}
{1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"}
{2 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"}
}

View File

@ -1,142 +1,159 @@
#-------------------------------------------------------------------------
# Command line options processor.
##########################################################################
# 2016 Jan 27
#
proc command_line_error {O E {msg ""}} {
if {$msg != ""} {
puts stderr "Error: $msg"
puts stderr ""
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
proc process_cmdline {} {
cmdline::process ::A $::argv {
{fts5 "use fts5 (this is the default)"}
{fts4 "use fts4"}
{colsize "10 10 10" "list of column sizes"}
{tblname "t1" "table name to create"}
{detail "full" "Fts5 detail mode to use"}
{repeat 1 "Load each file this many times"}
{prefix "" "Fts prefix= option"}
database
file...
} {
This script is designed to create fts4/5 tables with more than one column.
The -colsize option should be set to a Tcl list of integer values, one for
each column in the table. Each value is the number of tokens that will be
inserted into the column value for each row. For example, setting the -colsize
option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
tokens per row in each, respectively.
Each "FILE" argument should be a text file. The contents of these text files
is split on whitespace characters to form a list of tokens. The first N1
tokens are used for the first column of the first row, where N1 is the first
element of the -colsize list. The next N2 are used for the second column of
the first row, and so on. Rows are added to the table until the entire list
of tokens is exhausted.
}
set L [list]
foreach o $O {
if {[llength $o]==1} {
lappend L [string toupper $o]
}
}
puts stderr "Usage: $::argv0 ?SWITCHES? $L"
puts stderr ""
puts stderr "Switches are:"
foreach o $O {
if {[llength $o]==3} {
foreach {a b c} $o {}
puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b]
} elseif {[llength $o]==2} {
foreach {a b} $o {}
puts stderr [format " -%-15s %s" $a $b]
}
}
puts stderr ""
puts stderr $E
exit -1
}
proc process_command_line {avar lArgs O E} {
upvar $avar A
set zTrailing "" ;# True if ... is present in $O
set lPosargs [list]
# Populate A() with default values. Also, for each switch in the command
# line spec, set an entry in the idx() array as follows:
#
# {tblname t1 "table name to use"}
# -> [set idx(-tblname) {tblname t1 "table name to use"}
#
# For each position parameter, append its name to $lPosargs. If the ...
# specifier is present, set $zTrailing to the name of the prefix.
#
foreach o $O {
set nm [lindex $o 0]
set nArg [llength $o]
switch -- $nArg {
1 {
if {[string range $nm end-2 end]=="..."} {
set zTrailing [string range $nm 0 end-3]
} else {
lappend lPosargs $nm
###########################################################################
###########################################################################
# Command line options processor. This is generic code that can be copied
# between scripts.
#
namespace eval cmdline {
proc cmdline_error {O E {msg ""}} {
if {$msg != ""} {
puts stderr "Error: $msg"
puts stderr ""
}
set L [list]
foreach o $O {
if {[llength $o]==1} {
lappend L [string toupper $o]
}
}
puts stderr "Usage: $::argv0 ?SWITCHES? $L"
puts stderr ""
puts stderr "Switches are:"
foreach o $O {
if {[llength $o]==3} {
foreach {a b c} $o {}
puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b]
} elseif {[llength $o]==2} {
foreach {a b} $o {}
puts stderr [format " -%-15s %s" $a $b]
}
}
puts stderr ""
puts stderr $E
exit -1
}
proc process {avar lArgs O E} {
upvar $avar A
set zTrailing "" ;# True if ... is present in $O
set lPosargs [list]
# Populate A() with default values. Also, for each switch in the command
# line spec, set an entry in the idx() array as follows:
#
# {tblname t1 "table name to use"}
# -> [set idx(-tblname) {tblname t1 "table name to use"}
#
# For each position parameter, append its name to $lPosargs. If the ...
# specifier is present, set $zTrailing to the name of the prefix.
#
foreach o $O {
set nm [lindex $o 0]
set nArg [llength $o]
switch -- $nArg {
1 {
if {[string range $nm end-2 end]=="..."} {
set zTrailing [string range $nm 0 end-3]
} else {
lappend lPosargs $nm
}
}
2 {
set A($nm) 0
set idx(-$nm) $o
}
3 {
set A($nm) [lindex $o 1]
set idx(-$nm) $o
}
default {
error "Error in command line specification"
}
}
2 {
set A($nm) 0
set idx(-$nm) $o
}
3 {
set A($nm) [lindex $o 1]
set idx(-$nm) $o
}
default {
error "Error in command line specification"
}
# Set explicitly specified option values
#
set nArg [llength $lArgs]
for {set i 0} {$i < $nArg} {incr i} {
set opt [lindex $lArgs $i]
if {[string range $opt 0 0]!="-" || $opt=="--"} break
set c [array names idx "${opt}*"]
if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"}
if {[llength $c]>1} { cmdline_error $O $E "Ambiguous option: $opt"}
if {[llength $idx($c)]==3} {
if {$i==[llength $lArgs]-1} {
cmdline_error $O $E "Option requires argument: $c"
}
incr i
set A([lindex $idx($c) 0]) [lindex $lArgs $i]
} else {
set A([lindex $idx($c) 0]) 1
}
}
}
# Set explicitly specified option values
#
set nArg [llength $lArgs]
for {set i 0} {$i < $nArg} {incr i} {
set opt [lindex $lArgs $i]
if {[string range $opt 0 0]!="-" || $opt=="--"} break
set c [array names idx "${opt}*"]
if {[llength $c]==0} { command_line_error $O $E "Unrecognized option: $opt"}
if {[llength $c]>1} { command_line_error $O $E "Ambiguous option: $opt"}
if {[llength $idx($c)]==3} {
if {$i==[llength $lArgs]-1} {
command_line_error $O $E "Option requires argument: $c"
}
incr i
set A([lindex $idx($c) 0]) [lindex $lArgs $i]
} else {
set A([lindex $idx($c) 0]) 1
# Deal with position arguments.
#
set nPosarg [llength $lPosargs]
set nRem [expr $nArg - $i]
if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
cmdline_error $O $E
}
for {set j 0} {$j < $nPosarg} {incr j} {
set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
}
if {$zTrailing!=""} {
set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
}
}
# Deal with position arguments.
#
set nPosarg [llength $lPosargs]
set nRem [expr $nArg - $i]
if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
command_line_error $O $E
}
for {set j 0} {$j < $nPosarg} {incr j} {
set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
}
if {$zTrailing!=""} {
set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
}
}
} ;# namespace eval cmdline
# End of command line options processor.
#-------------------------------------------------------------------------
###########################################################################
###########################################################################
process_cmdline
process_command_line A $argv {
{fts5 "use fts5"}
{fts4 "use fts4"}
{colsize "10 10 10" "list of column sizes"}
{tblname "t1" "table name to create"}
{detail "full" "Fts5 detail mode to use"}
{repeat 1 "Load each file this many times"}
database
file...
} {
This script is designed to create fts4/5 tables with more than one column.
The -colsize option should be set to a Tcl list of integer values, one for
each column in the table. Each value is the number of tokens that will be
inserted into the column value for each row. For example, setting the -colsize
option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
tokens per row in each, respectively.
Each "FILE" argument should be a text file. The contents of these text files is
split on whitespace characters to form a list of tokens. The first N1 tokens
are used for the first column of the first row, where N1 is the first element
of the -colsize list. The next N2 are used for the second column of the first
row, and so on. Rows are added to the table until the entire list of tokens
is exhausted.
}
# If -fts4 was specified, use fts4. Otherwise, fts5.
if {$A(fts4)} {
set A(fts) fts4
} else {
@ -156,7 +173,8 @@ proc create_table {} {
set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) ("
append sql [join $cols ,]
if {$A(fts)=="fts5"} { append sql ",detail=$A(detail));" }
if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" }
append sql ", prefix='$A(prefix)');"
db eval $sql
return $cols

View File

@ -1,5 +1,5 @@
C Merge\sthe\simplementation\sof\sOP_IdxRowid\sand\sOP_Seek\sso\sthat\sOP_Seek\sno\slonger\nrequires\sthe\srowid\sregister\sand\sa\sseparate\sOP_IdxRowid\scall.\s\sShorter\sand\nfaster\sprepared\sstatements\sresult.
D 2016-01-30T16:59:56.592
C Performance\senhancement\sfor\sfts5\scolumn\sfilter\squeries\son\sdetail=full\stables.
D 2016-01-30T19:16:11.820
F Makefile.in 027c1603f255390c43a426671055a31c0a65fdb4
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
F Makefile.msc 72b7858f02017611c3ac1ddc965251017fed0845
@ -104,7 +104,7 @@ F ext/fts5/fts5_buffer.c f6e0c6018ffc8e39fc0b333b5daa8b8d528ae6e4
F ext/fts5/fts5_config.c 0c384ebdd23fd055e2e50a93277b8d59da538238
F ext/fts5/fts5_expr.c a66b9694519d9c336d9bdbd46ea22e7e14aef412
F ext/fts5/fts5_hash.c 1b113977296cf4212c6ec667d5e3f2bd18036955
F ext/fts5/fts5_index.c 5558bfbeaf364cc67f937e25753ceed8757cb6d1
F ext/fts5/fts5_index.c cd1e4faca8b9adc2d89b367075bf93a7f50c406b
F ext/fts5/fts5_main.c 3886bbfc5ac1d9df29979823ddf2b68241e1127e
F ext/fts5/fts5_storage.c 2a1f44deae090cd711f02cec0c2af8e660360d24
F ext/fts5/fts5_tcl.c f8731e0508299bd43f1a2eff7dbeaac870768966
@ -190,8 +190,8 @@ F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680
F ext/fts5/test/fts5update.test 57c7012a7919889048947addae10e0613df45529
F ext/fts5/test/fts5version.test 978f59541d8cef7e8591f8be2115ec5ccb863e2e
F ext/fts5/test/fts5vocab.test 480d780aa6b699816c5066225fbd86f3a0239477
F ext/fts5/tool/fts5speed.tcl 47f0031e6ac564964f4f4805e439ea665e848df2
F ext/fts5/tool/fts5txt2db.tcl ae308338b2da1646dea456ab66706acdde8c714e
F ext/fts5/tool/fts5speed.tcl f9944a9abb9b7685cfbee8101a3dd772ededca66
F ext/fts5/tool/fts5txt2db.tcl 1343745b89ca2a1e975c23f836d0cee410052975
F ext/fts5/tool/loadfts5.tcl 95b03429ee6b138645703c6ca192c3ac96eaf093
F ext/fts5/tool/mkfts5c.tcl d1c2a9ab8e0ec690a52316f33dd9b1d379942f45
F ext/fts5/tool/showfts5.tcl d54da0e067306663e2d5d523965ca487698e722c
@ -1422,7 +1422,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 1f4c667f37d63fc3ef2e8f2581ecd3a66c054426
R c807c9436e91d8aabdffdea0ecd68c4e
U drh
Z a3f05d9c2a4721d817f24f533822e83f
P 9bec50a1e7796a6e038db9b1cc7cc1e7e350bf74
R bc0069e7525e32617bb065c15a24c73f
U dan
Z 0739ff39c83f4f590353add9c7d562e8

View File

@ -1 +1 @@
9bec50a1e7796a6e038db9b1cc7cc1e7e350bf74
13fb4aa6a87c5c6258979953da82eedc1a7bf14f