Performance enhancement for fts5 column filter queries on detail=full tables.
FossilOrigin-Name: 13fb4aa6a87c5c6258979953da82eedc1a7bf14f
This commit is contained in:
parent
784c1b93fb
commit
52f5d1ba6b
@ -4333,26 +4333,32 @@ static int fts5IndexExtractCol(
|
||||
int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
|
||||
const u8 *p = *pa;
|
||||
const u8 *pEnd = &p[n]; /* One byte past end of position list */
|
||||
u8 prev = 0;
|
||||
|
||||
while( iCol>iCurrent ){
|
||||
/* Advance pointer p until it points to pEnd or an 0x01 byte that is
|
||||
** not part of a varint */
|
||||
while( (prev & 0x80) || *p!=0x01 ){
|
||||
prev = *p++;
|
||||
if( p==pEnd ) return 0;
|
||||
** not part of a varint. Note that it is not possible for a negative
|
||||
** or extremely large varint to occur within an uncorrupted position
|
||||
** list. So the last byte of each varint may be assumed to have a clear
|
||||
** 0x80 bit. */
|
||||
while( *p!=0x01 ){
|
||||
while( *p++ & 0x80 );
|
||||
if( p>=pEnd ) return 0;
|
||||
}
|
||||
*pa = p++;
|
||||
p += fts5GetVarint32(p, iCurrent);
|
||||
iCurrent = *p++;
|
||||
if( iCurrent & 0x80 ){
|
||||
p--;
|
||||
p += fts5GetVarint32(p, iCurrent);
|
||||
}
|
||||
}
|
||||
if( iCol!=iCurrent ) return 0;
|
||||
|
||||
/* Advance pointer p until it points to pEnd or an 0x01 byte that is
|
||||
** not part of a varint */
|
||||
assert( (prev & 0x80)==0 );
|
||||
while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
|
||||
prev = *p++;
|
||||
while( p<pEnd && *p!=0x01 ){
|
||||
while( *p++ & 0x80 );
|
||||
}
|
||||
|
||||
return p - (*pa);
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,6 @@ set Q {
|
||||
{1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"}
|
||||
{1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"}
|
||||
{1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"}
|
||||
|
||||
{2 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"}
|
||||
}
|
||||
|
||||
|
@ -1,142 +1,159 @@
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Command line options processor.
|
||||
##########################################################################
|
||||
# 2016 Jan 27
|
||||
#
|
||||
proc command_line_error {O E {msg ""}} {
|
||||
if {$msg != ""} {
|
||||
puts stderr "Error: $msg"
|
||||
puts stderr ""
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
proc process_cmdline {} {
|
||||
cmdline::process ::A $::argv {
|
||||
{fts5 "use fts5 (this is the default)"}
|
||||
{fts4 "use fts4"}
|
||||
{colsize "10 10 10" "list of column sizes"}
|
||||
{tblname "t1" "table name to create"}
|
||||
{detail "full" "Fts5 detail mode to use"}
|
||||
{repeat 1 "Load each file this many times"}
|
||||
{prefix "" "Fts prefix= option"}
|
||||
database
|
||||
file...
|
||||
} {
|
||||
This script is designed to create fts4/5 tables with more than one column.
|
||||
The -colsize option should be set to a Tcl list of integer values, one for
|
||||
each column in the table. Each value is the number of tokens that will be
|
||||
inserted into the column value for each row. For example, setting the -colsize
|
||||
option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
|
||||
tokens per row in each, respectively.
|
||||
|
||||
Each "FILE" argument should be a text file. The contents of these text files
|
||||
is split on whitespace characters to form a list of tokens. The first N1
|
||||
tokens are used for the first column of the first row, where N1 is the first
|
||||
element of the -colsize list. The next N2 are used for the second column of
|
||||
the first row, and so on. Rows are added to the table until the entire list
|
||||
of tokens is exhausted.
|
||||
}
|
||||
|
||||
set L [list]
|
||||
foreach o $O {
|
||||
if {[llength $o]==1} {
|
||||
lappend L [string toupper $o]
|
||||
}
|
||||
}
|
||||
|
||||
puts stderr "Usage: $::argv0 ?SWITCHES? $L"
|
||||
puts stderr ""
|
||||
puts stderr "Switches are:"
|
||||
foreach o $O {
|
||||
if {[llength $o]==3} {
|
||||
foreach {a b c} $o {}
|
||||
puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b]
|
||||
} elseif {[llength $o]==2} {
|
||||
foreach {a b} $o {}
|
||||
puts stderr [format " -%-15s %s" $a $b]
|
||||
}
|
||||
}
|
||||
puts stderr ""
|
||||
puts stderr $E
|
||||
exit -1
|
||||
}
|
||||
|
||||
proc process_command_line {avar lArgs O E} {
|
||||
|
||||
upvar $avar A
|
||||
set zTrailing "" ;# True if ... is present in $O
|
||||
set lPosargs [list]
|
||||
|
||||
# Populate A() with default values. Also, for each switch in the command
|
||||
# line spec, set an entry in the idx() array as follows:
|
||||
#
|
||||
# {tblname t1 "table name to use"}
|
||||
# -> [set idx(-tblname) {tblname t1 "table name to use"}
|
||||
#
|
||||
# For each position parameter, append its name to $lPosargs. If the ...
|
||||
# specifier is present, set $zTrailing to the name of the prefix.
|
||||
#
|
||||
foreach o $O {
|
||||
set nm [lindex $o 0]
|
||||
set nArg [llength $o]
|
||||
switch -- $nArg {
|
||||
1 {
|
||||
if {[string range $nm end-2 end]=="..."} {
|
||||
set zTrailing [string range $nm 0 end-3]
|
||||
} else {
|
||||
lappend lPosargs $nm
|
||||
###########################################################################
|
||||
###########################################################################
|
||||
# Command line options processor. This is generic code that can be copied
|
||||
# between scripts.
|
||||
#
|
||||
namespace eval cmdline {
|
||||
proc cmdline_error {O E {msg ""}} {
|
||||
if {$msg != ""} {
|
||||
puts stderr "Error: $msg"
|
||||
puts stderr ""
|
||||
}
|
||||
|
||||
set L [list]
|
||||
foreach o $O {
|
||||
if {[llength $o]==1} {
|
||||
lappend L [string toupper $o]
|
||||
}
|
||||
}
|
||||
|
||||
puts stderr "Usage: $::argv0 ?SWITCHES? $L"
|
||||
puts stderr ""
|
||||
puts stderr "Switches are:"
|
||||
foreach o $O {
|
||||
if {[llength $o]==3} {
|
||||
foreach {a b c} $o {}
|
||||
puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b]
|
||||
} elseif {[llength $o]==2} {
|
||||
foreach {a b} $o {}
|
||||
puts stderr [format " -%-15s %s" $a $b]
|
||||
}
|
||||
}
|
||||
puts stderr ""
|
||||
puts stderr $E
|
||||
exit -1
|
||||
}
|
||||
|
||||
proc process {avar lArgs O E} {
|
||||
upvar $avar A
|
||||
set zTrailing "" ;# True if ... is present in $O
|
||||
set lPosargs [list]
|
||||
|
||||
# Populate A() with default values. Also, for each switch in the command
|
||||
# line spec, set an entry in the idx() array as follows:
|
||||
#
|
||||
# {tblname t1 "table name to use"}
|
||||
# -> [set idx(-tblname) {tblname t1 "table name to use"}
|
||||
#
|
||||
# For each position parameter, append its name to $lPosargs. If the ...
|
||||
# specifier is present, set $zTrailing to the name of the prefix.
|
||||
#
|
||||
foreach o $O {
|
||||
set nm [lindex $o 0]
|
||||
set nArg [llength $o]
|
||||
switch -- $nArg {
|
||||
1 {
|
||||
if {[string range $nm end-2 end]=="..."} {
|
||||
set zTrailing [string range $nm 0 end-3]
|
||||
} else {
|
||||
lappend lPosargs $nm
|
||||
}
|
||||
}
|
||||
2 {
|
||||
set A($nm) 0
|
||||
set idx(-$nm) $o
|
||||
}
|
||||
3 {
|
||||
set A($nm) [lindex $o 1]
|
||||
set idx(-$nm) $o
|
||||
}
|
||||
default {
|
||||
error "Error in command line specification"
|
||||
}
|
||||
}
|
||||
2 {
|
||||
set A($nm) 0
|
||||
set idx(-$nm) $o
|
||||
}
|
||||
3 {
|
||||
set A($nm) [lindex $o 1]
|
||||
set idx(-$nm) $o
|
||||
}
|
||||
default {
|
||||
error "Error in command line specification"
|
||||
}
|
||||
|
||||
# Set explicitly specified option values
|
||||
#
|
||||
set nArg [llength $lArgs]
|
||||
for {set i 0} {$i < $nArg} {incr i} {
|
||||
set opt [lindex $lArgs $i]
|
||||
if {[string range $opt 0 0]!="-" || $opt=="--"} break
|
||||
set c [array names idx "${opt}*"]
|
||||
if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"}
|
||||
if {[llength $c]>1} { cmdline_error $O $E "Ambiguous option: $opt"}
|
||||
|
||||
if {[llength $idx($c)]==3} {
|
||||
if {$i==[llength $lArgs]-1} {
|
||||
cmdline_error $O $E "Option requires argument: $c"
|
||||
}
|
||||
incr i
|
||||
set A([lindex $idx($c) 0]) [lindex $lArgs $i]
|
||||
} else {
|
||||
set A([lindex $idx($c) 0]) 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Set explicitly specified option values
|
||||
#
|
||||
set nArg [llength $lArgs]
|
||||
for {set i 0} {$i < $nArg} {incr i} {
|
||||
set opt [lindex $lArgs $i]
|
||||
if {[string range $opt 0 0]!="-" || $opt=="--"} break
|
||||
set c [array names idx "${opt}*"]
|
||||
if {[llength $c]==0} { command_line_error $O $E "Unrecognized option: $opt"}
|
||||
if {[llength $c]>1} { command_line_error $O $E "Ambiguous option: $opt"}
|
||||
|
||||
if {[llength $idx($c)]==3} {
|
||||
if {$i==[llength $lArgs]-1} {
|
||||
command_line_error $O $E "Option requires argument: $c"
|
||||
}
|
||||
incr i
|
||||
set A([lindex $idx($c) 0]) [lindex $lArgs $i]
|
||||
} else {
|
||||
set A([lindex $idx($c) 0]) 1
|
||||
|
||||
# Deal with position arguments.
|
||||
#
|
||||
set nPosarg [llength $lPosargs]
|
||||
set nRem [expr $nArg - $i]
|
||||
if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
|
||||
cmdline_error $O $E
|
||||
}
|
||||
for {set j 0} {$j < $nPosarg} {incr j} {
|
||||
set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
|
||||
}
|
||||
if {$zTrailing!=""} {
|
||||
set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
|
||||
}
|
||||
}
|
||||
|
||||
# Deal with position arguments.
|
||||
#
|
||||
set nPosarg [llength $lPosargs]
|
||||
set nRem [expr $nArg - $i]
|
||||
if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
|
||||
command_line_error $O $E
|
||||
}
|
||||
for {set j 0} {$j < $nPosarg} {incr j} {
|
||||
set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
|
||||
}
|
||||
if {$zTrailing!=""} {
|
||||
set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
|
||||
}
|
||||
}
|
||||
} ;# namespace eval cmdline
|
||||
# End of command line options processor.
|
||||
#-------------------------------------------------------------------------
|
||||
###########################################################################
|
||||
###########################################################################
|
||||
|
||||
process_cmdline
|
||||
|
||||
process_command_line A $argv {
|
||||
{fts5 "use fts5"}
|
||||
{fts4 "use fts4"}
|
||||
{colsize "10 10 10" "list of column sizes"}
|
||||
{tblname "t1" "table name to create"}
|
||||
{detail "full" "Fts5 detail mode to use"}
|
||||
{repeat 1 "Load each file this many times"}
|
||||
database
|
||||
file...
|
||||
} {
|
||||
This script is designed to create fts4/5 tables with more than one column.
|
||||
The -colsize option should be set to a Tcl list of integer values, one for
|
||||
each column in the table. Each value is the number of tokens that will be
|
||||
inserted into the column value for each row. For example, setting the -colsize
|
||||
option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
|
||||
tokens per row in each, respectively.
|
||||
|
||||
Each "FILE" argument should be a text file. The contents of these text files is
|
||||
split on whitespace characters to form a list of tokens. The first N1 tokens
|
||||
are used for the first column of the first row, where N1 is the first element
|
||||
of the -colsize list. The next N2 are used for the second column of the first
|
||||
row, and so on. Rows are added to the table until the entire list of tokens
|
||||
is exhausted.
|
||||
}
|
||||
|
||||
# If -fts4 was specified, use fts4. Otherwise, fts5.
|
||||
if {$A(fts4)} {
|
||||
set A(fts) fts4
|
||||
} else {
|
||||
@ -156,7 +173,8 @@ proc create_table {} {
|
||||
|
||||
set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) ("
|
||||
append sql [join $cols ,]
|
||||
if {$A(fts)=="fts5"} { append sql ",detail=$A(detail));" }
|
||||
if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" }
|
||||
append sql ", prefix='$A(prefix)');"
|
||||
|
||||
db eval $sql
|
||||
return $cols
|
||||
|
18
manifest
18
manifest
@ -1,5 +1,5 @@
|
||||
C Merge\sthe\simplementation\sof\sOP_IdxRowid\sand\sOP_Seek\sso\sthat\sOP_Seek\sno\slonger\nrequires\sthe\srowid\sregister\sand\sa\sseparate\sOP_IdxRowid\scall.\s\sShorter\sand\nfaster\sprepared\sstatements\sresult.
|
||||
D 2016-01-30T16:59:56.592
|
||||
C Performance\senhancement\sfor\sfts5\scolumn\sfilter\squeries\son\sdetail=full\stables.
|
||||
D 2016-01-30T19:16:11.820
|
||||
F Makefile.in 027c1603f255390c43a426671055a31c0a65fdb4
|
||||
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
|
||||
F Makefile.msc 72b7858f02017611c3ac1ddc965251017fed0845
|
||||
@ -104,7 +104,7 @@ F ext/fts5/fts5_buffer.c f6e0c6018ffc8e39fc0b333b5daa8b8d528ae6e4
|
||||
F ext/fts5/fts5_config.c 0c384ebdd23fd055e2e50a93277b8d59da538238
|
||||
F ext/fts5/fts5_expr.c a66b9694519d9c336d9bdbd46ea22e7e14aef412
|
||||
F ext/fts5/fts5_hash.c 1b113977296cf4212c6ec667d5e3f2bd18036955
|
||||
F ext/fts5/fts5_index.c 5558bfbeaf364cc67f937e25753ceed8757cb6d1
|
||||
F ext/fts5/fts5_index.c cd1e4faca8b9adc2d89b367075bf93a7f50c406b
|
||||
F ext/fts5/fts5_main.c 3886bbfc5ac1d9df29979823ddf2b68241e1127e
|
||||
F ext/fts5/fts5_storage.c 2a1f44deae090cd711f02cec0c2af8e660360d24
|
||||
F ext/fts5/fts5_tcl.c f8731e0508299bd43f1a2eff7dbeaac870768966
|
||||
@ -190,8 +190,8 @@ F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680
|
||||
F ext/fts5/test/fts5update.test 57c7012a7919889048947addae10e0613df45529
|
||||
F ext/fts5/test/fts5version.test 978f59541d8cef7e8591f8be2115ec5ccb863e2e
|
||||
F ext/fts5/test/fts5vocab.test 480d780aa6b699816c5066225fbd86f3a0239477
|
||||
F ext/fts5/tool/fts5speed.tcl 47f0031e6ac564964f4f4805e439ea665e848df2
|
||||
F ext/fts5/tool/fts5txt2db.tcl ae308338b2da1646dea456ab66706acdde8c714e
|
||||
F ext/fts5/tool/fts5speed.tcl f9944a9abb9b7685cfbee8101a3dd772ededca66
|
||||
F ext/fts5/tool/fts5txt2db.tcl 1343745b89ca2a1e975c23f836d0cee410052975
|
||||
F ext/fts5/tool/loadfts5.tcl 95b03429ee6b138645703c6ca192c3ac96eaf093
|
||||
F ext/fts5/tool/mkfts5c.tcl d1c2a9ab8e0ec690a52316f33dd9b1d379942f45
|
||||
F ext/fts5/tool/showfts5.tcl d54da0e067306663e2d5d523965ca487698e722c
|
||||
@ -1422,7 +1422,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f
|
||||
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
||||
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
|
||||
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
||||
P 1f4c667f37d63fc3ef2e8f2581ecd3a66c054426
|
||||
R c807c9436e91d8aabdffdea0ecd68c4e
|
||||
U drh
|
||||
Z a3f05d9c2a4721d817f24f533822e83f
|
||||
P 9bec50a1e7796a6e038db9b1cc7cc1e7e350bf74
|
||||
R bc0069e7525e32617bb065c15a24c73f
|
||||
U dan
|
||||
Z 0739ff39c83f4f590353add9c7d562e8
|
||||
|
@ -1 +1 @@
|
||||
9bec50a1e7796a6e038db9b1cc7cc1e7e350bf74
|
||||
13fb4aa6a87c5c6258979953da82eedc1a7bf14f
|
Loading…
x
Reference in New Issue
Block a user