From 52f5d1ba6be88ca71cf8bda71ed519ebe84a93e0 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 30 Jan 2016 19:16:11 +0000 Subject: [PATCH] Performance enhancement for fts5 column filter queries on detail=full tables. FossilOrigin-Name: 13fb4aa6a87c5c6258979953da82eedc1a7bf14f --- ext/fts5/fts5_index.c | 24 ++-- ext/fts5/tool/fts5speed.tcl | 1 - ext/fts5/tool/fts5txt2db.tcl | 272 +++++++++++++++++++---------------- manifest | 18 +-- manifest.uuid | 2 +- 5 files changed, 170 insertions(+), 147 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index ca3feb86f7..f30c38d91a 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4333,26 +4333,32 @@ static int fts5IndexExtractCol( int iCurrent = 0; /* Anything before the first 0x01 is col 0 */ const u8 *p = *pa; const u8 *pEnd = &p[n]; /* One byte past end of position list */ - u8 prev = 0; while( iCol>iCurrent ){ /* Advance pointer p until it points to pEnd or an 0x01 byte that is - ** not part of a varint */ - while( (prev & 0x80) || *p!=0x01 ){ - prev = *p++; - if( p==pEnd ) return 0; + ** not part of a varint. Note that it is not possible for a negative + ** or extremely large varint to occur within an uncorrupted position + ** list. So the last byte of each varint may be assumed to have a clear + ** 0x80 bit. */ + while( *p!=0x01 ){ + while( *p++ & 0x80 ); + if( p>=pEnd ) return 0; } *pa = p++; - p += fts5GetVarint32(p, iCurrent); + iCurrent = *p++; + if( iCurrent & 0x80 ){ + p--; + p += fts5GetVarint32(p, iCurrent); + } } if( iCol!=iCurrent ) return 0; /* Advance pointer p until it points to pEnd or an 0x01 byte that is ** not part of a varint */ - assert( (prev & 0x80)==0 ); - while( p [set idx(-tblname) {tblname t1 "table name to use"} - # - # For each position parameter, append its name to $lPosargs. If the ... - # specifier is present, set $zTrailing to the name of the prefix. - # - foreach o $O { - set nm [lindex $o 0] - set nArg [llength $o] - switch -- $nArg { - 1 { - if {[string range $nm end-2 end]=="..."} { - set zTrailing [string range $nm 0 end-3] - } else { - lappend lPosargs $nm +########################################################################### +########################################################################### +# Command line options processor. This is generic code that can be copied +# between scripts. +# +namespace eval cmdline { + proc cmdline_error {O E {msg ""}} { + if {$msg != ""} { + puts stderr "Error: $msg" + puts stderr "" + } + + set L [list] + foreach o $O { + if {[llength $o]==1} { + lappend L [string toupper $o] + } + } + + puts stderr "Usage: $::argv0 ?SWITCHES? $L" + puts stderr "" + puts stderr "Switches are:" + foreach o $O { + if {[llength $o]==3} { + foreach {a b c} $o {} + puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b] + } elseif {[llength $o]==2} { + foreach {a b} $o {} + puts stderr [format " -%-15s %s" $a $b] + } + } + puts stderr "" + puts stderr $E + exit -1 + } + + proc process {avar lArgs O E} { + upvar $avar A + set zTrailing "" ;# True if ... is present in $O + set lPosargs [list] + + # Populate A() with default values. Also, for each switch in the command + # line spec, set an entry in the idx() array as follows: + # + # {tblname t1 "table name to use"} + # -> [set idx(-tblname) {tblname t1 "table name to use"} + # + # For each position parameter, append its name to $lPosargs. If the ... + # specifier is present, set $zTrailing to the name of the prefix. + # + foreach o $O { + set nm [lindex $o 0] + set nArg [llength $o] + switch -- $nArg { + 1 { + if {[string range $nm end-2 end]=="..."} { + set zTrailing [string range $nm 0 end-3] + } else { + lappend lPosargs $nm + } + } + 2 { + set A($nm) 0 + set idx(-$nm) $o + } + 3 { + set A($nm) [lindex $o 1] + set idx(-$nm) $o + } + default { + error "Error in command line specification" } } - 2 { - set A($nm) 0 - set idx(-$nm) $o - } - 3 { - set A($nm) [lindex $o 1] - set idx(-$nm) $o - } - default { - error "Error in command line specification" + } + + # Set explicitly specified option values + # + set nArg [llength $lArgs] + for {set i 0} {$i < $nArg} {incr i} { + set opt [lindex $lArgs $i] + if {[string range $opt 0 0]!="-" || $opt=="--"} break + set c [array names idx "${opt}*"] + if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"} + if {[llength $c]>1} { cmdline_error $O $E "Ambiguous option: $opt"} + + if {[llength $idx($c)]==3} { + if {$i==[llength $lArgs]-1} { + cmdline_error $O $E "Option requires argument: $c" + } + incr i + set A([lindex $idx($c) 0]) [lindex $lArgs $i] + } else { + set A([lindex $idx($c) 0]) 1 } } - } - - # Set explicitly specified option values - # - set nArg [llength $lArgs] - for {set i 0} {$i < $nArg} {incr i} { - set opt [lindex $lArgs $i] - if {[string range $opt 0 0]!="-" || $opt=="--"} break - set c [array names idx "${opt}*"] - if {[llength $c]==0} { command_line_error $O $E "Unrecognized option: $opt"} - if {[llength $c]>1} { command_line_error $O $E "Ambiguous option: $opt"} - - if {[llength $idx($c)]==3} { - if {$i==[llength $lArgs]-1} { - command_line_error $O $E "Option requires argument: $c" - } - incr i - set A([lindex $idx($c) 0]) [lindex $lArgs $i] - } else { - set A([lindex $idx($c) 0]) 1 + + # Deal with position arguments. + # + set nPosarg [llength $lPosargs] + set nRem [expr $nArg - $i] + if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { + cmdline_error $O $E + } + for {set j 0} {$j < $nPosarg} {incr j} { + set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] + } + if {$zTrailing!=""} { + set A($zTrailing) [lrange $lArgs [expr $j+$i] end] } } - - # Deal with position arguments. - # - set nPosarg [llength $lPosargs] - set nRem [expr $nArg - $i] - if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { - command_line_error $O $E - } - for {set j 0} {$j < $nPosarg} {incr j} { - set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] - } - if {$zTrailing!=""} { - set A($zTrailing) [lrange $lArgs [expr $j+$i] end] - } -} +} ;# namespace eval cmdline # End of command line options processor. -#------------------------------------------------------------------------- +########################################################################### +########################################################################### +process_cmdline -process_command_line A $argv { - {fts5 "use fts5"} - {fts4 "use fts4"} - {colsize "10 10 10" "list of column sizes"} - {tblname "t1" "table name to create"} - {detail "full" "Fts5 detail mode to use"} - {repeat 1 "Load each file this many times"} - database - file... -} { -This script is designed to create fts4/5 tables with more than one column. -The -colsize option should be set to a Tcl list of integer values, one for -each column in the table. Each value is the number of tokens that will be -inserted into the column value for each row. For example, setting the -colsize -option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 -tokens per row in each, respectively. - -Each "FILE" argument should be a text file. The contents of these text files is -split on whitespace characters to form a list of tokens. The first N1 tokens -are used for the first column of the first row, where N1 is the first element -of the -colsize list. The next N2 are used for the second column of the first -row, and so on. Rows are added to the table until the entire list of tokens -is exhausted. -} - +# If -fts4 was specified, use fts4. Otherwise, fts5. if {$A(fts4)} { set A(fts) fts4 } else { @@ -156,7 +173,8 @@ proc create_table {} { set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) (" append sql [join $cols ,] - if {$A(fts)=="fts5"} { append sql ",detail=$A(detail));" } + if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" } + append sql ", prefix='$A(prefix)');" db eval $sql return $cols diff --git a/manifest b/manifest index d850d9282e..75f2023494 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\sthe\simplementation\sof\sOP_IdxRowid\sand\sOP_Seek\sso\sthat\sOP_Seek\sno\slonger\nrequires\sthe\srowid\sregister\sand\sa\sseparate\sOP_IdxRowid\scall.\s\sShorter\sand\nfaster\sprepared\sstatements\sresult. -D 2016-01-30T16:59:56.592 +C Performance\senhancement\sfor\sfts5\scolumn\sfilter\squeries\son\sdetail=full\stables. +D 2016-01-30T19:16:11.820 F Makefile.in 027c1603f255390c43a426671055a31c0a65fdb4 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 72b7858f02017611c3ac1ddc965251017fed0845 @@ -104,7 +104,7 @@ F ext/fts5/fts5_buffer.c f6e0c6018ffc8e39fc0b333b5daa8b8d528ae6e4 F ext/fts5/fts5_config.c 0c384ebdd23fd055e2e50a93277b8d59da538238 F ext/fts5/fts5_expr.c a66b9694519d9c336d9bdbd46ea22e7e14aef412 F ext/fts5/fts5_hash.c 1b113977296cf4212c6ec667d5e3f2bd18036955 -F ext/fts5/fts5_index.c 5558bfbeaf364cc67f937e25753ceed8757cb6d1 +F ext/fts5/fts5_index.c cd1e4faca8b9adc2d89b367075bf93a7f50c406b F ext/fts5/fts5_main.c 3886bbfc5ac1d9df29979823ddf2b68241e1127e F ext/fts5/fts5_storage.c 2a1f44deae090cd711f02cec0c2af8e660360d24 F ext/fts5/fts5_tcl.c f8731e0508299bd43f1a2eff7dbeaac870768966 @@ -190,8 +190,8 @@ F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680 F ext/fts5/test/fts5update.test 57c7012a7919889048947addae10e0613df45529 F ext/fts5/test/fts5version.test 978f59541d8cef7e8591f8be2115ec5ccb863e2e F ext/fts5/test/fts5vocab.test 480d780aa6b699816c5066225fbd86f3a0239477 -F ext/fts5/tool/fts5speed.tcl 47f0031e6ac564964f4f4805e439ea665e848df2 -F ext/fts5/tool/fts5txt2db.tcl ae308338b2da1646dea456ab66706acdde8c714e +F ext/fts5/tool/fts5speed.tcl f9944a9abb9b7685cfbee8101a3dd772ededca66 +F ext/fts5/tool/fts5txt2db.tcl 1343745b89ca2a1e975c23f836d0cee410052975 F ext/fts5/tool/loadfts5.tcl 95b03429ee6b138645703c6ca192c3ac96eaf093 F ext/fts5/tool/mkfts5c.tcl d1c2a9ab8e0ec690a52316f33dd9b1d379942f45 F ext/fts5/tool/showfts5.tcl d54da0e067306663e2d5d523965ca487698e722c @@ -1422,7 +1422,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1f4c667f37d63fc3ef2e8f2581ecd3a66c054426 -R c807c9436e91d8aabdffdea0ecd68c4e -U drh -Z a3f05d9c2a4721d817f24f533822e83f +P 9bec50a1e7796a6e038db9b1cc7cc1e7e350bf74 +R bc0069e7525e32617bb065c15a24c73f +U dan +Z 0739ff39c83f4f590353add9c7d562e8 diff --git a/manifest.uuid b/manifest.uuid index 01040f34b0..6f555b4e1d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9bec50a1e7796a6e038db9b1cc7cc1e7e350bf74 \ No newline at end of file +13fb4aa6a87c5c6258979953da82eedc1a7bf14f \ No newline at end of file