sqlite/test/fts3auto.test

452 lines
14 KiB
Plaintext
Raw Normal View History

# 2011 June 10
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# If this build does not include FTS3, skip the tests in this file.
#
ifcapable !fts3 { finish_test ; return }
source $testdir/fts3_common.tcl
source $testdir/malloc_common.tcl
set testprefix fts3auto
set sfep $sqlite_fts3_enable_parentheses
set sqlite_fts3_enable_parentheses 1
#--------------------------------------------------------------------------
# Start of Tcl infrastructure used by tests. The entry point is
# [do_fts3query_test] (described below).
#
# do_fts3query_test TESTNAME ?OPTIONS? TABLE MATCHEXPR
#
# This proc runs several test cases on FTS3/4 table $TABLE using match
# expression $MATCHEXPR. All documents in $TABLE must be formatted so that
# they can be "tokenized" using the Tcl list commands (llength, lindex etc.).
# The name and column names used by $TABLE must not require any quoting or
# escaping when used in SQL statements.
#
# $MATCHINFO may be any expression accepted by the FTS4 MATCH operator,
# except that the "<column-name>:token" syntax is not supported. Tcl list
# commands are used to tokenize the expression. Any parenthesis must appear
# either as separate list elements, or as the first (for opening) or last
# (for closing) character of a list element. i.e. the expression "(a OR b)c"
# will not be parsed correctly, but "( a OR b) c" will.
#
set sqlite_fts3_enable_parentheses 1
proc do_fts3query_test {tn tbl expr} {
get_near_results $tbl $expr aMatchinfo
set match $expr
set matchinfo_asc [list]
foreach docid [lsort -integer -incr [array names aMatchinfo]] {
lappend matchinfo_asc $docid $aMatchinfo($docid)
}
set matchinfo_desc [list]
foreach docid [lsort -integer -decr [array names aMatchinfo]] {
lappend matchinfo_desc $docid $aMatchinfo($docid)
}
set title "(\"$match\" -> [llength [array names aMatchinfo]] rows)"
do_execsql_test $tn$title.1 "
SELECT docid FROM $tbl WHERE $tbl MATCH '$match' ORDER BY docid ASC
" [lsort -integer -incr [array names aMatchinfo]]
do_execsql_test $tn$title.2 "
SELECT docid FROM $tbl WHERE $tbl MATCH '$match' ORDER BY docid DESC
" [lsort -integer -decr [array names aMatchinfo]]
do_execsql_test $tn$title.3 "
SELECT docid, mit(matchinfo($tbl, 'x')) FROM $tbl
WHERE $tbl MATCH '$match' ORDER BY docid DESC
" $matchinfo_desc
do_execsql_test $tn$title.4 "
SELECT docid, mit(matchinfo($tbl, 'x')) FROM $tbl
WHERE $tbl MATCH '$match' ORDER BY docid ASC
" $matchinfo_asc
}
proc mit {blob} {
set scan(littleEndian) i*
set scan(bigEndian) I*
binary scan $blob $scan($::tcl_platform(byteOrder)) r
return $r
}
db func mit mit
proc fix_near_expr {expr} {
set out [list]
lappend out [lindex $expr 0]
foreach {a b} [lrange $expr 1 end] {
if {[string match -nocase near $a]} { set a 10 }
if {[string match -nocase near/* $a]} { set a [string range $a 5 end] }
lappend out $a
lappend out $b
}
return $out
}
proc get_single_near_results {tbl expr arrayvar nullvar} {
upvar $arrayvar aMatchinfo
upvar $nullvar nullentry
catch {array unset aMatchinfo}
set expr [fix_near_expr $expr]
# Calculate the expected results using [fts3_near_match]. The following
# loop populates the "hits" and "counts" arrays as follows:
#
# 1. For each document in the table that matches the NEAR expression,
# hits($docid) is set to 1. The set of docids that match the expression
# can therefore be found using [array names hits].
#
# 2. For each column of each document in the table, counts($docid,$iCol)
# is set to the -phrasecountvar output.
#
set res [list]
catch { array unset hits }
db eval "SELECT docid, * FROM $tbl" d {
set iCol 0
foreach col [lrange $d(*) 1 end] {
set docid $d(docid)
set hit [fts3_near_match $d($col) $expr -p counts($docid,$iCol)]
if {$hit} { set hits($docid) 1 }
incr iCol
}
}
set nPhrase [expr ([llength $expr]+1)/2]
set nCol $iCol
# This block populates the nHit and nDoc arrays. For each phrase/column
# in the query/table, array elements are set as follows:
#
# nHit($iPhrase,$iCol) - Total number of hits for phrase $iPhrase in
# column $iCol.
#
# nDoc($iPhrase,$iCol) - Number of documents with at least one hit for
# phrase $iPhrase in column $iCol.
#
for {set iPhrase 0} {$iPhrase < $nPhrase} {incr iPhrase} {
for {set iCol 0} {$iCol < $nCol} {incr iCol} {
set nHit($iPhrase,$iCol) 0
set nDoc($iPhrase,$iCol) 0
}
}
foreach key [array names counts] {
set iCol [lindex [split $key ,] 1]
set iPhrase 0
foreach c $counts($key) {
if {$c>0} { incr nDoc($iPhrase,$iCol) 1 }
incr nHit($iPhrase,$iCol) $c
incr iPhrase
}
}
if {[info exists ::fts3_deferred] && [llength $expr]==1} {
set phrase [lindex $expr 0]
set rewritten [list]
set partial 0
foreach tok $phrase {
if {[lsearch $::fts3_deferred $tok]>=0} {
lappend rewritten *
} else {
lappend rewritten $tok
set partial 1
}
}
if {$partial==0} {
set tblsize [db one "SELECT count(*) FROM $tbl"]
for {set iCol 0} {$iCol < $nCol} {incr iCol} {
set nHit(0,$iCol) $tblsize
set nDoc(0,$iCol) $tblsize
}
} elseif {$rewritten != $phrase} {
while {[lindex $rewritten end] == "*"} {
set rewritten [lrange $rewritten 0 end-1]
}
while {[lindex $rewritten 0] == "*"} {
set rewritten [lrange $rewritten 1 end]
}
get_single_near_results $tbl [list $rewritten] aRewrite nullentry
foreach docid [array names hits] {
set aMatchinfo($docid) $aRewrite($docid)
}
return
}
}
# Set up the aMatchinfo array. For each document, set aMatchinfo($docid) to
# contain the output of matchinfo('x') for the document.
#
foreach docid [array names hits] {
set mi [list]
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
for {set iCol 0} {$iCol<$nCol} {incr iCol} {
lappend mi [lindex $counts($docid,$iCol) $iPhrase]
lappend mi $nHit($iPhrase,$iCol)
lappend mi $nDoc($iPhrase,$iCol)
}
}
set aMatchinfo($docid) $mi
}
# Set up the nullentry output.
#
set nullentry [list]
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
for {set iCol 0} {$iCol<$nCol} {incr iCol} {
lappend nullentry 0 $nHit($iPhrase,$iCol) $nDoc($iPhrase,$iCol)
}
}
}
proc matching_brackets {expr} {
if {[string range $expr 0 0]!="(" || [string range $expr end end] !=")"} {
return 0
}
set iBracket 1
set nExpr [string length $expr]
for {set i 1} {$iBracket && $i < $nExpr} {incr i} {
set c [string range $expr $i $i]
if {$c == "("} {incr iBracket}
if {$c == ")"} {incr iBracket -1}
}
return [expr ($iBracket==0 && $i==$nExpr)]
}
proc get_near_results {tbl expr arrayvar {nullvar ""}} {
upvar $arrayvar aMatchinfo
if {$nullvar != ""} { upvar $nullvar nullentry }
set expr [string trim $expr]
while { [matching_brackets $expr] } {
set expr [string trim [string range $expr 1 end-1]]
}
set prec(NOT) 1
set prec(AND) 2
set prec(OR) 3
set currentprec 0
set iBracket 0
set expr_length [llength $expr]
for {set i 0} {$i < $expr_length} {incr i} {
set op [lindex $expr $i]
if {$iBracket==0 && [info exists prec($op)] && $prec($op)>=$currentprec } {
set opidx $i
set currentprec $prec($op)
} else {
for {set j 0} {$j < [string length $op]} {incr j} {
set c [string range $op $j $j]
if {$c == "("} { incr iBracket +1 }
if {$c == ")"} { incr iBracket -1 }
}
}
}
if {$iBracket!=0} { error "mismatched brackets in: $expr" }
if {[info exists opidx]==0} {
get_single_near_results $tbl $expr aMatchinfo nullentry
} else {
set eLeft [lrange $expr 0 [expr $opidx-1]]
set eRight [lrange $expr [expr $opidx+1] end]
get_near_results $tbl $eLeft aLeft nullleft
get_near_results $tbl $eRight aRight nullright
switch -- [lindex $expr $opidx] {
"NOT" {
foreach hit [array names aLeft] {
if {0==[info exists aRight($hit)]} {
set aMatchinfo($hit) $aLeft($hit)
}
}
set nullentry $nullleft
}
"AND" {
foreach hit [array names aLeft] {
if {[info exists aRight($hit)]} {
set aMatchinfo($hit) [concat $aLeft($hit) $aRight($hit)]
}
}
set nullentry [concat $nullleft $nullright]
}
"OR" {
foreach hit [array names aLeft] {
if {[info exists aRight($hit)]} {
set aMatchinfo($hit) [concat $aLeft($hit) $aRight($hit)]
unset aRight($hit)
} else {
set aMatchinfo($hit) [concat $aLeft($hit) $nullright]
}
}
foreach hit [array names aRight] {
set aMatchinfo($hit) [concat $nullleft $aRight($hit)]
}
set nullentry [concat $nullleft $nullright]
}
}
}
}
# End of test procs. Actual tests are below this line.
#--------------------------------------------------------------------------
#--------------------------------------------------------------------------
# The following test cases - fts3auto-1.* - focus on testing the Tcl
# command [fts3_near_match], which is used by other tests in this file.
#
proc test_fts3_near_match {tn doc expr res} {
fts3_near_match $doc $expr -phrasecountvar p
uplevel do_test [list $tn] [list [list set {} $p]] [list $res]
}
test_fts3_near_match 1.1.1 {a b c a b} a {2}
test_fts3_near_match 1.1.2 {a b c a b} {a 5 b 6 c} {2 2 1}
test_fts3_near_match 1.1.3 {a b c a b} {"a b"} {2}
test_fts3_near_match 1.1.4 {a b c a b} {"b c"} {1}
test_fts3_near_match 1.1.5 {a b c a b} {"c c"} {0}
test_fts3_near_match 1.2.1 "a b c d e f g" {b 2 f} {0 0}
test_fts3_near_match 1.2.2 "a b c d e f g" {b 3 f} {1 1}
test_fts3_near_match 1.2.3 "a b c d e f g" {f 2 b} {0 0}
test_fts3_near_match 1.2.4 "a b c d e f g" {f 3 b} {1 1}
test_fts3_near_match 1.2.5 "a b c d e f g" {"a b" 2 "f g"} {0 0}
test_fts3_near_match 1.2.6 "a b c d e f g" {"a b" 3 "f g"} {1 1}
set A "a b c d e f g h i j k l m n o p q r s t u v w x y z"
test_fts3_near_match 1.3.1 $A {"c d" 5 "i j" 1 "e f"} {0 0 0}
test_fts3_near_match 1.3.2 $A {"c d" 5 "i j" 2 "e f"} {1 1 1}
#--------------------------------------------------------------------------
# Test cases fts3auto-2.* run some simple tests using the
# [do_fts3query_test] proc.
#
foreach {tn create} {
1 "fts4(a, b)"
2 "fts4(a, b, order=DESC)"
3 "fts4(a, b, order=ASC)"
4 "fts4(a, b, prefix=1)"
5 "fts4(a, b, order=DESC, prefix=1)"
6 "fts4(a, b, order=ASC, prefix=1)"
} {
do_test 2.$tn.1 {
catchsql { DROP TABLE t1 }
execsql "CREATE VIRTUAL TABLE t1 USING $create"
for {set i 0} {$i<32} {incr i} {
set doc [list]
if {$i&0x01} {lappend doc one}
if {$i&0x02} {lappend doc two}
if {$i&0x04} {lappend doc three}
if {$i&0x08} {lappend doc four}
if {$i&0x10} {lappend doc five}
execsql { INSERT INTO t1 VALUES($doc, null) }
}
} {}
foreach {tn2 expr} {
1 {one}
2 {one NEAR/1 five}
3 {t*}
4 {t* NEAR/0 five}
5 {o* NEAR/1 f*}
6 {one NEAR five NEAR two NEAR four NEAR three}
7 {one NEAR xyz}
8 {one OR two}
9 {one AND two}
10 {one NOT two}
11 {one AND two OR three}
12 {three OR one AND two}
13 {(three OR one) AND two}
14 {(three OR one) AND two NOT (five NOT four)}
15 {"one two"}
16 {"one two" NOT "three four"}
} {
do_fts3query_test 2.$tn.2.$tn2 t1 $expr
}
}
#--------------------------------------------------------------------------
# Some test cases involving deferred tokens.
#
proc make_token_deferrable {tbl token} {
set nRow [db one "SELECT count(*) FROM $tbl"]
set pgsz [db one "PRAGMA page_size"]
execsql "INSERT INTO $tbl ($tbl) VALUES('maxpending=100000000')"
execsql BEGIN
for {set i 0} {$i < ($nRow * $pgsz * 1.2)/100} {incr i} {
set doc [string repeat "$token " 100]
execsql "INSERT INTO $tbl VALUES(\$doc)"
}
execsql "INSERT INTO $tbl VALUES('aaaaaaa ${token}aaaaa')"
execsql COMMIT
}
foreach {tn create} {
1 "fts4(x)"
2 "fts4(x, order=DESC)"
} {
catchsql { DROP TABLE t1 }
execsql "CREATE VIRTUAL TABLE t1 USING $create"
do_execsql_test 3.$tn.1 {
INSERT INTO t1(docid, x) VALUES(-2, 'a b c d e f g h i j k');
INSERT INTO t1(docid, x) VALUES(-1, 'b c d e f g h i j k a');
INSERT INTO t1(docid, x) VALUES(0, 'c d e f g h i j k a b');
INSERT INTO t1(docid, x) VALUES(1, 'd e f g h i j k a b c');
INSERT INTO t1(docid, x) VALUES(2, 'e f g h i j k a b c d');
INSERT INTO t1(docid, x) VALUES(3, 'f g h i j k a b c d e');
INSERT INTO t1(docid, x) VALUES(4, 'a c e g i k');
INSERT INTO t1(docid, x) VALUES(5, 'a d g j');
INSERT INTO t1(docid, x) VALUES(6, 'c a b');
}
make_token_deferrable t1 c
set ::fts3_deferred [list]
foreach {tn2 expr} {
1 {a OR c}
} {
do_fts3query_test 3.$tn.2.$tn2 t1 $expr
}
set ::fts3_deferred [list c]
execsql {
UPDATE t1_segments
SET block = zeroblob(length(block))
WHERE length(block)>10000 AND 0
}
foreach {tn2 expr} {
1 {a NEAR c}
2 {a AND c}
3 {"a c"}
4 {"c a"}
5 {"a c" NEAR/1 g}
6 {"a c" NEAR/0 g}
} {
do_fts3query_test 3.$tn.2.$tn2 t1 $expr
}
}
set sqlite_fts3_enable_parentheses $sfep
finish_test