Fix a bug in merging FTS language tables for languages other than language 0.
FossilOrigin-Name: d281cb8984c911a4c0cce2ec299e1351d8e580e4
This commit is contained in:
parent
7fcb214b93
commit
e3ddd1ac42
@ -488,7 +488,7 @@ int sqlite3Fts3AllSegdirs(
|
||||
/* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
|
||||
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
|
||||
if( rc==SQLITE_OK ){
|
||||
sqlite3_bind_int(pStmt, 1, iLevel+iIndex*FTS3_SEGDIR_MAXLEVEL);
|
||||
sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
|
||||
}
|
||||
}
|
||||
*ppStmt = pStmt;
|
||||
|
17
manifest
17
manifest
@ -1,5 +1,5 @@
|
||||
C Add\sthe\s"languageid="\soption\sto\sfts4.\sThis\scode\sis\sstill\slargely\suntested\sand\salsmost\scertainly\sbuggy.
|
||||
D 2012-03-01T19:44:20.362
|
||||
C Fix\sa\sbug\sin\smerging\sFTS\slanguage\stables\sfor\slanguages\sother\sthan\slanguage\s0.
|
||||
D 2012-03-02T11:48:50.564
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
@ -78,7 +78,7 @@ F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894
|
||||
F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106
|
||||
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
|
||||
F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68
|
||||
F ext/fts3/fts3_write.c 489d262b1ee9ab1dbb4da48bd8737fac15d0f58f
|
||||
F ext/fts3/fts3_write.c 36fc2e3a28f51ee135a344877c1e4be0a9f45e6e
|
||||
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
|
||||
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
|
||||
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
|
||||
@ -496,7 +496,7 @@ F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2
|
||||
F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659
|
||||
F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68
|
||||
F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f
|
||||
F test/fts4langid.test 3d968b7c0afb8be1609794267f34b89d378a81ea
|
||||
F test/fts4langid.test 7ab7be619d3acb3727e4bef3230ba3dbcf2e0556
|
||||
F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca
|
||||
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
|
||||
F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a
|
||||
@ -992,10 +992,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06
|
||||
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
|
||||
F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a
|
||||
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
|
||||
P 16330a2f7262173a32ae48a72c0ee2522b6dc554
|
||||
R 86036df8ba11902f17395620671e5794
|
||||
T *branch * fts4-languageid
|
||||
T *sym-fts4-languageid *
|
||||
T -sym-trunk *
|
||||
P bea257f70f10dd1111d79cabd1e1462dc651704d
|
||||
R a3a9247d2c76c9d90f9fc486f3311f0d
|
||||
U dan
|
||||
Z 6902c01b6e8a000d5e06f8fe8778490f
|
||||
Z f1e998b56e58f712fe6da1411961b8ef
|
||||
|
@ -1 +1 @@
|
||||
bea257f70f10dd1111d79cabd1e1462dc651704d
|
||||
d281cb8984c911a4c0cce2ec299e1351d8e580e4
|
@ -24,6 +24,27 @@ ifcapable !fts3 {
|
||||
|
||||
set ::testprefix fts4langid
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Test plan:
|
||||
#
|
||||
# 1.* - Warm-body tests created for specific purposes during development.
|
||||
# Passing these doesn't really prove much.
|
||||
#
|
||||
# 2.* - Test that FTS queries only ever return rows associated with
|
||||
# the requested language.
|
||||
#
|
||||
# 3.* - Test that the 'optimize' and 'rebuild' commands work correctly.
|
||||
#
|
||||
# 4.* - Test that if one is provided, the tokenizer xLanguage method
|
||||
# is called to configure the tokenizer before tokenizing query
|
||||
# or document text.
|
||||
#
|
||||
# 5.* - Test the fts4aux table when the associated FTS4 table contains
|
||||
# multiple languages.
|
||||
#
|
||||
# 6.* - Tests with content= tables. Both where there is a real
|
||||
# underlying content table and where there is not.
|
||||
#
|
||||
|
||||
|
||||
do_execsql_test 1.1 {
|
||||
@ -74,5 +95,165 @@ do_catchsql_test 1.17 {
|
||||
INSERT INTO t1(content, lang_id) VALUES('123', -1);
|
||||
} {1 {constraint failed}}
|
||||
|
||||
do_execsql_test 1.18 {
|
||||
DROP TABLE t1;
|
||||
CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
|
||||
INSERT INTO t1(content, lang_id) VALUES('A', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('B', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('C', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('D', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('E', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('F', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('G', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('H', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('I', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('J', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('K', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('L', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('M', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('N', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('O', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('P', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('Q', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('R', 13);
|
||||
INSERT INTO t1(content, lang_id) VALUES('S', 13);
|
||||
SELECT rowid FROM t1 WHERE t1 MATCH 'A';
|
||||
} {}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Test cases 2.*
|
||||
#
|
||||
|
||||
proc build_multilingual_db_1 {db} {
|
||||
$db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) }
|
||||
|
||||
set xwords [list zero one two three four five six seven eight nine ten]
|
||||
set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa]
|
||||
|
||||
for {set i 0} {$i < 1000} {incr i} {
|
||||
set iLangid [expr $i%9]
|
||||
set x ""
|
||||
set y ""
|
||||
|
||||
set x [list]
|
||||
lappend x [lindex $xwords [expr ($i / 1000) % 10]]
|
||||
lappend x [lindex $xwords [expr ($i / 100) % 10]]
|
||||
lappend x [lindex $xwords [expr ($i / 10) % 10]]
|
||||
lappend x [lindex $xwords [expr ($i / 1) % 10]]
|
||||
|
||||
set y [list]
|
||||
lappend y [lindex $ywords [expr ($i / 1000) % 10]]
|
||||
lappend y [lindex $ywords [expr ($i / 100) % 10]]
|
||||
lappend y [lindex $ywords [expr ($i / 10) % 10]]
|
||||
lappend y [lindex $ywords [expr ($i / 1) % 10]]
|
||||
|
||||
$db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) }
|
||||
}
|
||||
}
|
||||
|
||||
proc rowid_list_set_langid {langid} {
|
||||
set ::rowid_list_langid $langid
|
||||
}
|
||||
proc rowid_list {pattern} {
|
||||
set langid $::rowid_list_langid
|
||||
set res [list]
|
||||
db eval {SELECT docid, x, y FROM t2 WHERE l = $langid ORDER BY docid ASC} {
|
||||
if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} {
|
||||
lappend res $docid
|
||||
}
|
||||
}
|
||||
return $res
|
||||
}
|
||||
|
||||
proc or_merge_list {list1 list2} {
|
||||
set res [list]
|
||||
|
||||
set i1 0
|
||||
set i2 0
|
||||
|
||||
set n1 [llength $list1]
|
||||
set n2 [llength $list2]
|
||||
|
||||
while {$i1 < $n1 && $i2 < $n2} {
|
||||
set e1 [lindex $list1 $i1]
|
||||
set e2 [lindex $list2 $i2]
|
||||
|
||||
if {$e1==$e2} {
|
||||
lappend res $e1
|
||||
incr i1
|
||||
incr i2
|
||||
} elseif {$e1 < $e2} {
|
||||
lappend res $e1
|
||||
incr i1
|
||||
} else {
|
||||
lappend res $e2
|
||||
incr i2
|
||||
}
|
||||
}
|
||||
|
||||
concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end]
|
||||
}
|
||||
|
||||
proc or_merge_lists {args} {
|
||||
set res [lindex $args 0]
|
||||
for {set i 1} {$i < [llength $args]} {incr i} {
|
||||
set res [or_merge_list $res [lindex $args $i]]
|
||||
}
|
||||
set res
|
||||
}
|
||||
|
||||
proc and_merge_list {list1 list2} {
|
||||
foreach i $list2 { set a($i) 1 }
|
||||
set res [list]
|
||||
foreach i $list1 {
|
||||
if {[info exists a($i)]} {lappend res $i}
|
||||
}
|
||||
set res
|
||||
}
|
||||
|
||||
|
||||
proc and_merge_lists {args} {
|
||||
set res [lindex $args 0]
|
||||
for {set i 1} {$i < [llength $args]} {incr i} {
|
||||
set res [and_merge_list $res [lindex $args $i]]
|
||||
}
|
||||
set res
|
||||
}
|
||||
|
||||
proc filter_list {list langid} {
|
||||
set res [list]
|
||||
foreach i $list {
|
||||
if {($i % 9) == $langid} {lappend res $i}
|
||||
}
|
||||
set res
|
||||
}
|
||||
|
||||
do_test 2.0 {
|
||||
reset_db
|
||||
build_multilingual_db_1 db
|
||||
} {}
|
||||
|
||||
proc do_test_2.1 {tn query res_script} {
|
||||
for {set langid 0} {$langid < 10} {incr langid} {
|
||||
rowid_list_set_langid $langid
|
||||
set res [eval $res_script]
|
||||
|
||||
set actual [
|
||||
execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
|
||||
]
|
||||
do_test 2.1.$tn.$langid [list set {} $actual] $res
|
||||
}
|
||||
}
|
||||
|
||||
do_test_2.1 1 {delta} { rowid_list delta }
|
||||
do_test_2.1 2 {"zero one two"} { rowid_list "zero one two" }
|
||||
do_test_2.1 3 {zero one two} {
|
||||
and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
|
||||
}
|
||||
do_test_2.1 4 {"zero one" OR "one two"} {
|
||||
or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
|
||||
}
|
||||
|
||||
finish_test
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user