# 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the built-in fts5 tokenizers. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5locale # If SQLITE_ENABLE_FTS5 is not defined, omit this file. ifcapable !fts5 { finish_test return } proc transform_token {locale token} { switch -- $locale { reverse { set ret "" foreach c [split $token ""] { set ret "$c$ret" } set token $ret } default { # no-op } } set token } proc tcl_create {args} { return "tcl_tokenize" } proc tcl_tokenize {tflags text} { set iToken 1 set bSkip 0 if {[sqlite3_fts5_locale]=="second"} { set bSkip 1 } foreach {w iStart iEnd} [fts5_tokenize_split $text] { incr iToken if {(($iToken) % ($bSkip + 1))} continue set w [transform_token [sqlite3_fts5_locale] $w] sqlite3_fts5_token $w $iStart $iEnd } } #------------------------------------------------------------------------- # Check that queries can have a locale attached to them. # reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl); INSERT INTO t1 VALUES('abc'); INSERT INTO t1 VALUES('cba'); } {} do_execsql_test 1.1 { SELECT rowid, a FROM t1( fts5_locale('en_US', 'abc') ); } {1 abc} do_execsql_test 1.2 { SELECT rowid, a FROM t1( fts5_locale('reverse', 'abc') ); } {2 cba} #------------------------------------------------------------------------- # Test that the locale= option exists and seems to accept values. And # that fts5_locale() values may only be inserted into an internal-content # table if the locale=1 option was specified. # reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create do_execsql_test 2.1 { CREATE VIRTUAL TABLE b1 USING fts5(x, y, locale=1, tokenize=tcl); CREATE VIRTUAL TABLE b2 USING fts5(x, y, locale=0, tokenize=tcl); CREATE VIRTUAL TABLE ttt USING fts5vocab('b1', instance); } do_catchsql_test 2.2.1 { CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=2); } {1 {malformed locale=... directive}} do_catchsql_test 2.2.2 { CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=111); } {1 {malformed locale=... directive}} do_catchsql_test 2.3 { INSERT INTO b1(b1, rank) VALUES('locale', 0); } {1 {SQL logic error}} do_execsql_test 2.4 { INSERT INTO b1 VALUES('abc', 'one two three'); INSERT INTO b1 VALUES('def', fts5_locale('reverse', 'four five six')); } do_execsql_test 2.5 { INSERT INTO b2 VALUES('abc', 'one two three'); } do_catchsql_test 2.6 { INSERT INTO b2 VALUES('def', fts5_locale('reverse', 'four five six')); } {1 {fts5_locale() requires locale=1}} do_execsql_test 2.7 { SELECT rowid FROM b1('one') } {1} do_execsql_test 2.8 { SELECT rowid FROM b1('four') } {} do_execsql_test 2.9 { SELECT rowid FROM b1('ruof') } 2 do_execsql_test 2.10 { SELECT rowid FROM b1(fts5_locale('reverse', 'five'))} 2 do_execsql_test 2.11 { SELECT x, quote(y) FROM b1 } { abc {'one two three'} def {'four five six'} } do_execsql_test 2.12 { SELECT quote(y) FROM b1('ruof') } { {'four five six'} } do_execsql_test 2.13 { INSERT INTO b1(b1) VALUES('integrity-check'); } do_execsql_test 2.14 { INSERT INTO b1(b1) VALUES('rebuild'); } do_execsql_test 2.15 { INSERT INTO b1(b1) VALUES('integrity-check'); } do_execsql_test 2.16 { DELETE FROM b1 WHERE rowid=2 } do_execsql_test 2.17 { INSERT INTO b1(b1) VALUES('integrity-check'); } #------------------------------------------------------------------------- # Test the 'delete' command with contentless tables. # reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create do_execsql_test 3.1 { CREATE VIRTUAL TABLE c1 USING fts5(x, content=, tokenize=tcl, locale=1); CREATE VIRTUAL TABLE c2 USING fts5vocab('c1', instance); INSERT INTO c1 VALUES('hello world'); INSERT INTO c1 VALUES( fts5_locale('reverse', 'one two three') ); } do_execsql_test 3.2 { SELECT DISTINCT term FROM c2 ORDER BY 1 } { eerht eno hello owt world } do_execsql_test 3.3 { INSERT INTO c1(c1, rowid, x) VALUES('delete', 2, fts5_locale('reverse', 'one two three') ); } do_execsql_test 3.4 { SELECT DISTINCT term FROM c2 ORDER BY 1 } { hello world } #------------------------------------------------------------------------- # Test that an UPDATE that updates a subset of the columns does not # magically discard the locale from those columns not updated. # reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create do_execsql_test 4.1 { CREATE VIRTUAL TABLE d1 USING fts5(x, y, locale=1, tokenize=tcl); CREATE VIRTUAL TABLE d2 USING fts5vocab('d1', instance); INSERT INTO d1(rowid, x, y) VALUES(1, 'abc', 'def'); INSERT INTO d1(rowid, x, y) VALUES(2, 'ghi', fts5_locale('reverse', 'hello')); } do_execsql_test 4.2 { SELECT DISTINCT term FROM d2 ORDER BY 1 } { abc def ghi olleh } do_execsql_test 4.3 { UPDATE d1 SET x='jkl' WHERE rowid=2; } do_execsql_test 4.4 { SELECT DISTINCT term FROM d2 ORDER BY 1 } { abc def jkl olleh } do_execsql_test 4.5 { SELECT rowid, * FROM d1 } { 1 abc def 2 jkl hello } do_execsql_test 4.6 { UPDATE d1 SET rowid=4 WHERE rowid=2 } do_execsql_test 4.7 { SELECT rowid, * FROM d1 } { 1 abc def 4 jkl hello } fts5_aux_test_functions db do_execsql_test 4.8.1 { SELECT fts5_test_columntext(d1) FROM d1('jkl') } {{jkl hello}} do_execsql_test 4.8.2 { SELECT fts5_test_columntext(d1) FROM d1(fts5_locale('reverse', 'hello')) } {{jkl hello}} do_execsql_test 4.9 { SELECT fts5_test_columnlocale(d1) FROM d1(fts5_locale('reverse', 'hello')) } {{{} reverse}} do_execsql_test 4.10 { SELECT fts5_test_columnlocale(d1) FROM d1 } { {{} {}} {{} reverse} } #------------------------------------------------------------------------- # Test that if an fts5_locale() value is written to an UNINDEXED # column it is stored as text. This is so that blobs and other values # can also be stored as is. # reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create do_execsql_test 5.1 { CREATE VIRTUAL TABLE t1 USING fts5( x, y UNINDEXED, locale=1, tokenize=tcl ); INSERT INTO t1(rowid, x, y) VALUES(111, fts5_locale('reverse', 'one two three'), fts5_locale('reverse', 'four five six') ); } do_execsql_test 5.2 { SELECT rowid, x, y FROM t1 } { 111 {one two three} {four five six} } do_execsql_test 5.3 { SELECT typeof(c0), typeof(c1) FROM t1_content } { blob text } #------------------------------------------------------------------------- foreach {tn opt} { 1 {} 2 {, columnsize=0} } { reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create do_execsql_test 6.$tn.1 " CREATE VIRTUAL TABLE y1 USING fts5(t, locale=1, tokenize=tcl $opt); " do_execsql_test 6.$tn.2 { INSERT INTO y1(rowid, t) VALUES (1, fts5_locale('second', 'the city of London')), (2, fts5_locale('second', 'shall have all the old')), (3, fts5_locale('second', 'Liberties and Customs')), (4, fts5_locale('second', 'which it hath been used to have')); } fts5_aux_test_functions db do_execsql_test 5.$tn.3 { SELECT fts5_test_columnsize(y1) FROM y1 } { 2 3 2 4 } do_execsql_test 5.$tn.4 { SELECT rowid, fts5_test_columnsize(y1) FROM y1('shall'); } { 2 3 } do_execsql_test 5.$tn.5 { SELECT rowid, fts5_test_columnsize(y1) FROM y1('shall'); } { 2 3 } do_execsql_test 5.$tn.6 { SELECT rowid, fts5_test_columnsize(y1) FROM y1('have'); } { 4 4 } do_execsql_test 5.$tn.7 { SELECT rowid, highlight(y1, 0, '[', ']') FROM y1('have'); } { 4 {which it hath been used to [have]} } do_execsql_test 5.$tn.8 { SELECT rowid, highlight(y1, 0, '[', ']'), snippet(y1, 0, '[', ']', '...', 10) FROM y1('Liberties + Customs'); } { 3 {[Liberties and Customs]} {[Liberties and Customs]} } } #------------------------------------------------------------------------- reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE x1 USING fts5(x); } do_catchsql_test 6.1 { INSERT INTO x1(rowid, x) VALUES(123, fts5_locale('en_AU', 'hello world')); } {1 {fts5_locale() requires locale=1}} do_execsql_test 6.2 { SELECT typeof( fts5_locale(NULL, 'xyz') ), typeof( fts5_locale('', 'abc') ); } {text text} #-------------------------------------------------------------------------- # Test that fts5_locale() works with external-content tables. # reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create do_execsql_test 7.1 { CREATE TABLE t1(ii INTEGER PRIMARY KEY, bb BLOB, tt TEXT, locale TEXT); CREATE VIEW v1 AS SELECT ii AS rowid, bb, fts5_locale(locale, tt) AS tt FROM t1; CREATE VIRTUAL TABLE ft USING fts5( bb, tt, locale=1, tokenize=tcl, content=v1 ); INSERT INTO t1 VALUES(1, NULL, 'one two three', NULL); INSERT INTO t1 VALUES(2, '7800616263', 'four five six', 'reverse'); INSERT INTO t1 VALUES(3, '000000007800616263', 'seven eight nine', 'second'); } do_execsql_test 7.2 { INSERT INTO ft(ft) VALUES('rebuild'); INSERT INTO ft(ft) VALUES('integrity-check'); } do_execsql_test 7.3 { SELECT rowid, quote(bb), quote(tt) FROM ft } { 1 NULL {'one two three'} 2 '7800616263' {'four five six'} 3 '000000007800616263' {'seven eight nine'} } do_execsql_test 7.4 { SELECT rowid FROM ft('six'); } do_execsql_test 7.5 { SELECT rowid FROM ft(fts5_locale('reverse','six')); } 2 fts5_aux_test_functions db do_execsql_test 7.6 { SELECT fts5_test_columnlocale(ft) FROM ft; } { {{} {}} {{} reverse} {{} second} } #------------------------------------------------------------------------- # Test that the porter tokenizer works with locales. # reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create do_execsql_test 8.1 { CREATE VIRTUAL TABLE ft USING fts5(tt, locale=1, tokenize="porter tcl"); CREATE VIRTUAL TABLE vocab USING fts5vocab('ft', instance); INSERT INTO ft(rowid, tt) VALUES (111, fts5_locale('second', 'the porter tokenizer is a wrapper tokenizer')), (222, fts5_locale('reverse', 'This value may also be set')); } do_execsql_test 8.1 { SELECT DISTINCT term FROM vocab ORDER BY 1 } { a eb eulav osla sihT te the token yam } #------------------------------------------------------------------------- # Test that position-lists (used by xInst, xPhraseFirst etc.) work with # locales and modes other than detail=full. # foreach {tn detail} { 1 detail=full 2 detail=none 3 detail=column } { reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create do_execsql_test 9.$tn.0 " CREATE VIRTUAL TABLE ft USING fts5(tt, locale=1, tokenize=tcl, $detail); " do_execsql_test 9.$tn.1 { CREATE VIRTUAL TABLE vocab USING fts5vocab('ft', instance); INSERT INTO ft(rowid, tt) VALUES (-1, fts5_locale('second', 'it is an ancient mariner')); } do_execsql_test 9.$tn.2 { SELECT DISTINCT term FROM vocab } {an it mariner} do_execsql_test 9.$tn.3 { SELECT highlight(ft, 0, '[', ']') FROM ft('mariner') } {{it is an ancient [mariner]}} } #------------------------------------------------------------------------- # Check some corrupt fts5_locale() blob formats are detected. # foreach_detail_mode $::testprefix { reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create fts5_aux_test_functions db do_execsql_test 10.1 { CREATE TABLE x1(ii INTEGER PRIMARY KEY, x); CREATE VIRTUAL TABLE ft USING fts5(x, content=x1, content_rowid=ii, locale=1, detail=%DETAIL%, columnsize=0 ); } foreach {tn v} { 1 X'001122' 2 X'0011223344' 3 X'00E0B2EB68656c6c6f' 4 X'00E0B2EB0068656c6c6f' } { do_execsql_test 10.2.$tn.0 { INSERT INTO ft(ft) VALUES('delete-all') } do_execsql_test 10.2.$tn.1 { DELETE FROM x1; } do_execsql_test 10.2.$tn.2 " INSERT INTO x1 VALUES(NULL, $v) " do_catchsql_test 10.2.$tn.3 { INSERT INTO ft(ft) VALUES('rebuild'); } {1 {SQL logic error}} do_catchsql_test 10.2.$tn.4 " SELECT * FROM ft( test_setsubtype($v, 76) ); " {1 {SQL logic error}} do_execsql_test 10.2.$tn.5 { INSERT INTO ft(rowid, x) VALUES(1, 'hello world'); } if {"%DETAIL%"!="full"} { do_catchsql_test 10.2.$tn.6 { SELECT fts5_test_poslist(ft) FROM ft('world'); } {1 SQLITE_ERROR} do_catchsql_test 10.2.$tn.7 { SELECT fts5_test_columnsize(ft) FROM ft('world'); } {1 SQLITE_ERROR} do_catchsql_test 10.2.$tn.7 { SELECT fts5_test_columnlocale(ft) FROM ft('world'); } {1 SQLITE_ERROR} } do_catchsql_test 10.2.$tn.8 { SELECT * FROM ft('hello') } {1 {SQL logic error}} } } #------------------------------------------------------------------------- # reset_db sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create fts5_aux_test_functions db do_execsql_test 11.0 { CREATE VIRTUAL TABLE x1 USING fts5(abc, locale=1); INSERT INTO x1(rowid, abc) VALUES(123, fts5_locale('en_US', 'one two three')); } do_catchsql_test 11.1 { SELECT fts5_columnlocale(x1, -1) FROM x1('two'); } {1 SQLITE_RANGE} do_catchsql_test 11.2 { SELECT fts5_columnlocale(x1, 1) FROM x1('two'); } {1 SQLITE_RANGE} finish_test