sqlite/ext/fts5/test/fts5locale.test
dan a7384e0de2 More tests for the new code on this branch.
FossilOrigin-Name: 00792e807f1dde750e6ac9f9b0095cf60b9fa6ff7cf4b14440600a21de2ee61a
2024-08-13 21:15:43 +00:00

526 lines
13 KiB
Plaintext

# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the built-in fts5 tokenizers.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5locale
# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc transform_token {locale token} {
switch -- $locale {
reverse {
set ret ""
foreach c [split $token ""] {
set ret "$c$ret"
}
set token $ret
}
default {
# no-op
}
}
set token
}
proc tcl_create {args} { return "tcl_tokenize" }
proc tcl_tokenize {tflags text} {
set iToken 1
set bSkip 0
if {[sqlite3_fts5_locale]=="second"} { set bSkip 1 }
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
incr iToken
if {(($iToken) % ($bSkip + 1))} continue
set w [transform_token [sqlite3_fts5_locale] $w]
sqlite3_fts5_token $w $iStart $iEnd
}
}
#-------------------------------------------------------------------------
# Check that queries can have a locale attached to them.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl);
INSERT INTO t1 VALUES('abc');
INSERT INTO t1 VALUES('cba');
} {}
do_execsql_test 1.1 {
SELECT rowid, a FROM t1( fts5_locale('en_US', 'abc') );
} {1 abc}
do_execsql_test 1.2 {
SELECT rowid, a FROM t1( fts5_locale('reverse', 'abc') );
} {2 cba}
#-------------------------------------------------------------------------
# Test that the locale= option exists and seems to accept values. And
# that fts5_locale() values may only be inserted into an internal-content
# table if the locale=1 option was specified.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE b1 USING fts5(x, y, locale=1, tokenize=tcl);
CREATE VIRTUAL TABLE b2 USING fts5(x, y, locale=0, tokenize=tcl);
CREATE VIRTUAL TABLE ttt USING fts5vocab('b1', instance);
}
do_catchsql_test 2.2.1 {
CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=2);
} {1 {malformed locale=... directive}}
do_catchsql_test 2.2.2 {
CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=111);
} {1 {malformed locale=... directive}}
do_catchsql_test 2.3 {
INSERT INTO b1(b1, rank) VALUES('locale', 0);
} {1 {SQL logic error}}
do_execsql_test 2.4 {
INSERT INTO b1 VALUES('abc', 'one two three');
INSERT INTO b1 VALUES('def', fts5_locale('reverse', 'four five six'));
}
do_execsql_test 2.5 {
INSERT INTO b2 VALUES('abc', 'one two three');
}
do_catchsql_test 2.6 {
INSERT INTO b2 VALUES('def', fts5_locale('reverse', 'four five six'));
} {1 {fts5_locale() requires locale=1}}
do_execsql_test 2.7 { SELECT rowid FROM b1('one') } {1}
do_execsql_test 2.8 { SELECT rowid FROM b1('four') } {}
do_execsql_test 2.9 { SELECT rowid FROM b1('ruof') } 2
do_execsql_test 2.10 { SELECT rowid FROM b1(fts5_locale('reverse', 'five'))} 2
do_execsql_test 2.11 {
SELECT x, quote(y) FROM b1
} {
abc {'one two three'}
def {'four five six'}
}
do_execsql_test 2.12 { SELECT quote(y) FROM b1('ruof') } {
{'four five six'}
}
do_execsql_test 2.13 {
INSERT INTO b1(b1) VALUES('integrity-check');
}
do_execsql_test 2.14 {
INSERT INTO b1(b1) VALUES('rebuild');
}
do_execsql_test 2.15 {
INSERT INTO b1(b1) VALUES('integrity-check');
}
do_execsql_test 2.16 {
DELETE FROM b1 WHERE rowid=2
}
do_execsql_test 2.17 {
INSERT INTO b1(b1) VALUES('integrity-check');
}
#-------------------------------------------------------------------------
# Test the 'delete' command with contentless tables.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE c1 USING fts5(x, content=, tokenize=tcl, locale=1);
CREATE VIRTUAL TABLE c2 USING fts5vocab('c1', instance);
INSERT INTO c1 VALUES('hello world');
INSERT INTO c1 VALUES( fts5_locale('reverse', 'one two three') );
}
do_execsql_test 3.2 {
SELECT DISTINCT term FROM c2 ORDER BY 1
} {
eerht eno hello owt world
}
do_execsql_test 3.3 {
INSERT INTO c1(c1, rowid, x)
VALUES('delete', 2, fts5_locale('reverse', 'one two three') );
}
do_execsql_test 3.4 {
SELECT DISTINCT term FROM c2 ORDER BY 1
} {
hello world
}
#-------------------------------------------------------------------------
# Test that an UPDATE that updates a subset of the columns does not
# magically discard the locale from those columns not updated.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 4.1 {
CREATE VIRTUAL TABLE d1 USING fts5(x, y, locale=1, tokenize=tcl);
CREATE VIRTUAL TABLE d2 USING fts5vocab('d1', instance);
INSERT INTO d1(rowid, x, y) VALUES(1, 'abc', 'def');
INSERT INTO d1(rowid, x, y) VALUES(2, 'ghi', fts5_locale('reverse', 'hello'));
}
do_execsql_test 4.2 {
SELECT DISTINCT term FROM d2 ORDER BY 1
} {
abc def ghi olleh
}
do_execsql_test 4.3 {
UPDATE d1 SET x='jkl' WHERE rowid=2;
}
do_execsql_test 4.4 {
SELECT DISTINCT term FROM d2 ORDER BY 1
} {
abc def jkl olleh
}
do_execsql_test 4.5 {
SELECT rowid, * FROM d1
} {
1 abc def
2 jkl hello
}
do_execsql_test 4.6 {
UPDATE d1 SET rowid=4 WHERE rowid=2
}
do_execsql_test 4.7 {
SELECT rowid, * FROM d1
} {
1 abc def
4 jkl hello
}
fts5_aux_test_functions db
do_execsql_test 4.8.1 {
SELECT fts5_test_columntext(d1) FROM d1('jkl')
} {{jkl hello}}
do_execsql_test 4.8.2 {
SELECT fts5_test_columntext(d1) FROM d1(fts5_locale('reverse', 'hello'))
} {{jkl hello}}
do_execsql_test 4.9 {
SELECT fts5_test_columnlocale(d1) FROM d1(fts5_locale('reverse', 'hello'))
} {{{} reverse}}
do_execsql_test 4.10 {
SELECT fts5_test_columnlocale(d1) FROM d1
} {
{{} {}}
{{} reverse}
}
#-------------------------------------------------------------------------
# Test that if an fts5_locale() value is written to an UNINDEXED
# column it is stored as text. This is so that blobs and other values
# can also be stored as is.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 5.1 {
CREATE VIRTUAL TABLE t1 USING fts5(
x, y UNINDEXED, locale=1, tokenize=tcl
);
INSERT INTO t1(rowid, x, y) VALUES(111,
fts5_locale('reverse', 'one two three'),
fts5_locale('reverse', 'four five six')
);
}
do_execsql_test 5.2 {
SELECT rowid, x, y FROM t1
} {
111 {one two three} {four five six}
}
do_execsql_test 5.3 {
SELECT typeof(c0), typeof(c1) FROM t1_content
} {
blob text
}
#-------------------------------------------------------------------------
foreach {tn opt} {
1 {}
2 {, columnsize=0}
} {
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 6.$tn.1 "
CREATE VIRTUAL TABLE y1 USING fts5(t, locale=1, tokenize=tcl $opt);
"
do_execsql_test 6.$tn.2 {
INSERT INTO y1(rowid, t) VALUES
(1, fts5_locale('second', 'the city of London')),
(2, fts5_locale('second', 'shall have all the old')),
(3, fts5_locale('second', 'Liberties and Customs')),
(4, fts5_locale('second', 'which it hath been used to have'));
}
fts5_aux_test_functions db
do_execsql_test 5.$tn.3 {
SELECT fts5_test_columnsize(y1) FROM y1
} {
2 3 2 4
}
do_execsql_test 5.$tn.4 {
SELECT rowid, fts5_test_columnsize(y1) FROM y1('shall');
} {
2 3
}
do_execsql_test 5.$tn.5 {
SELECT rowid, fts5_test_columnsize(y1) FROM y1('shall');
} {
2 3
}
do_execsql_test 5.$tn.6 {
SELECT rowid, fts5_test_columnsize(y1) FROM y1('have');
} {
4 4
}
do_execsql_test 5.$tn.7 {
SELECT rowid, highlight(y1, 0, '[', ']') FROM y1('have');
} {
4 {which it hath been used to [have]}
}
do_execsql_test 5.$tn.8 {
SELECT rowid,
highlight(y1, 0, '[', ']'),
snippet(y1, 0, '[', ']', '...', 10)
FROM y1('Liberties + Customs');
} {
3 {[Liberties and Customs]}
{[Liberties and Customs]}
}
}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE x1 USING fts5(x);
}
do_catchsql_test 6.1 {
INSERT INTO x1(rowid, x) VALUES(123, fts5_locale('en_AU', 'hello world'));
} {1 {fts5_locale() requires locale=1}}
do_execsql_test 6.2 {
SELECT typeof( fts5_locale(NULL, 'xyz') ), typeof( fts5_locale('', 'abc') );
} {text text}
#--------------------------------------------------------------------------
# Test that fts5_locale() works with external-content tables.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 7.1 {
CREATE TABLE t1(ii INTEGER PRIMARY KEY, bb BLOB, tt TEXT, locale TEXT);
CREATE VIEW v1 AS
SELECT ii AS rowid, bb, fts5_locale(locale, tt) AS tt FROM t1;
CREATE VIRTUAL TABLE ft USING fts5(
bb, tt, locale=1, tokenize=tcl, content=v1
);
INSERT INTO t1 VALUES(1, NULL, 'one two three', NULL);
INSERT INTO t1 VALUES(2, '7800616263', 'four five six', 'reverse');
INSERT INTO t1 VALUES(3, '000000007800616263', 'seven eight nine', 'second');
}
do_execsql_test 7.2 {
INSERT INTO ft(ft) VALUES('rebuild');
INSERT INTO ft(ft) VALUES('integrity-check');
}
do_execsql_test 7.3 {
SELECT rowid, quote(bb), quote(tt) FROM ft
} {
1 NULL {'one two three'}
2 '7800616263' {'four five six'}
3 '000000007800616263' {'seven eight nine'}
}
do_execsql_test 7.4 { SELECT rowid FROM ft('six'); }
do_execsql_test 7.5 { SELECT rowid FROM ft(fts5_locale('reverse','six')); } 2
fts5_aux_test_functions db
do_execsql_test 7.6 {
SELECT fts5_test_columnlocale(ft) FROM ft;
} {
{{} {}} {{} reverse} {{} second}
}
#-------------------------------------------------------------------------
# Test that the porter tokenizer works with locales.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 8.1 {
CREATE VIRTUAL TABLE ft USING fts5(tt, locale=1, tokenize="porter tcl");
CREATE VIRTUAL TABLE vocab USING fts5vocab('ft', instance);
INSERT INTO ft(rowid, tt) VALUES
(111, fts5_locale('second', 'the porter tokenizer is a wrapper tokenizer')),
(222, fts5_locale('reverse', 'This value may also be set'));
}
do_execsql_test 8.1 {
SELECT DISTINCT term FROM vocab ORDER BY 1
} {
a eb eulav osla sihT te the token yam
}
#-------------------------------------------------------------------------
# Test that position-lists (used by xInst, xPhraseFirst etc.) work with
# locales and modes other than detail=full.
#
foreach {tn detail} {
1 detail=full
2 detail=none
3 detail=column
} {
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 9.$tn.0 "
CREATE VIRTUAL TABLE ft USING fts5(tt, locale=1, tokenize=tcl, $detail);
"
do_execsql_test 9.$tn.1 {
CREATE VIRTUAL TABLE vocab USING fts5vocab('ft', instance);
INSERT INTO ft(rowid, tt) VALUES
(-1, fts5_locale('second', 'it is an ancient mariner'));
}
do_execsql_test 9.$tn.2 {
SELECT DISTINCT term FROM vocab
} {an it mariner}
do_execsql_test 9.$tn.3 {
SELECT highlight(ft, 0, '[', ']') FROM ft('mariner')
} {{it is an ancient [mariner]}}
}
#-------------------------------------------------------------------------
# Check some corrupt fts5_locale() blob formats are detected.
#
foreach_detail_mode $::testprefix {
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
fts5_aux_test_functions db
do_execsql_test 10.1 {
CREATE TABLE x1(ii INTEGER PRIMARY KEY, x);
CREATE VIRTUAL TABLE ft USING fts5(x,
content=x1, content_rowid=ii, locale=1, detail=%DETAIL%, columnsize=0
);
}
foreach {tn v} {
1 X'001122'
2 X'0011223344'
3 X'00E0B2EB68656c6c6f'
4 X'00E0B2EB0068656c6c6f'
} {
do_execsql_test 10.2.$tn.0 { INSERT INTO ft(ft) VALUES('delete-all') }
do_execsql_test 10.2.$tn.1 { DELETE FROM x1; }
do_execsql_test 10.2.$tn.2 " INSERT INTO x1 VALUES(NULL, $v) "
do_catchsql_test 10.2.$tn.3 {
INSERT INTO ft(ft) VALUES('rebuild');
} {1 {SQL logic error}}
do_catchsql_test 10.2.$tn.4 "
SELECT * FROM ft( test_setsubtype($v, 76) );
" {1 {SQL logic error}}
do_execsql_test 10.2.$tn.5 {
INSERT INTO ft(rowid, x) VALUES(1, 'hello world');
}
if {"%DETAIL%"!="full"} {
do_catchsql_test 10.2.$tn.6 {
SELECT fts5_test_poslist(ft) FROM ft('world');
} {1 SQLITE_ERROR}
do_catchsql_test 10.2.$tn.7 {
SELECT fts5_test_columnsize(ft) FROM ft('world');
} {1 SQLITE_ERROR}
do_catchsql_test 10.2.$tn.7 {
SELECT fts5_test_columnlocale(ft) FROM ft('world');
} {1 SQLITE_ERROR}
}
do_catchsql_test 10.2.$tn.8 {
SELECT * FROM ft('hello')
} {1 {SQL logic error}}
}
}
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
fts5_aux_test_functions db
do_execsql_test 11.0 {
CREATE VIRTUAL TABLE x1 USING fts5(abc, locale=1);
INSERT INTO x1(rowid, abc) VALUES(123, fts5_locale('en_US', 'one two three'));
}
do_catchsql_test 11.1 {
SELECT fts5_columnlocale(x1, -1) FROM x1('two');
} {1 SQLITE_RANGE}
do_catchsql_test 11.2 {
SELECT fts5_columnlocale(x1, 1) FROM x1('two');
} {1 SQLITE_RANGE}
finish_test