sqlite/ext/fts5/test/fts5locale.test
dan 8f14c45197 Fix xColumnText() and xColumnSize() APIs. Add xColumnLocale() and xTokenizeSetLocale().
FossilOrigin-Name: 14006711d83d098e665c540b978b0e29aa8f458da1c2c8e9c2baf2ad5ebd502c
2024-07-29 20:31:17 +00:00

309 lines
7.2 KiB
Plaintext

# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the built-in fts5 tokenizers.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5locale
# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc transform_token {locale token} {
switch -- $locale {
reverse {
set ret ""
foreach c [split $token ""] {
set ret "$c$ret"
}
set token $ret
}
default {
# no-op
}
}
set token
}
proc tcl_create {args} { return "tcl_tokenize" }
proc tcl_tokenize {tflags text} {
set iToken 1
set bSkip 0
if {[sqlite3_fts5_locale]=="second"} { set bSkip 1 }
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
incr iToken
if {(($iToken) % ($bSkip + 1))} continue
set w [transform_token [sqlite3_fts5_locale] $w]
sqlite3_fts5_token $w $iStart $iEnd
}
}
#-------------------------------------------------------------------------
# Check that queries can have a locale attached to them.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl);
INSERT INTO t1 VALUES('abc');
INSERT INTO t1 VALUES('cba');
} {}
do_execsql_test 1.1 {
SELECT rowid, a FROM t1( fts5_locale('en_US', 'abc') );
} {1 abc}
do_execsql_test 1.2 {
SELECT rowid, a FROM t1( fts5_locale('reverse', 'abc') );
} {2 cba}
#-------------------------------------------------------------------------
# Test that the locale= option exists and seems to accept values. And
# that fts5_locale() values may only be inserted into an internal-content
# table if the locale=1 option was specified.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE b1 USING fts5(x, y, locale=1, tokenize=tcl);
CREATE VIRTUAL TABLE b2 USING fts5(x, y, locale=0, tokenize=tcl);
CREATE VIRTUAL TABLE ttt USING fts5vocab('b1', instance);
}
do_catchsql_test 2.2 {
CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=2);
} {1 {malformed locale=... directive}}
do_catchsql_test 2.3 {
INSERT INTO b1(b1, rank) VALUES('locale', 0);
} {1 {SQL logic error}}
do_execsql_test 2.4 {
INSERT INTO b1 VALUES('abc', 'one two three');
INSERT INTO b1 VALUES('def', fts5_locale('reverse', 'four five six'));
}
do_execsql_test 2.5 {
INSERT INTO b2 VALUES('abc', 'one two three');
}
do_catchsql_test 2.6 {
INSERT INTO b2 VALUES('def', fts5_locale('reverse', 'four five six'));
} {1 {fts5_locale() may not be used without locale=1}}
do_execsql_test 2.7 { SELECT rowid FROM b1('one') } {1}
do_execsql_test 2.8 { SELECT rowid FROM b1('four') } {}
do_execsql_test 2.9 { SELECT rowid FROM b1('ruof') } 2
do_execsql_test 2.10 { SELECT rowid FROM b1(fts5_locale('reverse', 'five'))} 2
do_execsql_test 2.11 {
SELECT x, quote(y) FROM b1
} {
abc {'one two three'}
def {'four five six'}
}
do_execsql_test 2.12 { SELECT quote(y) FROM b1('ruof') } {
{'four five six'}
}
do_execsql_test 2.13 {
INSERT INTO b1(b1) VALUES('integrity-check');
}
do_execsql_test 2.14 {
INSERT INTO b1(b1) VALUES('rebuild');
}
do_execsql_test 2.15 {
INSERT INTO b1(b1) VALUES('integrity-check');
}
do_execsql_test 2.16 {
DELETE FROM b1 WHERE rowid=2
}
do_execsql_test 2.17 {
INSERT INTO b1(b1) VALUES('integrity-check');
}
#-------------------------------------------------------------------------
# Test the 'delete' command with contentless tables.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE c1 USING fts5(x, content=, tokenize=tcl);
CREATE VIRTUAL TABLE c2 USING fts5vocab('c1', instance);
INSERT INTO c1 VALUES('hello world');
INSERT INTO c1 VALUES( fts5_locale('reverse', 'one two three') );
}
do_execsql_test 3.2 {
SELECT DISTINCT term FROM c2 ORDER BY 1
} {
eerht eno hello owt world
}
do_execsql_test 3.3 {
INSERT INTO c1(c1, rowid, x)
VALUES('delete', 2, fts5_locale('reverse', 'one two three') );
}
do_execsql_test 3.4 {
SELECT DISTINCT term FROM c2 ORDER BY 1
} {
hello world
}
#-------------------------------------------------------------------------
# Test that an UPDATE that updates a subset of the columns does not
# magically discard the locale from those columns not updated.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 4.1 {
CREATE VIRTUAL TABLE d1 USING fts5(x, y, locale=1, tokenize=tcl);
CREATE VIRTUAL TABLE d2 USING fts5vocab('d1', instance);
INSERT INTO d1(rowid, x, y) VALUES(1, 'abc', 'def');
INSERT INTO d1(rowid, x, y) VALUES(2, 'ghi', fts5_locale('reverse', 'hello'));
}
do_execsql_test 4.2 {
SELECT DISTINCT term FROM d2 ORDER BY 1
} {
abc def ghi olleh
}
do_execsql_test 4.3 {
UPDATE d1 SET x='jkl' WHERE rowid=2;
}
do_execsql_test 4.4 {
SELECT DISTINCT term FROM d2 ORDER BY 1
} {
abc def jkl olleh
}
do_execsql_test 4.5 {
SELECT rowid, * FROM d1
} {
1 abc def
2 jkl hello
}
do_execsql_test 4.6 {
UPDATE d1 SET rowid=4 WHERE rowid=2
}
do_execsql_test 4.7 {
SELECT rowid, * FROM d1
} {
1 abc def
4 jkl hello
}
fts5_aux_test_functions db
do_execsql_test 4.8.1 {
SELECT fts5_test_columntext(d1) FROM d1('jkl')
} {{jkl hello}}
do_execsql_test 4.8.2 {
SELECT fts5_test_columntext(d1) FROM d1(fts5_locale('reverse', 'hello'))
} {{jkl hello}}
do_execsql_test 4.9 {
SELECT fts5_test_columnlocale(d1) FROM d1(fts5_locale('reverse', 'hello'))
} {{{} reverse}}
do_execsql_test 4.10 {
SELECT fts5_test_columnlocale(d1) FROM d1
} {
{{} {}}
{{} reverse}
}
#-------------------------------------------------------------------------
# Test that if an fts5_locale() value is written to an UNINDEXED
# column it is stored as text. This is so that blobs and other values
# can also be stored as is.
#
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 5.1 {
CREATE VIRTUAL TABLE t1 USING fts5(
x, y UNINDEXED, locale=1, tokenize=tcl
);
INSERT INTO t1(rowid, x, y) VALUES(111,
fts5_locale('reverse', 'one two three'),
fts5_locale('reverse', 'four five six')
);
}
do_execsql_test 5.2 {
SELECT rowid, x, y FROM t1
} {
111 {one two three} {four five six}
}
do_execsql_test 5.3 {
SELECT typeof(c0), typeof(c1) FROM t1_content
} {
blob text
}
#-------------------------------------------------------------------------
foreach {tn opt} {
1 {}
2 {, columnsize=0}
} {
reset_db
sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
do_execsql_test 6.$tn.1 "
CREATE VIRTUAL TABLE y1 USING fts5(t, locale=1, tokenize=tcl $opt);
"
do_execsql_test 6.$tn.2 {
INSERT INTO y1(rowid, t) VALUES
(1, fts5_locale('second', 'the city of London')),
(2, fts5_locale('second', 'shall have all the old')),
(3, fts5_locale('second', 'Liberties and Customs')),
(4, fts5_locale('second', 'which it hath been used to have'));
}
fts5_aux_test_functions db
do_execsql_test 5.$tn.3 {
SELECT fts5_test_columnsize(y1) FROM y1
} {
2 3 2 4
}
}
finish_test