Add tests for the fts4 unicode61 tokenchars and separators options.

FossilOrigin-Name: 9ce6f40dfb54b35cecba3cc9c1ec0d111f6e9f11
This commit is contained in:
dan 2013-09-13 12:10:09 +00:00
parent 6e1b167454
commit f1d2670d40
3 changed files with 114 additions and 8 deletions

View File

@ -1,5 +1,5 @@
C Fix\stypo\sin\sa\smacro\sname:\s\s"GlogUpperToLower"\sshould\sbe\s"GlobUpperToLower"
D 2013-09-12T23:12:08.689
C Add\stests\sfor\sthe\sfts4\sunicode61\stokenchars\sand\sseparators\soptions.
D 2013-09-13T12:10:09.872
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -559,7 +559,7 @@ F test/fts4merge2.test 5faa558d1b672f82b847d2a337465fa745e46891
F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7
F test/fts4merge4.test c19c85ca1faa7b6d536832b49c12e1867235f584
F test/fts4noti.test aed33ba44808852dcb24bf70fa132e7bf530f057
F test/fts4unicode.test 5fa8e0a7899d906d114345c605250ebfa9d8ed28
F test/fts4unicode.test 26a0bd304dc3ecb6231f04cbd760af66f2b42102
F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d
F test/func.test cd25cf605c5a345d038dc7b84232204c6a901c84
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
@ -1112,7 +1112,7 @@ F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
F tool/wherecosttest.c f407dc4c79786982a475261866a161cd007947ae
F tool/win/sqlite.vsix 97894c2790eda7b5bce3cc79cb2a8ec2fde9b3ac
P 75a8a8c1b39725d36db627536d0c69401f8e0815
R 8ff1adbf60866468163443bbb47b7ee6
U drh
Z 1420c92f404975e34e33750c19177145
P 73634ca463f46027bfa8ea23f18abaa530460e24
R 4220c723ebfba5175f5b35a4f736cee8
U dan
Z ceac464bf6cfcc5f50757139a5d63046

View File

@ -1 +1 @@
73634ca463f46027bfa8ea23f18abaa530460e24
9ce6f40dfb54b35cecba3cc9c1ec0d111f6e9f11

View File

@ -438,4 +438,110 @@ do_execsql_test 8.2.3 {
SELECT rowid FROM t4 WHERE t4 MATCH 'a';
} {2 4}
#-------------------------------------------------------------------------
#
foreach {tn sql} {
1 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
CREATE VIRTUAL TABLE t6 USING fts4(
tokenize=unicode61 [tokenchars=="] "tokenchars=[]");
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);
}
2 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");
}
3 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');
}
4 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);
}
} {
do_execsql_test 9.$tn.0 {
DROP TABLE IF EXISTS t5;
DROP TABLE IF EXISTS t5aux;
DROP TABLE IF EXISTS t6;
DROP TABLE IF EXISTS t6aux;
DROP TABLE IF EXISTS t7;
DROP TABLE IF EXISTS t7aux;
}
do_execsql_test 9.$tn.1 $sql
do_execsql_test 9.$tn.2 {
CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);
INSERT INTO t5 VALUES('one two three/four.five.six');
SELECT * FROM t5aux;
} {
four.five.six * 1 1 four.five.six 0 1 1
{one two three} * 1 1 {one two three} 0 1 1
}
do_execsql_test 9.$tn.3 {
CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);
INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');
SELECT * FROM t6aux;
} {
{alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1
{delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1
}
do_execsql_test 9.$tn.4 {
CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);
INSERT INTO t7 VALUES('alephxbeth\xC4gimel');
SELECT * FROM t7aux;
} {
aleph * 1 1 aleph 0 1 1
beth * 1 1 beth 0 1 1
gimel * 1 1 gimel 0 1 1
}
}
# Check that multiple options are handled correctly.
#
do_execsql_test 10.1 {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61
"tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"
"separators=a" "separators=a" "tokenchars=a" "tokenchars=a"
);
INSERT INTO t1 VALUES('oneatwoxthreeyfour');
INSERT INTO t1 VALUES('a.single=word');
CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);
SELECT * FROM t1aux;
} {
.single=word * 1 1 .single=word 0 1 1
four * 1 1 four 0 1 1
one * 1 1 one 0 1 1
three * 1 1 three 0 1 1
two * 1 1 two 0 1 1
}
# Test that case folding happens after tokenization, not before.
#
do_execsql_test 10.2 {
DROP TABLE IF EXISTS t2;
CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");
INSERT INTO t2 VALUES('oneatwoBthree');
INSERT INTO t2 VALUES('onebtwoAthree');
CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);
SELECT * FROM t2aux;
} {
one * 1 1 one 0 1 1
onebtwoathree * 1 1 onebtwoathree 0 1 1
three * 1 1 three 0 1 1
two * 1 1 two 0 1 1
}
finish_test