Improve test coverage of fts5_tokenize.c.

FossilOrigin-Name: 0e91a6a520f040b8902da6a1a4d9107dc66c0ea3
This commit is contained in:
dan 2015-05-20 09:27:51 +00:00
parent 116eaee4a0
commit 8c1f46de50
6 changed files with 128 additions and 12 deletions

View File

@ -666,8 +666,8 @@ static int fts5Porter_Ostar(char *zStem, int nStem){
/* porter rule condition: (m > 1 and (*S or *T)) */
static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){
return nStem>0
&& (zStem[nStem-1]=='s' || zStem[nStem-1]=='t')
assert( nStem>0 );
return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t')
&& fts5Porter_MGt1(zStem, nStem);
}
@ -1167,7 +1167,8 @@ static int fts5PorterCb(
fts5PorterStep4(aBuf, &nBuf);
/* Step 5a. */
if( nBuf>0 && aBuf[nBuf-1]=='e' ){
assert( nBuf>0 );
if( aBuf[nBuf-1]=='e' ){
if( fts5Porter_MGt1(aBuf, nBuf-1)
|| (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1))
){

View File

@ -0,0 +1,64 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 porter stemmer implementation.
#
# These are extra tests added to those in fts5porter.test in order to
# improve test coverage of the porter stemmer implementation.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5porter2
set test_vocab {
tion tion
ation ation
vation vation
avation avat
vion vion
ion ion
relational relat
relation relat
relate relat
zzz zzz
ii ii
iiing ii
xtional xtional
xenci xenci
xlogi xlogi
realization realiz
realize realiz
xization xizat
capitalism capit
talism talism
xiveness xive
xfulness xful
xousness xous
xical xical
xicate xicat
xicity xiciti
ies ie
eed e
eing e
s s
}
set i 0
foreach {in out} $test_vocab {
do_test "1.$i.($in -> $out)" {
lindex [sqlite3_fts5_tokenize db porter $in] 0
} $out
incr i
}
finish_test

View File

@ -209,6 +209,37 @@ do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 }
do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 }
do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 }
#-------------------------------------------------------------------------
# Test the 'separators' option with the unicode61 tokenizer.
#
do_execsql_test 8.1 {
BEGIN;
CREATE VIRTUAL TABLE e6 USING fts5(x,
tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ"
);
INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog');
CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
SELECT term FROM e7;
ROLLBACK;
} {
brown dog fox jumped lazy over quick the
}
do_execsql_test 8.2 [subst {
BEGIN;
CREATE VIRTUAL TABLE e6 USING fts5(x,
tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'"
);
INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01'
|| 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog'
);
INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09');
CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
SELECT term FROM e7;
ROLLBACK;
}] [subst {
brown dog fox jumped lazy over quick the \u0E08 \u0E09
}]
finish_test

View File

@ -70,6 +70,12 @@ do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx"
# Title-case mappings work
do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"
do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \
"\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3"
do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \
"abc abc def def"
#-------------------------------------------------------------------------
#
set docs [list {
@ -225,6 +231,10 @@ do_test 4.1 {
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
execsql "CREATE VIRTUAL TABLE t8 USING fts5(
a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\"
)"
} {}
do_test 4.2 {
@ -253,6 +263,15 @@ do_test 4.3 {
}
} {}
do_test 4.4 {
sqlite3_exec_hex db {
CREATE VIRTUAL TABLE t9 USING fts5(a, b,
tokenize="unicode61 separators '%C09004'"
);
INSERT INTO t9(a) VALUES('abc%88def %89ghi%90');
}
} {0 {}}
#-------------------------------------------------------------------------

View File

@ -1,5 +1,5 @@
C Add\stests\sfor\sfts5\stokenizers.
D 2015-05-19T19:37:09.304
C Improve\stest\scoverage\sof\sfts5_tokenize.c.
D 2015-05-20T09:27:51.629
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -115,7 +115,7 @@ F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304
F ext/fts5/fts5_index.c 2c4500c35072b049d1391bbb4e64e4c0e3d3dd43
F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2
F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c
F ext/fts5/fts5_tokenize.c 4d9d50478169a8446686ab255cc723a6b4f4c20b
F ext/fts5/fts5_tokenize.c 6f4d2cbe7ed892821d1a233c7db613dafdb3877a
F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d
F ext/fts5/fts5_vocab.c b54301e376f59f08f662b5dde1cfaf26e86e4db6
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
@ -159,14 +159,15 @@ F ext/fts5/test/fts5near.test d2e3343e62d438f2efd96ebcd83a0d30a16ea6dc
F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54
F ext/fts5/test/fts5plan.test 89783f70dab89ff936ed6f21d88959b49c853a47
F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e
F ext/fts5/test/fts5porter2.test c534385e88e685b354c2b2020acc0c4920042c8e
F ext/fts5/test/fts5prefix.test 7eba86fc270b110ba2b83ba286a1fd4b3b17955e
F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890
F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066
F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0
F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5
F ext/fts5/test/fts5tokenizer.test f54bbbff67ff03ce49c153c0f6a5e3f8369f986a
F ext/fts5/test/fts5tokenizer.test 668747fcb41de6fc7daebc478920b705164fccc1
F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d
F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee
F ext/fts5/test/fts5unicode2.test ad38982b03dc9213445facb16e99f668a74cc4ba
F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944
F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887
F ext/fts5/test/fts5vocab.test 80fb22850dd3b2c92a3896e6021605e08c0872aa
@ -1328,7 +1329,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 2870a80593302e7835c5f5d167f42710d8439e7d
R 63f128b09262f76dbe78be4c38aa78c8
P 4f90ba20e2be6ec5755fe894938ac97342d6fbf6
R 43528c0613d372060fbd8256efc47909
U dan
Z e801c590b1575eb988d36c609d9907aa
Z e3c696b644b37e5798613b4f15c87656

View File

@ -1 +1 @@
4f90ba20e2be6ec5755fe894938ac97342d6fbf6
0e91a6a520f040b8902da6a1a4d9107dc66c0ea3