From 8c1f46de502ed8a91185ee1f30f0103b8e5afaab Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 20 May 2015 09:27:51 +0000 Subject: [PATCH] Improve test coverage of fts5_tokenize.c. FossilOrigin-Name: 0e91a6a520f040b8902da6a1a4d9107dc66c0ea3 --- ext/fts5/fts5_tokenize.c | 7 ++-- ext/fts5/test/fts5porter2.test | 64 ++++++++++++++++++++++++++++++++ ext/fts5/test/fts5tokenizer.test | 31 ++++++++++++++++ ext/fts5/test/fts5unicode2.test | 19 ++++++++++ manifest | 17 +++++---- manifest.uuid | 2 +- 6 files changed, 128 insertions(+), 12 deletions(-) create mode 100644 ext/fts5/test/fts5porter2.test diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 25316dd3c3..b340d45d36 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -666,8 +666,8 @@ static int fts5Porter_Ostar(char *zStem, int nStem){ /* porter rule condition: (m > 1 and (*S or *T)) */ static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ - return nStem>0 - && (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') + assert( nStem>0 ); + return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') && fts5Porter_MGt1(zStem, nStem); } @@ -1167,7 +1167,8 @@ static int fts5PorterCb( fts5PorterStep4(aBuf, &nBuf); /* Step 5a. */ - if( nBuf>0 && aBuf[nBuf-1]=='e' ){ + assert( nBuf>0 ); + if( aBuf[nBuf-1]=='e' ){ if( fts5Porter_MGt1(aBuf, nBuf-1) || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) ){ diff --git a/ext/fts5/test/fts5porter2.test b/ext/fts5/test/fts5porter2.test new file mode 100644 index 0000000000..7ea2e6994d --- /dev/null +++ b/ext/fts5/test/fts5porter2.test @@ -0,0 +1,64 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the fts5 porter stemmer implementation. +# +# These are extra tests added to those in fts5porter.test in order to +# improve test coverage of the porter stemmer implementation. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5porter2 + +set test_vocab { + tion tion + ation ation + vation vation + avation avat + vion vion + ion ion + relational relat + relation relat + relate relat + zzz zzz + ii ii + iiing ii + xtional xtional + xenci xenci + xlogi xlogi + realization realiz + realize realiz + xization xizat + capitalism capit + talism talism + xiveness xive + xfulness xful + xousness xous + xical xical + xicate xicat + xicity xiciti + ies ie + eed e + eing e + s s +} + +set i 0 +foreach {in out} $test_vocab { + do_test "1.$i.($in -> $out)" { + lindex [sqlite3_fts5_tokenize db porter $in] 0 + } $out + incr i +} + + +finish_test + diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index 83ad169188..1a3d253be8 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -209,6 +209,37 @@ do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 } do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 } do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 } +#------------------------------------------------------------------------- +# Test the 'separators' option with the unicode61 tokenizer. +# +do_execsql_test 8.1 { + BEGIN; + CREATE VIRTUAL TABLE e6 USING fts5(x, + tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ" + ); + INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog'); + CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); + SELECT term FROM e7; + ROLLBACK; +} { + brown dog fox jumped lazy over quick the +} + +do_execsql_test 8.2 [subst { + BEGIN; + CREATE VIRTUAL TABLE e6 USING fts5(x, + tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'" + ); + INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01' + || 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog' + ); + INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09'); + CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); + SELECT term FROM e7; + ROLLBACK; +}] [subst { + brown dog fox jumped lazy over quick the \u0E08 \u0E09 +}] finish_test diff --git a/ext/fts5/test/fts5unicode2.test b/ext/fts5/test/fts5unicode2.test index 056106e18e..280d045db1 100644 --- a/ext/fts5/test/fts5unicode2.test +++ b/ext/fts5/test/fts5unicode2.test @@ -70,6 +70,12 @@ do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx" # Title-case mappings work do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5" +do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \ + "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3" + +do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \ + "abc abc def def" + #------------------------------------------------------------------------- # set docs [list { @@ -225,6 +231,10 @@ do_test 4.1 { INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } + + execsql "CREATE VIRTUAL TABLE t8 USING fts5( + a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\" + )" } {} do_test 4.2 { @@ -253,6 +263,15 @@ do_test 4.3 { } } {} +do_test 4.4 { + sqlite3_exec_hex db { + CREATE VIRTUAL TABLE t9 USING fts5(a, b, + tokenize="unicode61 separators '%C09004'" + ); + INSERT INTO t9(a) VALUES('abc%88def %89ghi%90'); + } +} {0 {}} + #------------------------------------------------------------------------- diff --git a/manifest b/manifest index 90de227169..d897c6433b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stests\sfor\sfts5\stokenizers. -D 2015-05-19T19:37:09.304 +C Improve\stest\scoverage\sof\sfts5_tokenize.c. +D 2015-05-20T09:27:51.629 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -115,7 +115,7 @@ F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 F ext/fts5/fts5_index.c 2c4500c35072b049d1391bbb4e64e4c0e3d3dd43 F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c -F ext/fts5/fts5_tokenize.c 4d9d50478169a8446686ab255cc723a6b4f4c20b +F ext/fts5/fts5_tokenize.c 6f4d2cbe7ed892821d1a233c7db613dafdb3877a F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5_vocab.c b54301e376f59f08f662b5dde1cfaf26e86e4db6 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -159,14 +159,15 @@ F ext/fts5/test/fts5near.test d2e3343e62d438f2efd96ebcd83a0d30a16ea6dc F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5plan.test 89783f70dab89ff936ed6f21d88959b49c853a47 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e +F ext/fts5/test/fts5porter2.test c534385e88e685b354c2b2020acc0c4920042c8e F ext/fts5/test/fts5prefix.test 7eba86fc270b110ba2b83ba286a1fd4b3b17955e F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5 -F ext/fts5/test/fts5tokenizer.test f54bbbff67ff03ce49c153c0f6a5e3f8369f986a +F ext/fts5/test/fts5tokenizer.test 668747fcb41de6fc7daebc478920b705164fccc1 F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d -F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee +F ext/fts5/test/fts5unicode2.test ad38982b03dc9213445facb16e99f668a74cc4ba F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887 F ext/fts5/test/fts5vocab.test 80fb22850dd3b2c92a3896e6021605e08c0872aa @@ -1328,7 +1329,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2870a80593302e7835c5f5d167f42710d8439e7d -R 63f128b09262f76dbe78be4c38aa78c8 +P 4f90ba20e2be6ec5755fe894938ac97342d6fbf6 +R 43528c0613d372060fbd8256efc47909 U dan -Z e801c590b1575eb988d36c609d9907aa +Z e3c696b644b37e5798613b4f15c87656 diff --git a/manifest.uuid b/manifest.uuid index 1348da3add..e111b8a77c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4f90ba20e2be6ec5755fe894938ac97342d6fbf6 \ No newline at end of file +0e91a6a520f040b8902da6a1a4d9107dc66c0ea3 \ No newline at end of file