From ccf578d435cce596b7f7566a98b0ad8d71df1ea3 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 1 Oct 2020 16:10:22 +0000 Subject: [PATCH] Add tests for the trigram tokenizer. Fix minor issues. FossilOrigin-Name: 897ced99b44085012aa44d3264940dcbd4c77b295a894a1b58fb2c03a0f7fee8 --- ext/fts5/fts5_expr.c | 8 +++++ ext/fts5/fts5_tokenize.c | 14 +++++++-- ext/fts5/test/fts5faultD.test | 2 +- ext/fts5/test/fts5faultE.test | 53 ++++++++++++++++++++++++++++++++++ ext/fts5/test/fts5trigram.test | 13 +++++++++ manifest | 22 +++++++------- manifest.uuid | 2 +- 7 files changed, 98 insertions(+), 16 deletions(-) create mode 100644 ext/fts5/test/fts5faultE.test diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 68d3b6e87e..21cf06208a 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -284,6 +284,14 @@ int sqlite3Fts5ExprNew( return sParse.rc; } +/* +** This function is only called when using the special 'trigram' tokenizer. +** Argument zText contains the text of a LIKE or GLOB pattern matched +** against column iCol. This function creates and compiles an FTS5 MATCH +** expression that will match a superset of the rows matched by the LIKE or +** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error +** code. +*/ int sqlite3Fts5ExprPattern( Fts5Config *pConfig, int iCol, const char *zText, Fts5Expr **pp ){ diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index b3c3995504..665c613306 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -1261,10 +1261,9 @@ static int fts5PorterTokenize( /************************************************************************** ** Start of trigram implementation. */ - typedef struct TrigramTokenizer TrigramTokenizer; struct TrigramTokenizer { - int bFold; + int bFold; /* True to fold to lower-case */ }; /* @@ -1359,6 +1358,17 @@ static int fts5TriTokenize( return rc; } +/* +** Argument xCreate is a pointer to a constructor function for a tokenizer. +** pTok is a tokenizer previously created using the same method. This function +** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB +** indicating the style of pattern matching that the tokenizer can support. +** In practice, this is: +** +** "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB +** "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE +** all other tokenizers - FTS5_PATTERN_NONE +*/ int sqlite3Fts5TokenizerPattern( int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), Fts5Tokenizer *pTok diff --git a/ext/fts5/test/fts5faultD.test b/ext/fts5/test/fts5faultD.test index e259cbf610..33590645ce 100644 --- a/ext/fts5/test/fts5faultD.test +++ b/ext/fts5/test/fts5faultD.test @@ -14,7 +14,7 @@ source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl -set testprefix fts5faultA +set testprefix fts5faultD # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { diff --git a/ext/fts5/test/fts5faultE.test b/ext/fts5/test/fts5faultE.test new file mode 100644 index 0000000000..1ab3479901 --- /dev/null +++ b/ext/fts5/test/fts5faultE.test @@ -0,0 +1,53 @@ +# 2016 February 2 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file is focused on OOM errors. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5faultE + +# If SQLITE_ENABLE_FTS5 is not defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +faultsim_save_and_close +do_faultsim_test 1 -prep { + faultsim_restore_and_reopen +} -body { + execsql { CREATE VIRTUAL TABLE t1 USING fts5(x, y, tokenize=trigram) } +} -test { + faultsim_test_result {0 {}} {1 {vtable constructor failed: t1}} +} + +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, y, tokenize=trigram); +} + +faultsim_save_and_close +do_faultsim_test 2 -faults ioerr-t* -prep { + faultsim_restore_and_reopen +} -body { + execsql { + INSERT INTO t1 VALUES('abcdefghijklmnopqrstuvwxyz', NULL); + SELECT count(*) FROM t1 WHERE x LIKE '%mnop%' AND t1 MATCH 'jkl'; + } +} -test { + faultsim_test_result {0 1} {1 {vtable constructor failed: t1}} +} + + +finish_test + diff --git a/ext/fts5/test/fts5trigram.test b/ext/fts5/test/fts5trigram.test index be2c1acece..ebdc8d30ce 100644 --- a/ext/fts5/test/fts5trigram.test +++ b/ext/fts5/test/fts5trigram.test @@ -117,5 +117,18 @@ foreach {tn like res} { } $res } +#------------------------------------------------------------------------- +reset_db +do_catchsql_test 3.1 { + CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 2"); +} {1 {error in tokenizer constructor}} +do_catchsql_test 3.2 { + CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 11"); +} {1 {error in tokenizer constructor}} +do_catchsql_test 3.3 { + CREATE VIRTUAL TABLE ttt USING fts5(c, "tokenize=trigram case_sensitive 1"); +} {0 {}} + + finish_test diff --git a/manifest b/manifest index fded953f7e..70f72c9d32 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sexperimental\sunicode-aware\strigram\stokenizer\sto\sfts5.\sAnd\ssupport\sfor\sLIKE\sand\sGLOB\soptimizations\sfor\sfts5\stables\sthat\suse\ssaid\stokenizer. -D 2020-09-30T20:35:37.594 +C Add\stests\sfor\sthe\strigram\stokenizer.\sFix\sminor\sissues. +D 2020-10-01T16:10:22.243 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -116,7 +116,7 @@ F ext/fts5/fts5Int.h 928aed51dbeb4acc0d2e3ceeebb5f6918d64c9ad5c4e7634a238895abea F ext/fts5/fts5_aux.c dcc627d8b6e3fc773db528ff67b39955dab7b51628f9dba8e15849e5bedfd7fa F ext/fts5/fts5_buffer.c 5a5fe0159752c0fb0a5a93c722e9db2662822709490769d482b76a6dc8aaca70 F ext/fts5/fts5_config.c be54f44fca491e96c6923a4b9a736f2da2b13811600eb6e38d1bcc91c4ea2e61 -F ext/fts5/fts5_expr.c e1f548de5e7f146e55e1a34c2745d1893510c0766baa55d33aa05c0643398534 +F ext/fts5/fts5_expr.c 7eba8fed2a8f154413814f63c21b34d7562b7d80a62614b9301a5ba6a700f4f0 F ext/fts5/fts5_hash.c 15bffa734fbdca013b2289c6f8827a3b935ef14bd4dde5837d31a75434c00627 F ext/fts5/fts5_index.c 255d3ce3fec28be11c533451e5b23bd79e71a13a1b120f3658b34fff6b097816 F ext/fts5/fts5_main.c 65c5d579cabaecab478f4bd159ad5c040590f6a75e5afd4ad43c8b92ac65f7f2 @@ -124,7 +124,7 @@ F ext/fts5/fts5_storage.c 58ba71e6cd3d43a5735815e7956ee167babb4d2cbfe20690517479 F ext/fts5/fts5_tcl.c 39bcbae507f594aad778172fa914cad0f585bf92fd3b078c686e249282db0d95 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c f96c6e193c466711d6d7828d5f190407fe7ab897062d371426dd3036f01258e7 -F ext/fts5/fts5_tokenize.c be911fbd2f9c9ef0db2b4b492d62628820567eb83521466250fd6df27858fb74 +F ext/fts5/fts5_tokenize.c 5711f170065d23809afa97475c4adcd03387da043feb2fb2a1660fe366c01647 F ext/fts5/fts5_unicode2.c 8bd0cd07396b74c1a05590e4070d635bccfc849812c305619f109e6c0485e250 F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80 F ext/fts5/fts5_vocab.c 7a071833064dc8bca236c3c323e56aac36f583aa2c46ce916d52e31ce87462c9 @@ -179,7 +179,8 @@ F ext/fts5/test/fts5fault8.test 318238659d35f82ad215ecb57ca4c87486ea85d45dbeedae F ext/fts5/test/fts5fault9.test 098e6b894bbdf9b2192f994a30f4043673fb3f338b6b8ab1624c704422f39119 F ext/fts5/test/fts5faultA.test be4487576bff8c22cee6597d1893b312f306504a8c6ccd3c53ca85af12290c8c F ext/fts5/test/fts5faultB.test d606bdb8e81aaeb6f41de3fc9fc7ae315733f0903fbff05cf54f5b045b729ab5 -F ext/fts5/test/fts5faultD.test cc5d1225556e356615e719c612e845d41bff7d5a +F ext/fts5/test/fts5faultD.test e7ed7895abfe6bc98a5e853826f6b74956e7ba7f594f1860bbf9e504b9647996 +F ext/fts5/test/fts5faultE.test aa7caab3597390b753e0755c087f118f775804a070bd0960f5a4bb6246ed6a29 F ext/fts5/test/fts5first.test 3fcf2365c00a15fc9704233674789a3b95131d12de18a9b996159f6909dc8079 F ext/fts5/test/fts5full.test e1701a112354e0ff9a1fdffb0c940c576530c33732ee20ac5e8361777070d717 F ext/fts5/test/fts5fuzz1.test 238d8c45f3b81342aa384de3e581ff2fa330bf922a7b69e484bbc06051a1080e @@ -216,7 +217,7 @@ F ext/fts5/test/fts5synonym2.test b54cce5c34ec08ed616f646635538ae82e34a0e28f947e F ext/fts5/test/fts5tok1.test ce6551e41ff56f30b69963577324624733bed0d1753589f06120d664d9cd45c9 F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2 F ext/fts5/test/fts5tokenizer.test ac3c9112b263a639fb0508ae73a3ee886bf4866d2153771a8e8a20c721305a43 -F ext/fts5/test/fts5trigram.test 442b9e0c0f64838e1fad8d3d9e4ebb96f53a3033498e6e80b15d97081b320b0c +F ext/fts5/test/fts5trigram.test 0a9ade9e808c388d13e9ea925a1bf42f3fa873a90dcd5969ef6e50bdd9483873 F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602 F ext/fts5/test/fts5unicode.test 17056f4efe6b0a5d4f41fdf7a7dc9af2873004562eaa899d40633b93dc95f5a9 F ext/fts5/test/fts5unicode2.test 9b3df486de05fb4bde4aa7ee8de2e6dae1df6eb90e3f2e242c9383b95d314e3e @@ -1881,10 +1882,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 4a43430fd23f88352c33b29c4c105b72f6dc821f94bf362040c41a1648c402e5 -R 1f094ba3b91e26d2f277be832160094c -T *branch * fts5-trigram -T *sym-fts5-trigram * -T -sym-trunk * +P 0d7810c1aea93c0a3da1ccc4911dbce8a1b6e1dbfe1ab7e800289a0c783b5985 +R c7737548b4b859bb94163b777bb03b06 U dan -Z f75cc113cdfae84e1f09152a573517c5 +Z afc946f18e97090d1c0b09338fc98cec diff --git a/manifest.uuid b/manifest.uuid index 8d76819c2f..b9e8dd8af3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0d7810c1aea93c0a3da1ccc4911dbce8a1b6e1dbfe1ab7e800289a0c783b5985 \ No newline at end of file +897ced99b44085012aa44d3264940dcbd4c77b295a894a1b58fb2c03a0f7fee8 \ No newline at end of file