Add tests for the trigram tokenizer. Fix minor issues.

FossilOrigin-Name: 897ced99b44085012aa44d3264940dcbd4c77b295a894a1b58fb2c03a0f7fee8
This commit is contained in:
dan 2020-10-01 16:10:22 +00:00
parent 33a99fad08
commit ccf578d435
7 changed files with 98 additions and 16 deletions

View File

@ -284,6 +284,14 @@ int sqlite3Fts5ExprNew(
return sParse.rc;
}
/*
** This function is only called when using the special 'trigram' tokenizer.
** Argument zText contains the text of a LIKE or GLOB pattern matched
** against column iCol. This function creates and compiles an FTS5 MATCH
** expression that will match a superset of the rows matched by the LIKE or
** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error
** code.
*/
int sqlite3Fts5ExprPattern(
Fts5Config *pConfig, int iCol, const char *zText, Fts5Expr **pp
){

View File

@ -1261,10 +1261,9 @@ static int fts5PorterTokenize(
/**************************************************************************
** Start of trigram implementation.
*/
typedef struct TrigramTokenizer TrigramTokenizer;
struct TrigramTokenizer {
int bFold;
int bFold; /* True to fold to lower-case */
};
/*
@ -1359,6 +1358,17 @@ static int fts5TriTokenize(
return rc;
}
/*
** Argument xCreate is a pointer to a constructor function for a tokenizer.
** pTok is a tokenizer previously created using the same method. This function
** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB
** indicating the style of pattern matching that the tokenizer can support.
** In practice, this is:
**
** "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB
** "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE
** all other tokenizers - FTS5_PATTERN_NONE
*/
int sqlite3Fts5TokenizerPattern(
int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
Fts5Tokenizer *pTok

View File

@ -14,7 +14,7 @@
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5faultA
set testprefix fts5faultD
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {

View File

@ -0,0 +1,53 @@
# 2016 February 2
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5faultE
# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
faultsim_save_and_close
do_faultsim_test 1 -prep {
faultsim_restore_and_reopen
} -body {
execsql { CREATE VIRTUAL TABLE t1 USING fts5(x, y, tokenize=trigram) }
} -test {
faultsim_test_result {0 {}} {1 {vtable constructor failed: t1}}
}
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y, tokenize=trigram);
}
faultsim_save_and_close
do_faultsim_test 2 -faults ioerr-t* -prep {
faultsim_restore_and_reopen
} -body {
execsql {
INSERT INTO t1 VALUES('abcdefghijklmnopqrstuvwxyz', NULL);
SELECT count(*) FROM t1 WHERE x LIKE '%mnop%' AND t1 MATCH 'jkl';
}
} -test {
faultsim_test_result {0 1} {1 {vtable constructor failed: t1}}
}
finish_test

View File

@ -117,5 +117,18 @@ foreach {tn like res} {
} $res
}
#-------------------------------------------------------------------------
reset_db
do_catchsql_test 3.1 {
CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 2");
} {1 {error in tokenizer constructor}}
do_catchsql_test 3.2 {
CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 11");
} {1 {error in tokenizer constructor}}
do_catchsql_test 3.3 {
CREATE VIRTUAL TABLE ttt USING fts5(c, "tokenize=trigram case_sensitive 1");
} {0 {}}
finish_test

View File

@ -1,5 +1,5 @@
C Add\sexperimental\sunicode-aware\strigram\stokenizer\sto\sfts5.\sAnd\ssupport\sfor\sLIKE\sand\sGLOB\soptimizations\sfor\sfts5\stables\sthat\suse\ssaid\stokenizer.
D 2020-09-30T20:35:37.594
C Add\stests\sfor\sthe\strigram\stokenizer.\sFix\sminor\sissues.
D 2020-10-01T16:10:22.243
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -116,7 +116,7 @@ F ext/fts5/fts5Int.h 928aed51dbeb4acc0d2e3ceeebb5f6918d64c9ad5c4e7634a238895abea
F ext/fts5/fts5_aux.c dcc627d8b6e3fc773db528ff67b39955dab7b51628f9dba8e15849e5bedfd7fa
F ext/fts5/fts5_buffer.c 5a5fe0159752c0fb0a5a93c722e9db2662822709490769d482b76a6dc8aaca70
F ext/fts5/fts5_config.c be54f44fca491e96c6923a4b9a736f2da2b13811600eb6e38d1bcc91c4ea2e61
F ext/fts5/fts5_expr.c e1f548de5e7f146e55e1a34c2745d1893510c0766baa55d33aa05c0643398534
F ext/fts5/fts5_expr.c 7eba8fed2a8f154413814f63c21b34d7562b7d80a62614b9301a5ba6a700f4f0
F ext/fts5/fts5_hash.c 15bffa734fbdca013b2289c6f8827a3b935ef14bd4dde5837d31a75434c00627
F ext/fts5/fts5_index.c 255d3ce3fec28be11c533451e5b23bd79e71a13a1b120f3658b34fff6b097816
F ext/fts5/fts5_main.c 65c5d579cabaecab478f4bd159ad5c040590f6a75e5afd4ad43c8b92ac65f7f2
@ -124,7 +124,7 @@ F ext/fts5/fts5_storage.c 58ba71e6cd3d43a5735815e7956ee167babb4d2cbfe20690517479
F ext/fts5/fts5_tcl.c 39bcbae507f594aad778172fa914cad0f585bf92fd3b078c686e249282db0d95
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c f96c6e193c466711d6d7828d5f190407fe7ab897062d371426dd3036f01258e7
F ext/fts5/fts5_tokenize.c be911fbd2f9c9ef0db2b4b492d62628820567eb83521466250fd6df27858fb74
F ext/fts5/fts5_tokenize.c 5711f170065d23809afa97475c4adcd03387da043feb2fb2a1660fe366c01647
F ext/fts5/fts5_unicode2.c 8bd0cd07396b74c1a05590e4070d635bccfc849812c305619f109e6c0485e250
F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80
F ext/fts5/fts5_vocab.c 7a071833064dc8bca236c3c323e56aac36f583aa2c46ce916d52e31ce87462c9
@ -179,7 +179,8 @@ F ext/fts5/test/fts5fault8.test 318238659d35f82ad215ecb57ca4c87486ea85d45dbeedae
F ext/fts5/test/fts5fault9.test 098e6b894bbdf9b2192f994a30f4043673fb3f338b6b8ab1624c704422f39119
F ext/fts5/test/fts5faultA.test be4487576bff8c22cee6597d1893b312f306504a8c6ccd3c53ca85af12290c8c
F ext/fts5/test/fts5faultB.test d606bdb8e81aaeb6f41de3fc9fc7ae315733f0903fbff05cf54f5b045b729ab5
F ext/fts5/test/fts5faultD.test cc5d1225556e356615e719c612e845d41bff7d5a
F ext/fts5/test/fts5faultD.test e7ed7895abfe6bc98a5e853826f6b74956e7ba7f594f1860bbf9e504b9647996
F ext/fts5/test/fts5faultE.test aa7caab3597390b753e0755c087f118f775804a070bd0960f5a4bb6246ed6a29
F ext/fts5/test/fts5first.test 3fcf2365c00a15fc9704233674789a3b95131d12de18a9b996159f6909dc8079
F ext/fts5/test/fts5full.test e1701a112354e0ff9a1fdffb0c940c576530c33732ee20ac5e8361777070d717
F ext/fts5/test/fts5fuzz1.test 238d8c45f3b81342aa384de3e581ff2fa330bf922a7b69e484bbc06051a1080e
@ -216,7 +217,7 @@ F ext/fts5/test/fts5synonym2.test b54cce5c34ec08ed616f646635538ae82e34a0e28f947e
F ext/fts5/test/fts5tok1.test ce6551e41ff56f30b69963577324624733bed0d1753589f06120d664d9cd45c9
F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
F ext/fts5/test/fts5tokenizer.test ac3c9112b263a639fb0508ae73a3ee886bf4866d2153771a8e8a20c721305a43
F ext/fts5/test/fts5trigram.test 442b9e0c0f64838e1fad8d3d9e4ebb96f53a3033498e6e80b15d97081b320b0c
F ext/fts5/test/fts5trigram.test 0a9ade9e808c388d13e9ea925a1bf42f3fa873a90dcd5969ef6e50bdd9483873
F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602
F ext/fts5/test/fts5unicode.test 17056f4efe6b0a5d4f41fdf7a7dc9af2873004562eaa899d40633b93dc95f5a9
F ext/fts5/test/fts5unicode2.test 9b3df486de05fb4bde4aa7ee8de2e6dae1df6eb90e3f2e242c9383b95d314e3e
@ -1881,10 +1882,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 4a43430fd23f88352c33b29c4c105b72f6dc821f94bf362040c41a1648c402e5
R 1f094ba3b91e26d2f277be832160094c
T *branch * fts5-trigram
T *sym-fts5-trigram *
T -sym-trunk *
P 0d7810c1aea93c0a3da1ccc4911dbce8a1b6e1dbfe1ab7e800289a0c783b5985
R c7737548b4b859bb94163b777bb03b06
U dan
Z f75cc113cdfae84e1f09152a573517c5
Z afc946f18e97090d1c0b09338fc98cec

View File

@ -1 +1 @@
0d7810c1aea93c0a3da1ccc4911dbce8a1b6e1dbfe1ab7e800289a0c783b5985
897ced99b44085012aa44d3264940dcbd4c77b295a894a1b58fb2c03a0f7fee8