From 95dca8d0cff5d7dcdcadbc964ee6243ddbb8c499 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 3 Oct 2020 14:36:06 +0000 Subject: [PATCH] FTS5 does not handle tokens that contain embedded nul characters. Prevent the trigram tokenizer from returning such tokens. Fix for [2ba5930b2]. FossilOrigin-Name: b1d048748c054575425a4bebf0c5d09962f9329d5ce6a978cf54e508b238584c --- ext/fts5/fts5_tokenize.c | 3 +++ ext/fts5/test/fts5trigram.test | 11 +++++++++++ manifest | 14 +++++++------- manifest.uuid | 2 +- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 665c613306..aa519f868d 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -1333,11 +1333,13 @@ static int fts5TriTokenize( const unsigned char *zNext; READ_UTF8(zIn, zEof, iCode); + if( iCode==0 ) break; zNext = zIn; if( zInbFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0); WRITE_UTF8(zOut, iCode); READ_UTF8(zIn, zEof, iCode); + if( iCode==0 ) break; }else{ break; } @@ -1345,6 +1347,7 @@ static int fts5TriTokenize( if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0); WRITE_UTF8(zOut, iCode); READ_UTF8(zIn, zEof, iCode); + if( iCode==0 ) break; if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0); WRITE_UTF8(zOut, iCode); }else{ diff --git a/ext/fts5/test/fts5trigram.test b/ext/fts5/test/fts5trigram.test index ebdc8d30ce..b5b6d2acf3 100644 --- a/ext/fts5/test/fts5trigram.test +++ b/ext/fts5/test/fts5trigram.test @@ -129,6 +129,17 @@ do_catchsql_test 3.3 { CREATE VIRTUAL TABLE ttt USING fts5(c, "tokenize=trigram case_sensitive 1"); } {0 {}} +#------------------------------------------------------------------------- +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t0 USING fts5(b, tokenize = "trigram"); +} +do_execsql_test 4.1 { + INSERT INTO t0 VALUES (x'000b01'); +} +do_execsql_test 4.2 { + INSERT INTO t0(t0) VALUES('integrity-check'); +} finish_test diff --git a/manifest b/manifest index 027a6a9381..a592722f16 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssome\stest\scode\sso\sthat\stestfixture\scan\sbuild\swith\sSQLITE_OMIT_WAL\sdefined. -D 2020-10-02T15:15:18.200 +C FTS5\sdoes\snot\shandle\stokens\sthat\scontain\sembedded\snul\scharacters.\sPrevent\sthe\strigram\stokenizer\sfrom\sreturning\ssuch\stokens.\sFix\sfor\s[2ba5930b2]. +D 2020-10-03T14:36:06.096 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -124,7 +124,7 @@ F ext/fts5/fts5_storage.c 58ba71e6cd3d43a5735815e7956ee167babb4d2cbfe20690517479 F ext/fts5/fts5_tcl.c 39bcbae507f594aad778172fa914cad0f585bf92fd3b078c686e249282db0d95 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c f96c6e193c466711d6d7828d5f190407fe7ab897062d371426dd3036f01258e7 -F ext/fts5/fts5_tokenize.c 5711f170065d23809afa97475c4adcd03387da043feb2fb2a1660fe366c01647 +F ext/fts5/fts5_tokenize.c 6f47244681c670ec3c1364f19b2ec0cca191249ff3543755a65e1fc1df348061 F ext/fts5/fts5_unicode2.c 8bd0cd07396b74c1a05590e4070d635bccfc849812c305619f109e6c0485e250 F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80 F ext/fts5/fts5_vocab.c 7a071833064dc8bca236c3c323e56aac36f583aa2c46ce916d52e31ce87462c9 @@ -217,7 +217,7 @@ F ext/fts5/test/fts5synonym2.test b54cce5c34ec08ed616f646635538ae82e34a0e28f947e F ext/fts5/test/fts5tok1.test ce6551e41ff56f30b69963577324624733bed0d1753589f06120d664d9cd45c9 F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2 F ext/fts5/test/fts5tokenizer.test ac3c9112b263a639fb0508ae73a3ee886bf4866d2153771a8e8a20c721305a43 -F ext/fts5/test/fts5trigram.test 0a9ade9e808c388d13e9ea925a1bf42f3fa873a90dcd5969ef6e50bdd9483873 +F ext/fts5/test/fts5trigram.test 29d13f2293899c8a9db216af55f6bf0df520459ea2952df1b7866302ef0d0dea F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602 F ext/fts5/test/fts5unicode.test 17056f4efe6b0a5d4f41fdf7a7dc9af2873004562eaa899d40633b93dc95f5a9 F ext/fts5/test/fts5unicode2.test 9b3df486de05fb4bde4aa7ee8de2e6dae1df6eb90e3f2e242c9383b95d314e3e @@ -1882,7 +1882,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P ad5ab24ebd557e7af1d92ab3fbcb3747c04da5ad4ed779fb6391dc94042687dd -R 1cdad7d96c36f2e03dd55b2f6f56718c +P dd009cd7aec3598e930806907601f4f0f9b0021ea99fa6c5e29e88f1246066ed +R 9c7acf6623596414564a0020768bc2dc U dan -Z 9709ab64200eaa0e420a4dbfbec12e7d +Z 46788d8d61b39b3ddef723b54e3710a9 diff --git a/manifest.uuid b/manifest.uuid index 1cce5d9741..eb2c30100f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -dd009cd7aec3598e930806907601f4f0f9b0021ea99fa6c5e29e88f1246066ed \ No newline at end of file +b1d048748c054575425a4bebf0c5d09962f9329d5ce6a978cf54e508b238584c \ No newline at end of file