diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 1cc25ae7e0..14140658b2 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -228,6 +228,8 @@ struct Fts5Config { char *zContentExprlist; Fts5TokenizerConfig t; int bLock; /* True when table is preparing statement */ + int eEnc; /* An FTS5_ENCODING_XXX constant */ + /* Values loaded from the %_config table */ int iVersion; /* fts5 file format 'version' */ diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index b57864d8b0..673a44d139 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -118,12 +118,16 @@ struct Fts5FullTable { Fts5Global *pGlobal; /* Global (connection wide) data */ Fts5Cursor *pSortCsr; /* Sort data from this cursor */ int iSavepoint; /* Successful xSavepoint()+1 */ - + #ifdef SQLITE_DEBUG struct Fts5TransactionState ts; #endif }; +#define FTS5_ENCODING_UNKNOWN 0 +#define FTS5_ENCODING_UTF8 1 +#define FTS5_ENCODING_UTF16 2 + struct Fts5MatchPhrase { Fts5Buffer *pPoslist; /* Pointer to current poslist */ int nTerm; /* Size of phrase in terms */ @@ -1252,6 +1256,30 @@ void sqlite3Fts5ClearLocale(Fts5Config *pConfig){ fts5SetLocale(pConfig, 0, 0); } +static int fts5IsUtf16(Fts5Config *pConfig, int *pbIs){ + if( pConfig->eEnc==FTS5_ENCODING_UNKNOWN ){ + sqlite3_stmt *pPragma = 0; + int rc = fts5PrepareStatement(&pPragma, pConfig, + "SELECT (encoding LIKE '%%16%%') FROM pragma_encoding" + ); + if( rc==SQLITE_OK ){ + int val; + sqlite3_step(pPragma); + val = sqlite3_column_int(pPragma, 0); + rc = sqlite3_finalize(pPragma); + if( rc!=SQLITE_OK ) return rc; + if( val ){ + pConfig->eEnc = FTS5_ENCODING_UTF16; + }else{ + pConfig->eEnc = FTS5_ENCODING_UTF8; + } + } + } + + *pbIs = (pConfig->eEnc==FTS5_ENCODING_UTF16); + return SQLITE_OK; +} + int sqlite3Fts5ExtractText( Fts5Config *pConfig, int bContent, @@ -1280,17 +1308,33 @@ int sqlite3Fts5ExtractText( int nBlob = sqlite3_value_bytes(pVal); int nLocale = 0; - for(nLocale=0; nLocale=4 && memcmp(pBlob, "\0\0\0\0", 4)==0 ){ + int bIs16 = 0; + pText = (const char*)sqlite3_value_text(pVal); + nText = sqlite3_value_bytes(pVal); + rc = fts5IsUtf16(pConfig, &bIs16); + + if( bIs16 ){ + pText += 2; + nText -= 2; + }else{ + pText += 4; + nText -= 4; + } - if( nLocale==nBlob ) return SQLITE_ERROR; - pText = (const char*)&pBlob[nLocale+1]; - nText = nBlob-nLocale-1; + }else{ + for(nLocale=0; nLocalepSavedRow ){ pVal = sqlite3_column_value(p->pSavedRow, i-1); - }else if( i>1 && pConfig->abUnindexed[i-2] - && pConfig->bLocale - && sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE - ){ - /* At attempt to insert an fts5_locale() value into an UNINDEXED - ** column. Strip the locale away and just bind the text. */ - const char *pText = 0; - int nText = 0; - rc = sqlite3Fts5ExtractText(pConfig, 0, pVal, 0, &pText, &nText); - sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT); + }else if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE ){ + if( pConfig->bLocale==0 ){ + sqlite3Fts5ConfigErrmsg(pConfig, + "fts5_locale() may not be used without locale=1" + ); + rc = SQLITE_ERROR; + break; + }else if( i>1 && pConfig->abUnindexed[i-2] ){ + /* At attempt to insert an fts5_locale() value into an UNINDEXED + ** column. Strip the locale away and just bind the text. */ + const char *pText = 0; + int nText = 0; + rc = sqlite3Fts5ExtractText(pConfig, 0, pVal, 0, &pText, &nText); + sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT); + continue; + } + }else if( pConfig->bLocale && sqlite3_value_type(pVal)==SQLITE_BLOB ){ + /* Inserting a blob into a normal content table with locale=1. */ + int n = sqlite3_value_bytes(pVal); + u8 *pBlob = sqlite3Fts5MallocZero(&rc, n+4); + if( pBlob ){ + memcpy(&pBlob[4], sqlite3_value_blob(pVal), n); + rc = sqlite3_bind_blob(pInsert, i, pBlob, n+4, SQLITE_TRANSIENT); + sqlite3_free(pBlob); + } continue; } diff --git a/ext/fts5/test/fts5blob.test b/ext/fts5/test/fts5blob.test new file mode 100644 index 0000000000..9c838b75a6 --- /dev/null +++ b/ext/fts5/test/fts5blob.test @@ -0,0 +1,77 @@ +# 2024 July 30 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5blob + +# If SQLITE_ENABLE_FTS5 is not defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +foreach {tn enc locale} { + 1 utf8 0 + 2 utf8 1 + 3 utf16 0 + 4 utf16 1 +} { + reset_db + fts5_aux_test_functions db + + execsql "PRAGMA encoding = $enc" + + if {$tn==3 || $tn==4} breakpoint + + execsql " + CREATE VIRTUAL TABLE t1 USING fts5(x, y, locale=$locale); + " + do_execsql_test 1.$tn.0 { + CREATE VIRTUAL TABLE tt USING fts5vocab('t1', 'instance'); + INSERT INTO t1(rowid, x, y) VALUES(1, 555, X'0000000041424320444546'); + INSERT INTO t1(rowid, x, y) VALUES(2, 666, X'41424300444546'); + INSERT INTO t1(rowid, x, y) VALUES(3, 777, 'xyz'); + } + + do_execsql_test 1.$tn.1 { + SELECT rowid, quote(x), quote(y) FROM t1 + } { + 1 555 X'0000000041424320444546' + 2 666 X'41424300444546' + 3 777 'xyz' + } + + set T($enc,$locale) [execsql { SELECT * FROM tt }] + set U($enc,$locale) [execsql { SELECT fts5_test_columntext(t1) FROM t1 }] + + do_execsql_test 1.$tn.2 { + DELETE FROM t1 WHERE rowid=2; + DELETE FROM t1 WHERE rowid=1; + } + + do_execsql_test 1.$tn.3 { + PRAGMA integrity_check; + } {ok} +} + +do_test 1.5.1 { set T(utf8,1) } $T(utf8,0) +do_test 1.5.2 { set T(utf16,1) } $T(utf16,0) + +do_test 1.6.1 { set U(utf8,1) } $U(utf8,0) +do_test 1.6.2 { set U(utf16,1) } $U(utf16,0) + + + +finish_test + + diff --git a/ext/fts5/test/fts5locale.test b/ext/fts5/test/fts5locale.test index 1124a44436..d3fd9989c0 100644 --- a/ext/fts5/test/fts5locale.test +++ b/ext/fts5/test/fts5locale.test @@ -336,6 +336,19 @@ foreach {tn opt} { } } +#------------------------------------------------------------------------- +reset_db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); +} +do_catchsql_test 6.1 { + INSERT INTO x1(rowid, x) VALUES(123, fts5_locale('en_AU', 'hello world')); +} {1 {fts5_locale() may not be used without locale=1}} + +do_execsql_test 6.2 { + SELECT typeof( fts5_locale(NULL, 'xyz') ), typeof( fts5_locale('', 'abc') ); +} {text text} + finish_test diff --git a/manifest b/manifest index 89acd2b99f..156f1a64a5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\sthe\shighlight()\sand\ssnippet()\sfunctions\sto\suse\slocales\swhen\savailable. -D 2024-07-30T15:55:51.884 +C Fix\scases\ssurrounding\sblob\svalues\sbeing\sstored\sin\slocale=1\sfts5\stables. +D 2024-07-30T20:39:58.636 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -93,15 +93,15 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e F ext/fts5/fts5.h 7cd47e5dfe68d798e667caa76722374f0c909f2db05bb4d42b1ec5300d18e658 -F ext/fts5/fts5Int.h 330b1e2dad9ea9cccc9fa93817062fa21e89f00e7eac9a84be440f7e93bf7c3c +F ext/fts5/fts5Int.h 833a2fe729f926ebcde47e21e495d141b99ede9a188fc577873f24bea0f0bfa2 F ext/fts5/fts5_aux.c 652f839dc0c77431295f10b08f268631560bb5630e65fd701de7a58744428a82 F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09 F ext/fts5/fts5_config.c 0c96490fbad746b3780174f38b2ee5e3d719f2f81ee6b58ca828772871e0f680 F ext/fts5/fts5_expr.c c7336d5f9ecc0e2b014d700be2bec0ea383b0e82c494a7c5c4ac622327c2bfad F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe -F ext/fts5/fts5_main.c ee4bba42d8d093cd870c55a342ab85a647ec39dc79985e2b1084514d990de82d -F ext/fts5/fts5_storage.c 2118a5bd2ae617367259f6cf2412b6ff26bc1cd1cc29bf64a6249edcc1572f19 +F ext/fts5/fts5_main.c d9a3fef86887e373027d48ab8216ab8caca95f2316e05d953d8885633162a9c5 +F ext/fts5/fts5_storage.c cc6173bb755d668573169c038034a9ec8deadd10b3c10c145adbdf04ab5f889c F ext/fts5/fts5_tcl.c 93b705cb87633574983161edc5234f9b91ba03f9fecfbd2c5d401a1da6f93aa5 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b @@ -132,6 +132,7 @@ F ext/fts5/test/fts5auxdata.test 372549088ff792655f73e62b9dfaf4863ce74f5e604c06c F ext/fts5/test/fts5bigid.test 2860854c2561a57594192b00c33a29f91cb85e25f3d6c03b5c2b8f62708f39dd F ext/fts5/test/fts5bigpl.test 8f09858aab866c33593560e6480b2b6975ae7ff29ca32ad7b77e2da61402f8ef F ext/fts5/test/fts5bigtok.test 541119e616c637caea925a8c028c37c2c29e94383e00aa2f9198d530724b6e36 +F ext/fts5/test/fts5blob.test a16160688e181a212bcb4968325c57ef2864a0bcae0794d6a1e16185007f00b9 F ext/fts5/test/fts5cat.test daba0b80659460b0cb60bd1f40b402478a761fe7ea414c3c94c2be25568cc33a F ext/fts5/test/fts5circref.test f880dfd0d99f6fb73b88ccacb0927d18e833672fd906cc47d6b4e529419eaa62 F ext/fts5/test/fts5colset.test 544f4998cdbfe06a3123887fc0221612e8aa8192cdaff152872f1aadb10e6897 @@ -184,7 +185,7 @@ F ext/fts5/test/fts5interrupt.test 09613247b273a99889808ef852898177e671406fe71fd F ext/fts5/test/fts5lastrowid.test f36298a1fb9f988bde060a274a7ce638faa9c38a31400f8d2d27ea9373e0c4a1 F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc2782680740513c4d1fc114b43d4ad F ext/fts5/test/fts5limits.test 8ab67cf5d311c124b6ceb0062d0297767176df4572d955fce79fa43004dff01c -F ext/fts5/test/fts5locale.test 7c332f882080a69ed4be9790d99ee49747ca44cf241d45f762298e17fea6e7a7 +F ext/fts5/test/fts5locale.test bfd8704f9bea963314fcbcf810f08a357ac8035bcb80a2d6170c1e57fa6ad52a F ext/fts5/test/fts5matchinfo.test 877520582feb86bbfd95ab780099bcba4526f18ac75ee34979144cf86ba3a5a3 F ext/fts5/test/fts5merge.test 2654df0bcdb2d117c2d38b6aeb0168061be01c643f9e9194b36c43a2970e8082 F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2 @@ -2196,8 +2197,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 14006711d83d098e665c540b978b0e29aa8f458da1c2c8e9c2baf2ad5ebd502c -R 8ced4a5b525d31957945a9ad2a3de4d9 +P 569ae56a673a2e101fac7003a1cb41c7a02a515adf55bd1e1f2a03c19eb2b085 +R 937929d817b727d57bac9fb974f93247 U dan -Z 395467bf290b92744f082b81bcb8dffe +Z 807f8f93a084e88be44a4f6a2009b39e # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 634c3661ab..dfceffa3c6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -569ae56a673a2e101fac7003a1cb41c7a02a515adf55bd1e1f2a03c19eb2b085 +ae435aff1785d5832821c19dc88ccf6c496a7ff55be80276b31e5c9abee723db