Fix cases surrounding blob values being stored in locale=1 fts5 tables.

FossilOrigin-Name: ae435aff1785d5832821c19dc88ccf6c496a7ff55be80276b31e5c9abee723db
This commit is contained in:
dan 2024-07-30 20:39:58 +00:00
parent 8174305e17
commit 9015b8822d
7 changed files with 198 additions and 42 deletions

View File

@ -228,6 +228,8 @@ struct Fts5Config {
char *zContentExprlist;
Fts5TokenizerConfig t;
int bLock; /* True when table is preparing statement */
int eEnc; /* An FTS5_ENCODING_XXX constant */
/* Values loaded from the %_config table */
int iVersion; /* fts5 file format 'version' */

View File

@ -124,6 +124,10 @@ struct Fts5FullTable {
#endif
};
#define FTS5_ENCODING_UNKNOWN 0
#define FTS5_ENCODING_UTF8 1
#define FTS5_ENCODING_UTF16 2
struct Fts5MatchPhrase {
Fts5Buffer *pPoslist; /* Pointer to current poslist */
int nTerm; /* Size of phrase in terms */
@ -1252,6 +1256,30 @@ void sqlite3Fts5ClearLocale(Fts5Config *pConfig){
fts5SetLocale(pConfig, 0, 0);
}
static int fts5IsUtf16(Fts5Config *pConfig, int *pbIs){
if( pConfig->eEnc==FTS5_ENCODING_UNKNOWN ){
sqlite3_stmt *pPragma = 0;
int rc = fts5PrepareStatement(&pPragma, pConfig,
"SELECT (encoding LIKE '%%16%%') FROM pragma_encoding"
);
if( rc==SQLITE_OK ){
int val;
sqlite3_step(pPragma);
val = sqlite3_column_int(pPragma, 0);
rc = sqlite3_finalize(pPragma);
if( rc!=SQLITE_OK ) return rc;
if( val ){
pConfig->eEnc = FTS5_ENCODING_UTF16;
}else{
pConfig->eEnc = FTS5_ENCODING_UTF8;
}
}
}
*pbIs = (pConfig->eEnc==FTS5_ENCODING_UTF16);
return SQLITE_OK;
}
int sqlite3Fts5ExtractText(
Fts5Config *pConfig,
int bContent,
@ -1280,11 +1308,26 @@ int sqlite3Fts5ExtractText(
int nBlob = sqlite3_value_bytes(pVal);
int nLocale = 0;
if( nBlob>=4 && memcmp(pBlob, "\0\0\0\0", 4)==0 ){
int bIs16 = 0;
pText = (const char*)sqlite3_value_text(pVal);
nText = sqlite3_value_bytes(pVal);
rc = fts5IsUtf16(pConfig, &bIs16);
if( bIs16 ){
pText += 2;
nText -= 2;
}else{
pText += 4;
nText -= 4;
}
}else{
for(nLocale=0; nLocale<nBlob; nLocale++){
if( pBlob[nLocale]==0x00 ) break;
}
if( nLocale==nBlob ) return SQLITE_ERROR;
if( nLocale==nBlob || nLocale==0 ) return SQLITE_ERROR;
pText = (const char*)&pBlob[nLocale+1];
nText = nBlob-nLocale-1;
@ -1292,6 +1335,7 @@ int sqlite3Fts5ExtractText(
rc = fts5SetLocale(pConfig, (const char*)pBlob, nLocale);
*pbResetTokenizer = 1;
}
}
}else{
pText = (const char*)sqlite3_value_text(pVal);
@ -3302,6 +3346,9 @@ static void fts5LocaleFunc(
zText = (const char*)sqlite3_value_text(apArg[1]);
nText = sqlite3_value_bytes(apArg[1]);
if( zLocale==0 || zLocale[0]=='\0' ){
sqlite3_result_text(pCtx, zText, nText, SQLITE_TRANSIENT);
}else{
nBlob = nLocale + 1 + nText;
pBlob = (u8*)sqlite3_malloc(nBlob);
if( pBlob==0 ){
@ -3309,13 +3356,14 @@ static void fts5LocaleFunc(
return;
}
if( zLocale ) memcpy(pBlob, zLocale, nLocale);
memcpy(pBlob, zLocale, nLocale);
pBlob[nLocale] = 0x00;
if( zText ) memcpy(&pBlob[nLocale+1], zText, nText);
sqlite3_result_blob(pCtx, pBlob, nBlob, sqlite3_free);
sqlite3_result_subtype(pCtx, FTS5_LOCALE_SUBTYPE);
}
}
/*
** Return true if zName is the extension on one of the shadow tables used

View File

@ -839,10 +839,14 @@ int sqlite3Fts5StorageContentInsert(
sqlite3_value *pVal = apVal[i];
if( sqlite3_value_nochange(pVal) && p->pSavedRow ){
pVal = sqlite3_column_value(p->pSavedRow, i-1);
}else if( i>1 && pConfig->abUnindexed[i-2]
&& pConfig->bLocale
&& sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE
){
}else if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE ){
if( pConfig->bLocale==0 ){
sqlite3Fts5ConfigErrmsg(pConfig,
"fts5_locale() may not be used without locale=1"
);
rc = SQLITE_ERROR;
break;
}else if( i>1 && pConfig->abUnindexed[i-2] ){
/* At attempt to insert an fts5_locale() value into an UNINDEXED
** column. Strip the locale away and just bind the text. */
const char *pText = 0;
@ -851,6 +855,17 @@ int sqlite3Fts5StorageContentInsert(
sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT);
continue;
}
}else if( pConfig->bLocale && sqlite3_value_type(pVal)==SQLITE_BLOB ){
/* Inserting a blob into a normal content table with locale=1. */
int n = sqlite3_value_bytes(pVal);
u8 *pBlob = sqlite3Fts5MallocZero(&rc, n+4);
if( pBlob ){
memcpy(&pBlob[4], sqlite3_value_blob(pVal), n);
rc = sqlite3_bind_blob(pInsert, i, pBlob, n+4, SQLITE_TRANSIENT);
sqlite3_free(pBlob);
}
continue;
}
rc = sqlite3_bind_value(pInsert, i, pVal);
}

View File

@ -0,0 +1,77 @@
# 2024 July 30
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5blob
# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
foreach {tn enc locale} {
1 utf8 0
2 utf8 1
3 utf16 0
4 utf16 1
} {
reset_db
fts5_aux_test_functions db
execsql "PRAGMA encoding = $enc"
if {$tn==3 || $tn==4} breakpoint
execsql "
CREATE VIRTUAL TABLE t1 USING fts5(x, y, locale=$locale);
"
do_execsql_test 1.$tn.0 {
CREATE VIRTUAL TABLE tt USING fts5vocab('t1', 'instance');
INSERT INTO t1(rowid, x, y) VALUES(1, 555, X'0000000041424320444546');
INSERT INTO t1(rowid, x, y) VALUES(2, 666, X'41424300444546');
INSERT INTO t1(rowid, x, y) VALUES(3, 777, 'xyz');
}
do_execsql_test 1.$tn.1 {
SELECT rowid, quote(x), quote(y) FROM t1
} {
1 555 X'0000000041424320444546'
2 666 X'41424300444546'
3 777 'xyz'
}
set T($enc,$locale) [execsql { SELECT * FROM tt }]
set U($enc,$locale) [execsql { SELECT fts5_test_columntext(t1) FROM t1 }]
do_execsql_test 1.$tn.2 {
DELETE FROM t1 WHERE rowid=2;
DELETE FROM t1 WHERE rowid=1;
}
do_execsql_test 1.$tn.3 {
PRAGMA integrity_check;
} {ok}
}
do_test 1.5.1 { set T(utf8,1) } $T(utf8,0)
do_test 1.5.2 { set T(utf16,1) } $T(utf16,0)
do_test 1.6.1 { set U(utf8,1) } $U(utf8,0)
do_test 1.6.2 { set U(utf16,1) } $U(utf16,0)
finish_test

View File

@ -336,6 +336,19 @@ foreach {tn opt} {
}
}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE x1 USING fts5(x);
}
do_catchsql_test 6.1 {
INSERT INTO x1(rowid, x) VALUES(123, fts5_locale('en_AU', 'hello world'));
} {1 {fts5_locale() may not be used without locale=1}}
do_execsql_test 6.2 {
SELECT typeof( fts5_locale(NULL, 'xyz') ), typeof( fts5_locale('', 'abc') );
} {text text}
finish_test

View File

@ -1,5 +1,5 @@
C Update\sthe\shighlight()\sand\ssnippet()\sfunctions\sto\suse\slocales\swhen\savailable.
D 2024-07-30T15:55:51.884
C Fix\scases\ssurrounding\sblob\svalues\sbeing\sstored\sin\slocale=1\sfts5\stables.
D 2024-07-30T20:39:58.636
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -93,15 +93,15 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
F ext/fts5/fts5.h 7cd47e5dfe68d798e667caa76722374f0c909f2db05bb4d42b1ec5300d18e658
F ext/fts5/fts5Int.h 330b1e2dad9ea9cccc9fa93817062fa21e89f00e7eac9a84be440f7e93bf7c3c
F ext/fts5/fts5Int.h 833a2fe729f926ebcde47e21e495d141b99ede9a188fc577873f24bea0f0bfa2
F ext/fts5/fts5_aux.c 652f839dc0c77431295f10b08f268631560bb5630e65fd701de7a58744428a82
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
F ext/fts5/fts5_config.c 0c96490fbad746b3780174f38b2ee5e3d719f2f81ee6b58ca828772871e0f680
F ext/fts5/fts5_expr.c c7336d5f9ecc0e2b014d700be2bec0ea383b0e82c494a7c5c4ac622327c2bfad
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe
F ext/fts5/fts5_main.c ee4bba42d8d093cd870c55a342ab85a647ec39dc79985e2b1084514d990de82d
F ext/fts5/fts5_storage.c 2118a5bd2ae617367259f6cf2412b6ff26bc1cd1cc29bf64a6249edcc1572f19
F ext/fts5/fts5_main.c d9a3fef86887e373027d48ab8216ab8caca95f2316e05d953d8885633162a9c5
F ext/fts5/fts5_storage.c cc6173bb755d668573169c038034a9ec8deadd10b3c10c145adbdf04ab5f889c
F ext/fts5/fts5_tcl.c 93b705cb87633574983161edc5234f9b91ba03f9fecfbd2c5d401a1da6f93aa5
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
@ -132,6 +132,7 @@ F ext/fts5/test/fts5auxdata.test 372549088ff792655f73e62b9dfaf4863ce74f5e604c06c
F ext/fts5/test/fts5bigid.test 2860854c2561a57594192b00c33a29f91cb85e25f3d6c03b5c2b8f62708f39dd
F ext/fts5/test/fts5bigpl.test 8f09858aab866c33593560e6480b2b6975ae7ff29ca32ad7b77e2da61402f8ef
F ext/fts5/test/fts5bigtok.test 541119e616c637caea925a8c028c37c2c29e94383e00aa2f9198d530724b6e36
F ext/fts5/test/fts5blob.test a16160688e181a212bcb4968325c57ef2864a0bcae0794d6a1e16185007f00b9
F ext/fts5/test/fts5cat.test daba0b80659460b0cb60bd1f40b402478a761fe7ea414c3c94c2be25568cc33a
F ext/fts5/test/fts5circref.test f880dfd0d99f6fb73b88ccacb0927d18e833672fd906cc47d6b4e529419eaa62
F ext/fts5/test/fts5colset.test 544f4998cdbfe06a3123887fc0221612e8aa8192cdaff152872f1aadb10e6897
@ -184,7 +185,7 @@ F ext/fts5/test/fts5interrupt.test 09613247b273a99889808ef852898177e671406fe71fd
F ext/fts5/test/fts5lastrowid.test f36298a1fb9f988bde060a274a7ce638faa9c38a31400f8d2d27ea9373e0c4a1
F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc2782680740513c4d1fc114b43d4ad
F ext/fts5/test/fts5limits.test 8ab67cf5d311c124b6ceb0062d0297767176df4572d955fce79fa43004dff01c
F ext/fts5/test/fts5locale.test 7c332f882080a69ed4be9790d99ee49747ca44cf241d45f762298e17fea6e7a7
F ext/fts5/test/fts5locale.test bfd8704f9bea963314fcbcf810f08a357ac8035bcb80a2d6170c1e57fa6ad52a
F ext/fts5/test/fts5matchinfo.test 877520582feb86bbfd95ab780099bcba4526f18ac75ee34979144cf86ba3a5a3
F ext/fts5/test/fts5merge.test 2654df0bcdb2d117c2d38b6aeb0168061be01c643f9e9194b36c43a2970e8082
F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2
@ -2196,8 +2197,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 14006711d83d098e665c540b978b0e29aa8f458da1c2c8e9c2baf2ad5ebd502c
R 8ced4a5b525d31957945a9ad2a3de4d9
P 569ae56a673a2e101fac7003a1cb41c7a02a515adf55bd1e1f2a03c19eb2b085
R 937929d817b727d57bac9fb974f93247
U dan
Z 395467bf290b92744f082b81bcb8dffe
Z 807f8f93a084e88be44a4f6a2009b39e
# Remove this line to create a well-formed Fossil manifest.

View File

@ -1 +1 @@
569ae56a673a2e101fac7003a1cb41c7a02a515adf55bd1e1f2a03c19eb2b085
ae435aff1785d5832821c19dc88ccf6c496a7ff55be80276b31e5c9abee723db