From cf25c16a54f68d1ca095b91bd0ead840578b2bb4 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 7 Sep 2024 16:22:22 +0000 Subject: [PATCH] Fix a problem with fts5 locale=1 tables and UPDATE statements that may affect more than one row. FossilOrigin-Name: 70e42f941c0778a04b82655409c7caf4c1039589f7e43a8ec1e736ea8f931b26 --- ext/fts5/fts5Int.h | 2 + ext/fts5/fts5_main.c | 172 ++++++++++++++++++++-------------- ext/fts5/fts5_storage.c | 21 +++-- ext/fts5/test/fts5locale.test | 30 +++++- manifest | 22 ++--- manifest.uuid | 2 +- test/fts3corrupt4.test | 2 +- 7 files changed, 154 insertions(+), 97 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 7e41119572..0b8851d227 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -647,6 +647,8 @@ int sqlite3Fts5ExtractText( void sqlite3Fts5ClearLocale(Fts5Config *pConfig); +int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal); + /* ** End of interface to code in fts5.c. **************************************************************************/ diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 03c1bb83fa..cb68c1444f 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -83,8 +83,17 @@ struct Fts5Global { Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ Fts5Cursor *pCsr; /* First in list of all open cursors */ + u32 aLocaleHdr[4]; }; +/* +** Size of header on fts5_locale() values. And macro to access a buffer +** containing a copy of the header from an Fts5Config pointer. +*/ +#define FTS5_LOCALE_HDR_SIZE sizeof( ((Fts5Global*)0)->aLocaleHdr ) +#define FTS5_LOCALE_HDR(pConfig) ((const u8*)(pConfig->pGlobal->aLocaleHdr)) + + /* ** Each auxiliary function registered with the FTS5 module is represented ** by an object of the following type. All such objects are stored as part @@ -247,12 +256,6 @@ struct Fts5Cursor { #define BitFlagAllTest(x,y) (((x) & (y))==(y)) #define BitFlagTest(x,y) (((x) & (y))!=0) -/* -** The subtype value and header bytes used by fts5_locale(). -*/ -#define FTS5_LOCALE_SUBTYPE ((unsigned int)'L') -#define FTS5_LOCALE_HEADER "\x00\xE0\xB2\xEB" - /* ** Macros to Set(), Clear() and Test() cursor flags. @@ -1274,6 +1277,22 @@ void sqlite3Fts5ClearLocale(Fts5Config *pConfig){ fts5SetLocale(pConfig, 0, 0); } +/* +** Return true if the value passed as the only argument is an +** fts5_locale() value. +*/ +int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal){ + int ret = 0; + if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ + if( sqlite3_value_bytes(pVal)>FTS5_LOCALE_HDR_SIZE + && 0==memcmp(sqlite3_value_blob(pVal), FTS5_LOCALE_HDR(pConfig), 4) + ){ + ret = 1; + } + } + return ret; +} + /* ** This function is used to extract utf-8 text from an sqlite3_value. This ** is usually done in order to tokenize it. For example, when: @@ -1292,17 +1311,15 @@ void sqlite3Fts5ClearLocale(Fts5Config *pConfig){ ** 2) Combination text/locale blobs created by fts5_locale(). There ** are several cases for these: ** -** * Blobs tagged with FTS5_LOCALE_SUBTYPE. -** * Blobs read from the content table of a locale=1 external-content -** table, and +** * Blobs that have the 16-byte header, and ** * Blobs read from the content table of a locale=1 regular ** content table. ** -** The first two cases above should have the 4 byte FTS5_LOCALE_HEADER -** header. It is an error if a blob with the subtype or a blob read -** from the content table of an external content table does not have -** the required header. A blob read from the content table of a regular -** locale=1 table does not have the header. This is to save space. +** The first case above has the 16 byte FTS5_LOCALE_HDR(pConfig) +** header. It is an error if a blob read from the content table of +** an external content table does not have the required header. A blob +** read from the content table of a regular locale=1 table does not +** have the header. This is to save space. ** ** If successful, SQLITE_OK is returned and output parameters (*ppText) ** and (*pnText) are set to point to a buffer containing the extracted utf-8 @@ -1330,53 +1347,54 @@ int sqlite3Fts5ExtractText( const char *pText = 0; int nText = 0; int rc = SQLITE_OK; - int bDecodeBlob = 0; + + /* 0: Do not decode blob + ** 1: Decode blob, expect fts5_locale() header + ** 2: Decode blob, expect no fts5_locale() header + */ + int eDecodeBlob = 0; assert( pbResetTokenizer==0 || *pbResetTokenizer==0 ); assert( bContent==0 || pConfig->eContent!=FTS5_CONTENT_NONE ); - assert( bContent==0 || sqlite3_value_subtype(pVal)==0 ); if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ - if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE - || (bContent && pConfig->bLocale) + if( bContent + && pConfig->bLocale + && pConfig->eContent==FTS5_CONTENT_NORMAL ){ - bDecodeBlob = 1; + eDecodeBlob = 2; + }else if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ + eDecodeBlob = 1; + }else if( bContent && pConfig->bLocale ){ + return SQLITE_ERROR; } } - if( bDecodeBlob ){ - const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; + if( eDecodeBlob ){ const u8 *pBlob = sqlite3_value_blob(pVal); int nBlob = sqlite3_value_bytes(pVal); + int nLocale = 0; /* Unless this blob was read from the %_content table of an ** FTS5_CONTENT_NORMAL table, it should have the 4 byte fts5_locale() ** header. Check for this. If it is not found, return an error. */ - if( (!bContent || pConfig->eContent!=FTS5_CONTENT_NORMAL) ){ - if( nBlobnCol; ii++){ - if( sqlite3_value_type(apVal[ii+2])==SQLITE_BLOB ){ - int bSub = (sqlite3_value_subtype(apVal[ii+2])==FTS5_LOCALE_SUBTYPE); - if( (pConfig->bLocale && !bSub && pConfig->abUnindexed[ii]==0) - || (pConfig->bLocale==0 && bSub) - ){ - if( pConfig->bLocale==0 ){ - fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); + sqlite3_value *pVal = apVal[ii+2]; + if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ + int isLocale = sqlite3Fts5IsLocaleValue(pConfig, pVal); + if( pConfig->bLocale ){ + if( isLocale==0 && pConfig->abUnindexed[ii]==0 ){ + rc = SQLITE_MISMATCH; + goto update_out; + } + }else{ + if( isLocale ){ + fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); + rc = SQLITE_MISMATCH; + goto update_out; } - rc = SQLITE_MISMATCH; - goto update_out; } } } @@ -2716,21 +2735,21 @@ static int fts5ApiColumnLocale( /* Load the value into pVal. pVal is a locale/text pair iff: ** ** 1) It is an SQLITE_BLOB, and - ** 2) Either the subtype is FTS5_LOCALE_SUBTYPE, or else the - ** value was loaded from an FTS5_CONTENT_NORMAL table, and - ** 3) It does not begin with an 0x00 byte. + ** 2) Either the FTS5_LOCALE_HDR header is present, or else the + ** value was loaded from an FTS5_CONTENT_NORMAL table. + ** + ** If condition (1) is met but condition (2) is not, it is an error. */ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal); int nBlob = sqlite3_value_bytes(pVal); if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){ - const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; - if( nBlobabUnindexed[iCol]==0 ){ - const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; const u8 *pBlob = sqlite3_value_blob(pVal); int nBlob = sqlite3_value_bytes(pVal); int ii; if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){ - if( nBlobaLocaleHdr, FTS5_LOCALE_HDR_SIZE); + pCsr += FTS5_LOCALE_HDR_SIZE; memcpy(pCsr, zLocale, nLocale); pCsr += nLocale; (*pCsr++) = 0x00; @@ -3657,7 +3676,6 @@ static void fts5LocaleFunc( assert( &pCsr[nText]==&pBlob[nBlob] ); sqlite3_result_blob(pCtx, pBlob, nBlob, sqlite3_free); - sqlite3_result_subtype(pCtx, FTS5_LOCALE_SUBTYPE); } } @@ -3759,6 +3777,16 @@ static int fts5Init(sqlite3 *db){ pGlobal->api.xFindTokenizer = fts5FindTokenizer; pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2; pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2; + + /* Initialize pGlobal->aLocaleHdr[] to a 128-bit pseudo-random vector. + ** The constants below were generated randomly. */ + sqlite3_randomness(sizeof(pGlobal->aLocaleHdr), pGlobal->aLocaleHdr); + pGlobal->aLocaleHdr[0] ^= 0xF924976D; + pGlobal->aLocaleHdr[1] ^= 0x16596E13; + pGlobal->aLocaleHdr[2] ^= 0x7C80BEAA; + pGlobal->aLocaleHdr[3] ^= 0x9B03A67F; + assert( sizeof(pGlobal->aLocaleHdr)==16 ); + rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index cf25eb361e..3bf20b3390 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -891,23 +891,26 @@ int sqlite3Fts5StorageContentInsert( /* This is an UPDATE statement, and column (i-2) was not modified. ** Retrieve the value from Fts5Storage.pSavedRow instead. */ pVal = sqlite3_column_value(p->pSavedRow, i-1); - }else if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE ){ + }else if( sqlite3_value_type(pVal)==SQLITE_BLOB && pConfig->bLocale ){ assert( pConfig->bLocale ); assert( i>1 ); if( pConfig->abUnindexed[i-2] ){ - /* At attempt to insert an fts5_locale() value into an UNINDEXED - ** column. Strip the locale away and just bind the text. */ - const char *pText = 0; - int nText = 0; - rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, 0, &pText, &nText); - sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT); + if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ + /* At attempt to insert an fts5_locale() value into an UNINDEXED + ** column. Strip the locale away and just bind the text. */ + const char *pText = 0; + int nText = 0; + rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, 0, &pText, &nText); + sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT); + continue; + } }else{ const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal); int nBlob = sqlite3_value_bytes(pVal); assert( nBlob>4 ); - sqlite3_bind_blob(pInsert, i, pBlob+4, nBlob-4, SQLITE_TRANSIENT); + sqlite3_bind_blob(pInsert, i, pBlob+16, nBlob-16, SQLITE_TRANSIENT); + continue; } - continue; } rc = sqlite3_bind_value(pInsert, i, pVal); diff --git a/ext/fts5/test/fts5locale.test b/ext/fts5/test/fts5locale.test index 684dcecd80..f0df4969dc 100644 --- a/ext/fts5/test/fts5locale.test +++ b/ext/fts5/test/fts5locale.test @@ -488,7 +488,7 @@ foreach_detail_mode $::testprefix { do_catchsql_test 10.2.$tn.4 " SELECT * FROM ft( test_setsubtype($v, 76) ); - " {1 {SQL logic error}} + " {1 {fts5: syntax error near ""}} do_execsql_test 10.2.$tn.5 { INSERT INTO ft(rowid, x) VALUES(1, 'hello world'); @@ -523,11 +523,11 @@ foreach_detail_mode $::testprefix { do_catchsql_test 10.2.$tn.11 " INSERT INTO ft(ft, rowid, x) VALUES('delete', 1, test_setsubtype($v,76) ) - " {1 {SQL logic error}} + " {0 {}} do_catchsql_test 10.2.$tn.12 " INSERT INTO ft(rowid, x) VALUES(2, test_setsubtype($v,76) ) - " {1 {SQL logic error}} + " {1 {datatype mismatch}} do_execsql_test 10.2.$tn.13 { INSERT INTO ft2(rowid, x) VALUES(1, 'hello world'); @@ -663,5 +663,29 @@ do_catchsql_test 13.2.7 { FROM ft('one AND three') ORDER BY rowid } {1 {non-integer argument passed to function fts5_get_locale()}} +#------------------------------------------------------------------------- +# Check that UPDATE statements that may affect more than one row work. +# +reset_db +do_execsql_test 14.1 { + CREATE VIRTUAL TABLE ft USING fts5(a, b, locale=1); +} + +do_execsql_test 14.2 { + INSERT INTO ft VALUES('hello', 'world'); +} + +do_execsql_test 14.3 { + UPDATE ft SET b = fts5_locale('en_AU', 'world'); +} + +do_catchsql_test 14.4 { + INSERT INTO ft VALUES(X'abcd', X'1234'); +} {1 {datatype mismatch}} + +do_execsql_test 14.4 { + SELECT * FROM ft +} {hello world} + finish_test diff --git a/manifest b/manifest index 6d4099c51d..037c1b19a0 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\soff-by-one\serror\sin\sthe\sroutines\sthat\sbind\sthe\sspecial\s$test_TTT\sand\n$int_NNN\sparameters\sfor\sfuzz\stesting.\s\sFix\sto\stesting\slogic\sonly\s-\sno\schanges\nto\sthe\sSQLite\score. -D 2024-09-07T16:04:04.674 +C Fix\sa\sproblem\swith\sfts5\slocale=1\stables\sand\sUPDATE\sstatements\sthat\smay\saffect\smore\sthan\sone\srow. +D 2024-09-07T16:22:22.943 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -93,15 +93,15 @@ F ext/fts3/unicode/mkunicode.tcl 63db9624ccf70d4887836c320eda93ab552f21008f3be7e F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl 009cf59c77afa86d137b0cca3e3b1a5efbe2264faa2df233f9a7aa8563926d15 F ext/fts5/fts5.h efaaac0df3d3bc740383044c144b582f47921aafa21d7b10eb98f42c24c740b0 -F ext/fts5/fts5Int.h 26a71a09cefa4ef6b4516b204ed48da3e1380970a19b3482eea7c5d805655360 +F ext/fts5/fts5Int.h 7ab1d838adc4f22fdad5e1ba19182d6899ebded1d3ecadbe995322b0f0de7b9f F ext/fts5/fts5_aux.c 65a0468dd177d6093aa9ae1622e6d86b0136b8d267c62c0ad6493ad1e9a3d759 F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09 F ext/fts5/fts5_config.c 353d2a0d12678cae6ab5b9ce54aed8dac0825667b69248b5a4ed81cbefc109ea F ext/fts5/fts5_expr.c 9a56f53700d1860f0ee2f373c2b9074eaf2a7aa0637d0e27a6476de26a3fee33 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 F ext/fts5/fts5_index.c 571483823193f09439356741669aa8c81da838ae6f5e1bfa7517f7ee2fb3addd -F ext/fts5/fts5_main.c 1fddb53f495425d9314c74b30c5848a9dd254be0e5f445bfe38292d5ab21c288 -F ext/fts5/fts5_storage.c 9a9b880be12901f1962ae2a5a7e1b74348b3099a1e728764e419f75d98e3e612 +F ext/fts5/fts5_main.c c9c5fcce73ad05ef6abc4f69b9ade54093b2a6cb8ceb3ef647bd2e0d5f93b628 +F ext/fts5/fts5_storage.c 42cde97eb7d8506a8d2c7ea80b292fc3017b1f5469e1acb0035a69c345e6cf71 F ext/fts5/fts5_tcl.c 4db9258a7882c5eac0da4433042132aaf15b87dd1e1636c7a6ca203abd2c8bfe F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b @@ -189,7 +189,7 @@ F ext/fts5/test/fts5interrupt.test 20d04204d3e341b104c0c24a41596b6393a3a81eba104 F ext/fts5/test/fts5lastrowid.test f36298a1fb9f988bde060a274a7ce638faa9c38a31400f8d2d27ea9373e0c4a1 F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc2782680740513c4d1fc114b43d4ad F ext/fts5/test/fts5limits.test 8ab67cf5d311c124b6ceb0062d0297767176df4572d955fce79fa43004dff01c -F ext/fts5/test/fts5locale.test 797cf6f5e017462ab11313ce884b9f1df8ff063811e74ef42190cd19ed6b600b +F ext/fts5/test/fts5locale.test 58ce0515c4f49cbb9905e3711168050d58fc184daf885c9ef7483e20aab63e5a F ext/fts5/test/fts5matchinfo.test 877520582feb86bbfd95ab780099bcba4526f18ac75ee34979144cf86ba3a5a3 F ext/fts5/test/fts5merge.test 2654df0bcdb2d117c2d38b6aeb0168061be01c643f9e9194b36c43a2970e8082 F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2 @@ -1184,7 +1184,7 @@ F test/fts3conf.test c9cd45433b6787d48a43e84949aa2eb8b3b3d242bac7276731c1476290d F test/fts3corrupt.test 6732477c5ace050c5758a40a8b5706c8c0cccd416b9c558e0e15224805a40e57 F test/fts3corrupt2.test e318f0676e5e78d5a4b702637e2bb25265954c08a1b1e4aaf93c7880bb0c67d0 F test/fts3corrupt3.test 0d5b69a0998b4adf868cc301fc78f3d0707745f1d984ce044c205cdb764b491f -F test/fts3corrupt4.test 48bd57baed9654e511709a02dbef2d22ee54c012ad466e8648f0f825233faa08 +F test/fts3corrupt4.test 294684add5f235ea8a77a350b66eb74a80ac8ecee6ac38c07885348f5fb2e233 F test/fts3corrupt5.test 0549f85ec4bd22e992f645f13c59b99d652f2f5e643dac75568bfd23a6db7ed5 F test/fts3corrupt6.test f417c910254f32c0bc9ead7affa991a1d5aec35b3b32a183ffb05eea78289525 F test/fts3cov.test 7eacdbefd756cfa4dc2241974e3db2834e9b372ca215880e00032222f32194cf @@ -2212,8 +2212,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P e319d43bfd5ee4ed92b93531b239af4d1be0a8215b2a06c3532122ff2c7b6a7c -R 2c4135598e1fecddc6fdbdd8727336d2 -U drh -Z 73b413ffeaf70ba5cf262762754b392b +P 6206b90a4ec3f05e3bbb4844e71569bbde7df237550569e6419ff7c3146505dc +R a6acb083cae329c41bdac6bb67941a5b +U dan +Z 2566687c7d077d4a754f89b771fd38fa # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 78ce5b184c..73e0954b37 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -6206b90a4ec3f05e3bbb4844e71569bbde7df237550569e6419ff7c3146505dc +70e42f941c0778a04b82655409c7caf4c1039589f7e43a8ec1e736ea8f931b26 diff --git a/test/fts3corrupt4.test b/test/fts3corrupt4.test index 433a486359..7a9ab3fbd0 100644 --- a/test/fts3corrupt4.test +++ b/test/fts3corrupt4.test @@ -4404,7 +4404,7 @@ do_catchsql_test 25.5 { do_catchsql_test 25.6 { INSERT INTO t1(t1) SELECT x FROM t2; INSERT INTO t1(t1) SELECT x FROM t2; -} {1 {database disk image is malformed}} +} {0 {}} #------------------------------------------------------------------------- reset_db