Fix a spurious report of corruption that could be made by the fts5 integrity-check in SQLITE_DEBUG builds if the fts5 index contains malformed utf text.

FossilOrigin-Name: a11b393dc2c882cf0b3c47c3405bf43ca1d6459605bd39cccce4d32da653a72d
This commit is contained in:
dan 2019-12-24 14:27:03 +00:00
parent 34da2a4806
commit c26e78d275
4 changed files with 63 additions and 11 deletions

View File

@ -5723,6 +5723,37 @@ static int fts5QueryCksum(
return rc;
}
/*
** Check if buffer z[], size n bytes, contains as series of valid utf-8
** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
** contain valid utf-8, return non-zero.
*/
static int fts5TestUtf8(const char *z, int n){
assert( n>0 );
int i = 0;
while( i<n ){
if( (z[i] & 0x80)==0x00 ){
i++;
}else
if( (z[i] & 0xE0)==0xC0 ){
if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
i += 2;
}else
if( (z[i] & 0xF0)==0xE0 ){
if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
i += 3;
}else
if( (z[i] & 0xF8)==0xF0 ){
if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
if( (z[i+2] & 0xC0)!=0x80 ) return 1;
i += 3;
}else{
return 1;
}
}
return 0;
}
/*
** This function is also purely an internal test. It does not contribute to
@ -5763,8 +5794,14 @@ static void fts5TestTerm(
** This check may only be performed if the hash table is empty. This
** is because the hash table only supports a single scan query at
** a time, and the multi-iter loop from which this function is called
** is already performing such a scan. */
if( p->nPendingData==0 ){
** is already performing such a scan.
**
** Also only do this if buffer zTerm contains nTerm bytes of valid
** utf-8. Otherwise, the last part of the buffer contents might contain
** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
** character stored in the main fts index, which will cause the
** test to fail. */
if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
if( iIdx>0 && rc==SQLITE_OK ){
int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
ck2 = 0;

View File

@ -250,6 +250,21 @@ do_execsql_test 9.2 {
-4764623217061966105 8324454597464624651
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 10.0 {
CREATE VIRTUAL TABLE vt1 USING fts5(c1, c2, prefix = 1, tokenize = "ascii");
INSERT INTO vt1 VALUES (x'e4', '䔬');
}
do_execsql_test 10.1 {
SELECT quote(CAST(c1 AS blob)), quote(CAST(c2 AS blob)) FROM vt1
} {X'E4' X'E494AC'}
do_execsql_test 10.2 {
INSERT INTO vt1(vt1) VALUES('integrity-check');
}
finish_test

View File

@ -1,5 +1,5 @@
C Convert\san\sALWAYS()\sinto\san\sassert()\swith\san\sextra\serror\sterm.\nDbsqlfuzz\sfind,\swith\stest\scase\sin\sTH3.
D 2019-12-24T13:41:33.515
C Fix\sa\sspurious\sreport\sof\scorruption\sthat\scould\sbe\smade\sby\sthe\sfts5\sintegrity-check\sin\sSQLITE_DEBUG\sbuilds\sif\sthe\sfts5\sindex\scontains\smalformed\sutf\stext.
D 2019-12-24T14:27:03.948
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -116,7 +116,7 @@ F ext/fts5/fts5_buffer.c 5a5fe0159752c0fb0a5a93c722e9db2662822709490769d482b76a6
F ext/fts5/fts5_config.c b447948f35ad3354e8fe5e242e0a7e7b5b941555400b9404259944e3aa570037
F ext/fts5/fts5_expr.c 2be456484786333d559dc2987a00f2750981fab91d52db8452a8046278c5f22e
F ext/fts5/fts5_hash.c 1cc0095646f5f3b46721aa112fb4f9bf29ae175cb5338f89dcec66ed97acfe75
F ext/fts5/fts5_index.c 99b77ae1f503978ca76985bcfff7345c822aed8bbaa8edb3747f804f614685b5
F ext/fts5/fts5_index.c d0b7e5e79c136c6e27c96c8e8b5db7db8ec750edda427008afbec07b813178d4
F ext/fts5/fts5_main.c 9db1f173d299466aeff89bd949fb1eb0a181265726fb56f11e07ea292dcc9a73
F ext/fts5/fts5_storage.c 3ecda8edadc1f62a355d6789776be0da609f8658c50d72e422674093ab7e1528
F ext/fts5/fts5_tcl.c 39bcbae507f594aad778172fa914cad0f585bf92fd3b078c686e249282db0d95
@ -189,7 +189,7 @@ F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc27826807405
F ext/fts5/test/fts5matchinfo.test 50d86da66ec5b27603dcd90ba0227f5d9deb10351cbc52974a88e24f6fc9b076
F ext/fts5/test/fts5merge.test e92a8db28b45931e7a9c7b1bbd36101692759d00274df74d83fd29d25d53b3a6
F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2
F ext/fts5/test/fts5misc.test a5b53328b5b79275915de8f67ae85905eb2133d8dbcc808411f67c094b1bd347
F ext/fts5/test/fts5misc.test b294b1d7ad814da30e473905a8165de1bfe137822d243f2ab8cbf20ecc37bd1e
F ext/fts5/test/fts5multi.test a15bc91cdb717492e6e1b66fec1c356cb57386b980c7ba5af1915f97fe878581
F ext/fts5/test/fts5multiclient.test 5ff811c028d6108045ffef737f1e9f05028af2458e456c0937c1d1b8dea56d45
F ext/fts5/test/fts5near.test 211477940142d733ac04fad97cb24095513ab2507073a99c2765c3ddd2ef58bd
@ -1852,7 +1852,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 401c9d30e06191d938503aae024bc453d960fa64dc812ed86c661f94533247fd
R 7bfc5184e8e7362443b5d6dff6be172e
U drh
Z 7327fcfaeb195a694e2119a9482b57cd
P b473ad35c5ce355853e1805a5c0658bda1500775f22f59c6b6759ae990e65aca
R 88e0f620d2e9b56c3b0053f9411f1c0c
U dan
Z 0716b408ff1a9ce855db0a1c4ccdfffe

View File

@ -1 +1 @@
b473ad35c5ce355853e1805a5c0658bda1500775f22f59c6b6759ae990e65aca
a11b393dc2c882cf0b3c47c3405bf43ca1d6459605bd39cccce4d32da653a72d