Convert invalid surrogates to 0xfffd when translating UTF.
FossilOrigin-Name: 7fab1393c2b22b1f3b159b631e06e7e0d3900850ee249c38e4d3cdd0aacf637e
This commit is contained in:
parent
78d1d225d8
commit
0184a256e3
14
manifest
14
manifest
@ -1,5 +1,5 @@
|
||||
C A\sbetter\s(smaller\sand\sfaster)\ssolution\sto\sticket\s[4374860b29383380].
|
||||
D 2020-02-17T19:25:07.592
|
||||
C Convert\sinvalid\ssurrogates\sto\s0xfffd\swhen\stranslating\sUTF.
|
||||
D 2020-02-17T23:08:16.564
|
||||
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
|
||||
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
|
||||
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
|
||||
@ -600,7 +600,7 @@ F src/treeview.c 438c1000587b33faba35e87596bebcf7f40638d98f33781cdd9e04711b18b09
|
||||
F src/trigger.c a40d50e88bd3355f1d2a73f0a3b2d6b42eae26ca4219001b82ef0d064439badc
|
||||
F src/update.c 3eb778c42155d944377a4ee5e440b04520f07094804ed6ce63d2528f619614d9
|
||||
F src/upsert.c 2920de71b20f04fe25eb00b655d086f0ba60ea133c59d7fa3325c49838818e78
|
||||
F src/utf.c 736ff76753236ffbc8b5b939f5e0607f28aeaa7c780b3a56b419228f0a81c87b
|
||||
F src/utf.c 95fb6e03a5ca679045c5adccd05380f0addccabef5911abddcb06af069500ab7
|
||||
F src/util.c a285c1e026907b69fa2592bd05047a565a1d8a1aef2b73c924b6a8ffe772871a
|
||||
F src/vacuum.c 813b510ba887fee6492bcb11f2bf77d7eb58b232b83649136372e0a2fc17f4b9
|
||||
F src/vdbe.c 15cae95de3c1301747f7ee17a70046772741e7e630b6d5554c685b613798b8e8
|
||||
@ -1439,7 +1439,7 @@ F test/tkt-385a5b56b9.test 5204a7cba0e28c99df0acbf95af5e1af4d32965a7a14de6eccebf
|
||||
F test/tkt-38cb5df375.test f3cc8671f1eb604d4ae9cf886ed4366bec656678
|
||||
F test/tkt-3998683a16.test 6d1d04d551ed1704eb3396ca87bb9ccc8c5c1eb7
|
||||
F test/tkt-3a77c9714e.test 90e3e8455ee945a4076d4c44062b8845708af24a880355328fe7008f2047c9f0
|
||||
F test/tkt-3fe897352e.test 27e26eb0f1811aeba4d65aba43a4c52e99da5e70
|
||||
F test/tkt-3fe897352e.test 6849fde0a87165ff83f54f5047af7c743d72af26908fadb90174f3294450b3f4
|
||||
F test/tkt-4a03edc4c8.test 91c0e135888cdc3d4eea82406a44b05c8c1648d0
|
||||
F test/tkt-4c86b126f2.test cbcc611becd0396890169ab23102dd70048bbc9a
|
||||
F test/tkt-4dd95f6943.test 3d0ce415d2ee15d3d564121960016b9c7be79407
|
||||
@ -1858,7 +1858,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
|
||||
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
|
||||
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
|
||||
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
|
||||
P 9d0d4ab95dc0c56e053c2924ed322a9ea7b25439e6f74599f706905a1994e454
|
||||
R 1c052b7cdf4947664b7043564b643ac3
|
||||
P abc473fb8fb999005dc79a360e34f97b3b25429decf1820dd2afa5c19577753d
|
||||
R 4bf2952c1b8e70d0751661faff1674f3
|
||||
U drh
|
||||
Z e960557a43b001a47933dacf8bc1d10e
|
||||
Z 268df0dffaa8fc33ed45d27c33b3bdad
|
||||
|
@ -1 +1 @@
|
||||
abc473fb8fb999005dc79a360e34f97b3b25429decf1820dd2afa5c19577753d
|
||||
7fab1393c2b22b1f3b159b631e06e7e0d3900850ee249c38e4d3cdd0aacf637e
|
97
src/utf.c
97
src/utf.c
@ -105,26 +105,6 @@ static const unsigned char sqlite3Utf8Trans1[] = {
|
||||
} \
|
||||
}
|
||||
|
||||
#define READ_UTF16LE(zIn, TERM, c){ \
|
||||
c = (*zIn++); \
|
||||
c += ((*zIn++)<<8); \
|
||||
if( c>=0xD800 && c<0xE000 && TERM ){ \
|
||||
int c2 = (*zIn++); \
|
||||
c2 += ((*zIn++)<<8); \
|
||||
c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define READ_UTF16BE(zIn, TERM, c){ \
|
||||
c = ((*zIn++)<<8); \
|
||||
c += (*zIn++); \
|
||||
if( c>=0xD800 && c<0xE000 && TERM ){ \
|
||||
int c2 = ((*zIn++)<<8); \
|
||||
c2 += (*zIn++); \
|
||||
c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
** Translate a single UTF-8 character. Return the unicode value.
|
||||
**
|
||||
@ -301,13 +281,43 @@ SQLITE_NOINLINE int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
|
||||
if( pMem->enc==SQLITE_UTF16LE ){
|
||||
/* UTF-16 Little-endian -> UTF-8 */
|
||||
while( zIn<zTerm ){
|
||||
READ_UTF16LE(zIn, zIn<zTerm, c);
|
||||
c = *(zIn++);
|
||||
c += (*(zIn++))<<8;
|
||||
if( c>=0xd800 && c<0xe000 ){
|
||||
if( c>=0xdc00 || zIn>=zTerm ){
|
||||
c = 0xfffd;
|
||||
}else{
|
||||
int c2 = *(zIn++);
|
||||
c2 += (*(zIn++))<<8;
|
||||
if( c2<0xdc00 || c2>=0xe000 ){
|
||||
zIn -= 2;
|
||||
c = 0xfffd;
|
||||
}else{
|
||||
c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000;
|
||||
}
|
||||
}
|
||||
}
|
||||
WRITE_UTF8(z, c);
|
||||
}
|
||||
}else{
|
||||
/* UTF-16 Big-endian -> UTF-8 */
|
||||
while( zIn<zTerm ){
|
||||
READ_UTF16BE(zIn, zIn<zTerm, c);
|
||||
c = (*(zIn++))<<8;
|
||||
c += *(zIn++);
|
||||
if( c>=0xd800 && c<0xe000 ){
|
||||
if( c>=0xdc00 || zIn>=zTerm ){
|
||||
c = 0xfffd;
|
||||
}else{
|
||||
int c2 = (*(zIn++))<<8;
|
||||
c2 += *(zIn++);
|
||||
if( c2<0xdc00 || c2>=0xe000 ){
|
||||
zIn -= 2;
|
||||
c = 0xfffd;
|
||||
}else{
|
||||
c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000;
|
||||
}
|
||||
}
|
||||
}
|
||||
WRITE_UTF8(z, c);
|
||||
}
|
||||
}
|
||||
@ -466,18 +476,15 @@ int sqlite3Utf16ByteLen(const void *zIn, int nChar){
|
||||
unsigned char const *z = zIn;
|
||||
int n = 0;
|
||||
|
||||
if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
|
||||
while( n<nChar ){
|
||||
READ_UTF16BE(z, 1, c);
|
||||
n++;
|
||||
}
|
||||
}else{
|
||||
while( n<nChar ){
|
||||
READ_UTF16LE(z, 1, c);
|
||||
n++;
|
||||
}
|
||||
if( SQLITE_UTF16NATIVE==SQLITE_UTF16LE ) z++;
|
||||
while( n<nChar ){
|
||||
c = z[0];
|
||||
z += 2;
|
||||
if( c>=0xd8 && c<0xdc && z[0]>=0xdc && z[0]<0xe0 ) z += 2;
|
||||
n++;
|
||||
}
|
||||
return (int)(z-(unsigned char const *)zIn);
|
||||
return (int)(z-(unsigned char const *)zIn)
|
||||
- (SQLITE_UTF16NATIVE==SQLITE_UTF16LE);
|
||||
}
|
||||
|
||||
#if defined(SQLITE_TEST)
|
||||
@ -507,30 +514,6 @@ void sqlite3UtfSelfTest(void){
|
||||
assert( c==t );
|
||||
assert( (z-zBuf)==n );
|
||||
}
|
||||
for(i=0; i<0x00110000; i++){
|
||||
if( i>=0xD800 && i<0xE000 ) continue;
|
||||
z = zBuf;
|
||||
WRITE_UTF16LE(z, i);
|
||||
n = (int)(z-zBuf);
|
||||
assert( n>0 && n<=4 );
|
||||
z[0] = 0;
|
||||
z = zBuf;
|
||||
READ_UTF16LE(z, 1, c);
|
||||
assert( c==i );
|
||||
assert( (z-zBuf)==n );
|
||||
}
|
||||
for(i=0; i<0x00110000; i++){
|
||||
if( i>=0xD800 && i<0xE000 ) continue;
|
||||
z = zBuf;
|
||||
WRITE_UTF16BE(z, i);
|
||||
n = (int)(z-zBuf);
|
||||
assert( n>0 && n<=4 );
|
||||
z[0] = 0;
|
||||
z = zBuf;
|
||||
READ_UTF16BE(z, 1, c);
|
||||
assert( c==i );
|
||||
assert( (z-zBuf)==n );
|
||||
}
|
||||
}
|
||||
#endif /* SQLITE_TEST */
|
||||
#endif /* SQLITE_OMIT_UTF16 */
|
||||
|
@ -33,28 +33,28 @@ do_test tkt-3fe89-1.1 {
|
||||
INSERT INTO t1 VALUES(hex_to_utf16be('D800'));
|
||||
SELECT hex(x) FROM t1;
|
||||
}
|
||||
} {EDA080}
|
||||
} {EFBFBD}
|
||||
do_test tkt-3fe89-1.2 {
|
||||
db eval {
|
||||
DELETE FROM t1;
|
||||
INSERT INTO t1 VALUES(hex_to_utf16le('00D8'));
|
||||
SELECT hex(x) FROM t1;
|
||||
}
|
||||
} {EDA080}
|
||||
} {EFBFBD}
|
||||
do_test tkt-3fe89-1.3 {
|
||||
db eval {
|
||||
DELETE FROM t1;
|
||||
INSERT INTO t1 VALUES(hex_to_utf16be('DFFF'));
|
||||
SELECT hex(x) FROM t1;
|
||||
}
|
||||
} {EDBFBF}
|
||||
} {EFBFBD}
|
||||
do_test tkt-3fe89-1.4 {
|
||||
db eval {
|
||||
DELETE FROM t1;
|
||||
INSERT INTO t1 VALUES(hex_to_utf16le('FFDF'));
|
||||
SELECT hex(x) FROM t1;
|
||||
}
|
||||
} {EDBFBF}
|
||||
} {EFBFBD}
|
||||
|
||||
|
||||
finish_test
|
||||
|
Loading…
Reference in New Issue
Block a user