From 7c95b0f3dab6f6924826f32ee570e852dc4fc946 Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 23 Oct 2009 18:15:46 +0000 Subject: [PATCH] Make sure that UTF16 to UTF8 conversions to not read past the end of the UTF16 input buffer if the last two bytes of the UTF16 happen to be the first half of a surrogate pair. Ticket [3fe897352e] FossilOrigin-Name: 19064d7cea838e1a93fe63743ed247f440679e97 --- manifest | 27 +++++++--- manifest.uuid | 2 +- src/test_func.c | 107 ++++++++++++++++++++++++++++++++++++++- src/utf.c | 21 ++++---- test/tkt-3fe897352e.test | 53 +++++++++++++++++++ 5 files changed, 189 insertions(+), 21 deletions(-) create mode 100644 test/tkt-3fe897352e.test diff --git a/manifest b/manifest index d92eca6c16..097a850edc 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,8 @@ -C In\sshell,\sensure\sthat\sdo_meta_command()\sreturns\sconsistent\serror\svalues.\s\s\nAdjusted\sthe\stext\sof\ssome\serror\smessage\sto\sbe\smore\sconsistent.\nTicket\s[beb2dd69ad]. -D 2009-10-23T01:27:39 +-----BEGIN PGP SIGNED MESSAGE----- +Hash: SHA1 + +C Make\ssure\sthat\sUTF16\sto\sUTF8\sconversions\sto\snot\sread\spast\sthe\send\sof\sthe\nUTF16\sinput\sbuffer\sif\sthe\slast\stwo\sbytes\sof\sthe\sUTF16\shappen\sto\sbe\sthe\nfirst\shalf\sof\sa\ssurrogate\spair.\s\sTicket\s[3fe897352e] +D 2009-10-23T18:15:46 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in a77dfde96ad86aafd3f71651a4333a104debe86a F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -184,7 +187,7 @@ F src/test_backup.c 1384a18985a5a2d275c2662e48473bf1542ebd08 F src/test_btree.c 5adbba9b138988a3cf4d3b5424dbc7c85651da02 F src/test_config.c 4ac1e6257dcf926a71b7934410b71c5c326e68f2 F src/test_devsym.c 9f4bc2551e267ce7aeda195f3897d0f30c5228f4 -F src/test_func.c 26ac62d8ed7a9f45a1e05baffb1c1e55fe2a06f2 +F src/test_func.c c6e9d7cfbd7bb0bd7c392a10d76adab4b48e813b F src/test_hexio.c 2f1122aa3f012fa0142ee3c36ce5c902a70cd12f F src/test_init.c f6a5dfaf2fb52d697eec1c825a641e5893c339d2 F src/test_journal.c dab49b7c47b53242f039c9563b18cafb67ebfe03 @@ -202,7 +205,7 @@ F src/test_wsd.c 3ae5101de6cbfda2720152ab659ea84079719241 F src/tokenize.c af8a56e6a50c5042fc305bfa796275e9bf26ff2b F src/trigger.c 2053afa9952f69cf451bc0e6ea88072701f2925e F src/update.c 8e8535f66c32d946199cb1caad19646a97ead3a7 -F src/utf.c 99cf927eabb104621ba889ac0dd075fc1657ad30 +F src/utf.c 7b4012e80709fa654150dee360fc8dc62c4f3e12 F src/util.c 59d4e9456bf1fe581f415a783fa0cee6115c8f35 F src/vacuum.c 48e1282bbd5eac4b461587c51658378658c00770 F src/vdbe.c f0d6e7dbd4515758c188c9dd7025eb9dfcf021e0 @@ -587,6 +590,7 @@ F test/thread_common.tcl b65e6b1d1d90dc885e10ad080896c6c56eef0819 F test/threadtest1.c 6029d9c5567db28e6dc908a0c63099c3ba6c383b F test/threadtest2.c ace893054fa134af3fc8d6e7cfecddb8e3acefb9 F test/tkt-2ea2425d34.test 1cf13e6f75d149b3209a0cb32927a82d3d79fb28 +F test/tkt-3fe897352e.test 8084dad39807eac10b10720c84193bd1a5980973 F test/tkt-4a03edc4c8.test 2865e4edbc075b954daa82f8da7cc973033ec76e F test/tkt-5ee23731f.test 3581260f2a71e51db94e1506ba6b0f7311d002a9 F test/tkt-94c04eaadb.test 40e6b1fce420fbecf8c2379d3ec3cb6889e49091 @@ -760,7 +764,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 009efad0f4293dd08a6f2f16d8eb9e94e2f962ca -R f9d7f2aaa2e65e337a1efea04bfa77d8 -U shane -Z 4f0172d3d5c0c4efeae4e77b96e3ca4c +P 1ebac9edddd28bdbbd9815fdb64eb9129f39f94a +R 10c8b96c1dc163bdaa37780882770a38 +U drh +Z 0cd60e2c2d007317dff310d0e88c5141 +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.6 (GNU/Linux) + +iD8DBQFK4fLVoxKgR168RlERAr5CAJ0bzAp+z2Fww00QL0aoSxz38K0PMACfZgQl +UXyWy3LAycOijyALRw/J7TI= +=pctl +-----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 28969a3cf0..9ee5a18060 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1ebac9edddd28bdbbd9815fdb64eb9129f39f94a \ No newline at end of file +19064d7cea838e1a93fe63743ed247f440679e97 \ No newline at end of file diff --git a/src/test_func.c b/src/test_func.c index d0f47d400e..3557674619 100644 --- a/src/test_func.c +++ b/src/test_func.c @@ -313,6 +313,108 @@ static void test_eval( } +/* +** convert one character from hex to binary +*/ +static int testHexChar(char c){ + if( c>='0' && c<='9' ){ + return c - '0'; + }else if( c>='a' && c<='f' ){ + return c - 'a' + 10; + }else if( c>='A' && c<='F' ){ + return c - 'A' + 10; + } + return 0; +} + +/* +** Convert hex to binary. +*/ +static void testHexToBin(const char *zIn, char *zOut){ + while( zIn[0] && zIn[1] ){ + *(zOut++) = (testHexChar(zIn[0])<<4) + testHexChar(zIn[1]); + zIn += 2; + } +} + +/* +** hex_to_utf16be(HEX) +** +** Convert the input string from HEX into binary. Then return the +** result using sqlite3_result_text16le(). +*/ +static void testHexToUtf16be( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **argv +){ + int n; + const char *zIn; + char *zOut; + assert( nArg==1 ); + n = sqlite3_value_bytes(argv[0]); + zIn = (const char*)sqlite3_value_text(argv[0]); + zOut = sqlite3_malloc( n/2 ); + if( zOut==0 ){ + sqlite3_result_error_nomem(pCtx); + }else{ + testHexToBin(zIn, zOut); + sqlite3_result_text16be(pCtx, zOut, n/2, sqlite3_free); + } +} + +/* +** hex_to_utf8(HEX) +** +** Convert the input string from HEX into binary. Then return the +** result using sqlite3_result_text16le(). +*/ +static void testHexToUtf8( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **argv +){ + int n; + const char *zIn; + char *zOut; + assert( nArg==1 ); + n = sqlite3_value_bytes(argv[0]); + zIn = (const char*)sqlite3_value_text(argv[0]); + zOut = sqlite3_malloc( n/2 ); + if( zOut==0 ){ + sqlite3_result_error_nomem(pCtx); + }else{ + testHexToBin(zIn, zOut); + sqlite3_result_text(pCtx, zOut, n/2, sqlite3_free); + } +} + +/* +** hex_to_utf16le(HEX) +** +** Convert the input string from HEX into binary. Then return the +** result using sqlite3_result_text16le(). +*/ +static void testHexToUtf16le( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **argv +){ + int n; + const char *zIn; + char *zOut; + assert( nArg==1 ); + n = sqlite3_value_bytes(argv[0]); + zIn = (const char*)sqlite3_value_text(argv[0]); + zOut = sqlite3_malloc( n/2 ); + if( zOut==0 ){ + sqlite3_result_error_nomem(pCtx); + }else{ + testHexToBin(zIn, zOut); + sqlite3_result_text16le(pCtx, zOut, n/2, sqlite3_free); + } +} + static int registerTestFunctions(sqlite3 *db){ static const struct { char *zName; @@ -324,7 +426,10 @@ static int registerTestFunctions(sqlite3 *db){ { "test_destructor", 1, SQLITE_UTF8, test_destructor}, #ifndef SQLITE_OMIT_UTF16 { "test_destructor16", 1, SQLITE_UTF8, test_destructor16}, + { "hex_to_utf16be", 1, SQLITE_UTF8, testHexToUtf16be}, + { "hex_to_utf16le", 1, SQLITE_UTF8, testHexToUtf16le}, #endif + { "hex_to_utf8", 1, SQLITE_UTF8, testHexToUtf8}, { "test_destructor_count", 0, SQLITE_UTF8, test_destructor_count}, { "test_auxdata", -1, SQLITE_UTF8, test_auxdata}, { "test_error", 1, SQLITE_UTF8, test_error}, @@ -447,8 +552,6 @@ abuse_err: return TCL_ERROR; } - - /* ** Register commands with the TCL interpreter. */ diff --git a/src/utf.c b/src/utf.c index c3d07be828..93ce47c481 100644 --- a/src/utf.c +++ b/src/utf.c @@ -107,20 +107,20 @@ static const unsigned char sqlite3Utf8Trans1[] = { } \ } -#define READ_UTF16LE(zIn, c){ \ +#define READ_UTF16LE(zIn, zTerm, c){ \ c = (*zIn++); \ c += ((*zIn++)<<8); \ - if( c>=0xD800 && c<0xE000 ){ \ + if( c>=0xD800 && c<0xE000 && zIn=0xD800 && c<0xE000 ){ \ + if( c>=0xD800 && c<0xE000 && zInenc==SQLITE_UTF16LE ){ /* UTF-16 Little-endian -> UTF-8 */ while( zIn UTF-8 */ while( zIn0 && n<=4 ); z[0] = 0; z = zBuf; - READ_UTF16LE(z, c); + READ_UTF16LE(z, &zBuf[n], c); assert( c==i ); assert( (z-zBuf)==n ); } @@ -559,7 +560,7 @@ void sqlite3UtfSelfTest(void){ assert( n>0 && n<=4 ); z[0] = 0; z = zBuf; - READ_UTF16BE(z, c); + READ_UTF16BE(z, &zBuf[n], c); assert( c==i ); assert( (z-zBuf)==n ); } diff --git a/test/tkt-3fe897352e.test b/test/tkt-3fe897352e.test new file mode 100644 index 0000000000..eb4f178135 --- /dev/null +++ b/test/tkt-3fe897352e.test @@ -0,0 +1,53 @@ +# 2009 October 23 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. +# +# This file implements tests to verify that ticket [3fe897352e8d8] has been +# fixed. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_test tkt-3fe89-1.1 { + db close + sqlite3 db :memory: + db eval { + PRAGMA encoding=UTF8; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES(hex_to_utf16be('D800')); + SELECT hex(x) FROM t1; + } +} {EDA080} +do_test tkt-3fe89-1.2 { + db eval { + DELETE FROM t1; + INSERT INTO t1 VALUES(hex_to_utf16le('00D8')); + SELECT hex(x) FROM t1; + } +} {EDA080} +do_test tkt-3fe89-1.3 { + db eval { + DELETE FROM t1; + INSERT INTO t1 VALUES(hex_to_utf16be('DFFF')); + SELECT hex(x) FROM t1; + } +} {EDBFBF} +do_test tkt-3fe89-1.4 { + db eval { + DELETE FROM t1; + INSERT INTO t1 VALUES(hex_to_utf16le('FFDF')); + SELECT hex(x) FROM t1; + } +} {EDBFBF} + + +finish_test