mirror of https://github.com/sqlite/sqlite
Rework the UTF8 reader logic in order to avoid the use of malloc().
Ticket #2523. (CVS 4175) FossilOrigin-Name: 9a059cb6bced5cdc950f7816602ac92d89a899be
This commit is contained in:
parent
ad6b3159be
commit
6615095629
16
manifest
16
manifest
|
@ -1,5 +1,5 @@
|
|||
C Fix\sa\sbad\ssizeof\sin\svdbe.c.\s\sTicket\s#2522.\s(CVS\s4174)
|
||||
D 2007-07-22T19:10:21
|
||||
C Rework\sthe\sUTF8\sreader\slogic\sin\sorder\sto\savoid\sthe\suse\sof\smalloc().\nTicket\s#2523.\s(CVS\s4175)
|
||||
D 2007-07-23T19:12:42
|
||||
F Makefile.in 0c0e53720f658c7a551046442dd7afba0b72bfbe
|
||||
F Makefile.linux-gcc 65241babba6faf1152bf86574477baab19190499
|
||||
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
|
||||
|
@ -78,7 +78,7 @@ F src/date.c 6049db7d5a8fdf2c677ff7d58fa31d4f6593c988
|
|||
F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29
|
||||
F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b
|
||||
F src/expr.c de9f55b1baed00199466028ad96967208d487798
|
||||
F src/func.c 6b45261aa2c514f642201b90493af68469c04af6
|
||||
F src/func.c dcba54fc18d2b2fd02f8b7c3dc13e27d100a4d8e
|
||||
F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5
|
||||
F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564
|
||||
F src/insert.c 89d184422d85db0418e0f66032ccea3657078ecd
|
||||
|
@ -111,7 +111,7 @@ F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96
|
|||
F src/shell.c e7534cce78398bc1cac4a643e931fc6221c2897e
|
||||
F src/sqlite.h.in 8164526b1658a6dad472953ea91239849f913d45
|
||||
F src/sqlite3ext.h a27bedc222df5e5f0f458ac99726d0483b953a91
|
||||
F src/sqliteInt.h 81183ae71162818bf60478e738ff68604128bb06
|
||||
F src/sqliteInt.h 358f3a29b98e1efdd840a928dec8f60a51e6a33e
|
||||
F src/sqliteLimit.h f14609c27636ebc217c9603ade26dbdd7d0f6afa
|
||||
F src/table.c a8de75bcedf84d4060d804264b067ab3b1a3561d
|
||||
F src/tclsqlite.c 0d3370e01cd3b313ed29ed6b0ba00423b4329de0
|
||||
|
@ -137,7 +137,7 @@ F src/test_tclvar.c ea4500a60d663f7fdf18fd3210efc112e0c6e7f0
|
|||
F src/tokenize.c 0f0955ef7b8ab99ba2d3099faa89b80ccba3733a
|
||||
F src/trigger.c 420192efe3e6f03addf7897c60c3c8bf913d3493
|
||||
F src/update.c 6b10becb6235ea314ed245fbfbf8b38755e3166e
|
||||
F src/utf.c 01b2aba02b10d12903e9e1ff897215c9faf6b662
|
||||
F src/utf.c c152f99ddccc5e0214a9817aa07ab1b208b43f14
|
||||
F src/util.c 9e81d417fc60bd2fe156f8f2317aa4845bc6cc90
|
||||
F src/vacuum.c 8bd895d29e7074e78d4e80f948e35ddc9cf2beef
|
||||
F src/vdbe.c a58fe70f11078deb16f6825cc99f099d2fad4a7b
|
||||
|
@ -520,7 +520,7 @@ F www/tclsqlite.tcl 8be95ee6dba05eabcd27a9d91331c803f2ce2130
|
|||
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
|
||||
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
|
||||
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
|
||||
P 1924ba5207bdc8d503c17cd9460c1a9f9c357635
|
||||
R 6a3d5d19ad9da4a9718db45f3a6f4e18
|
||||
P 77ebc3feb089c28155cf20873fb4eabd26fa50c1
|
||||
R 4c6f94c5ade866798dc608d64060285b
|
||||
U drh
|
||||
Z f3b0c8bff800cc59d8eb156576c3d0e8
|
||||
Z 9a4a3510d0a6e206d28b34d524cb6b1e
|
||||
|
|
|
@ -1 +1 @@
|
|||
77ebc3feb089c28155cf20873fb4eabd26fa50c1
|
||||
9a059cb6bced5cdc950f7816602ac92d89a899be
|
127
src/func.c
127
src/func.c
|
@ -16,7 +16,7 @@
|
|||
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
|
||||
** All other code has file scope.
|
||||
**
|
||||
** $Id: func.c,v 1.161 2007/06/22 15:21:16 danielk1977 Exp $
|
||||
** $Id: func.c,v 1.162 2007/07/23 19:12:42 drh Exp $
|
||||
*/
|
||||
#include "sqliteInt.h"
|
||||
#include <ctype.h>
|
||||
|
@ -26,6 +26,7 @@
|
|||
#include "vdbeInt.h"
|
||||
#include "os.h"
|
||||
|
||||
|
||||
/*
|
||||
** Return the collating function associated with a function.
|
||||
*/
|
||||
|
@ -397,15 +398,6 @@ static const struct compareInfo likeInfoNorm = { '%', '_', 0, 1 };
|
|||
** is case sensitive causing 'a' LIKE 'A' to be false */
|
||||
static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 };
|
||||
|
||||
/*
|
||||
** Read a single UTF-8 character and return its value.
|
||||
*/
|
||||
u32 sqlite3ReadUtf8(const unsigned char *z){
|
||||
u32 c;
|
||||
SQLITE_READ_UTF8(z, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
** Compare two UTF-8 strings for equality where the first string can
|
||||
** potentially be a "glob" expression. Return true (1) if they
|
||||
|
@ -440,97 +432,102 @@ static int patternCompare(
|
|||
const struct compareInfo *pInfo, /* Information about how to do the compare */
|
||||
const int esc /* The escape character */
|
||||
){
|
||||
register int c;
|
||||
int c, c2;
|
||||
int invert;
|
||||
int seen;
|
||||
int c2;
|
||||
u8 matchOne = pInfo->matchOne;
|
||||
u8 matchAll = pInfo->matchAll;
|
||||
u8 matchSet = pInfo->matchSet;
|
||||
u8 noCase = pInfo->noCase;
|
||||
int prevEscape = 0; /* True if the previous character was 'escape' */
|
||||
|
||||
while( (c = *zPattern)!=0 ){
|
||||
while( (c = sqlite3Utf8Read(zPattern,0,&zPattern))!=0 ){
|
||||
if( !prevEscape && c==matchAll ){
|
||||
while( (c=zPattern[1]) == matchAll || c == matchOne ){
|
||||
if( c==matchOne ){
|
||||
if( *zString==0 ) return 0;
|
||||
SQLITE_SKIP_UTF8(zString);
|
||||
while( (c=sqlite3Utf8Read(zPattern,0,&zPattern)) == matchAll
|
||||
|| c == matchOne ){
|
||||
if( c==matchOne && sqlite3Utf8Read(zString, 0, &zString)==0 ){
|
||||
return 0;
|
||||
}
|
||||
zPattern++;
|
||||
}
|
||||
if( c && esc && sqlite3ReadUtf8(&zPattern[1])==esc ){
|
||||
u8 const *zTemp = &zPattern[1];
|
||||
SQLITE_SKIP_UTF8(zTemp);
|
||||
c = *zTemp;
|
||||
}
|
||||
if( c==0 ) return 1;
|
||||
if( c==matchSet ){
|
||||
assert( esc==0 ); /* This is GLOB, not LIKE */
|
||||
while( *zString && patternCompare(&zPattern[1],zString,pInfo,esc)==0 ){
|
||||
if( c==0 ){
|
||||
return 1;
|
||||
}else if( c==esc ){
|
||||
c = sqlite3Utf8Read(zPattern, 0, &zPattern);
|
||||
if( c==0 ){
|
||||
return 0;
|
||||
}
|
||||
}else if( c==matchSet ){
|
||||
assert( esc==0 ); /* This is GLOB, not LIKE */
|
||||
assert( matchSet<0x80 ); /* '[' is a single-byte character */
|
||||
while( *zString && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){
|
||||
SQLITE_SKIP_UTF8(zString);
|
||||
}
|
||||
return *zString!=0;
|
||||
}else{
|
||||
while( (c2 = *zString)!=0 ){
|
||||
if( noCase ){
|
||||
c2 = sqlite3UpperToLower[c2];
|
||||
c = sqlite3UpperToLower[c];
|
||||
while( c2 != 0 && c2 != c ){ c2 = sqlite3UpperToLower[*++zString]; }
|
||||
}else{
|
||||
while( c2 != 0 && c2 != c ){ c2 = *++zString; }
|
||||
}
|
||||
while( (c2 = sqlite3Utf8Read(zString,0,&zString))!=0 ){
|
||||
if( noCase ){
|
||||
c2 = c2<0x80 ? sqlite3UpperToLower[c2] : c2;
|
||||
c = c<0x80 ? sqlite3UpperToLower[c] : c;
|
||||
while( c2 != 0 && c2 != c ){
|
||||
c2 = sqlite3Utf8Read(zString, 0, &zString);
|
||||
if( c2<0x80 ) c2 = sqlite3UpperToLower[c2];
|
||||
}
|
||||
}else{
|
||||
while( c2 != 0 && c2 != c ){
|
||||
c2 = sqlite3Utf8Read(zString, 0, &zString);
|
||||
}
|
||||
if( c2==0 ) return 0;
|
||||
if( patternCompare(&zPattern[1],zString,pInfo,esc) ) return 1;
|
||||
SQLITE_SKIP_UTF8(zString);
|
||||
}
|
||||
if( c2==0 ) return 0;
|
||||
if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
|
||||
}
|
||||
return 0;
|
||||
}else if( !prevEscape && c==matchOne ){
|
||||
if( sqlite3Utf8Read(zString, 0, &zString)==0 ){
|
||||
return 0;
|
||||
}
|
||||
}else if( !prevEscape && c==matchOne ){
|
||||
if( *zString==0 ) return 0;
|
||||
SQLITE_SKIP_UTF8(zString);
|
||||
zPattern++;
|
||||
}else if( c==matchSet ){
|
||||
int prior_c = 0;
|
||||
assert( esc==0 ); /* This only occurs for GLOB, not LIKE */
|
||||
seen = 0;
|
||||
invert = 0;
|
||||
c = sqlite3ReadUtf8(zString);
|
||||
c = sqlite3Utf8Read(zString, 0, &zString);
|
||||
if( c==0 ) return 0;
|
||||
c2 = *++zPattern;
|
||||
if( c2=='^' ){ invert = 1; c2 = *++zPattern; }
|
||||
c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
|
||||
if( c2=='^' ){
|
||||
invert = 1;
|
||||
c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
|
||||
}
|
||||
if( c2==']' ){
|
||||
if( c==']' ) seen = 1;
|
||||
c2 = *++zPattern;
|
||||
c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
|
||||
}
|
||||
while( (c2 = sqlite3ReadUtf8(zPattern))!=0 && c2!=']' ){
|
||||
if( c2=='-' && zPattern[1]!=']' && zPattern[1]!=0 && prior_c>0 ){
|
||||
zPattern++;
|
||||
c2 = sqlite3ReadUtf8(zPattern);
|
||||
while( c2 && c2!=']' ){
|
||||
if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){
|
||||
c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
|
||||
if( c>=prior_c && c<=c2 ) seen = 1;
|
||||
prior_c = 0;
|
||||
}else if( c==c2 ){
|
||||
seen = 1;
|
||||
prior_c = c2;
|
||||
}else{
|
||||
if( c==c2 ){
|
||||
seen = 1;
|
||||
}
|
||||
prior_c = c2;
|
||||
}
|
||||
SQLITE_SKIP_UTF8(zPattern);
|
||||
c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
|
||||
}
|
||||
if( c2==0 || (seen ^ invert)==0 ) return 0;
|
||||
SQLITE_SKIP_UTF8(zString);
|
||||
zPattern++;
|
||||
}else if( esc && !prevEscape && sqlite3ReadUtf8(zPattern)==esc){
|
||||
if( c2==0 || (seen ^ invert)==0 ){
|
||||
return 0;
|
||||
}
|
||||
}else if( esc==c && !prevEscape ){
|
||||
prevEscape = 1;
|
||||
SQLITE_SKIP_UTF8(zPattern);
|
||||
}else{
|
||||
c2 = sqlite3Utf8Read(zString, 0, &zString);
|
||||
if( noCase ){
|
||||
if( sqlite3UpperToLower[c] != sqlite3UpperToLower[*zString] ) return 0;
|
||||
}else{
|
||||
if( c != *zString ) return 0;
|
||||
c = c<0x80 ? sqlite3UpperToLower[c] : c;
|
||||
c2 = c2<0x80 ? sqlite3UpperToLower[c2] : c2;
|
||||
}
|
||||
if( c!=c2 ){
|
||||
return 0;
|
||||
}
|
||||
zPattern++;
|
||||
zString++;
|
||||
prevEscape = 0;
|
||||
}
|
||||
}
|
||||
|
@ -590,7 +587,7 @@ static void likeFunc(
|
|||
"ESCAPE expression must be a single character", -1);
|
||||
return;
|
||||
}
|
||||
escape = sqlite3ReadUtf8(zEsc);
|
||||
escape = sqlite3Utf8Read(zEsc, 0, &zEsc);
|
||||
}
|
||||
if( zA && zB ){
|
||||
struct compareInfo *pInfo = sqlite3_user_data(context);
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
*************************************************************************
|
||||
** Internal interface definitions for SQLite.
|
||||
**
|
||||
** @(#) $Id: sqliteInt.h,v 1.578 2007/06/26 10:38:55 danielk1977 Exp $
|
||||
** @(#) $Id: sqliteInt.h,v 1.579 2007/07/23 19:12:42 drh Exp $
|
||||
*/
|
||||
#ifndef _SQLITEINT_H_
|
||||
#define _SQLITEINT_H_
|
||||
|
@ -1556,62 +1556,15 @@ typedef struct {
|
|||
extern int sqlite3_always_code_trigger_setup;
|
||||
|
||||
/*
|
||||
** A lookup table used by the SQLITE_READ_UTF8 macro. The definition
|
||||
** is in utf.c.
|
||||
** Assuming zIn points to the first byte of a UTF-8 character,
|
||||
** advance zIn to point to the first byte of the next UTF-8 character.
|
||||
*/
|
||||
extern const unsigned char sqlite3UtfTrans1[];
|
||||
|
||||
/*
|
||||
** Macros for reading UTF8 characters.
|
||||
**
|
||||
** SQLITE_READ_UTF8(x,c) reads a single UTF8 value out of x and writes
|
||||
** that value into c. The type of x must be unsigned char*. The type
|
||||
** of c must be unsigned int.
|
||||
**
|
||||
** SQLITE_SKIP_UTF8(x) advances x forward by one character. The type of
|
||||
** x must be unsigned char*.
|
||||
**
|
||||
** Notes On Invalid UTF-8:
|
||||
**
|
||||
** * These macros never allow a 7-bit character (0x00 through 0x7f) to
|
||||
** be encoded as a multi-byte character. Any multi-byte character that
|
||||
** attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
|
||||
**
|
||||
** * These macros never allow a UTF16 surrogate value to be encoded.
|
||||
** If a multi-byte character attempts to encode a value between
|
||||
** 0xd800 and 0xe000 then it is rendered as 0xfffd.
|
||||
**
|
||||
** * Bytes in the range of 0x80 through 0xbf which occur as the first
|
||||
** byte of a character are interpreted as single-byte characters
|
||||
** and rendered as themselves even though they are technically
|
||||
** invalid characters.
|
||||
**
|
||||
** * These routines accept an infinite number of different UTF8 encodings
|
||||
** for unicode values 0x80 and greater. They do not change over-length
|
||||
** encodings to 0xfffd as some systems recommend.
|
||||
**
|
||||
*/
|
||||
#define SQLITE_READ_UTF8(zIn, c) { \
|
||||
c = *(zIn++); \
|
||||
if( c>=0xc0 ){ \
|
||||
c = sqlite3UtfTrans1[c-0xc0]; \
|
||||
while( (*zIn & 0xc0)==0x80 ){ \
|
||||
c = (c<<6) + (0x3f & *(zIn++)); \
|
||||
} \
|
||||
if( c<0x80 \
|
||||
|| (c&0xFFFFF800)==0xD800 \
|
||||
|| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
|
||||
} \
|
||||
}
|
||||
#define SQLITE_SKIP_UTF8(zIn) { \
|
||||
if( (*(zIn++))>=0xc0 ){ \
|
||||
while( (*zIn & 0xc0)==0x80 ){ zIn++; } \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** The SQLITE_CORRUPT_BKPT macro can be either a constant (for production
|
||||
** builds) or a function call (for debugging). If it is a function call,
|
||||
|
@ -1830,7 +1783,7 @@ int sqlite3GetInt32(const char *, int*);
|
|||
int sqlite3FitsIn64Bits(const char *);
|
||||
int sqlite3Utf16ByteLen(const void *pData, int nChar);
|
||||
int sqlite3Utf8CharLen(const char *pData, int nByte);
|
||||
u32 sqlite3ReadUtf8(const unsigned char *);
|
||||
int sqlite3Utf8Read(const u8*, const u8*, const u8**);
|
||||
int sqlite3PutVarint(unsigned char *, u64);
|
||||
int sqlite3GetVarint(const unsigned char *, u64 *);
|
||||
int sqlite3GetVarint32(const unsigned char *, u32 *);
|
||||
|
|
129
src/utf.c
129
src/utf.c
|
@ -12,7 +12,7 @@
|
|||
** This file contains routines used to translate between UTF-8,
|
||||
** UTF-16, UTF-16BE, and UTF-16LE.
|
||||
**
|
||||
** $Id: utf.c,v 1.51 2007/05/23 16:23:09 danielk1977 Exp $
|
||||
** $Id: utf.c,v 1.52 2007/07/23 19:12:42 drh Exp $
|
||||
**
|
||||
** Notes on UTF-8:
|
||||
**
|
||||
|
@ -60,6 +60,7 @@ const unsigned char sqlite3UtfTrans1[] = {
|
|||
0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
|
||||
};
|
||||
|
||||
|
||||
#define WRITE_UTF8(zOut, c) { \
|
||||
if( c<0x00080 ){ \
|
||||
*zOut++ = (c&0xFF); \
|
||||
|
@ -126,6 +127,54 @@ const unsigned char sqlite3UtfTrans1[] = {
|
|||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
** Translate a single UTF-8 character. Return the unicode value.
|
||||
**
|
||||
** During translation, assume that the byte that zTerm points
|
||||
** is a 0x00.
|
||||
**
|
||||
** Write a pointer to the next unread byte back into *pzNext.
|
||||
**
|
||||
** Notes On Invalid UTF-8:
|
||||
**
|
||||
** * This routine never allows a 7-bit character (0x00 through 0x7f) to
|
||||
** be encoded as a multi-byte character. Any multi-byte character that
|
||||
** attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
|
||||
**
|
||||
** * This routine never allows a UTF16 surrogate value to be encoded.
|
||||
** If a multi-byte character attempts to encode a value between
|
||||
** 0xd800 and 0xe000 then it is rendered as 0xfffd.
|
||||
**
|
||||
** * Bytes in the range of 0x80 through 0xbf which occur as the first
|
||||
** byte of a character are interpreted as single-byte characters
|
||||
** and rendered as themselves even though they are technically
|
||||
** invalid characters.
|
||||
**
|
||||
** * This routine accepts an infinite number of different UTF8 encodings
|
||||
** for unicode values 0x80 and greater. It do not change over-length
|
||||
** encodings to 0xfffd as some systems recommend.
|
||||
*/
|
||||
int sqlite3Utf8Read(
|
||||
const unsigned char *z, /* First byte of UTF-8 character */
|
||||
const unsigned char *zTerm, /* Pretend this byte is 0x00 */
|
||||
const unsigned char **pzNext /* Write first byte past UTF-8 char here */
|
||||
){
|
||||
int c = *(z++);
|
||||
if( c>=0xc0 ){
|
||||
c = sqlite3UtfTrans1[c-0xc0];
|
||||
while( z!=zTerm && (*z & 0xc0)==0x80 ){
|
||||
c = (c<<6) + (0x3f & *(z++));
|
||||
}
|
||||
if( c<0x80
|
||||
|| (c&0xFFFFF800)==0xD800
|
||||
|| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; }
|
||||
}
|
||||
*pzNext = z;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
|
||||
** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
|
||||
|
@ -219,81 +268,19 @@ int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
|
|||
z = zOut;
|
||||
|
||||
if( pMem->enc==SQLITE_UTF8 ){
|
||||
unsigned int iExtra = 0xD800;
|
||||
|
||||
if( 0==(pMem->flags&MEM_Term) && zTerm>zIn && (zTerm[-1]&0x80) ){
|
||||
/* This UTF8 string is not nul-terminated, and the last byte is
|
||||
** not a character in the ascii range (codpoints 0..127). This
|
||||
** means the SQLITE_READ_UTF8() macro might read past the end
|
||||
** of the allocated buffer.
|
||||
**
|
||||
** There are four possibilities:
|
||||
**
|
||||
** 1. The last byte is the first byte of a non-ASCII character,
|
||||
**
|
||||
** 2. The final N bytes of the input string are continuation bytes
|
||||
** and immediately preceding them is the first byte of a
|
||||
** non-ASCII character.
|
||||
**
|
||||
** 3. The final N bytes of the input string are continuation bytes
|
||||
** and immediately preceding them is a byte that encodes a
|
||||
** character in the ASCII range.
|
||||
**
|
||||
** 4. The entire string consists of continuation characters.
|
||||
**
|
||||
** Cases (3) and (4) require no special handling. The SQLITE_READ_UTF8()
|
||||
** macro will not overread the buffer in these cases.
|
||||
*/
|
||||
unsigned char *zExtra = &zTerm[-1];
|
||||
while( zExtra>zIn && (zExtra[0]&0xC0)==0x80 ){
|
||||
zExtra--;
|
||||
}
|
||||
|
||||
if( (zExtra[0]&0xC0)==0xC0 ){
|
||||
/* Make a copy of the last character encoding in the input string.
|
||||
** Then make sure it is nul-terminated and use SQLITE_READ_UTF8()
|
||||
** to decode the codepoint. Store the codepoint in variable iExtra,
|
||||
** it will be appended to the output string later.
|
||||
*/
|
||||
unsigned char *zFree = 0;
|
||||
unsigned char zBuf[16];
|
||||
int nExtra = (pMem->n+zIn-zExtra);
|
||||
zTerm = zExtra;
|
||||
if( nExtra>15 ){
|
||||
zExtra = sqliteMallocRaw(nExtra+1);
|
||||
if( !zExtra ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
zFree = zExtra;
|
||||
}else{
|
||||
zExtra = zBuf;
|
||||
}
|
||||
memcpy(zExtra, zTerm, nExtra);
|
||||
zExtra[nExtra] = '\0';
|
||||
SQLITE_READ_UTF8(zExtra, iExtra);
|
||||
sqliteFree(zFree);
|
||||
}
|
||||
}
|
||||
|
||||
if( desiredEnc==SQLITE_UTF16LE ){
|
||||
/* UTF-8 -> UTF-16 Little-endian */
|
||||
while( zIn<zTerm ){
|
||||
SQLITE_READ_UTF8(zIn, c);
|
||||
c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
|
||||
WRITE_UTF16LE(z, c);
|
||||
}
|
||||
if( iExtra!=0xD800 ){
|
||||
WRITE_UTF16LE(z, iExtra);
|
||||
}
|
||||
}else{
|
||||
assert( desiredEnc==SQLITE_UTF16BE );
|
||||
/* UTF-8 -> UTF-16 Big-endian */
|
||||
while( zIn<zTerm ){
|
||||
SQLITE_READ_UTF8(zIn, c);
|
||||
c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
|
||||
WRITE_UTF16BE(z, c);
|
||||
}
|
||||
if( iExtra!=0xD800 ){
|
||||
WRITE_UTF16BE(z, iExtra);
|
||||
}
|
||||
}
|
||||
pMem->n = z - zOut;
|
||||
*z++ = 0;
|
||||
|
@ -477,11 +464,11 @@ int sqlite3Utf16ByteLen(const void *zIn, int nChar){
|
|||
int sqlite3Utf8To8(unsigned char *zIn){
|
||||
unsigned char *zOut = zIn;
|
||||
unsigned char *zStart = zIn;
|
||||
int c;
|
||||
unsigned char *zTerm;
|
||||
u32 c;
|
||||
|
||||
while(1){
|
||||
SQLITE_READ_UTF8(zIn, c);
|
||||
if( c==0 ) break;
|
||||
while( zIn[0] ){
|
||||
c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
|
||||
if( c!=0xfffd ){
|
||||
WRITE_UTF8(zOut, c);
|
||||
}
|
||||
|
@ -501,6 +488,7 @@ void sqlite3UtfSelfTest(){
|
|||
unsigned int i, t;
|
||||
unsigned char zBuf[20];
|
||||
unsigned char *z;
|
||||
unsigned char *zTerm;
|
||||
int n;
|
||||
unsigned int c;
|
||||
|
||||
|
@ -509,8 +497,9 @@ void sqlite3UtfSelfTest(){
|
|||
WRITE_UTF8(z, i);
|
||||
n = z-zBuf;
|
||||
z[0] = 0;
|
||||
zTerm = z;
|
||||
z = zBuf;
|
||||
SQLITE_READ_UTF8(z, c);
|
||||
c = sqlite3Utf8Read(z, zTerm, (const u8**)&z);
|
||||
t = i;
|
||||
if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
|
||||
if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
|
||||
|
|
Loading…
Reference in New Issue