Implement following item in TODO:
* Reject character sequences those are not valid in their charset
This commit is contained in:
parent
d7f3cbc288
commit
e1de3e0833
@ -6,7 +6,7 @@
|
|||||||
* WIN1250 client encoding support contributed by Pavel Behal
|
* WIN1250 client encoding support contributed by Pavel Behal
|
||||||
* SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya
|
* SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya
|
||||||
*
|
*
|
||||||
* $Id: conv.c,v 1.27 2001/09/06 04:57:29 ishii Exp $
|
* $Id: conv.c,v 1.28 2001/09/11 04:50:36 ishii Exp $
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -1633,48 +1633,48 @@ big5_to_utf(unsigned char *euc, unsigned char *utf, int len)
|
|||||||
*/
|
*/
|
||||||
pg_enconv pg_enconv_tbl[] =
|
pg_enconv pg_enconv_tbl[] =
|
||||||
{
|
{
|
||||||
{ PG_SQL_ASCII, ascii2mic, mic2ascii, ascii2utf, utf2ascii },
|
{ PG_SQL_ASCII, ascii2mic, mic2ascii, ascii2utf, utf2ascii},
|
||||||
{ PG_EUC_JP, euc_jp2mic, mic2euc_jp, euc_jp_to_utf, utf_to_euc_jp },
|
{ PG_EUC_JP, euc_jp2mic, mic2euc_jp, euc_jp_to_utf, utf_to_euc_jp},
|
||||||
{ PG_EUC_CN, euc_cn2mic, mic2euc_cn, euc_cn_to_utf, utf_to_euc_cn },
|
{ PG_EUC_CN, euc_cn2mic, mic2euc_cn, euc_cn_to_utf, utf_to_euc_cn},
|
||||||
{ PG_EUC_KR, euc_kr2mic, mic2euc_kr, euc_kr_to_utf, utf_to_euc_kr },
|
{ PG_EUC_KR, euc_kr2mic, mic2euc_kr, euc_kr_to_utf, utf_to_euc_kr},
|
||||||
{ PG_EUC_TW, euc_tw2mic, mic2euc_tw, euc_tw_to_utf, utf_to_euc_tw },
|
{ PG_EUC_TW, euc_tw2mic, mic2euc_tw, euc_tw_to_utf, utf_to_euc_tw},
|
||||||
{ PG_UTF8, 0, 0, 0, 0 },
|
{ PG_UTF8, 0, 0, 0, 0},
|
||||||
{ PG_MULE_INTERNAL, 0, 0, 0, 0 },
|
{ PG_MULE_INTERNAL, 0, 0, 0, 0},
|
||||||
{ PG_LATIN1, latin12mic, mic2latin1, latin1_to_utf, utf_to_latin1 },
|
{ PG_LATIN1, latin12mic, mic2latin1, latin1_to_utf, utf_to_latin1},
|
||||||
{ PG_LATIN2, latin22mic, mic2latin2, latin2_to_utf, utf_to_latin2 },
|
{ PG_LATIN2, latin22mic, mic2latin2, latin2_to_utf, utf_to_latin2},
|
||||||
{ PG_LATIN3, latin32mic, mic2latin3, latin3_to_utf, utf_to_latin3 },
|
{ PG_LATIN3, latin32mic, mic2latin3, latin3_to_utf, utf_to_latin3},
|
||||||
{ PG_LATIN4, latin42mic, mic2latin4, latin4_to_utf, utf_to_latin4 },
|
{ PG_LATIN4, latin42mic, mic2latin4, latin4_to_utf, utf_to_latin4},
|
||||||
{ PG_LATIN5, iso2mic, mic2iso, latin5_to_utf, utf_to_latin5 },
|
{ PG_LATIN5, iso2mic, mic2iso, latin5_to_utf, utf_to_latin5},
|
||||||
{ PG_KOI8R, koi8r2mic, mic2koi8r, KOI8R_to_utf, utf_to_KOI8R },
|
{ PG_KOI8R, koi8r2mic, mic2koi8r, KOI8R_to_utf, utf_to_KOI8R},
|
||||||
{ PG_WIN1251, win12512mic, mic2win1251, WIN1251_to_utf, utf_to_WIN1251 },
|
{ PG_WIN1251, win12512mic, mic2win1251, WIN1251_to_utf, utf_to_WIN1251},
|
||||||
{ PG_ALT, alt2mic, mic2alt, ALT_to_utf, utf_to_ALT },
|
{ PG_ALT, alt2mic, mic2alt, ALT_to_utf, utf_to_ALT},
|
||||||
{ PG_SJIS, sjis2mic, mic2sjis, sjis_to_utf, utf_to_sjis },
|
{ PG_SJIS, sjis2mic, mic2sjis, sjis_to_utf, utf_to_sjis},
|
||||||
{ PG_BIG5, big52mic, mic2big5, big5_to_utf, utf_to_big5},
|
{ PG_BIG5, big52mic, mic2big5, big5_to_utf, utf_to_big5},
|
||||||
{ PG_WIN1250, win12502mic, mic2win1250, 0, 0 },
|
{ PG_WIN1250, win12502mic, mic2win1250, 0, 0},
|
||||||
};
|
};
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
pg_enconv pg_enconv_tbl[] =
|
pg_enconv pg_enconv_tbl[] =
|
||||||
{
|
{
|
||||||
{ PG_SQL_ASCII, ascii2mic, mic2ascii, 0, 0 },
|
{ PG_SQL_ASCII, ascii2mic, mic2ascii, 0, 0},
|
||||||
{ PG_EUC_JP, euc_jp2mic, mic2euc_jp, 0, 0 },
|
{ PG_EUC_JP, euc_jp2mic, mic2euc_jp, 0, 0},
|
||||||
{ PG_EUC_CN, euc_cn2mic, mic2euc_cn, 0, 0 },
|
{ PG_EUC_CN, euc_cn2mic, mic2euc_cn, 0, 0},
|
||||||
{ PG_EUC_KR, euc_kr2mic, mic2euc_kr, 0, 0 },
|
{ PG_EUC_KR, euc_kr2mic, mic2euc_kr, 0, 0},
|
||||||
{ PG_EUC_TW, euc_tw2mic, mic2euc_tw, 0, 0 },
|
{ PG_EUC_TW, euc_tw2mic, mic2euc_tw, 0, 0},
|
||||||
{ PG_UTF8, 0, 0, 0, 0 },
|
{ PG_UTF8, 0, 0, 0, 0},
|
||||||
{ PG_MULE_INTERNAL, 0, 0, 0, 0 },
|
{ PG_MULE_INTERNAL, 0, 0, 0, 0},
|
||||||
{ PG_LATIN1, latin12mic, mic2latin1, 0, 0 },
|
{ PG_LATIN1, latin12mic, mic2latin1, 0, 0},
|
||||||
{ PG_LATIN2, latin22mic, mic2latin2, 0, 0 },
|
{ PG_LATIN2, latin22mic, mic2latin2, 0, 0},
|
||||||
{ PG_LATIN3, latin32mic, mic2latin3, 0, 0 },
|
{ PG_LATIN3, latin32mic, mic2latin3, 0, 0},
|
||||||
{ PG_LATIN4, latin42mic, mic2latin4, 0, 0 },
|
{ PG_LATIN4, latin42mic, mic2latin4, 0, 0},
|
||||||
{ PG_LATIN5, iso2mic, mic2iso, 0, 0 },
|
{ PG_LATIN5, iso2mic, mic2iso, 0, 0},
|
||||||
{ PG_KOI8R, koi8r2mic, mic2koi8r, 0, 0 },
|
{ PG_KOI8R, koi8r2mic, mic2koi8r, 0, 0},
|
||||||
{ PG_WIN1251, win12512mic, mic2win1251, 0, 0 },
|
{ PG_WIN1251, win12512mic, mic2win1251, 0, 0},
|
||||||
{ PG_ALT, alt2mic, mic2alt, 0, 0 },
|
{ PG_ALT, alt2mic, mic2alt, 0, 0},
|
||||||
{ PG_SJIS, sjis2mic, mic2sjis, 0, 0 },
|
{ PG_SJIS, sjis2mic, mic2sjis, 0, 0},
|
||||||
{ PG_BIG5, big52mic, mic2big5, 0, 0 },
|
{ PG_BIG5, big52mic, mic2big5, 0, 0},
|
||||||
{ PG_WIN1250, win12502mic, mic2win1250, 0, 0 },
|
{ PG_WIN1250, win12502mic, mic2win1250, 0, 0},
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* UNICODE_CONVERSION */
|
#endif /* UNICODE_CONVERSION */
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* conversion functions between pg_wchar and multi-byte streams.
|
* conversion functions between pg_wchar and multi-byte streams.
|
||||||
* Tatsuo Ishii
|
* Tatsuo Ishii
|
||||||
* $Id: wchar.c,v 1.19 2001/09/06 04:57:29 ishii Exp $
|
* $Id: wchar.c,v 1.20 2001/09/11 04:50:36 ishii Exp $
|
||||||
*
|
*
|
||||||
* WIN1250 client encoding updated by Pavel Behal
|
* WIN1250 client encoding updated by Pavel Behal
|
||||||
*
|
*
|
||||||
@ -458,24 +458,24 @@ pg_big5_mblen(const unsigned char *s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
pg_wchar_tbl pg_wchar_table[] = {
|
pg_wchar_tbl pg_wchar_table[] = {
|
||||||
{pg_ascii2wchar_with_len, pg_ascii_mblen}, /* 0; PG_SQL_ASCII */
|
{pg_ascii2wchar_with_len, pg_ascii_mblen, 1}, /* 0; PG_SQL_ASCII */
|
||||||
{pg_eucjp2wchar_with_len, pg_eucjp_mblen}, /* 1; PG_EUC_JP */
|
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3}, /* 1; PG_EUC_JP */
|
||||||
{pg_euccn2wchar_with_len, pg_euccn_mblen}, /* 2; PG_EUC_CN */
|
{pg_euccn2wchar_with_len, pg_euccn_mblen, 3}, /* 2; PG_EUC_CN */
|
||||||
{pg_euckr2wchar_with_len, pg_euckr_mblen}, /* 3; PG_EUC_KR */
|
{pg_euckr2wchar_with_len, pg_euckr_mblen, 3}, /* 3; PG_EUC_KR */
|
||||||
{pg_euctw2wchar_with_len, pg_euctw_mblen}, /* 4; PG_EUC_TW */
|
{pg_euctw2wchar_with_len, pg_euctw_mblen, 3}, /* 4; PG_EUC_TW */
|
||||||
{pg_utf2wchar_with_len, pg_utf_mblen}, /* 5; PG_UNICODE */
|
{pg_utf2wchar_with_len, pg_utf_mblen, 3}, /* 5; PG_UNICODE */
|
||||||
{pg_mule2wchar_with_len, pg_mule_mblen}, /* 6; PG_MULE_INTERNAL */
|
{pg_mule2wchar_with_len, pg_mule_mblen, 3}, /* 6; PG_MULE_INTERNAL */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 7; PG_LATIN1 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 7; PG_LATIN1 */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 8; PG_LATIN2 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 8; PG_LATIN2 */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 9; PG_LATIN3 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 9; PG_LATIN3 */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 10; PG_LATIN4 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 10; PG_LATIN4 */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 11; PG_LATIN5 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 11; PG_LATIN5 */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 12; PG_KOI8 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 12; PG_KOI8 */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 13; PG_WIN1251 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 13; PG_WIN1251 */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 14; PG_ALT */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 14; PG_ALT */
|
||||||
{0, pg_sjis_mblen}, /* 15; PG_SJIS */
|
{0, pg_sjis_mblen, 2}, /* 15; PG_SJIS */
|
||||||
{0, pg_big5_mblen}, /* 17; PG_BIG5 */
|
{0, pg_big5_mblen, 2}, /* 17; PG_BIG5 */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen} /* 18; PG_WIN1250 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1} /* 18; PG_WIN1250 */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* returns the byte length of a word for mule internal code */
|
/* returns the byte length of a word for mule internal code */
|
||||||
@ -498,3 +498,68 @@ pg_encoding_mblen(int encoding, const unsigned char *mbstr)
|
|||||||
((*pg_wchar_table[encoding].mblen) (mbstr)) :
|
((*pg_wchar_table[encoding].mblen) (mbstr)) :
|
||||||
((*pg_wchar_table[PG_SQL_ASCII].mblen) (mbstr)));
|
((*pg_wchar_table[PG_SQL_ASCII].mblen) (mbstr)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef FRONTEND
|
||||||
|
/*
|
||||||
|
* Verify mbstr to make sure that it has a valid character sequence.
|
||||||
|
* mbstr is not necessarily NULL terminated. length of mbstr is
|
||||||
|
* specified by len. If an error was found, returns an error message.
|
||||||
|
* Note that the message is kept in a static buffer, the next invocation
|
||||||
|
* might break the message.
|
||||||
|
* If no error was found, this function returns NULL.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
pg_verifymbstr(const unsigned char *mbstr, int len)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
int i, j;
|
||||||
|
static char buf[256];
|
||||||
|
int slen = 0;
|
||||||
|
|
||||||
|
/* we do not check single byte encodings */
|
||||||
|
if (pg_wchar_table[GetDatabaseEncoding()].maxmblen <= 1)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
while (len > 0 && *mbstr)
|
||||||
|
{
|
||||||
|
l = pg_mblen(mbstr);
|
||||||
|
|
||||||
|
/* multi-byte letter? */
|
||||||
|
if (l > 1)
|
||||||
|
{
|
||||||
|
for (i=1;i<l;i++)
|
||||||
|
{
|
||||||
|
if (i > len || *(mbstr+i) == '\0' ||
|
||||||
|
/* we assume that every muti-byte letter
|
||||||
|
* consists of bytes being the 8th bit set
|
||||||
|
*/
|
||||||
|
((*(mbstr+i) & 0x80) == 0))
|
||||||
|
{
|
||||||
|
int remains = sizeof(buf);
|
||||||
|
char *p = buf;
|
||||||
|
|
||||||
|
slen = snprintf(p, remains, "Invalid %s character sequence found (0x",
|
||||||
|
GetDatabaseEncodingName());
|
||||||
|
p += slen;
|
||||||
|
remains -= slen;
|
||||||
|
|
||||||
|
i = ((*(mbstr+i) & 0x80) == 0)?l:i;
|
||||||
|
|
||||||
|
for (j=0;j<i;j++)
|
||||||
|
{
|
||||||
|
slen = snprintf(p, remains, "%02x",
|
||||||
|
*(mbstr+j));
|
||||||
|
p += slen;
|
||||||
|
remains -= slen;
|
||||||
|
}
|
||||||
|
snprintf(p, remains, ")");
|
||||||
|
return(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
len -= l;
|
||||||
|
mbstr += l;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* $Id: pg_wchar.h,v 1.29 2001/09/06 04:57:29 ishii Exp $ */
|
/* $Id: pg_wchar.h,v 1.30 2001/09/11 04:50:36 ishii Exp $ */
|
||||||
|
|
||||||
#ifndef PG_WCHAR_H
|
#ifndef PG_WCHAR_H
|
||||||
#define PG_WCHAR_H
|
#define PG_WCHAR_H
|
||||||
@ -182,6 +182,8 @@ typedef struct
|
|||||||
int (*mb2wchar_with_len) (); /* convert a multi-byte
|
int (*mb2wchar_with_len) (); /* convert a multi-byte
|
||||||
* string to a wchar */
|
* string to a wchar */
|
||||||
int (*mblen) (); /* returns the length of a multi-byte word */
|
int (*mblen) (); /* returns the length of a multi-byte word */
|
||||||
|
int maxmblen; /* max bytes for a letter in this charset */
|
||||||
|
|
||||||
} pg_wchar_tbl;
|
} pg_wchar_tbl;
|
||||||
|
|
||||||
extern pg_wchar_tbl pg_wchar_table[];
|
extern pg_wchar_tbl pg_wchar_table[];
|
||||||
@ -240,6 +242,8 @@ extern unsigned char *pg_server_to_client(unsigned char *, int);
|
|||||||
extern unsigned short BIG5toCNS(unsigned short, unsigned char *);
|
extern unsigned short BIG5toCNS(unsigned short, unsigned char *);
|
||||||
extern unsigned short CNStoBIG5(unsigned short, unsigned char);
|
extern unsigned short CNStoBIG5(unsigned short, unsigned char);
|
||||||
|
|
||||||
|
char *pg_verifymbstr(const unsigned char *, int);
|
||||||
|
|
||||||
#endif /* MULTIBYTE */
|
#endif /* MULTIBYTE */
|
||||||
|
|
||||||
#endif /* PG_WCHAR_H */
|
#endif /* PG_WCHAR_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user