From cfe01796e6e9a9f644247b6e1810ba95bf89832d Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Tue, 16 Oct 2001 10:09:17 +0000 Subject: [PATCH] Ok, here is the modified encoding table (column1 is the standard name, 2 is our "official" name, and 3 is alias). If there's no objection, I will change them. ASCII SQL_ASCII UTF-8 UNICODE UTF_8 MULE-INTERNAL MULE_INTERNAL ISO-8859-1 LATIN1 ISO_8859_1 ISO-8859-2 LATIN2 ISO_8859_2 ISO-8859-3 LATIN3 ISO_8859_3 ISO-8859-4 LATIN4 ISO_8859_4 ISO-8859-5 ISO_8859_5 ISO-8859-6 ISO_8859_6 ISO-8859-7 ISO_8859_7 ISO-8859-8 ISO_8859_8 ISO-8859-9 LATIN5 ISO_8859_9 ISO-8859-10 LATIN6 ISO_8859_10 ISO-8859-13 LATIN7 ISO_8859_13 ISO-8859-14 LATIN8 ISO_8859_14 ISO-8859-15 LATIN9 ISO_8859_15 ISO-8859-16 LATIN10 ISO_8859_16 --- configure | 12 +++++----- configure.in | 6 ++--- src/backend/utils/mb/conv.c | 23 ++++++++++--------- src/backend/utils/mb/encnames.c | 40 +++++++++++++++++---------------- src/include/mb/pg_wchar.h | 26 ++++++++++----------- 5 files changed, 55 insertions(+), 52 deletions(-) diff --git a/configure b/configure index 7ec6c61285..d650c47a4d 100755 --- a/configure +++ b/configure @@ -895,16 +895,16 @@ if test x"${enable_multibyte+set}" = xset; then enable_multibyte=yes case $enableval in - SQL_ASCII|EUC_JP|EUC_CN|EUC_KR|EUC_TW|UNICODE|MULE_INTERNAL|LATIN1|LATIN2|LATIN3|LATIN4|LATIN5|KOI8|WIN|ALT|ISO_8859_5|ISO_8859_6|ISO_8859_7|ISO_8859_8|ISO_8859_10|ISO_8859_13|ISO_8859_14|ISO_8859_15|ISO_8859_16) + SQL_ASCII|EUC_JP|EUC_CN|EUC_KR|EUC_TW|UNICODE|MULE_INTERNAL|LATIN1|LATIN2|LATIN3|LATIN4|LATIN5|LATIN6|LATIN7|LATIN8|LATIN9|LATIN10|KOI8|WIN|ALT|ISO_8859_5|ISO_8859_6|ISO_8859_7|ISO_8859_8) MULTIBYTE=$enableval;; *) { echo "configure: error: argument to --enable-multibyte must be one of: SQL_ASCII, EUC_JP, EUC_CN, EUC_KR, EUC_TW, UNICODE, MULE_INTERNAL, LATIN1, LATIN2, LATIN3, LATIN4, LATIN5, + LATIN6, LATIN7, LATIN8, LATIN9, LATIN10, KOI8, WIN, ALT, - ISO_8859_6, ISO_8859_7, ISO_8859_8, - ISO_8859_10, ISO_8859_13, ISO_8859_14, ISO_8859_15, ISO_8859_16 + ISO_8859_5, ISO_8859_6, ISO_8859_7, ISO_8859_8 Or do not specify an argument to the option to use the default." 1>&2; exit 1; };; esac @@ -2718,7 +2718,7 @@ echo "using CPPFLAGS=$CPPFLAGS" echo "using LDFLAGS=$LDFLAGS" -for ac_prog in mawk gawk nawk awk +for ac_prog in gawk mawk nawk awk do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -5542,7 +5542,7 @@ else int main() { /* Ultrix mips cc rejects this. */ -typedef int charset[2]; const charset x; +typedef int charset[2]; const charset x={0,0}; /* SunOS 4.1.1 cc rejects this. */ char const *const *ccp; char **p; @@ -5617,7 +5617,7 @@ for ac_kw in inline __inline__ __inline; do #include "confdefs.h" int main() { -} $ac_kw foo() { +} $ac_kw int foo() { ; return 0; } EOF if { (eval echo configure:5624: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then diff --git a/configure.in b/configure.in index 2c7b9b7141..ca1e80e276 100644 --- a/configure.in +++ b/configure.in @@ -179,7 +179,7 @@ PGAC_ARG_OPTARG(enable, multibyte, [ --enable-multibyte enable multibyte c [MULTIBYTE=SQL_ASCII], [ case $enableval in - SQL_ASCII|EUC_JP|EUC_CN|EUC_KR|EUC_TW|UNICODE|MULE_INTERNAL|LATIN1|LATIN2|LATIN3|LATIN4|LATIN5|KOI8|WIN|ALT|ISO_8859_5|ISO_8859_6|ISO_8859_7|ISO_8859_8|ISO_8859_10|ISO_8859_13|ISO_8859_14|ISO_8859_15|ISO_8859_16) + SQL_ASCII|EUC_JP|EUC_CN|EUC_KR|EUC_TW|UNICODE|MULE_INTERNAL|LATIN1|LATIN2|LATIN3|LATIN4|LATIN5|LATIN6|LATIN7|LATIN8|LATIN9|LATIN10|KOI8|WIN|ALT|ISO_8859_5|ISO_8859_6|ISO_8859_7|ISO_8859_8) MULTIBYTE=$enableval;; *) AC_MSG_ERROR( @@ -187,9 +187,9 @@ PGAC_ARG_OPTARG(enable, multibyte, [ --enable-multibyte enable multibyte c SQL_ASCII, EUC_JP, EUC_CN, EUC_KR, EUC_TW, UNICODE, MULE_INTERNAL, LATIN1, LATIN2, LATIN3, LATIN4, LATIN5, + LATIN6, LATIN7, LATIN8, LATIN9, LATIN10, KOI8, WIN, ALT, - ISO_8859_6, ISO_8859_7, ISO_8859_8, - ISO_8859_10, ISO_8859_13, ISO_8859_14, ISO_8859_15, ISO_8859_16 + ISO_8859_5, ISO_8859_6, ISO_8859_7, ISO_8859_8 Or do not specify an argument to the option to use the default.]);; esac ], diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c index f6c0ac090a..68fec7e090 100644 --- a/src/backend/utils/mb/conv.c +++ b/src/backend/utils/mb/conv.c @@ -6,7 +6,7 @@ * WIN1250 client encoding support contributed by Pavel Behal * SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya * - * $Id: conv.c,v 1.31 2001/10/11 14:20:35 ishii Exp $ + * $Id: conv.c,v 1.32 2001/10/16 10:09:17 ishii Exp $ * * */ @@ -1692,6 +1692,11 @@ pg_enconv pg_enconv_tbl[] = { PG_LATIN3, latin32mic, mic2latin3, iso8859_3_to_utf, utf_to_iso8859_3}, { PG_LATIN4, latin42mic, mic2latin4, iso8859_4_to_utf, utf_to_iso8859_4}, { PG_LATIN5, iso2mic, mic2iso, iso8859_9_to_utf, utf_to_iso8859_9}, + { PG_LATIN6, 0, 0, iso8859_10_to_utf, utf_to_iso8859_10}, + { PG_LATIN7, 0, 0, iso8859_13_to_utf, utf_to_iso8859_13}, + { PG_LATIN8, 0, 0, iso8859_14_to_utf, utf_to_iso8859_14}, + { PG_LATIN9, 0, 0, iso8859_15_to_utf, utf_to_iso8859_15}, + { PG_LATIN10, 0, 0, iso8859_16_to_utf, utf_to_iso8859_16}, { PG_KOI8R, koi8r2mic, mic2koi8r, KOI8R_to_utf, utf_to_KOI8R}, { PG_WIN1251, win12512mic, mic2win1251, WIN1251_to_utf, utf_to_WIN1251}, { PG_ALT, alt2mic, mic2alt, ALT_to_utf, utf_to_ALT}, @@ -1699,11 +1704,7 @@ pg_enconv pg_enconv_tbl[] = { PG_ISO_8859_6, 0, 0, iso8859_6_to_utf, utf_to_iso8859_6}, { PG_ISO_8859_7, 0, 0, iso8859_7_to_utf, utf_to_iso8859_7}, { PG_ISO_8859_8, 0, 0, iso8859_8_to_utf, utf_to_iso8859_8}, - { PG_ISO_8859_10, 0, 0, iso8859_10_to_utf, utf_to_iso8859_10}, - { PG_ISO_8859_13, 0, 0, iso8859_13_to_utf, utf_to_iso8859_13}, - { PG_ISO_8859_14, 0, 0, iso8859_14_to_utf, utf_to_iso8859_14}, - { PG_ISO_8859_15, 0, 0, iso8859_15_to_utf, utf_to_iso8859_15}, - { PG_ISO_8859_16, 0, 0, iso8859_16_to_utf, utf_to_iso8859_16}, + { PG_SJIS, sjis2mic, mic2sjis, sjis_to_utf, utf_to_sjis}, { PG_BIG5, big52mic, mic2big5, big5_to_utf, utf_to_big5}, { PG_WIN1250, win12502mic, mic2win1250, 0, 0}, @@ -1725,6 +1726,11 @@ pg_enconv pg_enconv_tbl[] = { PG_LATIN3, latin32mic, mic2latin3, 0, 0}, { PG_LATIN4, latin42mic, mic2latin4, 0, 0}, { PG_LATIN5, iso2mic, mic2iso, 0, 0}, + { PG_LATIN6, 0, 0, 0, 0}, + { PG_LATIN7, 0, 0, 0, 0}, + { PG_LATIN8, 0, 0, 0, 0}, + { PG_LATIN9, 0, 0, 0, 0}, + { PG_LATIN10, 0, 0, 0, 0}, { PG_KOI8R, koi8r2mic, mic2koi8r, 0, 0}, { PG_WIN1251, win12512mic, mic2win1251, 0, 0}, { PG_ALT, alt2mic, mic2alt, 0, 0}, @@ -1732,11 +1738,6 @@ pg_enconv pg_enconv_tbl[] = { PG_ISO_8859_6, 0, 0, 0, 0}, { PG_ISO_8859_7, 0, 0, 0, 0}, { PG_ISO_8859_8, 0, 0, 0, 0}, - { PG_ISO_8859_10, 0, 0, 0, 0}, - { PG_ISO_8859_13, 0, 0, 0, 0}, - { PG_ISO_8859_14, 0, 0, 0, 0}, - { PG_ISO_8859_15, 0, 0, 0, 0}, - { PG_ISO_8859_16, 0, 0, 0, 0}, { PG_SJIS, sjis2mic, mic2sjis, 0, 0}, { PG_BIG5, big52mic, mic2big5, 0, 0}, { PG_WIN1250, win12502mic, mic2win1250, 0, 0}, diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c index 51fc8bd61c..f2e16460eb 100644 --- a/src/backend/utils/mb/encnames.c +++ b/src/backend/utils/mb/encnames.c @@ -2,7 +2,7 @@ * Encoding names and routines for work with it. All * in this file is shared bedween FE and BE. * - * $Id: encnames.c,v 1.3 2001/10/11 14:20:35 ishii Exp $ + * $Id: encnames.c,v 1.4 2001/10/16 10:09:17 ishii Exp $ */ #ifdef FRONTEND #include "postgres_fe.h" @@ -38,16 +38,16 @@ pg_encname pg_encname_tbl[] = { { "alt", PG_ALT }, /* IBM866 */ { "big5", PG_BIG5 }, /* Big5; Chinese for Taiwan Multi-byte set */ - { "euccn", PG_EUC_CN }, /* EUC-CN; ??? */ - { "eucjp", PG_EUC_JP }, /* EUC-JP; Extended UNIX Code Fixed Width for Japanese, stdandard OSF */ + { "euccn", PG_EUC_CN }, /* EUC-CN; Extended Unix Code for simplified Chinese */ + { "eucjp", PG_EUC_JP }, /* EUC-JP; Extended UNIX Code fixed Width for Japanese, stdandard OSF */ { "euckr", PG_EUC_KR }, /* EUC-KR; RFC1557,Choi */ - { "euctw", PG_EUC_TW }, /* EUC-TW; ??? */ + { "euctw", PG_EUC_TW }, /* EUC-TW; Extended Unix Code for traditional Chinese */ { "iso88591", PG_LATIN1 }, /* ISO-8859-1; RFC1345,KXS2 */ - { "iso885910", PG_ISO_8859_10 }, /* ISO-8859-10; RFC1345,KXS2 */ - { "iso885913", PG_ISO_8859_13 }, /* ISO-8859-13; RFC1345,KXS2 */ - { "iso885914", PG_ISO_8859_14 }, /* ISO-8859-14; RFC1345,KXS2 */ - { "iso885915", PG_ISO_8859_15 }, /* ISO-8859-15; RFC1345,KXS2 */ - { "iso885916", PG_ISO_8859_16 }, /* ISO-8859-15; RFC1345,KXS2 */ + { "iso885910", PG_LATIN6 }, /* ISO-8859-10; RFC1345,KXS2 */ + { "iso885913", PG_LATIN7 }, /* ISO-8859-13; RFC1345,KXS2 */ + { "iso885914", PG_LATIN8 }, /* ISO-8859-14; RFC1345,KXS2 */ + { "iso885915", PG_LATIN9 }, /* ISO-8859-15; RFC1345,KXS2 */ + { "iso885916", PG_LATIN10 }, /* ISO-8859-16; RFC1345,KXS2 */ { "iso88592", PG_LATIN2 }, /* ISO-8859-2; RFC1345,KXS2 */ { "iso88593", PG_LATIN3 }, /* ISO-8859-3; RFC1345,KXS2 */ { "iso88594", PG_LATIN4 }, /* ISO-8859-4; RFC1345,KXS2 */ @@ -59,14 +59,15 @@ pg_encname pg_encname_tbl[] = { "koi8", PG_KOI8R }, /* _dirty_ alias for KOI8-R (backward compatibility) */ { "koi8r", PG_KOI8R }, /* KOI8-R; RFC1489 */ { "latin1", PG_LATIN1 }, /* alias for ISO-8859-1 */ + { "latin10", PG_LATIN10}, /* alias for ISO-8859-16 */ { "latin2", PG_LATIN2 }, /* alias for ISO-8859-2 */ { "latin3", PG_LATIN3 }, /* alias for ISO-8859-3 */ { "latin4", PG_LATIN4 }, /* alias for ISO-8859-4 */ { "latin5", PG_LATIN5 }, /* alias for ISO-8859-9 */ - { "latin6", PG_ISO_8859_10}, /* alias for ISO-8859-10 */ - { "latin7", PG_ISO_8859_13}, /* alias for ISO-8859-13 */ - { "latin8", PG_ISO_8859_14}, /* alias for ISO-8859-14 */ - { "latin9", PG_ISO_8859_15}, /* alias for ISO-8859-15 */ + { "latin6", PG_LATIN6}, /* alias for ISO-8859-10 */ + { "latin7", PG_LATIN7}, /* alias for ISO-8859-13 */ + { "latin8", PG_LATIN8}, /* alias for ISO-8859-14 */ + { "latin9", PG_LATIN9}, /* alias for ISO-8859-15 */ { "mskanji", PG_SJIS }, /* alias for Shift_JIS */ { "muleinternal",PG_MULE_INTERNAL }, { "shiftjis", PG_SJIS }, /* Shift_JIS; JIS X 0202-1991 */ @@ -87,7 +88,8 @@ unsigned int pg_encname_tbl_sz = \ sizeof(pg_encname_tbl) / sizeof(pg_encname_tbl[0]) -1; /* ---------- - * WARNING: sorted by pg_enc enum (pg_wchar.h)! + * These are "official" encoding names. + * XXX must be sorted by the same order as pg_enc type (see mb/pg_wchar.h) * ---------- */ pg_enc2name pg_enc2name_tbl[] = @@ -104,6 +106,11 @@ pg_enc2name pg_enc2name_tbl[] = { "LATIN3", PG_LATIN3 }, { "LATIN4", PG_LATIN4 }, { "LATIN5", PG_LATIN5 }, + { "LATIN6", PG_LATIN6 }, + { "LATIN7", PG_LATIN7 }, + { "LATIN8", PG_LATIN8 }, + { "LATIN9", PG_LATIN9 }, + { "LATIN10", PG_LATIN10 }, { "KOI8", PG_KOI8R }, { "WIN", PG_WIN1251 }, { "ALT", PG_ALT }, @@ -111,11 +118,6 @@ pg_enc2name pg_enc2name_tbl[] = { "ISO_8859_6", PG_ISO_8859_6 }, { "ISO_8859_7", PG_ISO_8859_7 }, { "ISO_8859_8", PG_ISO_8859_8 }, - { "ISO_8859_10", PG_ISO_8859_10 }, - { "ISO_8859_13", PG_ISO_8859_13 }, - { "ISO_8859_14", PG_ISO_8859_14 }, - { "ISO_8859_15", PG_ISO_8859_15 }, - { "ISO_8859_16", PG_ISO_8859_16 }, { "SJIS", PG_SJIS }, { "BIG5", PG_BIG5 }, { "WIN1250", PG_WIN1250 } diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 38d06661d5..c1e60312ab 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -1,4 +1,4 @@ -/* $Id: pg_wchar.h,v 1.33 2001/10/11 14:20:35 ishii Exp $ */ +/* $Id: pg_wchar.h,v 1.34 2001/10/16 10:09:17 ishii Exp $ */ #ifndef PG_WCHAR_H #define PG_WCHAR_H @@ -141,7 +141,7 @@ typedef unsigned int pg_wchar; * Encoding numeral identificators * * WARNING: the order of this table must be same as order - * in the pg_enconv[] (mb/conv.c) and pg_enc2name[] (mb/names.c) array! + * in the pg_enconv[] (mb/conv.c) and pg_enc2name[] (mb/encnames.c) array! * * If you add some encoding don'y forget check * PG_ENCODING_[BE|FE]_LAST macros. @@ -157,11 +157,16 @@ typedef enum pg_enc PG_EUC_TW, /* EUC for Taiwan */ PG_UTF8, /* Unicode UTF-8 */ PG_MULE_INTERNAL, /* Mule internal code */ - PG_LATIN1, /* ISO-8859 Latin 1 */ - PG_LATIN2, /* ISO-8859 Latin 2 */ - PG_LATIN3, /* ISO-8859 Latin 3 */ - PG_LATIN4, /* ISO-8859 Latin 4 */ + PG_LATIN1, /* ISO-8859-1 Latin 1 */ + PG_LATIN2, /* ISO-8859-2 Latin 2 */ + PG_LATIN3, /* ISO-8859-3 Latin 3 */ + PG_LATIN4, /* ISO-8859-4 Latin 4 */ PG_LATIN5, /* ISO-8859-9 Latin 5 */ + PG_LATIN6, /* ISO-8859-10 Latin6 */ + PG_LATIN7, /* ISO-8859-13 Latin7 */ + PG_LATIN8, /* ISO-8859-14 Latin8 */ + PG_LATIN9, /* ISO-8859-15 Latin9 */ + PG_LATIN10, /* ISO-8859-16 Latin10 */ PG_KOI8R, /* KOI8-R */ PG_WIN1251, /* windows-1251 (was: WIN) */ PG_ALT, /* (MS-DOS CP866) */ @@ -169,12 +174,7 @@ typedef enum pg_enc PG_ISO_8859_6, /* ISO-8859-6 */ PG_ISO_8859_7, /* ISO-8859-7 */ PG_ISO_8859_8, /* ISO-8859-8 */ - PG_ISO_8859_10, /* ISO-8859-10 Latin6 */ - PG_ISO_8859_13, /* ISO-8859-13 Latin7 */ - PG_ISO_8859_14, /* ISO-8859-14 Latin8 */ - PG_ISO_8859_15, /* ISO-8859-15 Latin9 */ - PG_ISO_8859_16, /* ISO-8859-16 */ - + /* followings are for client encoding only */ PG_SJIS, /* Shift JIS */ PG_BIG5, /* Big5 */ @@ -184,7 +184,7 @@ typedef enum pg_enc } pg_enc; -#define PG_ENCODING_BE_LAST PG_ISO_8859_16 +#define PG_ENCODING_BE_LAST PG_ISO_8859_8 #define PG_ENCODING_FE_LAST PG_WIN1250