From 3c7798f068ce2f334fee95fb125f0641eb10f725 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Tue, 16 Jul 2002 09:25:06 +0000 Subject: [PATCH] Add conversion procs for CREATE CONVERSION --- .../utils/mb/conversion_procs/Makefile | 118 +++++ .../conversion_procs/euc_jp_and_sjis/Makefile | 11 + .../euc_jp_and_sjis/euc_jp_and_sjis.c | 456 ++++++++++++++++++ .../conversion_procs/euc_jp_and_sjis/sjis.map | 396 +++++++++++++++ .../conversion_procs/euc_tw_and_big5/Makefile | 14 + .../conversion_procs/euc_tw_and_big5/big5.c | 378 +++++++++++++++ .../euc_tw_and_big5/euc_tw_and_big5.c | 340 +++++++++++++ src/backend/utils/mb/conversion_procs/proc.mk | 21 + .../conversion_procs/utf8_and_ascii/Makefile | 11 + .../utf8_and_ascii/utf8_and_ascii.c | 65 +++ .../conversion_procs/utf8_and_big5/Makefile | 11 + .../utf8_and_big5/utf8_and_big5.c | 68 +++ .../conversion_procs/utf8_and_euc_cn/Makefile | 11 + .../utf8_and_euc_cn/utf8_and_euc_cn.c | 68 +++ .../conversion_procs/utf8_and_euc_jp/Makefile | 11 + .../utf8_and_euc_jp/utf8_and_euc_jp.c | 68 +++ .../conversion_procs/utf8_and_euc_kr/Makefile | 11 + .../utf8_and_euc_kr/utf8_and_euc_kr.c | 68 +++ .../conversion_procs/utf8_and_euc_tw/Makefile | 11 + .../utf8_and_euc_tw/utf8_and_euc_tw.c | 68 +++ .../utf8_and_gb18030/Makefile | 11 + .../utf8_and_gb18030/utf8_and_gb18030.c | 68 +++ .../mb/conversion_procs/utf8_and_gbk/Makefile | 11 + .../utf8_and_gbk/utf8_and_gbk.c | 68 +++ .../utf8_and_iso8859/Makefile | 11 + .../utf8_and_iso8859/utf8_and_iso8859.c | 156 ++++++ .../utf8_and_iso8859_1/Makefile | 11 + .../utf8_and_iso8859_1/utf8_and_iso8859_1.c | 97 ++++ .../conversion_procs/utf8_and_johab/Makefile | 11 + .../utf8_and_johab/utf8_and_johab.c | 68 +++ .../conversion_procs/utf8_and_sjis/Makefile | 11 + .../utf8_and_sjis/utf8_and_sjis.c | 68 +++ .../conversion_procs/utf8_and_tcvn/Makefile | 11 + .../utf8_and_tcvn/utf8_and_tcvn.c | 68 +++ .../mb/conversion_procs/utf8_and_uhc/Makefile | 11 + .../utf8_and_uhc/utf8_and_uhc.c | 68 +++ 36 files changed, 2954 insertions(+) create mode 100644 src/backend/utils/mb/conversion_procs/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c create mode 100644 src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/sjis.map create mode 100644 src/backend/utils/mb/conversion_procs/euc_tw_and_big5/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c create mode 100644 src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c create mode 100644 src/backend/utils/mb/conversion_procs/proc.mk create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_ascii/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_big5/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_gb18030/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_gbk/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_iso8859/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_johab/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_sjis/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_tcvn/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_uhc/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c diff --git a/src/backend/utils/mb/conversion_procs/Makefile b/src/backend/utils/mb/conversion_procs/Makefile new file mode 100644 index 0000000000..e8335d234c --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/Makefile @@ -0,0 +1,118 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for utils/mb/conversion_procs +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.1 2002/07/16 09:25:04 ishii Exp $ +# +#------------------------------------------------------------------------- + +subdir = src/backend/utils/mb/conversion_procs +top_builddir = ../../../../.. +include $(top_builddir)/src/Makefile.global + +SQLSCRIPT = conversion_create.sql + +DIRS = \ + utf8_and_ascii utf8_and_iso8859_1 \ + utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_cn utf8_and_euc_tw \ + utf8_and_sjis utf8_and_big5 utf8_and_gbk utf8_and_gb18030 \ + utf8_and_uhc utf8_and_johab utf8_and_tcvn utf8_and_iso8859 \ + euc_jp_and_sjis euc_tw_and_big5 + +# conversion_name source_encoding destination_encoding function object +$(SQLSCRIPT): Makefile + @set \ + utf8_to_ascii UNICODE SQL_ASCII utf8_to_ascii utf8_and_ascii \ + ascii_to_utf8 SQL_ASCII UNICODE ascii_to_utf8 utf8_and_ascii \ + utf8_to_iso8859_1 UNICODE LATIN1 utf8_to_iso8859_1 utf8_and_iso8859_1 \ + iso8859_1_to_utf8 LATIN1 UNICODE iso8859_1_to_utf8 utf8_and_iso8859_1 \ + euc_jp_to_utf8 EUC_JP UNICODE euc_jp_to_utf8 utf8_and_euc_jp \ + utf8_to_euc_jp UNICODE EUC_JP utf8_to_euc_jp utf8_and_euc_jp \ + euc_kr_to_utf8 EUC_KR UNICODE euc_kr_to_utf8 utf8_and_euc_kr \ + utf8_to_euc_kr UNICODE EUC_KR utf8_to_euc_kr utf8_and_euc_kr \ + euc_cn_to_utf8 EUC_CN UNICODE euc_cn_to_utf8 utf8_and_euc_cn \ + utf8_to_euc_cn UNICODE EUC_CN utf8_to_euc_cn utf8_and_euc_cn \ + euc_tw_to_utf8 EUC_TW UNICODE euc_tw_to_utf8 utf8_and_euc_tw \ + utf8_to_euc_tw UNICODE EUC_TW utf8_to_euc_tw utf8_and_euc_tw \ + sjis_to_utf8 SJIS UNICODE sjis_to_utf8 utf8_and_sjis \ + utf8_to_sjis UNICODE SJIS utf8_to_sjis utf8_and_sjis \ + big5_to_utf8 BIG5 UNICODE big5_to_utf8 utf8_and_big5 \ + utf8_to_big5 UNICODE BIG5 utf8_to_big5 utf8_and_big5 \ + gbk_to_utf8 GBK UNICODE gbk_to_utf8 utf8_and_gbk \ + utf8_to_gbk UNICODE GBK utf8_to_gbk utf8_and_gbk \ + gb18030_to_utf8 GB18030 UNICODE gb18030_to_utf8 utf8_and_gb18030 \ + utf8_to_gb18030 UNICODE GB18030 utf8_to_gb18030 utf8_and_gb18030 \ + uhc_to_utf8 UHC UNICODE uhc_to_utf8 utf8_and_uhc \ + utf8_to_uhc UNICODE UHC utf8_to_uhc utf8_and_uhc \ + johab_to_utf8 JOHAB UNICODE johab_to_utf8 utf8_and_johab \ + utf8_to_johab UNICODE JOHAB utf8_to_johab utf8_and_johab \ + tcvn_to_utf8 TCVN UNICODE tcvn_to_utf8 utf8_and_tcvn \ + utf8_to_tcvn UNICODE TCVN utf8_to_tcvn utf8_and_tcvn \ + utf8_to_iso8859_2 UNICODE LATIN2 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_2_to_utf8 LATIN2 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_3 UNICODE LATIN3 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_3_to_utf8 LATIN3 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_4 UNICODE LATIN4 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_4_to_utf8 LATIN4 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_9 UNICODE LATIN5 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_9_to_utf8 LATIN5 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_10 UNICODE LATIN6 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_10_to_utf8 LATIN6 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_13 UNICODE LATIN7 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_13_to_utf8 LATIN7 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_14 UNICODE LATIN8 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_14_to_utf8 LATIN8 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_15 UNICODE LATIN9 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_15_to_utf8 LATIN9 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_16 UNICODE LATIN10 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_16_to_utf8 LATIN10 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_5 UNICODE ISO-8859-5 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_5_to_utf8 ISO-8859-5 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_6 UNICODE ISO-8859-6 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_6_to_utf8 ISO-8859-6 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_7 UNICODE ISO-8859-7 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_7_to_utf8 ISO-8859-7 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + utf8_to_iso8859_8 UNICODE ISO-8859-8 utf8_to_iso8859 utf8_and_iso8859 \ + iso8859_8_to_utf8 ISO-8859-8 UNICODE iso8859_to_utf8 utf8_and_iso8859 \ + euc_jp_to_sjis EUC_JP SJIS euc_jp_to_sjis euc_jp_and_sjis \ + sjis_to_euc_jp SJIS EUC_JP sjis_to_euc_jp euc_jp_and_sjis \ + euc_jp_to_mic EUC_JP MULE_INTERNAL euc_jp_to_mic euc_jp_and_sjis \ + sjis_to_mic SJIS MULE_INTERNAL sjis_to_mic euc_jp_and_sjis \ + mic_to_euc_jp MULE_INTERNAL EUC_JP mic_to_euc_jp euc_jp_and_sjis \ + mic_to_sjis MULE_INTERNAL SJIS mic_to_sjis euc_jp_and_sjis \ + euc_tw_to_big5 EUC_TW BIG5 euc_tw_to_big5 euc_tw_and_big5 \ + big5_to_euc_tw BIG5 EUC_TW big5_to_euc_tw euc_tw_and_big5 \ + euc_tw_to_mic EUC_TW MULE_INTERNAL euc_tw_to_mic euc_tw_and_big5 \ + big5_to_mic BIG5 MULE_INTERNAL big5_to_mic euc_tw_and_big5 \ + mic_to_euc_tw MULE_INTERNAL EUC_TW mic_to_euc_tw euc_tw_and_big5 \ + mic_to_big5 MULE_INTERNAL BIG5 mic_to_big5 euc_tw_and_big5 \ + ; \ + while [ "$$#" -gt 0 ] ; \ + do \ + name=$$1;shift; \ + se=$$1;shift; \ + de=$$1; shift; \ + func=$$1; shift; \ + obj=$$1; shift; \ + echo "-- $$se --> $$de"; \ + echo "CREATE OR REPLACE FUNCTION $$func (INTEGER, INTEGER, OPAQUE, OPAQUE, INTEGER) RETURNS INTEGER AS '$$"libdir"/$$obj', '$$func' LANGUAGE 'c';"; \ + echo "DROP CONVERSION pg_catalog.$$name;"; \ + echo "CREATE DEFAULT CONVERSION pg_catalog.$$name FOR '$$se' TO '$$de' FROM $$func;"; \ + done > $@ + +install: + $(INSTALL_DATA) $(SQLSCRIPT) $(datadir) + @for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done + +all: $(SQLSCRIPT) + @for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done + +clean: + $(RM) $(SQLSCRIPT) + @for dir in $(DIRS); do $(MAKE) -C $$dir $@; done + +distclean maintainer-clean: + $(RM) $(SQLSCRIPT) + @for dir in $(DIRS); do $(MAKE) -C $$dir $@; done diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/Makefile b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/Makefile new file mode 100644 index 0000000000..6082880e8b --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := euc_jp_and_sjis + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c new file mode 100644 index 0000000000..a5ae4c6ac6 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c @@ -0,0 +1,456 @@ +/*------------------------------------------------------------------------- + * + * EUC_JP, SJIS and MULE_INTERNAL + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +/* + * SJIS alternative code. + * this code is used if a mapping EUC -> SJIS is not defined. + */ +#define PGSJISALTCODE 0x81ac +#define PGEUCALTCODE 0xa2ae + +/* + * conversion table between SJIS UDC (IBM kanji) and EUC_JP + */ +#include "sjis.map" + +#define ENCODING_GROWTH_RATE 4 + +PG_FUNCTION_INFO_V1(euc_jp_to_sjis) +PG_FUNCTION_INFO_V1(sjis_to_euc_jp) +PG_FUNCTION_INFO_V1(euc_jp_to_mic) +PG_FUNCTION_INFO_V1(mic_to_euc_jp) +PG_FUNCTION_INFO_V1(sjis_to_mic) +PG_FUNCTION_INFO_V1(mic_to_sjis) + +extern Datum euc_jp_to_sjis(PG_FUNCTION_ARGS); +extern Datum sjis_to_euc_jp(PG_FUNCTION_ARGS); +extern Datum euc_jp_to_mic(PG_FUNCTION_ARGS); +extern Datum mic_to_euc_jp(PG_FUNCTION_ARGS); +extern Datum sjis_to_mic(PG_FUNCTION_ARGS); +extern Datum mic_to_sjis(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ + +static void sjis2mic(unsigned char *sjis, unsigned char *p, int len); +static void mic2sjis(unsigned char *mic, unsigned char *p, int len); +static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len); +static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len); + +Datum +euc_jp_to_sjis(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + unsigned char *buf; + + Assert(PG_GETARG_INT32(0) == PG_EUC_JP); + Assert(PG_GETARG_INT32(1) == PG_SJIS); + Assert(len > 0); + + buf = palloc(len * ENCODING_GROWTH_RATE); + euc_jp2mic(src, buf, len); + mic2sjis(buf, dest, strlen(buf)); + pfree(buf); + + PG_RETURN_INT32(0); +} + +Datum +sjis_to_euc_jp(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + unsigned char *buf; + + Assert(PG_GETARG_INT32(0) == PG_SJIS); + Assert(PG_GETARG_INT32(1) == PG_EUC_JP); + Assert(len > 0); + + buf = palloc(len * ENCODING_GROWTH_RATE); + sjis2mic(src, buf, len); + mic2euc_jp(buf, dest, strlen(buf)); + pfree(buf); + + PG_RETURN_INT32(0); +} + +Datum +euc_jp_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_EUC_JP); + Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL); + Assert(len > 0); + + euc_jp2mic(src, dest, len); + + PG_RETURN_INT32(0); +} + +Datum +mic_to_euc_jp(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL); + Assert(PG_GETARG_INT32(1) == PG_EUC_JP); + Assert(len > 0); + + mic2sjis(src, dest, len); + + PG_RETURN_INT32(0); +} + +Datum +sjis_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_SJIS); + Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL); + Assert(len > 0); + + sjis2mic(src, dest, len); + + PG_RETURN_INT32(0); +} + +Datum +mic_to_sjis(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL); + Assert(PG_GETARG_INT32(1) == PG_SJIS); + Assert(len > 0); + + mic2sjis(src, dest, len); + + PG_RETURN_INT32(0); +} + +/* + * SJIS ---> MIC + */ +static void +sjis2mic(unsigned char *sjis, unsigned char *p, int len) +{ + int c1, + c2, +/* Eiji Tokuya patched begin */ + i, + k, + k2; + +/* Eiji Tokuya patched end */ + while (len > 0 && (c1 = *sjis++)) + { + if (c1 >= 0xa1 && c1 <= 0xdf) + { + /* JIS X0201 (1 byte kana) */ + len--; + *p++ = LC_JISX0201K; + *p++ = c1; + } + else if (c1 > 0x7f) + { + /* + * JIS X0208, X0212, user defined extended characters + */ + c2 = *sjis++; + k = (c1 << 8) + c2; +/* Eiji Tokuya patched begin */ + if (k >= 0xed40 && k < 0xf040) + { + /* NEC selection IBM kanji */ + for (i = 0;; i++) + { + k2 = ibmkanji[i].nec; + if (k2 == 0xffff) + break; + if (k2 == k) + { + k = ibmkanji[i].sjis; + c1 = (k >> 8) & 0xff; + c2 = k & 0xff; + } + } + } + + if (k < 0xeb3f) +/* Eiji Tokuya patched end */ + { + /* JIS X0208 */ + len -= 2; + *p++ = LC_JISX0208; + *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e); + *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); + } +/* Eiji Tokuya patched begin */ + else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc)) + { + /* NEC selection IBM kanji - Other undecided justice */ +/* Eiji Tokuya patched end */ + *p++ = LC_JISX0208; + *p++ = PGEUCALTCODE >> 8; + *p++ = PGEUCALTCODE & 0xff; + } + else if (k >= 0xf040 && k < 0xf540) + { + /* + * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 - + * 0x7e7e EUC 0xf5a1 - 0xfefe + */ + len -= 2; + *p++ = LC_JISX0208; + c1 -= 0x6f; + *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); + *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); + } + else if (k >= 0xf540 && k < 0xfa40) + { + /* + * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 - + * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe + */ + len -= 2; + *p++ = LC_JISX0212; + c1 -= 0x74; + *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); + *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); + } + else if (k >= 0xfa40) + { + /* + * mapping IBM kanji to X0208 and X0212 + * + */ + len -= 2; + for (i = 0;; i++) + { + k2 = ibmkanji[i].sjis; + if (k2 == 0xffff) + break; + if (k2 == k) + { + k = ibmkanji[i].euc; + if (k >= 0x8f0000) + { + *p++ = LC_JISX0212; + *p++ = 0x80 | ((k & 0xff00) >> 8); + *p++ = 0x80 | (k & 0xff); + } + else + { + *p++ = LC_JISX0208; + *p++ = 0x80 | (k >> 8); + *p++ = 0x80 | (k & 0xff); + } + } + } + } + } + else + { /* should be ASCII */ + len--; + *p++ = c1; + } + } + *p = '\0'; +} + +/* + * MIC ---> SJIS + */ +static void +mic2sjis(unsigned char *mic, unsigned char *p, int len) +{ + int c1, + c2, + k; + + while (len > 0 && (c1 = *mic)) + { + len -= pg_mic_mblen(mic++); + + if (c1 == LC_JISX0201K) + *p++ = *mic++; + else if (c1 == LC_JISX0208) + { + c1 = *mic++; + c2 = *mic++; + k = (c1 << 8) | (c2 & 0xff); + if (k >= 0xf5a1) + { + /* UDC1 */ + c1 -= 0x54; + *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f; + } + else + *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1); + *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); + } + else if (c1 == LC_JISX0212) + { + int i, + k2; + + c1 = *mic++; + c2 = *mic++; + k = c1 << 8 | c2; + if (k >= 0xf5a1) + { + /* UDC2 */ + c1 -= 0x54; + *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74; + *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); + } + else + { + /* IBM kanji */ + for (i = 0;; i++) + { + k2 = ibmkanji[i].euc & 0xffff; + if (k2 == 0xffff) + { + *p++ = PGSJISALTCODE >> 8; + *p++ = PGSJISALTCODE & 0xff; + break; + } + if (k2 == k) + { + k = ibmkanji[i].sjis; + *p++ = k >> 8; + *p++ = k & 0xff; + break; + } + } + } + } + else if (c1 > 0x7f) + { + /* cannot convert to SJIS! */ + *p++ = PGSJISALTCODE >> 8; + *p++ = PGSJISALTCODE & 0xff; + } + else + { /* should be ASCII */ + *p++ = c1; + } + } + *p = '\0'; +} + +/* + * EUC_JP ---> MIC + */ +static void +euc_jp2mic(unsigned char *euc, unsigned char *p, int len) +{ + int c1; + + while (len > 0 && (c1 = *euc++)) + { + if (c1 == SS2) + { /* 1 byte kana? */ + len -= 2; + *p++ = LC_JISX0201K; + *p++ = *euc++; + } + else if (c1 == SS3) + { /* JIS X0212 kanji? */ + len -= 3; + *p++ = LC_JISX0212; + *p++ = *euc++; + *p++ = *euc++; + } + else if (c1 & 0x80) + { /* kanji? */ + len -= 2; + *p++ = LC_JISX0208; + *p++ = c1; + *p++ = *euc++; + } + else + { /* should be ASCII */ + len--; + *p++ = c1; + } + } + *p = '\0'; +} + +/* + * MIC ---> EUC_JP + */ +static void +mic2euc_jp(unsigned char *mic, unsigned char *p, int len) +{ + int c1; + + while (len > 0 && (c1 = *mic)) + { + len -= pg_mic_mblen(mic++); + + if (c1 == LC_JISX0201K) + { + *p++ = SS2; + *p++ = *mic++; + } + else if (c1 == LC_JISX0212) + { + *p++ = SS3; + *p++ = *mic++; + *p++ = *mic++; + } + else if (c1 == LC_JISX0208) + { + *p++ = *mic++; + *p++ = *mic++; + } + else if (c1 > 0x7f) + { /* cannot convert to EUC_JP! */ + mic--; + pg_print_bogus_char(&mic, &p); + } + else + { /* should be ASCII */ + *p++ = c1; + } + } + *p = '\0'; +} diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/sjis.map b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/sjis.map new file mode 100644 index 0000000000..cfcfaefb06 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/sjis.map @@ -0,0 +1,396 @@ +static struct +{ + unsigned short int nec; /* SJIS UDC (NEC selection IBM kanji) */ + unsigned short int sjis; /* SJIS UDC (IBM kanji) */ + int euc; /* EUC_JP */ +} ibmkanji[] = { +{ 0xEEEF , 0xfa40 , 0x8ff3f3 }, +{ 0xEEF0 , 0xfa41 , 0x8ff3f4 }, +{ 0xEEF1 , 0xfa42 , 0x8ff3f5 }, +{ 0xEEF2 , 0xfa43 , 0x8ff3f6 }, +{ 0xEEF3 , 0xfa44 , 0x8ff3f7 }, +{ 0xEEF4 , 0xfa45 , 0x8ff3f8 }, +{ 0xEEF5 , 0xfa46 , 0x8ff3f9 }, +{ 0xEEF6 , 0xfa47 , 0x8ff3fa }, +{ 0xEEF7 , 0xfa48 , 0x8ff3fb }, +{ 0xEEF8 , 0xfa49 , 0x8ff3fc }, +{ 0x8754 , 0xfa4a , 0x8ff3fd }, +{ 0x8755 , 0xfa4b , 0x8ff3fe }, +{ 0x8756 , 0xfa4c , 0x8ff4a1 }, +{ 0x8757 , 0xfa4d , 0x8ff4a2 }, +{ 0x8758 , 0xfa4e , 0x8ff4a3 }, +{ 0x8759 , 0xfa4f , 0x8ff4a4 }, +{ 0x875A , 0xfa50 , 0x8ff4a5 }, +{ 0x875B , 0xfa51 , 0x8ff4a6 }, +{ 0x875C , 0xfa52 , 0x8ff4a7 }, +{ 0x875D , 0xfa53 , 0x8ff4a8 }, +{ 0xEEF9 , 0xfa54 , 0xa2cc }, +{ 0xEEFA , 0xfa55 , 0x8fa2c3 }, +{ 0xEEFB , 0xfa56 , 0x8ff4a9 }, +{ 0xEEFC , 0xfa57 , 0x8ff4aa }, +{ 0x878A , 0xfa58 , 0x8ff4ab }, +{ 0x8782 , 0xfa59 , 0x8ff4ac }, +{ 0x8784 , 0xfa5a , 0x8ff4ad }, +{ 0x879A , 0xfa5b , 0xa2e8 }, +{ 0xED40 , 0xfa5c , 0x8fd4e3 }, +{ 0xED41 , 0xfa5d , 0x8fdcdf }, +{ 0xED42 , 0xfa5e , 0x8fe4e9 }, +{ 0xED43 , 0xfa5f , 0x8fe3f8 }, +{ 0xED44 , 0xfa60 , 0x8fd9a1 }, +{ 0xED45 , 0xfa61 , 0x8fb1bb }, +{ 0xED46 , 0xfa62 , 0x8ff4ae }, +{ 0xED47 , 0xfa63 , 0x8fc2ad }, +{ 0xED48 , 0xfa64 , 0x8fc3fc }, +{ 0xED49 , 0xfa65 , 0x8fe4d0 }, +{ 0xED4A , 0xfa66 , 0x8fc2bf }, +{ 0xED4B , 0xfa67 , 0x8fbcf4 }, +{ 0xED4C , 0xfa68 , 0x8fb0a9 }, +{ 0xED4D , 0xfa69 , 0x8fb0c8 }, +{ 0xED4E , 0xfa6a , 0x8ff4af }, +{ 0xED4F , 0xfa6b , 0x8fb0d2 }, +{ 0xED50 , 0xfa6c , 0x8fb0d4 }, +{ 0xED51 , 0xfa6d , 0x8fb0e3 }, +{ 0xED52 , 0xfa6e , 0x8fb0ee }, +{ 0xED53 , 0xfa6f , 0x8fb1a7 }, +{ 0xED54 , 0xfa70 , 0x8fb1a3 }, +{ 0xED55 , 0xfa71 , 0x8fb1ac }, +{ 0xED56 , 0xfa72 , 0x8fb1a9 }, +{ 0xED57 , 0xfa73 , 0x8fb1be }, +{ 0xED58 , 0xfa74 , 0x8fb1df }, +{ 0xED59 , 0xfa75 , 0x8fb1d8 }, +{ 0xED5A , 0xfa76 , 0x8fb1c8 }, +{ 0xED5B , 0xfa77 , 0x8fb1d7 }, +{ 0xED5C , 0xfa78 , 0x8fb1e3 }, +{ 0xED5D , 0xfa79 , 0x8fb1f4 }, +{ 0xED5E , 0xfa7a , 0x8fb1e1 }, +{ 0xED5F , 0xfa7b , 0x8fb2a3 }, +{ 0xED60 , 0xfa7c , 0x8ff4b0 }, +{ 0xED61 , 0xfa7d , 0x8fb2bb }, +{ 0xED62 , 0xfa7e , 0x8fb2e6 }, +{ 0xED63 , 0xfa80 , 0x8fb2ed }, +{ 0xED64 , 0xfa81 , 0x8fb2f5 }, +{ 0xED65 , 0xfa82 , 0x8fb2fc }, +{ 0xED66 , 0xfa83 , 0x8ff4b1 }, +{ 0xED67 , 0xfa84 , 0x8fb3b5 }, +{ 0xED68 , 0xfa85 , 0x8fb3d8 }, +{ 0xED69 , 0xfa86 , 0x8fb3db }, +{ 0xED6A , 0xfa87 , 0x8fb3e5 }, +{ 0xED6B , 0xfa88 , 0x8fb3ee }, +{ 0xED6C , 0xfa89 , 0x8fb3fb }, +{ 0xED6D , 0xfa8a , 0x8ff4b2 }, +{ 0xED6E , 0xfa8b , 0x8ff4b3 }, +{ 0xED6F , 0xfa8c , 0x8fb4c0 }, +{ 0xED70 , 0xfa8d , 0x8fb4c7 }, +{ 0xED71 , 0xfa8e , 0x8fb4d0 }, +{ 0xED72 , 0xfa8f , 0x8fb4de }, +{ 0xED73 , 0xfa90 , 0x8ff4b4 }, +{ 0xED74 , 0xfa91 , 0x8fb5aa }, +{ 0xED75 , 0xfa92 , 0x8ff4b5 }, +{ 0xED76 , 0xfa93 , 0x8fb5af }, +{ 0xED77 , 0xfa94 , 0x8fb5c4 }, +{ 0xED78 , 0xfa95 , 0x8fb5e8 }, +{ 0xED79 , 0xfa96 , 0x8ff4b6 }, +{ 0xED7A , 0xfa97 , 0x8fb7c2 }, +{ 0xED7B , 0xfa98 , 0x8fb7e4 }, +{ 0xED7C , 0xfa99 , 0x8fb7e8 }, +{ 0xED7D , 0xfa9a , 0x8fb7e7 }, +{ 0xED7E , 0xfa9b , 0x8ff4b7 }, +{ 0xED80 , 0xfa9c , 0x8ff4b8 }, +{ 0xED81 , 0xfa9d , 0x8ff4b9 }, +{ 0xED82 , 0xfa9e , 0x8fb8ce }, +{ 0xED83 , 0xfa9f , 0x8fb8e1 }, +{ 0xED84 , 0xfaa0 , 0x8fb8f5 }, +{ 0xED85 , 0xfaa1 , 0x8fb8f7 }, +{ 0xED86 , 0xfaa2 , 0x8fb8f8 }, +{ 0xED87 , 0xfaa3 , 0x8fb8fc }, +{ 0xED88 , 0xfaa4 , 0x8fb9af }, +{ 0xED89 , 0xfaa5 , 0x8fb9b7 }, +{ 0xED8A , 0xfaa6 , 0x8fbabe }, +{ 0xED8B , 0xfaa7 , 0x8fbadb }, +{ 0xED8C , 0xfaa8 , 0x8fcdaa }, +{ 0xED8D , 0xfaa9 , 0x8fbae1 }, +{ 0xED8E , 0xfaaa , 0x8ff4ba }, +{ 0xED8F , 0xfaab , 0x8fbaeb }, +{ 0xED90 , 0xfaac , 0x8fbbb3 }, +{ 0xED91 , 0xfaad , 0x8fbbb8 }, +{ 0xED92 , 0xfaae , 0x8ff4bb }, +{ 0xED93 , 0xfaaf , 0x8fbbca }, +{ 0xED94 , 0xfab0 , 0x8ff4bc }, +{ 0xED95 , 0xfab1 , 0x8ff4bd }, +{ 0xED96 , 0xfab2 , 0x8fbbd0 }, +{ 0xED97 , 0xfab3 , 0x8fbbde }, +{ 0xED98 , 0xfab4 , 0x8fbbf4 }, +{ 0xED99 , 0xfab5 , 0x8fbbf5 }, +{ 0xED9A , 0xfab6 , 0x8fbbf9 }, +{ 0xED9B , 0xfab7 , 0x8fbce4 }, +{ 0xED9C , 0xfab8 , 0x8fbced }, +{ 0xED9D , 0xfab9 , 0x8fbcfe }, +{ 0xED9E , 0xfaba , 0x8ff4be }, +{ 0xED9F , 0xfabb , 0x8fbdc2 }, +{ 0xEDA0 , 0xfabc , 0x8fbde7 }, +{ 0xEDA1 , 0xfabd , 0x8ff4bf }, +{ 0xEDA2 , 0xfabe , 0x8fbdf0 }, +{ 0xEDA3 , 0xfabf , 0x8fbeb0 }, +{ 0xEDA4 , 0xfac0 , 0x8fbeac }, +{ 0xEDA5 , 0xfac1 , 0x8ff4c0 }, +{ 0xEDA6 , 0xfac2 , 0x8fbeb3 }, +{ 0xEDA7 , 0xfac3 , 0x8fbebd }, +{ 0xEDA8 , 0xfac4 , 0x8fbecd }, +{ 0xEDA9 , 0xfac5 , 0x8fbec9 }, +{ 0xEDAA , 0xfac6 , 0x8fbee4 }, +{ 0xEDAB , 0xfac7 , 0x8fbfa8 }, +{ 0xEDAC , 0xfac8 , 0x8fbfc9 }, +{ 0xEDAD , 0xfac9 , 0x8fc0c4 }, +{ 0xEDAE , 0xfaca , 0x8fc0e4 }, +{ 0xEDAF , 0xfacb , 0x8fc0f4 }, +{ 0xEDB0 , 0xfacc , 0x8fc1a6 }, +{ 0xEDB1 , 0xfacd , 0x8ff4c1 }, +{ 0xEDB2 , 0xface , 0x8fc1f5 }, +{ 0xEDB3 , 0xfacf , 0x8fc1fc }, +{ 0xEDB4 , 0xfad0 , 0x8ff4c2 }, +{ 0xEDB5 , 0xfad1 , 0x8fc1f8 }, +{ 0xEDB6 , 0xfad2 , 0x8fc2ab }, +{ 0xEDB7 , 0xfad3 , 0x8fc2a1 }, +{ 0xEDB8 , 0xfad4 , 0x8fc2a5 }, +{ 0xEDB9 , 0xfad5 , 0x8ff4c3 }, +{ 0xEDBA , 0xfad6 , 0x8fc2b8 }, +{ 0xEDBB , 0xfad7 , 0x8fc2ba }, +{ 0xEDBC , 0xfad8 , 0x8ff4c4 }, +{ 0xEDBD , 0xfad9 , 0x8fc2c4 }, +{ 0xEDBE , 0xfada , 0x8fc2d2 }, +{ 0xEDBF , 0xfadb , 0x8fc2d7 }, +{ 0xEDC0 , 0xfadc , 0x8fc2db }, +{ 0xEDC1 , 0xfadd , 0x8fc2de }, +{ 0xEDC2 , 0xfade , 0x8fc2ed }, +{ 0xEDC3 , 0xfadf , 0x8fc2f0 }, +{ 0xEDC4 , 0xfae0 , 0x8ff4c5 }, +{ 0xEDC5 , 0xfae1 , 0x8fc3a1 }, +{ 0xEDC6 , 0xfae2 , 0x8fc3b5 }, +{ 0xEDC7 , 0xfae3 , 0x8fc3c9 }, +{ 0xEDC8 , 0xfae4 , 0x8fc3b9 }, +{ 0xEDC9 , 0xfae5 , 0x8ff4c6 }, +{ 0xEDCA , 0xfae6 , 0x8fc3d8 }, +{ 0xEDCB , 0xfae7 , 0x8fc3fe }, +{ 0xEDCC , 0xfae8 , 0x8ff4c7 }, +{ 0xEDCD , 0xfae9 , 0x8fc4cc }, +{ 0xEDCE , 0xfaea , 0x8ff4c8 }, +{ 0xEDCF , 0xfaeb , 0x8fc4d9 }, +{ 0xEDD0 , 0xfaec , 0x8fc4ea }, +{ 0xEDD1 , 0xfaed , 0x8fc4fd }, +{ 0xEDD2 , 0xfaee , 0x8ff4c9 }, +{ 0xEDD3 , 0xfaef , 0x8fc5a7 }, +{ 0xEDD4 , 0xfaf0 , 0x8fc5b5 }, +{ 0xEDD5 , 0xfaf1 , 0x8fc5b6 }, +{ 0xEDD6 , 0xfaf2 , 0x8ff4ca }, +{ 0xEDD7 , 0xfaf3 , 0x8fc5d5 }, +{ 0xEDD8 , 0xfaf4 , 0x8fc6b8 }, +{ 0xEDD9 , 0xfaf5 , 0x8fc6d7 }, +{ 0xEDDA , 0xfaf6 , 0x8fc6e0 }, +{ 0xEDDB , 0xfaf7 , 0x8fc6ea }, +{ 0xEDDC , 0xfaf8 , 0x8fc6e3 }, +{ 0xEDDD , 0xfaf9 , 0x8fc7a1 }, +{ 0xEDDE , 0xfafa , 0x8fc7ab }, +{ 0xEDDF , 0xfafb , 0x8fc7c7 }, +{ 0xEDE0 , 0xfafc , 0x8fc7c3 }, +{ 0xEDE1 , 0xfb40 , 0x8fc7cb }, +{ 0xEDE2 , 0xfb41 , 0x8fc7cf }, +{ 0xEDE3 , 0xfb42 , 0x8fc7d9 }, +{ 0xEDE4 , 0xfb43 , 0x8ff4cb }, +{ 0xEDE5 , 0xfb44 , 0x8ff4cc }, +{ 0xEDE6 , 0xfb45 , 0x8fc7e6 }, +{ 0xEDE7 , 0xfb46 , 0x8fc7ee }, +{ 0xEDE8 , 0xfb47 , 0x8fc7fc }, +{ 0xEDE9 , 0xfb48 , 0x8fc7eb }, +{ 0xEDEA , 0xfb49 , 0x8fc7f0 }, +{ 0xEDEB , 0xfb4a , 0x8fc8b1 }, +{ 0xEDEC , 0xfb4b , 0x8fc8e5 }, +{ 0xEDED , 0xfb4c , 0x8fc8f8 }, +{ 0xEDEE , 0xfb4d , 0x8fc9a6 }, +{ 0xEDEF , 0xfb4e , 0x8fc9ab }, +{ 0xEDF0 , 0xfb4f , 0x8fc9ad }, +{ 0xEDF1 , 0xfb50 , 0x8ff4cd }, +{ 0xEDF2 , 0xfb51 , 0x8fc9ca }, +{ 0xEDF3 , 0xfb52 , 0x8fc9d3 }, +{ 0xEDF4 , 0xfb53 , 0x8fc9e9 }, +{ 0xEDF5 , 0xfb54 , 0x8fc9e3 }, +{ 0xEDF6 , 0xfb55 , 0x8fc9fc }, +{ 0xEDF7 , 0xfb56 , 0x8fc9f4 }, +{ 0xEDF8 , 0xfb57 , 0x8fc9f5 }, +{ 0xEDF9 , 0xfb58 , 0x8ff4ce }, +{ 0xEDFA , 0xfb59 , 0x8fcab3 }, +{ 0xEDFB , 0xfb5a , 0x8fcabd }, +{ 0xEDFC , 0xfb5b , 0x8fcaef }, +{ 0xEE40 , 0xfb5c , 0x8fcaf1 }, +{ 0xEE41 , 0xfb5d , 0x8fcbae }, +{ 0xEE42 , 0xfb5e , 0x8ff4cf }, +{ 0xEE43 , 0xfb5f , 0x8fcbca }, +{ 0xEE44 , 0xfb60 , 0x8fcbe6 }, +{ 0xEE45 , 0xfb61 , 0x8fcbea }, +{ 0xEE46 , 0xfb62 , 0x8fcbf0 }, +{ 0xEE47 , 0xfb63 , 0x8fcbf4 }, +{ 0xEE48 , 0xfb64 , 0x8fcbee }, +{ 0xEE49 , 0xfb65 , 0x8fcca5 }, +{ 0xEE4A , 0xfb66 , 0x8fcbf9 }, +{ 0xEE4B , 0xfb67 , 0x8fccab }, +{ 0xEE4C , 0xfb68 , 0x8fccae }, +{ 0xEE4D , 0xfb69 , 0x8fccad }, +{ 0xEE4E , 0xfb6a , 0x8fccb2 }, +{ 0xEE4F , 0xfb6b , 0x8fccc2 }, +{ 0xEE50 , 0xfb6c , 0x8fccd0 }, +{ 0xEE51 , 0xfb6d , 0x8fccd9 }, +{ 0xEE52 , 0xfb6e , 0x8ff4d0 }, +{ 0xEE53 , 0xfb6f , 0x8fcdbb }, +{ 0xEE54 , 0xfb70 , 0x8ff4d1 }, +{ 0xEE55 , 0xfb71 , 0x8fcebb }, +{ 0xEE56 , 0xfb72 , 0x8ff4d2 }, +{ 0xEE57 , 0xfb73 , 0x8fceba }, +{ 0xEE58 , 0xfb74 , 0x8fcec3 }, +{ 0xEE59 , 0xfb75 , 0x8ff4d3 }, +{ 0xEE5A , 0xfb76 , 0x8fcef2 }, +{ 0xEE5B , 0xfb77 , 0x8fb3dd }, +{ 0xEE5C , 0xfb78 , 0x8fcfd5 }, +{ 0xEE5D , 0xfb79 , 0x8fcfe2 }, +{ 0xEE5E , 0xfb7a , 0x8fcfe9 }, +{ 0xEE5F , 0xfb7b , 0x8fcfed }, +{ 0xEE60 , 0xfb7c , 0x8ff4d4 }, +{ 0xEE61 , 0xfb7d , 0x8ff4d5 }, +{ 0xEE62 , 0xfb7e , 0x8ff4d6 }, +{ 0xEE63 , 0xfb80 , 0x8ff4d7 }, +{ 0xEE64 , 0xfb81 , 0x8fd0e5 }, +{ 0xEE65 , 0xfb82 , 0x8ff4d8 }, +{ 0xEE66 , 0xfb83 , 0x8fd0e9 }, +{ 0xEE67 , 0xfb84 , 0x8fd1e8 }, +{ 0xEE68 , 0xfb85 , 0x8ff4d9 }, +{ 0xEE69 , 0xfb86 , 0x8ff4da }, +{ 0xEE6A , 0xfb87 , 0x8fd1ec }, +{ 0xEE6B , 0xfb88 , 0x8fd2bb }, +{ 0xEE6C , 0xfb89 , 0x8ff4db }, +{ 0xEE6D , 0xfb8a , 0x8fd3e1 }, +{ 0xEE6E , 0xfb8b , 0x8fd3e8 }, +{ 0xEE6F , 0xfb8c , 0x8fd4a7 }, +{ 0xEE70 , 0xfb8d , 0x8ff4dc }, +{ 0xEE71 , 0xfb8e , 0x8ff4dd }, +{ 0xEE72 , 0xfb8f , 0x8fd4d4 }, +{ 0xEE73 , 0xfb90 , 0x8fd4f2 }, +{ 0xEE74 , 0xfb91 , 0x8fd5ae }, +{ 0xEE75 , 0xfb92 , 0x8ff4de }, +{ 0xEE76 , 0xfb93 , 0x8fd7de }, +{ 0xEE77 , 0xfb94 , 0x8ff4df }, +{ 0xEE78 , 0xfb95 , 0x8fd8a2 }, +{ 0xEE79 , 0xfb96 , 0x8fd8b7 }, +{ 0xEE7A , 0xfb97 , 0x8fd8c1 }, +{ 0xEE7B , 0xfb98 , 0x8fd8d1 }, +{ 0xEE7C , 0xfb99 , 0x8fd8f4 }, +{ 0xEE7D , 0xfb9a , 0x8fd9c6 }, +{ 0xEE7E , 0xfb9b , 0x8fd9c8 }, +{ 0xEE80 , 0xfb9c , 0x8fd9d1 }, +{ 0xEE81 , 0xfb9d , 0x8ff4e0 }, +{ 0xEE82 , 0xfb9e , 0x8ff4e1 }, +{ 0xEE83 , 0xfb9f , 0x8ff4e2 }, +{ 0xEE84 , 0xfba0 , 0x8ff4e3 }, +{ 0xEE85 , 0xfba1 , 0x8ff4e4 }, +{ 0xEE86 , 0xfba2 , 0x8fdcd3 }, +{ 0xEE87 , 0xfba3 , 0x8fddc8 }, +{ 0xEE88 , 0xfba4 , 0x8fddd4 }, +{ 0xEE89 , 0xfba5 , 0x8fddea }, +{ 0xEE8A , 0xfba6 , 0x8fddfa }, +{ 0xEE8B , 0xfba7 , 0x8fdea4 }, +{ 0xEE8C , 0xfba8 , 0x8fdeb0 }, +{ 0xEE8D , 0xfba9 , 0x8ff4e5 }, +{ 0xEE8E , 0xfbaa , 0x8fdeb5 }, +{ 0xEE8F , 0xfbab , 0x8fdecb }, +{ 0xEE90 , 0xfbac , 0x8ff4e6 }, +{ 0xEE91 , 0xfbad , 0x8fdfb9 }, +{ 0xEE92 , 0xfbae , 0x8ff4e7 }, +{ 0xEE93 , 0xfbaf , 0x8fdfc3 }, +{ 0xEE94 , 0xfbb0 , 0x8ff4e8 }, +{ 0xEE95 , 0xfbb1 , 0x8ff4e9 }, +{ 0xEE96 , 0xfbb2 , 0x8fe0d9 }, +{ 0xEE97 , 0xfbb3 , 0x8ff4ea }, +{ 0xEE98 , 0xfbb4 , 0x8ff4eb }, +{ 0xEE99 , 0xfbb5 , 0x8fe1e2 }, +{ 0xEE9A , 0xfbb6 , 0x8ff4ec }, +{ 0xEE9B , 0xfbb7 , 0x8ff4ed }, +{ 0xEE9C , 0xfbb8 , 0x8ff4ee }, +{ 0xEE9D , 0xfbb9 , 0x8fe2c7 }, +{ 0xEE9E , 0xfbba , 0x8fe3a8 }, +{ 0xEE9F , 0xfbbb , 0x8fe3a6 }, +{ 0xEEA0 , 0xfbbc , 0x8fe3a9 }, +{ 0xEEA1 , 0xfbbd , 0x8fe3af }, +{ 0xEEA2 , 0xfbbe , 0x8fe3b0 }, +{ 0xEEA3 , 0xfbbf , 0x8fe3aa }, +{ 0xEEA4 , 0xfbc0 , 0x8fe3ab }, +{ 0xEEA5 , 0xfbc1 , 0x8fe3bc }, +{ 0xEEA6 , 0xfbc2 , 0x8fe3c1 }, +{ 0xEEA7 , 0xfbc3 , 0x8fe3bf }, +{ 0xEEA8 , 0xfbc4 , 0x8fe3d5 }, +{ 0xEEA9 , 0xfbc5 , 0x8fe3d8 }, +{ 0xEEAA , 0xfbc6 , 0x8fe3d6 }, +{ 0xEEAB , 0xfbc7 , 0x8fe3df }, +{ 0xEEAC , 0xfbc8 , 0x8fe3e3 }, +{ 0xEEAD , 0xfbc9 , 0x8fe3e1 }, +{ 0xEEAE , 0xfbca , 0x8fe3d4 }, +{ 0xEEAF , 0xfbcb , 0x8fe3e9 }, +{ 0xEEB0 , 0xfbcc , 0x8fe4a6 }, +{ 0xEEB1 , 0xfbcd , 0x8fe3f1 }, +{ 0xEEB2 , 0xfbce , 0x8fe3f2 }, +{ 0xEEB3 , 0xfbcf , 0x8fe4cb }, +{ 0xEEB4 , 0xfbd0 , 0x8fe4c1 }, +{ 0xEEB5 , 0xfbd1 , 0x8fe4c3 }, +{ 0xEEB6 , 0xfbd2 , 0x8fe4be }, +{ 0xEEB7 , 0xfbd3 , 0x8ff4ef }, +{ 0xEEB8 , 0xfbd4 , 0x8fe4c0 }, +{ 0xEEB9 , 0xfbd5 , 0x8fe4c7 }, +{ 0xEEBA , 0xfbd6 , 0x8fe4bf }, +{ 0xEEBB , 0xfbd7 , 0x8fe4e0 }, +{ 0xEEBC , 0xfbd8 , 0x8fe4de }, +{ 0xEEBD , 0xfbd9 , 0x8fe4d1 }, +{ 0xEEBE , 0xfbda , 0x8ff4f0 }, +{ 0xEEBF , 0xfbdb , 0x8fe4dc }, +{ 0xEEC0 , 0xfbdc , 0x8fe4d2 }, +{ 0xEEC1 , 0xfbdd , 0x8fe4db }, +{ 0xEEC2 , 0xfbde , 0x8fe4d4 }, +{ 0xEEC3 , 0xfbdf , 0x8fe4fa }, +{ 0xEEC4 , 0xfbe0 , 0x8fe4ef }, +{ 0xEEC5 , 0xfbe1 , 0x8fe5b3 }, +{ 0xEEC6 , 0xfbe2 , 0x8fe5bf }, +{ 0xEEC7 , 0xfbe3 , 0x8fe5c9 }, +{ 0xEEC8 , 0xfbe4 , 0x8fe5d0 }, +{ 0xEEC9 , 0xfbe5 , 0x8fe5e2 }, +{ 0xEECA , 0xfbe6 , 0x8fe5ea }, +{ 0xEECB , 0xfbe7 , 0x8fe5eb }, +{ 0xEECC , 0xfbe8 , 0x8ff4f1 }, +{ 0xEECD , 0xfbe9 , 0x8ff4f2 }, +{ 0xEECE , 0xfbea , 0x8ff4f3 }, +{ 0xEECF , 0xfbeb , 0x8fe6e8 }, +{ 0xEED0 , 0xfbec , 0x8fe6ef }, +{ 0xEED1 , 0xfbed , 0x8fe7ac }, +{ 0xEED2 , 0xfbee , 0x8ff4f4 }, +{ 0xEED3 , 0xfbef , 0x8fe7ae }, +{ 0xEED4 , 0xfbf0 , 0x8ff4f5 }, +{ 0xEED5 , 0xfbf1 , 0x8fe7b1 }, +{ 0xEED6 , 0xfbf2 , 0x8ff4f6 }, +{ 0xEED7 , 0xfbf3 , 0x8fe7b2 }, +{ 0xEED8 , 0xfbf4 , 0x8fe8b1 }, +{ 0xEED9 , 0xfbf5 , 0x8fe8b6 }, +{ 0xEEDA , 0xfbf6 , 0x8ff4f7 }, +{ 0xEEDB , 0xfbf7 , 0x8ff4f8 }, +{ 0xEEDC , 0xfbf8 , 0x8fe8dd }, +{ 0xEEDD , 0xfbf9 , 0x8ff4f9 }, +{ 0xEEDE , 0xfbfa , 0x8ff4fa }, +{ 0xEEDF , 0xfbfb , 0x8fe9d1 }, +{ 0xEEE0 , 0xfbfc , 0x8ff4fb }, +{ 0xEEE1 , 0xfc40 , 0x8fe9ed }, +{ 0xEEE2 , 0xfc41 , 0x8feacd }, +{ 0xEEE3 , 0xfc42 , 0x8ff4fc }, +{ 0xEEE4 , 0xfc43 , 0x8feadb }, +{ 0xEEE5 , 0xfc44 , 0x8feae6 }, +{ 0xEEE6 , 0xfc45 , 0x8feaea }, +{ 0xEEE7 , 0xfc46 , 0x8feba5 }, +{ 0xEEE8 , 0xfc47 , 0x8febfb }, +{ 0xEEE9 , 0xfc48 , 0x8febfa }, +{ 0xEEEA , 0xfc49 , 0x8ff4fd }, +{ 0xEEEB , 0xfc4a , 0x8fecd6 }, +{ 0xEEEC , 0xfc4b , 0x8ff4fe }, +{ 0xffff , 0xffff , 0xffff } /* Stop code */ +}; diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/Makefile b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/Makefile new file mode 100644 index 0000000000..2adb6e9da9 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/Makefile @@ -0,0 +1,14 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := euc_tw_and_big5 + +SRCS += big5.c +OBJS += big5.o + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c new file mode 100644 index 0000000000..4e1844c2d5 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c @@ -0,0 +1,378 @@ +/* + * conversion between BIG5 and Mule Internal Code(CNS 116643-1992 + * plane 1 and plane 2). + * This program is partially copied from lv(Multilingual file viewer) + * and slightly modified. lv is written and copyrighted by NARITA Tomio + * (nrt@web.ad.jp). + * + * 1999/1/15 Tatsuo Ishii + * + * $Id: big5.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + */ + +/* can be used in either frontend or backend */ +#include "postgres_fe.h" + +#include "mb/pg_wchar.h" + +typedef struct +{ + unsigned short code, + peer; +} codes_t; + +/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */ +static codes_t big5Level1ToCnsPlane1[25] = { /* range */ + {0xA140, 0x2121}, + {0xA1F6, 0x2258}, + {0xA1F7, 0x2257}, + {0xA1F8, 0x2259}, + {0xA2AF, 0x2421}, + {0xA3C0, 0x4221}, + {0xa3e1, 0x0000}, + {0xA440, 0x4421}, + {0xACFE, 0x5753}, + {0xacff, 0x0000}, + {0xAD40, 0x5323}, + {0xAFD0, 0x5754}, + {0xBBC8, 0x6B51}, + {0xBE52, 0x6B50}, + {0xBE53, 0x6F5C}, + {0xC1AB, 0x7536}, + {0xC2CB, 0x7535}, + {0xC2CC, 0x7737}, + {0xC361, 0x782E}, + {0xC3B9, 0x7865}, + {0xC3BA, 0x7864}, + {0xC3BB, 0x7866}, + {0xC456, 0x782D}, + {0xC457, 0x7962}, + {0xc67f, 0x0000} +}; + +/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */ +static codes_t cnsPlane1ToBig5Level1[26] = { /* range */ + {0x2121, 0xA140}, + {0x2257, 0xA1F7}, + {0x2258, 0xA1F6}, + {0x2259, 0xA1F8}, + {0x234f, 0x0000}, + {0x2421, 0xA2AF}, + {0x2571, 0x0000}, + {0x4221, 0xA3C0}, + {0x4242, 0x0000}, + {0x4421, 0xA440}, + {0x5323, 0xAD40}, + {0x5753, 0xACFE}, + {0x5754, 0xAFD0}, + {0x6B50, 0xBE52}, + {0x6B51, 0xBBC8}, + {0x6F5C, 0xBE53}, + {0x7535, 0xC2CB}, + {0x7536, 0xC1AB}, + {0x7737, 0xC2CC}, + {0x782D, 0xC456}, + {0x782E, 0xC361}, + {0x7864, 0xC3BA}, + {0x7865, 0xC3B9}, + {0x7866, 0xC3BB}, + {0x7962, 0xC457}, + {0x7d4c, 0x0000} +}; + +/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */ +static codes_t big5Level2ToCnsPlane2[48] = { /* range */ + {0xC940, 0x2121}, + {0xc94a, 0x0000}, + {0xC94B, 0x212B}, + {0xC96C, 0x214D}, + {0xC9BE, 0x214C}, + {0xC9BF, 0x217D}, + {0xC9ED, 0x224E}, + {0xCAF7, 0x224D}, + {0xCAF8, 0x2439}, + {0xD77A, 0x3F6A}, + {0xD77B, 0x387E}, + {0xDBA7, 0x3F6B}, + {0xDDFC, 0x4176}, + {0xDDFD, 0x4424}, + {0xE8A3, 0x554C}, + {0xE976, 0x5723}, + {0xEB5B, 0x5A29}, + {0xEBF1, 0x554B}, + {0xEBF2, 0x5B3F}, + {0xECDE, 0x5722}, + {0xECDF, 0x5C6A}, + {0xEDAA, 0x5D75}, + {0xEEEB, 0x642F}, + {0xEEEC, 0x6039}, + {0xF056, 0x5D74}, + {0xF057, 0x6243}, + {0xF0CB, 0x5A28}, + {0xF0CC, 0x6337}, + {0xF163, 0x6430}, + {0xF16B, 0x6761}, + {0xF16C, 0x6438}, + {0xF268, 0x6934}, + {0xF269, 0x6573}, + {0xF2C3, 0x664E}, + {0xF375, 0x6762}, + {0xF466, 0x6935}, + {0xF4B5, 0x664D}, + {0xF4B6, 0x6962}, + {0xF4FD, 0x6A4C}, + {0xF663, 0x6A4B}, + {0xF664, 0x6C52}, + {0xF977, 0x7167}, + {0xF9C4, 0x7166}, + {0xF9C5, 0x7234}, + {0xF9C6, 0x7240}, + {0xF9C7, 0x7235}, + {0xF9D2, 0x7241}, + {0xf9d6, 0x0000} +}; + +/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */ +static codes_t cnsPlane2ToBig5Level2[49] = { /* range */ + {0x2121, 0xC940}, + {0x212B, 0xC94B}, + {0x214C, 0xC9BE}, + {0x214D, 0xC96C}, + {0x217D, 0xC9BF}, + {0x224D, 0xCAF7}, + {0x224E, 0xC9ED}, + {0x2439, 0xCAF8}, + {0x387E, 0xD77B}, + {0x3F6A, 0xD77A}, + {0x3F6B, 0xDBA7}, + {0x4424, 0x0000}, + {0x4176, 0xDDFC}, + {0x4177, 0x0000}, + {0x4424, 0xDDFD}, + {0x554B, 0xEBF1}, + {0x554C, 0xE8A3}, + {0x5722, 0xECDE}, + {0x5723, 0xE976}, + {0x5A28, 0xF0CB}, + {0x5A29, 0xEB5B}, + {0x5B3F, 0xEBF2}, + {0x5C6A, 0xECDF}, + {0x5D74, 0xF056}, + {0x5D75, 0xEDAA}, + {0x6039, 0xEEEC}, + {0x6243, 0xF057}, + {0x6337, 0xF0CC}, + {0x642F, 0xEEEB}, + {0x6430, 0xF163}, + {0x6438, 0xF16C}, + {0x6573, 0xF269}, + {0x664D, 0xF4B5}, + {0x664E, 0xF2C3}, + {0x6761, 0xF16B}, + {0x6762, 0xF375}, + {0x6934, 0xF268}, + {0x6935, 0xF466}, + {0x6962, 0xF4B6}, + {0x6A4B, 0xF663}, + {0x6A4C, 0xF4FD}, + {0x6C52, 0xF664}, + {0x7166, 0xF9C4}, + {0x7167, 0xF977}, + {0x7234, 0xF9C5}, + {0x7235, 0xF9C7}, + {0x7240, 0xF9C6}, + {0x7241, 0xF9D2}, + {0x7245, 0x0000} +}; + +/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */ +static unsigned short b1c4[][2] = { + {0xC879, 0x2123}, + {0xC87B, 0x2124}, + {0xC87D, 0x212A}, + {0xC8A2, 0x2152} +}; + +/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */ +static unsigned short b2c3[][2] = { + {0xF9D6, 0x4337}, + {0xF9D7, 0x4F50}, + {0xF9D8, 0x444E}, + {0xF9D9, 0x504A}, + {0xF9DA, 0x2C5D}, + {0xF9DB, 0x3D7E}, + {0xF9DC, 0x4B5C} +}; + +static unsigned short BinarySearchRange + (codes_t *array, int high, unsigned short code) +{ + int low, + mid, + distance, + tmp; + + low = 0; + mid = high >> 1; + + for (; low <= high; mid = (low + high) >> 1) + { + if ((array[mid].code <= code) && (array[mid + 1].code > code)) + { + if (0 == array[mid].peer) + return 0; + if (code >= 0xa140U) + { + /* big5 to cns */ + tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; + high = code & 0x00ff; + low = array[mid].code & 0x00ff; + + /* + * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e, + * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix + * is 0x9d. [region_low, region_high] + * We should remember big5 has two different regions + * (above). There is a bias for the distance between these + * regions. 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 + * and 0x7e is 1.) bias = - 0x22. + */ + distance = tmp * 0x9d + high - low + + (high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22) + : (low >= 0xa1 ? +0x22 : 0)); + + /* + * NOTE: we have to convert the distance into a code + * point. The code point's low_byte is 0x21 plus mod_0x5e. + * In the first, we extract the mod_0x5e of the starting + * code point, subtracting 0x21, and add distance to it. + * Then we calculate again mod_0x5e of them, and restore + * the final codepoint, adding 0x21. + */ + tmp = (array[mid].peer & 0x00ff) + distance - 0x21; + tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8) + + 0x21 + tmp % 0x5e; + return tmp; + } + else + { + /* cns to big5 */ + tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; + + /* + * NOTE: ISO charsets ranges between 0x21-0xfe + * (94charset). Its radix is 0x5e. But there is no + * distance bias like big5. + */ + distance = tmp * 0x5e + + ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff)); + + /* + * NOTE: Similar to big5 to cns conversion, we extract + * mod_0x9d and restore mod_0x9d into a code point. + */ + low = array[mid].peer & 0x00ff; + tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40); + low = tmp % 0x9d; + tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8) + + (low > 0x3e ? 0x62 : 0x40) + low; + return tmp; + } + } + else if (array[mid].code > code) + high = mid - 1; + else + low = mid + 1; + } + + return 0; +} + + +unsigned short +BIG5toCNS(unsigned short big5, unsigned char *lc) +{ + unsigned short cns = 0; + int i; + + if (big5 < 0xc940U) + { + /* level 1 */ + + for (i = 0; i < sizeof(b1c4) / sizeof(unsigned short); i++) + { + if (b1c4[i][0] == big5) + { + *lc = LC_CNS11643_4; + return (b1c4[i][1] | 0x8080U); + } + } + + if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5))) + *lc = LC_CNS11643_1; + } + else if (big5 == 0xc94aU) + { + /* level 2 */ + *lc = LC_CNS11643_1; + cns = 0x4442; + } + else + { + /* level 2 */ + for (i = 0; i < sizeof(b2c3) / sizeof(unsigned short); i++) + { + if (b2c3[i][0] == big5) + { + *lc = LC_CNS11643_3; + return (b2c3[i][1] | 0x8080U); + } + } + + if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5))) + *lc = LC_CNS11643_2; + } + + if (0 == cns) + { /* no mapping Big5 to CNS 11643-1992 */ + *lc = 0; + return (unsigned short) '?'; + } + + return cns | 0x8080; +} + +unsigned short +CNStoBIG5(unsigned short cns, unsigned char lc) +{ + int i; + unsigned int big5 = 0; + + cns &= 0x7f7f; + + switch (lc) + { + case LC_CNS11643_1: + big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns); + break; + case LC_CNS11643_2: + big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns); + break; + case LC_CNS11643_3: + for (i = 0; i < sizeof(b2c3) / sizeof(unsigned short); i++) + { + if (b2c3[i][1] == cns) + return (b2c3[i][0]); + } + break; + case LC_CNS11643_4: + for (i = 0; i < sizeof(b1c4) / sizeof(unsigned short); i++) + { + if (b1c4[i][1] == cns) + return (b1c4[i][0]); + } + default: + break; + } + return big5; +} diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c new file mode 100644 index 0000000000..591f7f38b9 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c @@ -0,0 +1,340 @@ +/*------------------------------------------------------------------------- + * + * EUC_TW, BIG5 and MULE_INTERNAL + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +#define ENCODING_GROWTH_RATE 4 + +PG_FUNCTION_INFO_V1(euc_tw_to_big5) +PG_FUNCTION_INFO_V1(big5_to_euc_tw) +PG_FUNCTION_INFO_V1(euc_tw_to_mic) +PG_FUNCTION_INFO_V1(mic_to_euc_tw) +PG_FUNCTION_INFO_V1(big5_to_mic) +PG_FUNCTION_INFO_V1(mic_to_big5) + +extern Datum euc_tw_to_big5(PG_FUNCTION_ARGS); +extern Datum big5_to_euc_tw(PG_FUNCTION_ARGS); +extern Datum euc_tw_to_mic(PG_FUNCTION_ARGS); +extern Datum mic_to_euc_tw(PG_FUNCTION_ARGS); +extern Datum big5_to_mic(PG_FUNCTION_ARGS); +extern Datum mic_to_big5(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ + +static void big52mic(unsigned char *big5, unsigned char *p, int len); +static void mic2big5(unsigned char *mic, unsigned char *p, int len); +static void euc_tw2mic(unsigned char *euc, unsigned char *p, int len); +static void mic2euc_tw(unsigned char *mic, unsigned char *p, int len); + +Datum +euc_tw_to_big5(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + unsigned char *buf; + + Assert(PG_GETARG_INT32(0) == PG_EUC_TW); + Assert(PG_GETARG_INT32(1) == PG_BIG5); + Assert(len > 0); + + buf = palloc(len * ENCODING_GROWTH_RATE); + euc_tw2mic(src, buf, len); + mic2big5(buf, dest, strlen(buf)); + pfree(buf); + + PG_RETURN_INT32(0); +} + +Datum +big5_to_euc_tw(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + unsigned char *buf; + + Assert(PG_GETARG_INT32(0) == PG_BIG5); + Assert(PG_GETARG_INT32(1) == PG_EUC_TW); + Assert(len > 0); + + buf = palloc(len * ENCODING_GROWTH_RATE); + big52mic(src, buf, len); + mic2euc_tw(buf, dest, strlen(buf)); + pfree(buf); + + PG_RETURN_INT32(0); +} + +Datum +euc_tw_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_EUC_TW); + Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL); + Assert(len > 0); + + euc_tw2mic(src, dest, len); + + PG_RETURN_INT32(0); +} + +Datum +mic_to_euc_tw(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL); + Assert(PG_GETARG_INT32(1) == PG_EUC_TW); + Assert(len > 0); + + mic2big5(src, dest, len); + + PG_RETURN_INT32(0); +} + +Datum +big5_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_BIG5); + Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL); + Assert(len > 0); + + big52mic(src, dest, len); + + PG_RETURN_INT32(0); +} + +Datum +mic_to_big5(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL); + Assert(PG_GETARG_INT32(1) == PG_BIG5); + Assert(len > 0); + + mic2big5(src, dest, len); + + PG_RETURN_INT32(0); +} + +/* + * EUC_TW ---> MIC + */ +static void +euc_tw2mic(unsigned char *euc, unsigned char *p, int len) +{ + int c1; + + while (len > 0 && (c1 = *euc++)) + { + if (c1 == SS2) + { + len -= 4; + c1 = *euc++; /* plane No. */ + if (c1 == 0xa1) + *p++ = LC_CNS11643_1; + else if (c1 == 0xa2) + *p++ = LC_CNS11643_2; + else + { + *p++ = 0x9d; /* LCPRV2 */ + *p++ = 0xa3 - c1 + LC_CNS11643_3; + } + *p++ = *euc++; + *p++ = *euc++; + } + else if (c1 & 0x80) + { /* CNS11643-1 */ + len -= 2; + *p++ = LC_CNS11643_1; + *p++ = c1; + *p++ = *euc++; + } + else + { /* should be ASCII */ + len--; + *p++ = c1; + } + } + *p = '\0'; +} + +/* + * MIC ---> EUC_TW + */ +static void +mic2euc_tw(unsigned char *mic, unsigned char *p, int len) +{ + int c1; + + while (len > 0 && (c1 = *mic)) + { + len -= pg_mic_mblen(mic++); + + if (c1 == LC_CNS11643_1) + { + *p++ = *mic++; + *p++ = *mic++; + } + else if (c1 == LC_CNS11643_2) + { + *p++ = SS2; + *p++ = 0xa2; + *p++ = *mic++; + *p++ = *mic++; + } + else if (c1 == 0x9d) + { /* LCPRV2? */ + *p++ = SS2; + *p++ = *mic++ - LC_CNS11643_3 + 0xa3; + *p++ = *mic++; + *p++ = *mic++; + } + else if (c1 > 0x7f) + { /* cannot convert to EUC_TW! */ + mic--; + pg_print_bogus_char(&mic, &p); + } + else + { /* should be ASCII */ + *p++ = c1; + } + } + *p = '\0'; +} + +/* + * Big5 ---> MIC + */ +static void +big52mic(unsigned char *big5, unsigned char *p, int len) +{ + unsigned short c1; + unsigned short big5buf, + cnsBuf; + unsigned char lc; + char bogusBuf[3]; + int i; + + while (len > 0 && (c1 = *big5++)) + { + if (c1 <= 0x7fU) + { /* ASCII */ + len--; + *p++ = c1; + } + else + { + len -= 2; + big5buf = c1 << 8; + c1 = *big5++; + big5buf |= c1; + cnsBuf = BIG5toCNS(big5buf, &lc); + if (lc != 0) + { + if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4) + { + *p++ = 0x9d; /* LCPRV2 */ + } + *p++ = lc; /* Plane No. */ + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else + { /* cannot convert */ + big5 -= 2; + *p++ = '('; + for (i = 0; i < 2; i++) + { + sprintf(bogusBuf, "%02x", *big5++); + *p++ = bogusBuf[0]; + *p++ = bogusBuf[1]; + } + *p++ = ')'; + } + } + } + *p = '\0'; +} + +/* + * MIC ---> Big5 + */ +static void +mic2big5(unsigned char *mic, unsigned char *p, int len) +{ + int l; + unsigned short c1; + unsigned short big5buf, + cnsBuf; + + while (len > 0 && (c1 = *mic)) + { + l = pg_mic_mblen(mic++); + len -= l; + + /* 0x9d means LCPRV2 */ + if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d) + { + if (c1 == 0x9d) + { + c1 = *mic++; /* get plane no. */ + } + cnsBuf = (*mic++) << 8; + cnsBuf |= (*mic++) & 0x00ff; + big5buf = CNStoBIG5(cnsBuf, c1); + if (big5buf == 0) + { /* cannot convert to Big5! */ + mic -= l; + pg_print_bogus_char(&mic, &p); + } + else + { + *p++ = (big5buf >> 8) & 0x00ff; + *p++ = big5buf & 0x00ff; + } + } + else if (c1 <= 0x7f) /* ASCII */ + *p++ = c1; + else + { /* cannot convert to Big5! */ + mic--; + pg_print_bogus_char(&mic, &p); + } + } + *p = '\0'; +} diff --git a/src/backend/utils/mb/conversion_procs/proc.mk b/src/backend/utils/mb/conversion_procs/proc.mk new file mode 100644 index 0000000000..a6582d0612 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/proc.mk @@ -0,0 +1,21 @@ +SRCS += $(NAME).c +OBJS += $(NAME).o + +PG_CPPFLAGS := +SHLIB_LINK := + +SO_MAJOR_VERSION := 0 +SO_MINOR_VERSION := 0 +rpath = + +install: all + $(INSTALL_SHLIB) $(shlib) $(DESTDIR)$(pkglibdir)/$(NAME)$(DLSUFFIX) + +uninstall: uninstall-lib + +clean distclean maintainer-clean: clean-lib + $(RM) $(OBJS) + +include $(top_builddir)/src/Makefile.shlib + +all: $(shlib) diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_ascii/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_ascii/Makefile new file mode 100644 index 0000000000..6db75ac1a7 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_ascii/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_ascii + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c b/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c new file mode 100644 index 0000000000..db84f9f05d --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------- + * + * ASCII <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +PG_FUNCTION_INFO_V1(ascii_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_ascii) + +extern Datum ascii_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_ascii(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ + +Datum +ascii_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_SQL_ASCII); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + pg_ascii2mic(src, dest, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_ascii(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_SQL_ASCII); + Assert(len > 0); + + pg_mic2ascii(src, dest, len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_big5/Makefile new file mode 100644 index 0000000000..0aac095f99 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_big5 + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c new file mode 100644 index 0000000000..b40db46665 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * BIG5 <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/big5_to_utf8.map" +#include "../../Unicode/utf8_to_big5.map" + +PG_FUNCTION_INFO_V1(big5_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_big5) + +extern Datum big5_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_big5(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +big5_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_BIG5); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapBIG5, + sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), PG_BIG5, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_big5(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_BIG5); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapBIG5, + sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/Makefile new file mode 100644 index 0000000000..aa82a66f5d --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_euc_cn + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c new file mode 100644 index 0000000000..e5edd24b7b --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * EUC_CN <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/euc_cn_to_utf8.map" +#include "../../Unicode/utf8_to_euc_cn.map" + +PG_FUNCTION_INFO_V1(euc_cn_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_euc_cn) + +extern Datum euc_cn_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_euc_cn(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +euc_cn_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_EUC_CN); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapEUC_CN, + sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_euc_cn(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_EUC_CN); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapEUC_CN, + sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/Makefile new file mode 100644 index 0000000000..5ba7466c0e --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_euc_jp + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c new file mode 100644 index 0000000000..8d702f391c --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * EUC_JP <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/euc_jp_to_utf8.map" +#include "../../Unicode/utf8_to_euc_jp.map" + +PG_FUNCTION_INFO_V1(euc_jp_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_euc_jp) + +extern Datum euc_jp_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_euc_jp(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +euc_jp_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_EUC_JP); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapEUC_JP, + sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), PG_EUC_JP, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_euc_jp(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_EUC_JP); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapEUC_JP, + sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/Makefile new file mode 100644 index 0000000000..bb9657f4f1 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_euc_kr + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c new file mode 100644 index 0000000000..9f2761134b --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * EUC_KR <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/euc_kr_to_utf8.map" +#include "../../Unicode/utf8_to_euc_kr.map" + +PG_FUNCTION_INFO_V1(euc_kr_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_euc_kr) + +extern Datum euc_kr_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_euc_kr(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +euc_kr_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_EUC_KR); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapEUC_KR, + sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), PG_EUC_KR, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_euc_kr(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_EUC_KR); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapEUC_KR, + sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/Makefile new file mode 100644 index 0000000000..86c3df883f --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_euc_tw + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c new file mode 100644 index 0000000000..bafa218167 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * EUC_TW <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/euc_tw_to_utf8.map" +#include "../../Unicode/utf8_to_euc_tw.map" + +PG_FUNCTION_INFO_V1(euc_tw_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_euc_tw) + +extern Datum euc_tw_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_euc_tw(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +euc_tw_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_EUC_TW); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapEUC_TW, + sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), PG_EUC_TW, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_euc_tw(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_EUC_TW); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapEUC_TW, + sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/Makefile new file mode 100644 index 0000000000..a77b8b2d6c --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_gb18030 + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c new file mode 100644 index 0000000000..4587448a90 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * GB18030 <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/gb18030_to_utf8.map" +#include "../../Unicode/utf8_to_gb18030.map" + +PG_FUNCTION_INFO_V1(gb18030_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_gb18030) + +extern Datum gb18030_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_gb18030(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +gb18030_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_GB18030); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapGB18030, + sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), PG_GB18030, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_gb18030(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_GB18030); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapGB18030, + sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/Makefile new file mode 100644 index 0000000000..26a3773d0b --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_gbk + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c new file mode 100644 index 0000000000..d81c4d7a18 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * GBK <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/gbk_to_utf8.map" +#include "../../Unicode/utf8_to_gbk.map" + +PG_FUNCTION_INFO_V1(gbk_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_gbk) + +extern Datum gbk_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_gbk(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +gbk_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_GBK); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapGBK, + sizeof(LUmapGBK) / sizeof(pg_local_to_utf), PG_GBK, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_gbk(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_GBK); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapGBK, + sizeof(ULmapGBK) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/Makefile new file mode 100644 index 0000000000..d7dd6c3309 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_iso8859 + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c new file mode 100644 index 0000000000..5f270aadb4 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c @@ -0,0 +1,156 @@ +/*------------------------------------------------------------------------- + * + * ISO 8859 2-16 <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/utf8_to_iso8859_2.map" +#include "../../Unicode/utf8_to_iso8859_3.map" +#include "../../Unicode/utf8_to_iso8859_4.map" +#include "../../Unicode/utf8_to_iso8859_5.map" +#include "../../Unicode/utf8_to_iso8859_6.map" +#include "../../Unicode/utf8_to_iso8859_7.map" +#include "../../Unicode/utf8_to_iso8859_8.map" +#include "../../Unicode/utf8_to_iso8859_9.map" +#include "../../Unicode/utf8_to_iso8859_10.map" +#include "../../Unicode/utf8_to_iso8859_13.map" +#include "../../Unicode/utf8_to_iso8859_14.map" +#include "../../Unicode/utf8_to_iso8859_15.map" +#include "../../Unicode/utf8_to_iso8859_16.map" +#include "../../Unicode/iso8859_2_to_utf8.map" +#include "../../Unicode/iso8859_3_to_utf8.map" +#include "../../Unicode/iso8859_4_to_utf8.map" +#include "../../Unicode/iso8859_5_to_utf8.map" +#include "../../Unicode/iso8859_6_to_utf8.map" +#include "../../Unicode/iso8859_7_to_utf8.map" +#include "../../Unicode/iso8859_8_to_utf8.map" +#include "../../Unicode/iso8859_9_to_utf8.map" +#include "../../Unicode/iso8859_10_to_utf8.map" +#include "../../Unicode/iso8859_13_to_utf8.map" +#include "../../Unicode/iso8859_14_to_utf8.map" +#include "../../Unicode/iso8859_15_to_utf8.map" +#include "../../Unicode/iso8859_16_to_utf8.map" + +PG_FUNCTION_INFO_V1(iso8859_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_iso8859) + +extern Datum iso8859_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_iso8859(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ + +typedef struct { + pg_enc encoding; + pg_local_to_utf *map1; /* to UTF-8 map name */ + pg_utf_to_local *map2; /* from UTF-8 map name */ + int size1; /* size of map1 */ + int size2; /* size of map2 */ +} pg_conv_map; + +static pg_conv_map maps[] = { + {PG_SQL_ASCII}, /* SQL/ASCII */ + {PG_EUC_JP}, /* EUC for Japanese */ + {PG_EUC_CN}, /* EUC for Chinese */ + {PG_EUC_KR}, /* EUC for Korean */ + {PG_EUC_TW}, /* EUC for Taiwan */ + {PG_JOHAB}, /* EUC for Korean JOHAB */ + {PG_UTF8}, /* Unicode UTF-8 */ + {PG_MULE_INTERNAL}, /* Mule internal code */ + {PG_LATIN1}, /* ISO-8859-1 Latin 1 */ + {PG_LATIN2, LUmapISO8859_2, ULmapISO8859_2, + sizeof(LUmapISO8859_2)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_2)/sizeof(pg_utf_to_local)}, /* ISO-8859-2 Latin 2 */ + {PG_LATIN3, LUmapISO8859_3, ULmapISO8859_3, + sizeof(LUmapISO8859_3)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_3)/sizeof(pg_utf_to_local)}, /* ISO-8859-3 Latin 3 */ + {PG_LATIN4, LUmapISO8859_4, ULmapISO8859_4, + sizeof(LUmapISO8859_4)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_4)/sizeof(pg_utf_to_local)}, /* ISO-8859-4 Latin 4 */ + {PG_LATIN5, LUmapISO8859_9, ULmapISO8859_9, + sizeof(LUmapISO8859_9)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_9)/sizeof(pg_utf_to_local)}, /* ISO-8859-9 Latin 5 */ + {PG_LATIN6, LUmapISO8859_10, ULmapISO8859_10, + sizeof(LUmapISO8859_10)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_10)/sizeof(pg_utf_to_local)}, /* ISO-8859-10 Latin 6 */ + {PG_LATIN7, LUmapISO8859_13, ULmapISO8859_13, + sizeof(LUmapISO8859_13)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_13)/sizeof(pg_utf_to_local)}, /* ISO-8859-13 Latin 7 */ + {PG_LATIN8, LUmapISO8859_14, ULmapISO8859_14, + sizeof(LUmapISO8859_14)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_14)/sizeof(pg_utf_to_local)}, /* ISO-8859-14 Latin 8 */ + {PG_LATIN9, LUmapISO8859_2, ULmapISO8859_2, + sizeof(LUmapISO8859_15)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_15)/sizeof(pg_utf_to_local)}, /* ISO-8859-15 Latin 9 */ + {PG_LATIN10, LUmapISO8859_16, ULmapISO8859_16, + sizeof(LUmapISO8859_16)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_16)/sizeof(pg_utf_to_local)}, /* ISO-8859-16 Latin 10 */ + {PG_WIN1256}, /* windows-1256 */ + {PG_TCVN}, /* TCVN (Windows-1258) */ + {PG_WIN874}, /* windows-874 */ + {PG_KOI8R}, /* KOI8-R */ + {PG_WIN1251}, /* windows-1251 (was: WIN) */ + {PG_ALT}, /* (MS-DOS CP866) */ + {PG_ISO_8859_5, LUmapISO8859_5, ULmapISO8859_5, + sizeof(LUmapISO8859_5)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_5)/sizeof(pg_utf_to_local)}, /* ISO-8859-5 */ + {PG_ISO_8859_6, LUmapISO8859_6, ULmapISO8859_6, + sizeof(LUmapISO8859_6)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_6)/sizeof(pg_utf_to_local)}, /* ISO-8859-6 */ + {PG_ISO_8859_7, LUmapISO8859_7, ULmapISO8859_7, + sizeof(LUmapISO8859_7)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_7)/sizeof(pg_utf_to_local)}, /* ISO-8859-7 */ + {PG_ISO_8859_8, LUmapISO8859_8, ULmapISO8859_8, + sizeof(LUmapISO8859_8)/sizeof(pg_local_to_utf), + sizeof(ULmapISO8859_8)/sizeof(pg_utf_to_local)}, /* ISO-8859-8 */ +}; + +Datum +iso8859_to_utf8(PG_FUNCTION_ARGS) +{ + int encoding = PG_GETARG_INT32(0); + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, maps[encoding].map1, maps[encoding].size1, encoding, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_iso8859(PG_FUNCTION_ARGS) +{ + int encoding = PG_GETARG_INT32(1); + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(len > 0); + + UtfToLocal(src, dest, maps[encoding].map2, maps[encoding].size2, len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/Makefile new file mode 100644 index 0000000000..783140dfd5 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_iso8859_1 + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c new file mode 100644 index 0000000000..80c91a067d --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c @@ -0,0 +1,97 @@ +/*------------------------------------------------------------------------- + * + * ISO8859_1 <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c,v 1.1 2002/07/16 09:25:05 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +PG_FUNCTION_INFO_V1(iso8859_1_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_iso8859_1) + +extern Datum iso8859_1_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_iso8859_1(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ + +Datum +iso8859_1_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + unsigned short c; + + Assert(PG_GETARG_INT32(0) == PG_LATIN1); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + while (len-- > 0 && (c = *src++)) + { + if (c < 0x80) + *dest++ = c; + else + { + *dest++ = (c >> 6) | 0xc0; + *dest++ = (c & 0x003f) | 0x80; + } + } + *dest = '\0'; + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_iso8859_1(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + unsigned short c, + c1, + c2; + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_LATIN1); + Assert(len > 0); + + while (len > 0 && (c = *src++)) + { + if ((c & 0xe0) == 0xc0) + { + c1 = c & 0x1f; + c2 = *src++ & 0x3f; + *dest = c1 << 6; + *dest++ |= c2; + len -= 2; + } + else if ((c & 0xe0) == 0xe0) + elog(ERROR, "Could not convert UTF-8 to ISO8859-1"); + else + { + *dest++ = c; + len--; + } + } + *dest = '\0'; + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_johab/Makefile new file mode 100644 index 0000000000..31c2188f48 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_johab + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c new file mode 100644 index 0000000000..e05e30e858 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * JOHAB <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.1 2002/07/16 09:25:06 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/johab_to_utf8.map" +#include "../../Unicode/utf8_to_johab.map" + +PG_FUNCTION_INFO_V1(johab_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_johab) + +extern Datum johab_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_johab(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +johab_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_JOHAB); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapJOHAB, + sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), PG_JOHAB, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_johab(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_JOHAB); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapJOHAB, + sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/Makefile new file mode 100644 index 0000000000..5b991f99ae --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:06 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_sjis + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c new file mode 100644 index 0000000000..50739f9f09 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * SJIS <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.1 2002/07/16 09:25:06 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/sjis_to_utf8.map" +#include "../../Unicode/utf8_to_sjis.map" + +PG_FUNCTION_INFO_V1(sjis_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_sjis) + +extern Datum sjis_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_sjis(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +sjis_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_SJIS); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapSJIS, + sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), PG_SJIS, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_sjis(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_SJIS); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapSJIS, + sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/Makefile new file mode 100644 index 0000000000..2e9115e467 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:06 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_tcvn + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c b/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c new file mode 100644 index 0000000000..7486bcad73 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * TCVN <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/Attic/utf8_and_tcvn.c,v 1.1 2002/07/16 09:25:06 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/tcvn_to_utf8.map" +#include "../../Unicode/utf8_to_tcvn.map" + +PG_FUNCTION_INFO_V1(tcvn_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_tcvn) + +extern Datum tcvn_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_tcvn(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +tcvn_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_TCVN); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapTCVN, + sizeof(LUmapTCVN) / sizeof(pg_local_to_utf), PG_TCVN, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_tcvn(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_TCVN); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapTCVN, + sizeof(ULmapTCVN) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/Makefile new file mode 100644 index 0000000000..824624af52 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/Makefile @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------- +# +# $Id: Makefile,v 1.1 2002/07/16 09:25:06 ishii Exp $ +# +#------------------------------------------------------------------------- +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_uhc + +include ../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c new file mode 100644 index 0000000000..8a8f479c75 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * UHC <--> UTF-8 + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.1 2002/07/16 09:25:06 ishii Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/uhc_to_utf8.map" +#include "../../Unicode/utf8_to_uhc.map" + +PG_FUNCTION_INFO_V1(uhc_to_utf8) +PG_FUNCTION_INFO_V1(utf8_to_uhc) + +extern Datum uhc_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_uhc(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * OPAQUE, -- source string (null terminated C string) + * OPAQUE, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns INTEGER; -- dummy. returns nothing, actually. + * ---------- + */ +Datum +uhc_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UHC); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len > 0); + + LocalToUtf(src, dest, LUmapUHC, + sizeof(LUmapUHC) / sizeof(pg_local_to_utf), PG_UHC, len); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_uhc(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_UHC); + Assert(len > 0); + + UtfToLocal(src, dest, ULmapUHC, + sizeof(ULmapUHC) / sizeof(pg_utf_to_local), len); + + PG_RETURN_INT32(0); +}