diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c index f957b6efd3..9757dbabec 100644 --- a/src/backend/utils/mb/conv.c +++ b/src/backend/utils/mb/conv.c @@ -14,6 +14,51 @@ #include "mb/pg_wchar.h" +/* + * local2local: a generic single byte charset encoding + * conversion between two ASCII-superset encodings. + * + * l points to the source string of length len + * p is the output area (must be large enough!) + * src_encoding is the PG identifier for the source encoding + * dest_encoding is the PG identifier for the target encoding + * tab holds conversion entries for the source charset + * starting from 128 (0x80). each entry in the table holds the corresponding + * code point for the target charset, or 0 if there is no equivalent code. + */ +void +local2local(const unsigned char *l, + unsigned char *p, + int len, + int src_encoding, + int dest_encoding, + const unsigned char *tab) +{ + unsigned char c1, + c2; + + while (len > 0) + { + c1 = *l; + if (c1 == 0) + report_invalid_encoding(src_encoding, (const char *) l, len); + if (!IS_HIGHBIT_SET(c1)) + *p++ = c1; + else + { + c2 = tab[c1 - HIGHBIT]; + if (c2) + *p++ = c2; + else + report_untranslatable_char(src_encoding, dest_encoding, + (const char *) l, len); + } + l++; + len--; + } + *p = '\0'; +} + /* * LATINn ---> MIC when the charset's local codes map directly to MIC * @@ -141,8 +186,8 @@ pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len) * lc is the mule character set id for the local encoding * encoding is the PG identifier for the local encoding * tab holds conversion entries for the local charset - * starting from 128 (0x80). each entry in the table - * holds the corresponding code point for the mule internal code. + * starting from 128 (0x80). each entry in the table holds the corresponding + * code point for the mule encoding, or 0 if there is no equivalent code. */ void latin2mic_with_table(const unsigned char *l, @@ -188,9 +233,9 @@ latin2mic_with_table(const unsigned char *l, * p is the output area (must be large enough!) * lc is the mule character set id for the local encoding * encoding is the PG identifier for the local encoding - * tab holds conversion entries for the mule internal code's - * second byte, starting from 128 (0x80). each entry in the table - * holds the corresponding code point for the local charset. + * tab holds conversion entries for the mule internal code's second byte, + * starting from 128 (0x80). each entry in the table holds the corresponding + * code point for the local charset, or 0 if there is no equivalent code. */ void mic2latin_with_table(const unsigned char *mic, diff --git a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c index 5d1c59b842..1847287b31 100644 --- a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c @@ -15,8 +15,6 @@ #include "fmgr.h" #include "mb/pg_wchar.h" -#define ENCODING_GROWTH_RATE 4 - PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(koi8r_to_mic); @@ -51,14 +49,256 @@ PG_FUNCTION_INFO_V1(win866_to_iso); * ---------- */ -static void koi8r2mic(const unsigned char *l, unsigned char *p, int len); -static void mic2koi8r(const unsigned char *mic, unsigned char *p, int len); -static void iso2mic(const unsigned char *l, unsigned char *p, int len); -static void mic2iso(const unsigned char *mic, unsigned char *p, int len); -static void win12512mic(const unsigned char *l, unsigned char *p, int len); -static void mic2win1251(const unsigned char *mic, unsigned char *p, int len); -static void win8662mic(const unsigned char *l, unsigned char *p, int len); -static void mic2win866(const unsigned char *mic, unsigned char *p, int len); +/* + * Cyrillic support + * currently supported Cyrillic encodings: + * + * KOI8-R (this is also the charset for the mule internal code for Cyrillic) + * ISO-8859-5 + * Microsoft's CP1251 (windows-1251) + * Alternativny Variant (MS-DOS CP866) + */ + +/* ISO-8859-5 to KOI8-R */ +static const unsigned char iso2koi[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xB3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xE1, 0xE2, 0xF7, 0xE7, 0xE4, 0xE5, 0xF6, 0xFA, + 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, + 0xF2, 0xF3, 0xF4, 0xF5, 0xE6, 0xE8, 0xE3, 0xFE, + 0xFB, 0xFD, 0xFF, 0xF9, 0xF8, 0xFC, 0xE0, 0xF1, + 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xD6, 0xDA, + 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, + 0xD2, 0xD3, 0xD4, 0xD5, 0xC6, 0xC8, 0xC3, 0xDE, + 0xDB, 0xDD, 0xDF, 0xD9, 0xD8, 0xDC, 0xC0, 0xD1, + 0x00, 0xA3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* KOI8-R to ISO-8859-5 */ +static const unsigned char koi2iso[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xF1, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEE, 0xD0, 0xD1, 0xE6, 0xD4, 0xD5, 0xE4, 0xD3, + 0xE5, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, + 0xDF, 0xEF, 0xE0, 0xE1, 0xE2, 0xE3, 0xD6, 0xD2, + 0xEC, 0xEB, 0xD7, 0xE8, 0xED, 0xE9, 0xE7, 0xEA, + 0xCE, 0xB0, 0xB1, 0xC6, 0xB4, 0xB5, 0xC4, 0xB3, + 0xC5, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, + 0xBF, 0xCF, 0xC0, 0xC1, 0xC2, 0xC3, 0xB6, 0xB2, + 0xCC, 0xCB, 0xB7, 0xC8, 0xCD, 0xC9, 0xC7, 0xCA +}; + +/* WIN1251 to KOI8-R */ +static const unsigned char win12512koi[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, + 0xB3, 0x00, 0xB4, 0x00, 0x00, 0x00, 0x00, 0xB7, + 0x00, 0x00, 0xB6, 0xA6, 0xAD, 0x00, 0x00, 0x00, + 0xA3, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x00, 0xA7, + 0xE1, 0xE2, 0xF7, 0xE7, 0xE4, 0xE5, 0xF6, 0xFA, + 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, + 0xF2, 0xF3, 0xF4, 0xF5, 0xE6, 0xE8, 0xE3, 0xFE, + 0xFB, 0xFD, 0xFF, 0xF9, 0xF8, 0xFC, 0xE0, 0xF1, + 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xD6, 0xDA, + 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, + 0xD2, 0xD3, 0xD4, 0xD5, 0xC6, 0xC8, 0xC3, 0xDE, + 0xDB, 0xDD, 0xDF, 0xD9, 0xD8, 0xDC, 0xC0, 0xD1 +}; + +/* KOI8-R to WIN1251 */ +static const unsigned char koi2win1251[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xB8, 0xBA, 0x00, 0xB3, 0xBF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xA8, 0xAA, 0x00, 0xB2, 0xAF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, + 0xFE, 0xE0, 0xE1, 0xF6, 0xE4, 0xE5, 0xF4, 0xE3, + 0xF5, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, + 0xEF, 0xFF, 0xF0, 0xF1, 0xF2, 0xF3, 0xE6, 0xE2, + 0xFC, 0xFB, 0xE7, 0xF8, 0xFD, 0xF9, 0xF7, 0xFA, + 0xDE, 0xC0, 0xC1, 0xD6, 0xC4, 0xC5, 0xD4, 0xC3, + 0xD5, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, + 0xCF, 0xDF, 0xD0, 0xD1, 0xD2, 0xD3, 0xC6, 0xC2, + 0xDC, 0xDB, 0xC7, 0xD8, 0xDD, 0xD9, 0xD7, 0xDA +}; + +/* WIN866 to KOI8-R */ +static const unsigned char win8662koi[] = { + 0xE1, 0xE2, 0xF7, 0xE7, 0xE4, 0xE5, 0xF6, 0xFA, + 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, + 0xF2, 0xF3, 0xF4, 0xF5, 0xE6, 0xE8, 0xE3, 0xFE, + 0xFB, 0xFD, 0xFF, 0xF9, 0xF8, 0xFC, 0xE0, 0xF1, + 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xD6, 0xDA, + 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xD2, 0xD3, 0xD4, 0xD5, 0xC6, 0xC8, 0xC3, 0xDE, + 0xDB, 0xDD, 0xDF, 0xD9, 0xD8, 0xDC, 0xC0, 0xD1, + 0xB3, 0xA3, 0xB4, 0xA4, 0xB7, 0xA7, 0x00, 0x00, + 0xB6, 0xA6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* KOI8-R to WIN866 */ +static const unsigned char koi2win866[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xF1, 0xF3, 0x00, 0xF9, 0xF5, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xF0, 0xF2, 0x00, 0xF8, 0xF4, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, + 0xEE, 0xA0, 0xA1, 0xE6, 0xA4, 0xA5, 0xE4, 0xA3, + 0xE5, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, + 0xAF, 0xEF, 0xE0, 0xE1, 0xE2, 0xE3, 0xA6, 0xA2, + 0xEC, 0xEB, 0xA7, 0xE8, 0xED, 0xE9, 0xE7, 0xEA, + 0x9E, 0x80, 0x81, 0x96, 0x84, 0x85, 0x94, 0x83, + 0x95, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, + 0x8F, 0x9F, 0x90, 0x91, 0x92, 0x93, 0x86, 0x82, + 0x9C, 0x9B, 0x87, 0x98, 0x9D, 0x99, 0x97, 0x9A +}; + +/* WIN866 to WIN1251 */ +static const unsigned char win8662win1251[] = { + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, + 0xA8, 0xB8, 0xAA, 0xBA, 0xAF, 0xBF, 0x00, 0x00, + 0xB2, 0xB3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* WIN1251 to WIN866 */ +static const unsigned char win12512win866[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, + 0xF0, 0x00, 0xF2, 0x00, 0x00, 0x00, 0x00, 0xF4, + 0x00, 0x00, 0xF8, 0xF9, 0xAD, 0x00, 0x00, 0x00, + 0xF1, 0x00, 0xF3, 0x00, 0x00, 0x00, 0x00, 0xF5, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF +}; + +/* ISO-8859-5 to WIN1251 */ +static const unsigned char iso2win1251[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xA8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, + 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* WIN1251 to ISO-8859-5 */ +static const unsigned char win12512iso[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xA1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF +}; + +/* ISO-8859-5 to WIN866 */ +static const unsigned char iso2win866[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0x00, 0xF1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* WIN866 to ISO-8859-5 */ +static const unsigned char win8662iso[] = { + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xA1, 0xF1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + Datum koi8r_to_mic(PG_FUNCTION_ARGS) @@ -69,7 +309,7 @@ koi8r_to_mic(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_MULE_INTERNAL); - koi8r2mic(src, dest, len); + latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R); PG_RETURN_VOID(); } @@ -83,7 +323,7 @@ mic_to_koi8r(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_KOI8R); - mic2koi8r(src, dest, len); + mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R); PG_RETURN_VOID(); } @@ -97,7 +337,7 @@ iso_to_mic(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_MULE_INTERNAL); - iso2mic(src, dest, len); + latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi); PG_RETURN_VOID(); } @@ -111,7 +351,7 @@ mic_to_iso(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_ISO_8859_5); - mic2iso(src, dest, len); + mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso); PG_RETURN_VOID(); } @@ -125,7 +365,7 @@ win1251_to_mic(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_MULE_INTERNAL); - win12512mic(src, dest, len); + latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi); PG_RETURN_VOID(); } @@ -139,7 +379,7 @@ mic_to_win1251(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1251); - mic2win1251(src, dest, len); + mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251); PG_RETURN_VOID(); } @@ -153,7 +393,7 @@ win866_to_mic(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_MULE_INTERNAL); - win8662mic(src, dest, len); + latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi); PG_RETURN_VOID(); } @@ -167,7 +407,7 @@ mic_to_win866(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN866); - mic2win866(src, dest, len); + mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866); PG_RETURN_VOID(); } @@ -178,14 +418,10 @@ koi8r_to_win1251(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN1251); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - koi8r2mic(src, buf, len); - mic2win1251(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251); PG_RETURN_VOID(); } @@ -196,14 +432,10 @@ win1251_to_koi8r(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_KOI8R); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - win12512mic(src, buf, len); - mic2koi8r(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi); PG_RETURN_VOID(); } @@ -214,14 +446,10 @@ koi8r_to_win866(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN866); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - koi8r2mic(src, buf, len); - mic2win866(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866); PG_RETURN_VOID(); } @@ -232,14 +460,10 @@ win866_to_koi8r(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_KOI8R); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - win8662mic(src, buf, len); - mic2koi8r(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi); PG_RETURN_VOID(); } @@ -250,20 +474,10 @@ win866_to_win1251(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_WIN1251); - /* - * Note: There are a few characters like the "Numero" sign that exist in - * all the other cyrillic encodings (win1251, ISO_8859-5 and cp866), but - * not in KOI8R. As we use MULE_INTERNAL/KOI8R as an intermediary, we will - * fail to convert those characters. - */ - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - win8662mic(src, buf, len); - mic2win1251(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251); PG_RETURN_VOID(); } @@ -274,15 +488,10 @@ win1251_to_win866(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_WIN866); - /* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */ - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - win12512mic(src, buf, len); - mic2win866(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866); PG_RETURN_VOID(); } @@ -293,14 +502,10 @@ iso_to_koi8r(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_KOI8R); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - iso2mic(src, buf, len); - mic2koi8r(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi); PG_RETURN_VOID(); } @@ -311,14 +516,10 @@ koi8r_to_iso(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_ISO_8859_5); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - koi8r2mic(src, buf, len); - mic2iso(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso); PG_RETURN_VOID(); } @@ -329,15 +530,10 @@ iso_to_win1251(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN1251); - /* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */ - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - iso2mic(src, buf, len); - mic2win1251(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251); PG_RETURN_VOID(); } @@ -348,15 +544,10 @@ win1251_to_iso(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_ISO_8859_5); - /* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */ - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - win12512mic(src, buf, len); - mic2iso(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso); PG_RETURN_VOID(); } @@ -367,15 +558,10 @@ iso_to_win866(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN866); - /* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */ - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - iso2mic(src, buf, len); - mic2win866(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866); PG_RETURN_VOID(); } @@ -386,196 +572,10 @@ win866_to_iso(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_ISO_8859_5); - /* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */ - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - win8662mic(src, buf, len); - mic2iso(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso); PG_RETURN_VOID(); } - -/* - * Cyrillic support - * currently supported Cyrillic encodings: - * - * KOI8-R (this is the charset for the mule internal code - * for Cyrillic) - * ISO-8859-5 - * Microsoft's CP1251(windows-1251) - * Alternativny Variant (MS-DOS CP866) - */ - -/* koi8r2mic: KOI8-R to Mule internal code */ -static void -koi8r2mic(const unsigned char *l, unsigned char *p, int len) -{ - latin2mic(l, p, len, LC_KOI8_R, PG_KOI8R); -} - -/* mic2koi8r: Mule internal code to KOI8-R */ -static void -mic2koi8r(const unsigned char *mic, unsigned char *p, int len) -{ - mic2latin(mic, p, len, LC_KOI8_R, PG_KOI8R); -} - -/* iso2mic: ISO-8859-5 to Mule internal code */ -static void -iso2mic(const unsigned char *l, unsigned char *p, int len) -{ - static const unsigned char iso2koi[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa, - 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, - 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe, - 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1, - 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda, - 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, - 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde, - 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1, - 0x00, 0xa3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - }; - - latin2mic_with_table(l, p, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi); -} - -/* mic2iso: Mule internal code to ISO8859-5 */ -static void -mic2iso(const unsigned char *mic, unsigned char *p, int len) -{ - static const unsigned char koi2iso[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xf1, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xa1, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xee, 0xd0, 0xd1, 0xe6, 0xd4, 0xd5, 0xe4, 0xd3, - 0xe5, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, - 0xdf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xd6, 0xd2, - 0xec, 0xeb, 0xd7, 0xe8, 0xed, 0xe9, 0xe7, 0xea, - 0xce, 0xb0, 0xb1, 0xc6, 0xb4, 0xb5, 0xc4, 0xb3, - 0xc5, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, - 0xbf, 0xcf, 0xc0, 0xc1, 0xc2, 0xc3, 0xb6, 0xb2, - 0xcc, 0xcb, 0xb7, 0xc8, 0xcd, 0xc9, 0xc7, 0xca - }; - - mic2latin_with_table(mic, p, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso); -} - -/* win2mic: CP1251 to Mule internal code */ -static void -win12512mic(const unsigned char *l, unsigned char *p, int len) -{ - static const unsigned char win2koi[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00, - 0xb3, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x00, 0xb7, - 0x00, 0x00, 0xb6, 0xa6, 0xad, 0x00, 0x00, 0x00, - 0xa3, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x00, 0xa7, - 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa, - 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, - 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe, - 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1, - 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda, - 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, - 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde, - 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1 - }; - - latin2mic_with_table(l, p, len, LC_KOI8_R, PG_WIN1251, win2koi); -} - -/* mic2win: Mule internal code to CP1251 */ -static void -mic2win1251(const unsigned char *mic, unsigned char *p, int len) -{ - static const unsigned char koi2win[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xb8, 0xba, 0x00, 0xb3, 0xbf, - 0x00, 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xa8, 0xaa, 0x00, 0xb2, 0xaf, - 0x00, 0x00, 0x00, 0x00, 0x00, 0xa5, 0x00, 0x00, - 0xfe, 0xe0, 0xe1, 0xf6, 0xe4, 0xe5, 0xf4, 0xe3, - 0xf5, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, - 0xef, 0xff, 0xf0, 0xf1, 0xf2, 0xf3, 0xe6, 0xe2, - 0xfc, 0xfb, 0xe7, 0xf8, 0xfd, 0xf9, 0xf7, 0xfa, - 0xde, 0xc0, 0xc1, 0xd6, 0xc4, 0xc5, 0xd4, 0xc3, - 0xd5, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, - 0xcf, 0xdf, 0xd0, 0xd1, 0xd2, 0xd3, 0xc6, 0xc2, - 0xdc, 0xdb, 0xc7, 0xd8, 0xdd, 0xd9, 0xd7, 0xda - }; - - mic2latin_with_table(mic, p, len, LC_KOI8_R, PG_WIN1251, koi2win); -} - -/* win8662mic: CP866 to Mule internal code */ -static void -win8662mic(const unsigned char *l, unsigned char *p, int len) -{ - static const unsigned char win8662koi[] = { - 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa, - 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, - 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe, - 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1, - 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda, - 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde, - 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1, - 0xb3, 0xa3, 0xb4, 0xa4, 0xb7, 0xa7, 0x00, 0x00, - 0xb6, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - }; - - latin2mic_with_table(l, p, len, LC_KOI8_R, PG_WIN866, win8662koi); -} - -/* mic2win866: Mule internal code to CP866 */ -static void -mic2win866(const unsigned char *mic, unsigned char *p, int len) -{ - static const unsigned char koi2win866[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xf1, 0xf3, 0x00, 0xf9, 0xf5, - 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xf0, 0xf2, 0x00, 0xf8, 0xf4, - 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00, - 0xee, 0xa0, 0xa1, 0xe6, 0xa4, 0xa5, 0xe4, 0xa3, - 0xe5, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, - 0xaf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xa6, 0xa2, - 0xec, 0xeb, 0xa7, 0xe8, 0xed, 0xe9, 0xe7, 0xea, - 0x9e, 0x80, 0x81, 0x96, 0x84, 0x85, 0x94, 0x83, - 0x95, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, - 0x8f, 0x9f, 0x90, 0x91, 0x92, 0x93, 0x86, 0x82, - 0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a - }; - - mic2latin_with_table(mic, p, len, LC_KOI8_R, PG_WIN866, koi2win866); -} diff --git a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c index 8f831ba1b3..1260b62cca 100644 --- a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c +++ b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c @@ -15,8 +15,6 @@ #include "fmgr.h" #include "mb/pg_wchar.h" -#define ENCODING_GROWTH_RATE 4 - PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(latin2_to_mic); @@ -37,10 +35,46 @@ PG_FUNCTION_INFO_V1(win1250_to_latin2); * ---------- */ -static void latin22mic(const unsigned char *l, unsigned char *p, int len); -static void mic2latin2(const unsigned char *mic, unsigned char *p, int len); -static void win12502mic(const unsigned char *l, unsigned char *p, int len); -static void mic2win1250(const unsigned char *mic, unsigned char *p, int len); +/* WIN1250 to ISO-8859-2 */ +static const unsigned char win1250_2_iso88592[] = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0xA9, 0x8B, 0xA6, 0xAB, 0xAE, 0xAC, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0xB9, 0x9B, 0xB6, 0xBB, 0xBE, 0xBC, + 0xA0, 0xB7, 0xA2, 0xA3, 0xA4, 0xA1, 0x00, 0xA7, + 0xA8, 0x00, 0xAA, 0x00, 0x00, 0xAD, 0x00, 0xAF, + 0xB0, 0x00, 0xB2, 0xB3, 0xB4, 0x00, 0x00, 0x00, + 0xB8, 0xB1, 0xBA, 0x00, 0xA5, 0xBD, 0xB5, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; + +/* ISO-8859-2 to WIN1250 */ +static const unsigned char iso88592_2_win1250[] = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x00, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x00, + 0xA0, 0xA5, 0xA2, 0xA3, 0xA4, 0xBC, 0x8C, 0xA7, + 0xA8, 0x8A, 0xAA, 0x8D, 0x8F, 0xAD, 0x8E, 0xAF, + 0xB0, 0xB9, 0xB2, 0xB3, 0xB4, 0xBE, 0x9C, 0xA1, + 0xB8, 0x9A, 0xBA, 0x9D, 0x9F, 0xBD, 0x9E, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; + Datum latin2_to_mic(PG_FUNCTION_ARGS) @@ -51,7 +85,7 @@ latin2_to_mic(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL); - latin22mic(src, dest, len); + latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2); PG_RETURN_VOID(); } @@ -65,7 +99,7 @@ mic_to_latin2(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2); - mic2latin2(src, dest, len); + mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2); PG_RETURN_VOID(); } @@ -79,7 +113,8 @@ win1250_to_mic(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL); - win12502mic(src, dest, len); + latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, + win1250_2_iso88592); PG_RETURN_VOID(); } @@ -93,7 +128,8 @@ mic_to_win1250(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250); - mic2win1250(src, dest, len); + mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, + iso88592_2_win1250); PG_RETURN_VOID(); } @@ -104,14 +140,10 @@ latin2_to_win1250(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_WIN1250); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - latin22mic(src, buf, len); - mic2win1250(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_LATIN2, PG_WIN1250, iso88592_2_win1250); PG_RETURN_VOID(); } @@ -122,82 +154,10 @@ win1250_to_latin2(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - win12502mic(src, buf, len); - mic2latin2(buf, dest, strlen((char *) buf)); - pfree(buf); + local2local(src, dest, len, PG_WIN1250, PG_LATIN2, win1250_2_iso88592); PG_RETURN_VOID(); } - -static void -latin22mic(const unsigned char *l, unsigned char *p, int len) -{ - latin2mic(l, p, len, LC_ISO8859_2, PG_LATIN2); -} - -static void -mic2latin2(const unsigned char *mic, unsigned char *p, int len) -{ - mic2latin(mic, p, len, LC_ISO8859_2, PG_LATIN2); -} - -/*----------------------------------------------------------------- - * WIN1250 - * Microsoft's CP1250(windows-1250) - *-----------------------------------------------------------------*/ -static void -win12502mic(const unsigned char *l, unsigned char *p, int len) -{ - static const unsigned char win1250_2_iso88592[] = { - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0xA9, 0x8B, 0xA6, 0xAB, 0xAE, 0xAC, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, - 0x98, 0x99, 0xB9, 0x9B, 0xB6, 0xBB, 0xBE, 0xBC, - 0xA0, 0xB7, 0xA2, 0xA3, 0xA4, 0xA1, 0x00, 0xA7, - 0xA8, 0x00, 0xAA, 0x00, 0x00, 0xAD, 0x00, 0xAF, - 0xB0, 0x00, 0xB2, 0xB3, 0xB4, 0x00, 0x00, 0x00, - 0xB8, 0xB1, 0xBA, 0x00, 0xA5, 0xBD, 0xB5, 0xBF, - 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, - 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, - 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF - }; - - latin2mic_with_table(l, p, len, LC_ISO8859_2, PG_WIN1250, - win1250_2_iso88592); -} - -static void -mic2win1250(const unsigned char *mic, unsigned char *p, int len) -{ - static const unsigned char iso88592_2_win1250[] = { - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x00, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, - 0x98, 0x99, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x00, - 0xA0, 0xA5, 0xA2, 0xA3, 0xA4, 0xBC, 0x8C, 0xA7, - 0xA8, 0x8A, 0xAA, 0x8D, 0x8F, 0xAD, 0x8E, 0xAF, - 0xB0, 0xB9, 0xB2, 0xB3, 0xB4, 0xBE, 0x9C, 0xA1, - 0xB8, 0x9A, 0xBA, 0x9D, 0x9F, 0xBD, 0x9E, 0xBF, - 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, - 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, - 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF - }; - - mic2latin_with_table(mic, p, len, LC_ISO8859_2, PG_WIN1250, - iso88592_2_win1250); -} diff --git a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c index b727691994..806605c048 100644 --- a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c @@ -35,12 +35,6 @@ PG_FUNCTION_INFO_V1(mic_to_latin4); * ---------- */ -static void latin12mic(const unsigned char *l, unsigned char *p, int len); -static void mic2latin1(const unsigned char *mic, unsigned char *p, int len); -static void latin32mic(const unsigned char *l, unsigned char *p, int len); -static void mic2latin3(const unsigned char *mic, unsigned char *p, int len); -static void latin42mic(const unsigned char *l, unsigned char *p, int len); -static void mic2latin4(const unsigned char *mic, unsigned char *p, int len); Datum latin1_to_mic(PG_FUNCTION_ARGS) @@ -51,7 +45,7 @@ latin1_to_mic(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL); - latin12mic(src, dest, len); + latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1); PG_RETURN_VOID(); } @@ -65,7 +59,7 @@ mic_to_latin1(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1); - mic2latin1(src, dest, len); + mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1); PG_RETURN_VOID(); } @@ -79,7 +73,7 @@ latin3_to_mic(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL); - latin32mic(src, dest, len); + latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3); PG_RETURN_VOID(); } @@ -93,7 +87,7 @@ mic_to_latin3(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3); - mic2latin3(src, dest, len); + mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3); PG_RETURN_VOID(); } @@ -107,7 +101,7 @@ latin4_to_mic(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL); - latin42mic(src, dest, len); + latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4); PG_RETURN_VOID(); } @@ -121,43 +115,7 @@ mic_to_latin4(PG_FUNCTION_ARGS) CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4); - mic2latin4(src, dest, len); + mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4); PG_RETURN_VOID(); } - -static void -latin12mic(const unsigned char *l, unsigned char *p, int len) -{ - latin2mic(l, p, len, LC_ISO8859_1, PG_LATIN1); -} - -static void -mic2latin1(const unsigned char *mic, unsigned char *p, int len) -{ - mic2latin(mic, p, len, LC_ISO8859_1, PG_LATIN1); -} - -static void -latin32mic(const unsigned char *l, unsigned char *p, int len) -{ - latin2mic(l, p, len, LC_ISO8859_3, PG_LATIN3); -} - -static void -mic2latin3(const unsigned char *mic, unsigned char *p, int len) -{ - mic2latin(mic, p, len, LC_ISO8859_3, PG_LATIN3); -} - -static void -latin42mic(const unsigned char *l, unsigned char *p, int len) -{ - latin2mic(l, p, len, LC_ISO8859_4, PG_LATIN4); -} - -static void -mic2latin4(const unsigned char *mic, unsigned char *p, int len) -{ - mic2latin(mic, p, len, LC_ISO8859_4, PG_LATIN4); -} diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index f8b0edc678..87f9fbf884 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -537,6 +537,8 @@ extern void report_invalid_encoding(int encoding, const char *mbstr, int len) pg extern void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len) pg_attribute_noreturn(); +extern void local2local(const unsigned char *l, unsigned char *p, int len, + int src_encoding, int dest_encoding, const unsigned char *tab); extern void pg_ascii2mic(const unsigned char *l, unsigned char *p, int len); extern void pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len); extern void latin2mic(const unsigned char *l, unsigned char *p, int len,