Add new encoding EUC_JIS_2004 and SHIFT_JIS_2004,

along with new conversions among EUC_JIS_2004, SHIFT_JIS_2004 and UTF-8.
catalog version has been bump up.
This commit is contained in:
Tatsuo Ishii 2007-03-25 11:56:04 +00:00
parent 7b4726e6c3
commit 75c6519ff6
41 changed files with 70345 additions and 119 deletions

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.81 2007/01/31 20:56:16 momjian Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.82 2007/03/25 11:56:01 ishii Exp $ -->
<chapter id="charset">
<title>Localization</>
@ -364,6 +364,14 @@ initdb --locale=sv_SE
<entry>1-3</entry>
<entry></entry>
</row>
<row>
<entry><literal>EUC_JIS_2004</literal></entry>
<entry>Extended UNIX Code-JP, JIS X 0213</entry>
<entry>Japanese</entry>
<entry>Yes</entry>
<entry>1-3</entry>
<entry></entry>
</row>
<row>
<entry><literal>EUC_KR</literal></entry>
<entry>Extended UNIX Code-KR</entry>
@ -540,6 +548,14 @@ initdb --locale=sv_SE
<entry>1-2</entry>
<entry><literal>Mskanji</>, <literal>ShiftJIS</>, <literal>WIN932</>, <literal>Windows932</></entry>
</row>
<row>
<entry><literal>SHIFT_JIS_2004</literal></entry>
<entry>Shift JIS, JIS X 0213</entry>
<entry>Japanese</entry>
<entry>No</entry>
<entry>1-2</entry>
<entry></entry>
</row>
<row>
<entry><literal>SQL_ASCII</literal></entry>
<entry>unspecified (see text)</entry>

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.370 2007/03/20 05:44:59 neilc Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.371 2007/03/25 11:56:01 ishii Exp $ -->
<chapter id="functions">
<title>Functions and Operators</title>
@ -2394,6 +2394,42 @@
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>euc_jis_2004_to_utf8</literal></entry>
<entry><literal>EUC_JIS_2004</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>ut8_to_euc_jis_2004</literal></entry>
<entry><literal>UTF8</literal></entry>
<entry><literal>EUC_JIS_2004</literal></entry>
</row>
<row>
<entry><literal>shift_jis_2004_to_utf8</literal></entry>
<entry><literal>SHIFT_JIS_2004</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>ut8_to_shift_jis_2004</literal></entry>
<entry><literal>UTF8</literal></entry>
<entry><literal>SHIFT_JIS_2004</literal></entry>
</row>
<row>
<entry><literal>euc_jis_2004_to_shift_jis_2004</literal></entry>
<entry><literal>EUC_JIS_2004</literal></entry>
<entry><literal>SHIFT_JIS_2004</literal></entry>
</row>
<row>
<entry><literal>shift_jis_2004_to_euc_jis_2004</literal></entry>
<entry><literal>SHIFT_JIS_2004</literal></entry>
<entry><literal>EUC_JIS_2004</literal></entry>
</row>
</tbody>
</tgroup>
</table>

View File

@ -0,0 +1,248 @@
#! /usr/bin/perl
#
# Copyright (c) 2007, PostgreSQL Global Development Group
#
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl,v 1.1 2007/03/25 11:56:02 ishii Exp $
#
# Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
# "euc-jis-2004-std.txt" (http://x0213.org)
require "ucs2utf.pl";
$TEST = 1;
# first generate UTF-8 --> EUC_JIS_2004 table
$in_file = "euc-jis-2004-std.txt";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
reset 'array1';
reset 'comment';
reset 'comment1';
while($line = <FILE> ){
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u1 = $2;
$u2 = $3;
$rest = "U+" . $u1 . "+" . $u2 . $4;
$code = hex($c);
$ucs = hex($u1);
$utf1 = &ucs2utf($ucs);
$ucs = hex($u2);
$utf2 = &ucs2utf($ucs);
$str = sprintf "%08x%08x", $utf1, $utf2;
$array1{ $str } = $code;
$comment1{ $str } = $rest;
$count1++;
next;
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u = $2;
$rest = "U+" . $u . $3;
} else {
next;
}
$ucs = hex($u);
$code = hex($c);
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = $code;
$comment{ $code } = $rest;
}
close( FILE );
$file = "utf8_to_euc_jis_2004.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, $comment{ $code };
} else {
printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, $comment{ $code };
}
}
print FILE "};\n";
close(FILE);
if ($TEST == 1) {
$file1 = "utf8.data";
$file2 = "euc_jis_2004.data";
open( FILE1, "> $file1" ) || die( "cannot open $file1" );
open( FILE2, "> $file2" ) || die( "cannot open $file2" );
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
if ($code > 0x00 && $code != 0x09 && $code != 0x0a && $code != 0x0d &&
$code != 0x5c &&
($code < 0x80 ||
($code >= 0x8ea1 && $code <= 0x8efe) ||
($code >= 0x8fa1a1 && $code <= 0x8ffefe) ||
($code >= 0xa1a1 && $code <= 0x8fefe))) {
for ($i = 3; $i >= 0; $i--) {
$s = $i * 8;
$mask = 0xff << $s;
print FILE1 pack("C", ($index & $mask) >> $s) if $index & $mask;
print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
}
print FILE1 "\n";
print FILE2 "\n";
}
}
}
$file = "utf8_to_euc_jis_2004_combined.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
for $index ( sort {$a cmp $b} keys( %array1 ) ){
$code = $array1{ $index };
$count1--;
if( $count1 == 0 ){
printf FILE " {0x%s, 0x%s, 0x%06x} /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
} else {
printf FILE " {0x%s, 0x%s, 0x%06x}, /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
}
}
print FILE "};\n";
close(FILE);
if ($TEST == 1) {
for $index ( sort {$a cmp $b} keys( %array1 ) ){
$code = $array1{ $index };
if ($code > 0x00 && $code != 0x09 && $code != 0x0a && $code != 0x0d &&
$code != 0x5c &&
($code < 0x80 ||
($code >= 0x8ea1 && $code <= 0x8efe) ||
($code >= 0x8fa1a1 && $code <= 0x8ffefe) ||
($code >= 0xa1a1 && $code <= 0x8fefe))) {
$v1 = hex(substr($index, 0, 8));
$v2 = hex(substr($index, 8, 8));
for ($i = 3; $i >= 0; $i--) {
$s = $i * 8;
$mask = 0xff << $s;
print FILE1 pack("C", ($v1 & $mask) >> $s) if $v1 & $mask;
print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
}
for ($i = 3; $i >= 0; $i--) {
$s = $i * 8;
$mask = 0xff << $s;
print FILE1 pack("C", ($v2 & $mask) >> $s) if $v2 & $mask;
}
print FILE1 "\n";
print FILE2 "\n";
}
}
close(FILE1);
close(FILE2);
}
# then generate EUC_JIS_2004 --> UTF-8 table
$in_file = "euc-jis-2004-std.txt";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
reset 'array1';
reset 'comment';
reset 'comment1';
while($line = <FILE> ){
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u1 = $2;
$u2 = $3;
$rest = "U+" . $u1 . "+" . $u2 . $4;
$code = hex($c);
$ucs = hex($u1);
$utf1 = &ucs2utf($ucs);
$ucs = hex($u2);
$utf2 = &ucs2utf($ucs);
$str = sprintf "%08x%08x", $utf1, $utf2;
$array1{ $code } = $str;
$comment1{ $code } = $rest;
$count1++;
next;
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u = $2;
$rest = "U+" . $u . $3;
} else {
next;
}
$ucs = hex($u);
$code = hex($c);
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
next;
}
$count++;
$array{ $code } = $utf;
$comment{ $utf } = $rest;
}
close( FILE );
$file = "euc_jis_2004_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%06x, 0x%08x} /* %s */\n", $index, $code, $comment{ $code };
} else {
printf FILE " {0x%06x, 0x%08x}, /* %s */\n", $index, $code, $comment{ $code };
}
}
print FILE "};\n";
close(FILE);
$file = "euc_jis_2004_to_utf8_combined.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
for $index ( sort {$a <=> $b} keys( %array1 ) ){
$code = $array1{ $index };
$count1--;
if( $count1 == 0 ){
printf FILE " {0x%06x, 0x%s, 0x%s} /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
} else {
printf FILE " {0x%06x, 0x%s, 0x%s}, /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
}
}
print FILE "};\n";
close(FILE);

View File

@ -0,0 +1,189 @@
#! /usr/bin/perl
#
# Copyright (c) 2007, PostgreSQL Global Development Group
#
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl,v 1.1 2007/03/25 11:56:02 ishii Exp $
#
# Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
# "sjis-0213-2004-std.txt" (http://x0213.org)
require "ucs2utf.pl";
# first generate UTF-8 --> SHIFT_JIS_2004 table
$in_file = "sjis-0213-2004-std.txt";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
reset 'array1';
reset 'comment';
reset 'comment1';
while($line = <FILE> ){
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u1 = $2;
$u2 = $3;
$rest = "U+" . $u1 . "+" . $u2 . $4;
$code = hex($c);
$ucs = hex($u1);
$utf1 = &ucs2utf($ucs);
$ucs = hex($u2);
$utf2 = &ucs2utf($ucs);
$str = sprintf "%08x%08x", $utf1, $utf2;
$array1{ $str } = $code;
$comment1{ $str } = $rest;
$count1++;
next;
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u = $2;
$rest = "U+" . $u . $3;
} else {
next;
}
$ucs = hex($u);
$code = hex($c);
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n",$utf, $ucs, $code;
next;
}
$count++;
$array{ $utf } = $code;
$comment{ $code } = $rest;
}
close( FILE );
$file = "utf8_to_shift_jis_2004.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, $comment{ $code };
} else {
printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, $comment{ $code };
}
}
print FILE "};\n";
close(FILE);
$file = "utf8_to_shift_jis_2004_combined.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
for $index ( sort {$a cmp $b} keys( %array1 ) ){
$code = $array1{ $index };
$count1--;
if( $count1 == 0 ){
printf FILE " {0x%s, 0x%s, 0x%04x} /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
} else {
printf FILE " {0x%s, 0x%s, 0x%04x}, /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
}
}
print FILE "};\n";
close(FILE);
# then generate SHIFT_JIS_2004 --> UTF-8 table
$in_file = "sjis-0213-2004-std.txt";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
reset 'array1';
reset 'comment';
reset 'comment1';
while($line = <FILE> ){
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u1 = $2;
$u2 = $3;
$rest = "U+" . $u1 . "+" . $u2 . $4;
$code = hex($c);
$ucs = hex($u1);
$utf1 = &ucs2utf($ucs);
$ucs = hex($u2);
$utf2 = &ucs2utf($ucs);
$str = sprintf "%08x%08x", $utf1, $utf2;
$array1{ $code } = $str;
$comment1{ $code } = $rest;
$count1++;
next;
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u = $2;
$rest = "U+" . $u . $3;
} else {
next;
}
$ucs = hex($u);
$code = hex($c);
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate UTF-8: %08x UCS: %04x Shift JIS: %04x\n",$utf, $ucs, $code;
printf STDERR "Previous value: UTF-8: %08x\n", $array{ $utf };
next;
}
$count++;
$array{ $code } = $utf;
$comment{ $utf } = $rest;
}
close( FILE );
$file = "shift_jis_2004_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%08x} /* %s */\n", $index, $code, $comment{ $code };
} else {
printf FILE " {0x%04x, 0x%08x}, /* %s */\n", $index, $code, $comment{ $code };
}
}
print FILE "};\n";
close(FILE);
$file = "shift_jis_2004_to_utf8_combined.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
for $index ( sort {$a <=> $b} keys( %array1 ) ){
$code = $array1{ $index };
$count1--;
if( $count1 == 0 ){
printf FILE " {0x%04x, 0x%s, 0x%s} /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
} else {
printf FILE " {0x%04x, 0x%s, 0x%s}, /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
}
}
print FILE "};\n";
close(FILE);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
{0x00a4f7, 0x00e3818b, 0x00e3829a}, /* U+304B+309A [2000] */
{0x00a4f8, 0x00e3818d, 0x00e3829a}, /* U+304D+309A [2000] */
{0x00a4f9, 0x00e3818f, 0x00e3829a}, /* U+304F+309A [2000] */
{0x00a4fa, 0x00e38191, 0x00e3829a}, /* U+3051+309A [2000] */
{0x00a4fb, 0x00e38193, 0x00e3829a}, /* U+3053+309A [2000] */
{0x00a5f7, 0x00e382ab, 0x00e3829a}, /* U+30AB+309A [2000] */
{0x00a5f8, 0x00e382ad, 0x00e3829a}, /* U+30AD+309A [2000] */
{0x00a5f9, 0x00e382af, 0x00e3829a}, /* U+30AF+309A [2000] */
{0x00a5fa, 0x00e382b1, 0x00e3829a}, /* U+30B1+309A [2000] */
{0x00a5fb, 0x00e382b3, 0x00e3829a}, /* U+30B3+309A [2000] */
{0x00a5fc, 0x00e382bb, 0x00e3829a}, /* U+30BB+309A [2000] */
{0x00a5fd, 0x00e38384, 0x00e3829a}, /* U+30C4+309A [2000] */
{0x00a5fe, 0x00e38388, 0x00e3829a}, /* U+30C8+309A [2000] */
{0x00a6f8, 0x00e387b7, 0x00e3829a}, /* U+31F7+309A [2000] */
{0x00abc4, 0x0000c3a6, 0x0000cc80}, /* U+00E6+0300 [2000] */
{0x00abc8, 0x0000c994, 0x0000cc80}, /* U+0254+0300 [2000] */
{0x00abc9, 0x0000c994, 0x0000cc81}, /* U+0254+0301 [2000] */
{0x00abca, 0x0000ca8c, 0x0000cc80}, /* U+028C+0300 [2000] */
{0x00abcb, 0x0000ca8c, 0x0000cc81}, /* U+028C+0301 [2000] */
{0x00abcc, 0x0000c999, 0x0000cc80}, /* U+0259+0300 [2000] */
{0x00abcd, 0x0000c999, 0x0000cc81}, /* U+0259+0301 [2000] */
{0x00abce, 0x0000c99a, 0x0000cc80}, /* U+025A+0300 [2000] */
{0x00abcf, 0x0000c99a, 0x0000cc81}, /* U+025A+0301 [2000] */
{0x00abe5, 0x0000cba9, 0x0000cba5}, /* U+02E9+02E5 [2000] */
{0x00abe6, 0x0000cba5, 0x0000cba9} /* U+02E5+02E9 [2000] */
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
{0x82f5, 0x00e3818b, 0x00e3829a}, /* U+304B+309A [2000] */
{0x82f6, 0x00e3818d, 0x00e3829a}, /* U+304D+309A [2000] */
{0x82f7, 0x00e3818f, 0x00e3829a}, /* U+304F+309A [2000] */
{0x82f8, 0x00e38191, 0x00e3829a}, /* U+3051+309A [2000] */
{0x82f9, 0x00e38193, 0x00e3829a}, /* U+3053+309A [2000] */
{0x8397, 0x00e382ab, 0x00e3829a}, /* U+30AB+309A [2000] */
{0x8398, 0x00e382ad, 0x00e3829a}, /* U+30AD+309A [2000] */
{0x8399, 0x00e382af, 0x00e3829a}, /* U+30AF+309A [2000] */
{0x839a, 0x00e382b1, 0x00e3829a}, /* U+30B1+309A [2000] */
{0x839b, 0x00e382b3, 0x00e3829a}, /* U+30B3+309A [2000] */
{0x839c, 0x00e382bb, 0x00e3829a}, /* U+30BB+309A [2000] */
{0x839d, 0x00e38384, 0x00e3829a}, /* U+30C4+309A [2000] */
{0x839e, 0x00e38388, 0x00e3829a}, /* U+30C8+309A [2000] */
{0x83f6, 0x00e387b7, 0x00e3829a}, /* U+31F7+309A [2000] */
{0x8663, 0x0000c3a6, 0x0000cc80}, /* U+00E6+0300 [2000] */
{0x8667, 0x0000c994, 0x0000cc80}, /* U+0254+0300 [2000] */
{0x8668, 0x0000c994, 0x0000cc81}, /* U+0254+0301 [2000] */
{0x8669, 0x0000ca8c, 0x0000cc80}, /* U+028C+0300 [2000] */
{0x866a, 0x0000ca8c, 0x0000cc81}, /* U+028C+0301 [2000] */
{0x866b, 0x0000c999, 0x0000cc80}, /* U+0259+0300 [2000] */
{0x866c, 0x0000c999, 0x0000cc81}, /* U+0259+0301 [2000] */
{0x866d, 0x0000c99a, 0x0000cc80}, /* U+025A+0300 [2000] */
{0x866e, 0x0000c99a, 0x0000cc81}, /* U+025A+0301 [2000] */
{0x8685, 0x0000cba9, 0x0000cba5}, /* U+02E9+02E5 [2000] */
{0x8686, 0x0000cba5, 0x0000cba9} /* U+02E5+02E9 [2000] */
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
{0x0000c3a6, 0x0000cc80, 0x00abc4}, /* U+00E6+0300 [2000] */
{0x0000c994, 0x0000cc80, 0x00abc8}, /* U+0254+0300 [2000] */
{0x0000c994, 0x0000cc81, 0x00abc9}, /* U+0254+0301 [2000] */
{0x0000c999, 0x0000cc80, 0x00abcc}, /* U+0259+0300 [2000] */
{0x0000c999, 0x0000cc81, 0x00abcd}, /* U+0259+0301 [2000] */
{0x0000c99a, 0x0000cc80, 0x00abce}, /* U+025A+0300 [2000] */
{0x0000c99a, 0x0000cc81, 0x00abcf}, /* U+025A+0301 [2000] */
{0x0000ca8c, 0x0000cc80, 0x00abca}, /* U+028C+0300 [2000] */
{0x0000ca8c, 0x0000cc81, 0x00abcb}, /* U+028C+0301 [2000] */
{0x0000cba5, 0x0000cba9, 0x00abe6}, /* U+02E5+02E9 [2000] */
{0x0000cba9, 0x0000cba5, 0x00abe5}, /* U+02E9+02E5 [2000] */
{0x00e3818b, 0x00e3829a, 0x00a4f7}, /* U+304B+309A [2000] */
{0x00e3818d, 0x00e3829a, 0x00a4f8}, /* U+304D+309A [2000] */
{0x00e3818f, 0x00e3829a, 0x00a4f9}, /* U+304F+309A [2000] */
{0x00e38191, 0x00e3829a, 0x00a4fa}, /* U+3051+309A [2000] */
{0x00e38193, 0x00e3829a, 0x00a4fb}, /* U+3053+309A [2000] */
{0x00e382ab, 0x00e3829a, 0x00a5f7}, /* U+30AB+309A [2000] */
{0x00e382ad, 0x00e3829a, 0x00a5f8}, /* U+30AD+309A [2000] */
{0x00e382af, 0x00e3829a, 0x00a5f9}, /* U+30AF+309A [2000] */
{0x00e382b1, 0x00e3829a, 0x00a5fa}, /* U+30B1+309A [2000] */
{0x00e382b3, 0x00e3829a, 0x00a5fb}, /* U+30B3+309A [2000] */
{0x00e382bb, 0x00e3829a, 0x00a5fc}, /* U+30BB+309A [2000] */
{0x00e38384, 0x00e3829a, 0x00a5fd}, /* U+30C4+309A [2000] */
{0x00e38388, 0x00e3829a, 0x00a5fe}, /* U+30C8+309A [2000] */
{0x00e387b7, 0x00e3829a, 0x00a6f8} /* U+31F7+309A [2000] */
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
{0x0000c3a6, 0x0000cc80, 0x8663}, /* U+00E6+0300 [2000] */
{0x0000c994, 0x0000cc80, 0x8667}, /* U+0254+0300 [2000] */
{0x0000c994, 0x0000cc81, 0x8668}, /* U+0254+0301 [2000] */
{0x0000c999, 0x0000cc80, 0x866b}, /* U+0259+0300 [2000] */
{0x0000c999, 0x0000cc81, 0x866c}, /* U+0259+0301 [2000] */
{0x0000c99a, 0x0000cc80, 0x866d}, /* U+025A+0300 [2000] */
{0x0000c99a, 0x0000cc81, 0x866e}, /* U+025A+0301 [2000] */
{0x0000ca8c, 0x0000cc80, 0x8669}, /* U+028C+0300 [2000] */
{0x0000ca8c, 0x0000cc81, 0x866a}, /* U+028C+0301 [2000] */
{0x0000cba5, 0x0000cba9, 0x8686}, /* U+02E5+02E9 [2000] */
{0x0000cba9, 0x0000cba5, 0x8685}, /* U+02E9+02E5 [2000] */
{0x00e3818b, 0x00e3829a, 0x82f5}, /* U+304B+309A [2000] */
{0x00e3818d, 0x00e3829a, 0x82f6}, /* U+304D+309A [2000] */
{0x00e3818f, 0x00e3829a, 0x82f7}, /* U+304F+309A [2000] */
{0x00e38191, 0x00e3829a, 0x82f8}, /* U+3051+309A [2000] */
{0x00e38193, 0x00e3829a, 0x82f9}, /* U+3053+309A [2000] */
{0x00e382ab, 0x00e3829a, 0x8397}, /* U+30AB+309A [2000] */
{0x00e382ad, 0x00e3829a, 0x8398}, /* U+30AD+309A [2000] */
{0x00e382af, 0x00e3829a, 0x8399}, /* U+30AF+309A [2000] */
{0x00e382b1, 0x00e3829a, 0x839a}, /* U+30B1+309A [2000] */
{0x00e382b3, 0x00e3829a, 0x839b}, /* U+30B3+309A [2000] */
{0x00e382bb, 0x00e3829a, 0x839c}, /* U+30BB+309A [2000] */
{0x00e38384, 0x00e3829a, 0x839d}, /* U+30C4+309A [2000] */
{0x00e38388, 0x00e3829a, 0x839e}, /* U+30C8+309A [2000] */
{0x00e387b7, 0x00e3829a, 0x83f6} /* U+31F7+309A [2000] */
};

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.62 2007/01/05 22:19:44 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.63 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -244,10 +244,10 @@ mic2latin_with_table(const unsigned char *mic,
static int
compare1(const void *p1, const void *p2)
{
unsigned int v1,
uint32 v1,
v2;
v1 = *(unsigned int *) p1;
v1 = *(uint32 *) p1;
v2 = ((pg_utf_to_local *) p2)->utf;
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}
@ -259,31 +259,86 @@ compare1(const void *p1, const void *p2)
static int
compare2(const void *p1, const void *p2)
{
unsigned int v1,
uint32 v1,
v2;
v1 = *(unsigned int *) p1;
v1 = *(uint32 *) p1;
v2 = ((pg_local_to_utf *) p2)->code;
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}
/*
* comparison routine for bsearch()
* this routine is intended for combined UTF8 -> local code
*/
static int
compare3(const void *p1, const void *p2)
{
uint32 s1, s2, d1, d2;
s1 = *(uint32 *)p1;
s2 = *((uint32 *)p1 + 1);
d1 = ((pg_utf_to_local_combined *) p2)->utf1;
d2 = ((pg_utf_to_local_combined *) p2)->utf2;
return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
}
/*
* comparison routine for bsearch()
* this routine is intended for local code -> combined UTF8
*/
static int
compare4(const void *p1, const void *p2)
{
uint32 v1,
v2;
v1 = *(uint32 *) p1;
v2 = ((pg_local_to_utf_combined *) p2)->code;
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}
/*
* convert 32bit wide character to mutibye stream pointed to by iso
*/
static unsigned char *set_iso_code(unsigned char *iso, uint32 code)
{
if (code & 0xff000000)
*iso++ = code >> 24;
if (code & 0x00ff0000)
*iso++ = (code & 0x00ff0000) >> 16;
if (code & 0x0000ff00)
*iso++ = (code & 0x0000ff00) >> 8;
if (code & 0x000000ff)
*iso++ = code & 0x000000ff;
return iso;
}
/*
* UTF8 ---> local code
*
* utf: input UTF8 string (need not be null-terminated).
* iso: pointer to the output area (must be large enough!)
* map: the conversion map.
* size: the size of the conversion map.
* cmap: the conversion map for combined characters.
* (optional)
* size1: the size of the conversion map.
* size2: the size of the conversion map for combined characters
* (optional)
* encoding: the PG identifier for the local encoding.
* len: length of input string.
*/
void
UtfToLocal(const unsigned char *utf, unsigned char *iso,
const pg_utf_to_local *map, int size, int encoding, int len)
const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
int size1, int size2, int encoding, int len)
{
unsigned int iutf;
int l;
uint32 iutf;
uint32 cutf[2];
uint32 code;
pg_utf_to_local *p;
pg_utf_to_local_combined *cp;
int l;
for (; len > 0; len -= l)
{
@ -324,21 +379,94 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
iutf |= *utf++;
}
p = bsearch(&iutf, map, size,
sizeof(pg_utf_to_local), compare1);
/*
* first, try with combined map if possible
*/
if (cmap && len > l)
{
const unsigned char *utf_save = utf;
int len_save = len;
int l_save = l;
len -= l;
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
l = pg_utf_mblen(utf);
if (len < l)
break;
if (p->code & 0xff000000)
*iso++ = p->code >> 24;
if (p->code & 0x00ff0000)
*iso++ = (p->code & 0x00ff0000) >> 16;
if (p->code & 0x0000ff00)
*iso++ = (p->code & 0x0000ff00) >> 8;
if (p->code & 0x000000ff)
*iso++ = p->code & 0x000000ff;
if (!pg_utf8_islegal(utf, l))
break;
cutf[0] = iutf;
if (l == 1)
{
if (len_save > 1)
{
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
}
/* ASCII case is easy */
*iso++ = *utf++;
continue;
}
else if (l == 2)
{
iutf = *utf++ << 8;
iutf |= *utf++;
}
else if (l == 3)
{
iutf = *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
else if (l == 4)
{
iutf = *utf++ << 24;
iutf |= *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
cutf[1] = iutf;
cp = bsearch(cutf, cmap, size2,
sizeof(pg_utf_to_local_combined), compare3);
if (cp)
code = cp->code;
else
{
/* not found in combined map. try with ordinary map */
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
p = bsearch(&cutf[1], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
}
}
else /* no cmap or no remaining data */
{
p = bsearch(&iutf, map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
}
iso = set_iso_code(iso, code);
}
if (len > 0)
@ -353,17 +481,23 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
* iso: input local string (need not be null-terminated).
* utf: pointer to the output area (must be large enough!)
* map: the conversion map.
* size: the size of the conversion map.
* cmap: the conversion map for combined characters.
* (optional)
* size1: the size of the conversion map.
* size2: the size of the conversion map for combined characters
* (optional)
* encoding: the PG identifier for the local encoding.
* len: length of input string.
*/
void
LocalToUtf(const unsigned char *iso, unsigned char *utf,
const pg_local_to_utf *map, int size, int encoding, int len)
const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
int size1, int size2, int encoding, int len)
{
unsigned int iiso;
int l;
pg_local_to_utf *p;
pg_local_to_utf_combined *cp;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
@ -409,20 +543,59 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
iiso |= *iso++;
}
p = bsearch(&iiso, map, size,
p = bsearch(&iiso, map, size1,
sizeof(pg_local_to_utf), compare2);
if (p == NULL)
{
/*
* not found in the ordinary map. if there's a combined
* character map, try with it
*/
if (cmap)
{
cp = bsearch(&iiso, cmap, size2,
sizeof(pg_local_to_utf_combined), compare4);
if (cp)
{
if (cp->utf1 & 0xff000000)
*utf++ = cp->utf1 >> 24;
if (cp->utf1 & 0x00ff0000)
*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
if (cp->utf1 & 0x0000ff00)
*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
if (cp->utf1 & 0x000000ff)
*utf++ = cp->utf1 & 0x000000ff;
if (cp->utf2 & 0xff000000)
*utf++ = cp->utf2 >> 24;
if (cp->utf2 & 0x00ff0000)
*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
if (cp->utf2 & 0x0000ff00)
*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
if (cp->utf2 & 0x000000ff)
*utf++ = cp->utf2 & 0x000000ff;
continue;
}
}
report_untranslatable_char(encoding, PG_UTF8,
(const char *) (iso - l), len);
if (p->utf & 0xff000000)
*utf++ = p->utf >> 24;
if (p->utf & 0x00ff0000)
*utf++ = (p->utf & 0x00ff0000) >> 16;
if (p->utf & 0x0000ff00)
*utf++ = (p->utf & 0x0000ff00) >> 8;
if (p->utf & 0x000000ff)
*utf++ = p->utf & 0x000000ff;
}
else
{
if (p->utf & 0xff000000)
*utf++ = p->utf >> 24;
if (p->utf & 0x00ff0000)
*utf++ = (p->utf & 0x00ff0000) >> 16;
if (p->utf & 0x0000ff00)
*utf++ = (p->utf & 0x0000ff00) >> 8;
if (p->utf & 0x000000ff)
*utf++ = p->utf & 0x000000ff;
}
}
if (len > 0)

View File

@ -4,7 +4,7 @@
# Makefile for utils/mb/conversion_procs
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.17 2006/02/27 16:09:49 petere Exp $
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.18 2007/03/25 11:56:02 ishii Exp $
#
#-------------------------------------------------------------------------
@ -23,7 +23,8 @@ DIRS = \
utf8_and_ascii utf8_and_big5 utf8_and_cyrillic utf8_and_euc_cn \
utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_tw utf8_and_gb18030 \
utf8_and_gbk utf8_and_iso8859 utf8_and_iso8859_1 utf8_and_johab \
utf8_and_sjis utf8_and_win utf8_and_uhc
utf8_and_sjis utf8_and_win utf8_and_uhc \
utf8_and_euc_jis_2004 utf8_and_shift_jis_2004 euc_jis_2004_and_shift_jis_2004
# conversion_name source_encoding destination_encoding function object
CONVERSIONS = \
@ -150,8 +151,13 @@ CONVERSIONS = \
sjis_to_utf8 SJIS UTF8 sjis_to_utf8 utf8_and_sjis \
utf8_to_sjis UTF8 SJIS utf8_to_sjis utf8_and_sjis \
uhc_to_utf8 UHC UTF8 uhc_to_utf8 utf8_and_uhc \
utf8_to_uhc UTF8 UHC utf8_to_uhc utf8_and_uhc
utf8_to_uhc UTF8 UHC utf8_to_uhc utf8_and_uhc \
euc_jis_2004_to_utf8 EUC_JIS_2004 UTF8 euc_jis_2004_to_utf8 utf8_and_euc_jis_2004 \
utf8_to_euc_jis_2004 UTF8 EUC_JIS_2004 utf8_to_euc_jis_2004 utf8_and_euc_jis_2004 \
shift_jis_2004_to_utf8 SHIFT_JIS_2004 UTF8 shift_jis_2004_to_utf8 utf8_and_shift_jis_2004 \
utf8_to_shift_jis_2004 UTF8 SHIFT_JIS_2004 utf8_to_shift_jis_2004 utf8_and_shift_jis_2004 \
euc_jis_2004_to_shift_jis_2004 EUC_JIS_2004 SHIFT_JIS_2004 euc_jis_2004_to_shift_jis_2004 euc_jis_2004_and_shift_jis_2004 \
shift_jis_2004_to_euc_jis_2004 SHIFT_JIS_2004 EUC_JIS_2004 shift_jis_2004_to_euc_jis_2004 euc_jis_2004_and_shift_jis_2004
all: $(SQLSCRIPT)
@for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done

View File

@ -0,0 +1,12 @@
#-------------------------------------------------------------------------
#
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/Makefile,v 1.1 2007/03/25 11:56:02 ishii Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shit_jis_2004
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = euc_jis_2004_and_shift_jis_2004
include $(srcdir)/../proc.mk

View File

@ -0,0 +1,333 @@
/*-------------------------------------------------------------------------
*
* EUC_JIS_2004, SHIFT_JIS_2004
*
* Copyright (c) 2007, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/euc_jis_2004_and_shift_jis_2004.c,v 1.1 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns VOID;
* ----------
*/
Datum
euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_JIS_2004);
Assert(PG_GETARG_INT32(1) == PG_SHIFT_JIS_2004);
Assert(len >= 0);
euc_jis_20042shift_jis_2004(src, dest, len);
PG_RETURN_VOID();
}
Datum
shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_SHIFT_JIS_2004);
Assert(PG_GETARG_INT32(1) == PG_EUC_JIS_2004);
Assert(len >= 0);
shift_jis_20042euc_jis_2004(src, dest, len);
PG_RETURN_VOID();
}
/*
* EUC_JIS_2004 -> SHIFT_JIS_2004
*/
static void
euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
{
int c1,
ku,
ten;
int l;
while (len > 0)
{
c1 = *euc;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
*p++ = c1;
euc++;
len--;
continue;
}
l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
if (l < 0)
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
{
*p++ = euc[1];
}
else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
{
ku = euc[1] - 0xa0;
ten = euc[2] - 0xa0;
switch (ku)
{
case 1:
case 3:
case 4:
case 5:
case 8:
case 12:
case 13:
case 14:
case 15:
*p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
break;
default:
if (ku >= 78 && ku <= 94)
{
*p++ = (ku + 0x19b) >> 1;
}
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
}
if (ku % 2)
{
if (ten >= 1 && ten <= 63)
*p++ = ten + 0x3f;
else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40;
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
}
else
*p++ = ten + 0x9e;
}
else if (l == 2) /* JIS X 0213 plane 1? */
{
ku = c1 - 0xa0;
ten = euc[1] - 0xa0;
if (ku >= 1 && ku <= 62)
*p++ = (ku + 0x101) >> 1;
else if (ku >= 63 && ku <= 94)
*p++ = (ku + 0x181) >> 1;
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
if (ku % 2)
{
if (ten >= 1 && ten <= 63)
*p++ = ten + 0x3f;
else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40;
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
}
else
*p++ = ten + 0x9e;
}
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
euc += l;
len -= l;
}
*p = '\0';
}
/*
* returns SHIFT_JIS_2004 "ku" code indicated by second byte
* *ku = 0: "ku" = even
* *ku = 1: "ku" = odd
*/
static int get_ten(int b, int *ku)
{
int ten;
if (b >= 0x40 && b <= 0x7e)
{
ten = b - 0x3f;
*ku = 1;
} else if (b >= 0x80 && b <= 0x9e)
{
ten = b - 0x40;
*ku = 1;
} else if (b >= 0x9f && b <= 0xfc)
{
ten = b - 0x9e;
*ku = 0;
}
else
{
ten = -1; /* error */
}
return ten;
}
/*
* SHIFT_JIS_2004 ---> EUC_JIS_2004
*/
static void
shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
{
int c1,
c2;
int ku, ten, kubun;
int plane;
int l;
while (len > 0)
{
c1 = *sjis;
c2 = sjis[1];
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
*p++ = c1;
sjis++;
len--;
continue;
}
l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
if (l < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
{
/* JIS X0201 (1 byte kana) */
*p++ = SS2;
*p++ = c1;
}
else if (l == 2)
{
plane = 1;
ku = 1;
ten = 1;
/*
* JIS X 0213
*/
if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
{
ku = (c1 << 1) - 0x100;
ten = get_ten(c2, &kubun);
if (ten < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
ku -= kubun;
}
else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
{
ku = (c1 << 1) - 0x180;
ten = get_ten(c2, &kubun);
if (ten < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
ku -= kubun;
}
else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2 1,3,4,5,8,12,13,14,15 ku */
{
plane = 2;
ten = get_ten(c2, &kubun);
if (ten < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
switch (c1)
{
case 0xf0:
ku = kubun == 0? 8: 1;
break;
case 0xf1:
ku = kubun == 0? 4: 3;
break;
case 0xf2:
ku = kubun == 0? 12: 5;
break;
default:
ku = kubun == 0? 14: 13;
break;
}
}
else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
{
plane = 2;
ten = get_ten(c2, &kubun);
if (ten < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
if (c1 == 0xf4 && kubun == 1)
ku = 15;
else
ku = (c1 << 1) - 0x19a - kubun;
}
else
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
if (plane == 2)
*p++ = SS3;
*p++ = ku + 0xa0;
*p++ = ten + 0xa0;
}
sjis += l;
len -= l;
}
*p = '\0';
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.15 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.16 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ big5_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapBIG5,
sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), PG_BIG5, len);
LocalToUtf(src, dest, LUmapBIG5, NULL,
sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), 0, PG_BIG5, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_big5(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_BIG5);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapBIG5,
sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), PG_BIG5, len);
UtfToLocal(src, dest, ULmapBIG5, NULL,
sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), 0, PG_BIG5, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.18 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.19 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -47,8 +47,8 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_KOI8R);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapKOI8R,
sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), PG_KOI8R, len);
UtfToLocal(src, dest, ULmapKOI8R, NULL,
sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), 0, PG_KOI8R, len);
PG_RETURN_VOID();
}
@ -64,8 +64,8 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapKOI8R,
sizeof(LUmapKOI8R) / sizeof(pg_local_to_utf), PG_KOI8R, len);
LocalToUtf(src, dest, LUmapKOI8R, NULL,
sizeof(LUmapKOI8R) / sizeof(pg_local_to_utf), 0, PG_KOI8R, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.16 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.17 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_CN,
sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len);
LocalToUtf(src, dest, LUmapEUC_CN, NULL,
sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), 0, PG_EUC_CN, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_EUC_CN);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_CN,
sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), PG_EUC_CN, len);
UtfToLocal(src, dest, ULmapEUC_CN, NULL,
sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), 0, PG_EUC_CN, len);
PG_RETURN_VOID();
}

View File

@ -0,0 +1,12 @@
#-------------------------------------------------------------------------
#
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jis_2004/Makefile,v 1.1 2007/03/25 11:56:02 ishii Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/utf8_and_euc_jis_2004
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = utf8_and_euc_jis_2004
include $(srcdir)/../proc.mk

View File

@ -0,0 +1,76 @@
/*-------------------------------------------------------------------------
*
* EUC_JIS_2004 <--> UTF8
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jis_2004/utf8_and_euc_jis_2004.c,v 1.1 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/euc_jis_2004_to_utf8.map"
#include "../../Unicode/utf8_to_euc_jis_2004.map"
#include "../../Unicode/euc_jis_2004_to_utf8_combined.map"
#include "../../Unicode/utf8_to_euc_jis_2004_combined.map"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(euc_jis_2004_to_utf8);
PG_FUNCTION_INFO_V1(utf8_to_euc_jis_2004);
extern Datum euc_jis_2004_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_euc_jis_2004(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns VOID;
* ----------
*/
Datum
euc_jis_2004_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_JIS_2004);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_JIS_2004, LUmapEUC_JIS_2004_combined,
sizeof(LUmapEUC_JIS_2004) / sizeof(pg_local_to_utf),
sizeof(LUmapEUC_JIS_2004_combined) / sizeof(pg_local_to_utf_combined),
PG_EUC_JIS_2004, len);
PG_RETURN_VOID();
}
Datum
utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_EUC_JIS_2004);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_JIS_2004, ULmapEUC_JIS_2004_combined,
sizeof(ULmapEUC_JIS_2004) / sizeof(pg_utf_to_local),
sizeof(ULmapEUC_JIS_2004_combined) / sizeof(pg_utf_to_local_combined),
PG_EUC_JIS_2004, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.16 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.17 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_JP,
sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), PG_EUC_JP, len);
LocalToUtf(src, dest, LUmapEUC_JP, NULL,
sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), 0, PG_EUC_JP, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_JP,
sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), PG_EUC_JP, len);
UtfToLocal(src, dest, ULmapEUC_JP, NULL,
sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), 0, PG_EUC_JP, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.16 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.17 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_KR,
sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), PG_EUC_KR, len);
LocalToUtf(src, dest, LUmapEUC_KR, NULL,
sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), 0, PG_EUC_KR, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_EUC_KR);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_KR,
sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), PG_EUC_KR, len);
UtfToLocal(src, dest, ULmapEUC_KR, NULL,
sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), 0, PG_EUC_KR, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.16 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.17 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_TW,
sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), PG_EUC_TW, len);
LocalToUtf(src, dest, LUmapEUC_TW, NULL,
sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), 0, PG_EUC_TW, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_EUC_TW);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_TW,
sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), PG_EUC_TW, len);
UtfToLocal(src, dest, ULmapEUC_TW, NULL,
sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), 0, PG_EUC_TW, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.17 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.18 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ gb18030_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapGB18030,
sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), PG_GB18030, len);
LocalToUtf(src, dest, LUmapGB18030, NULL,
sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), 0, PG_GB18030, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_GB18030);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapGB18030,
sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), PG_GB18030, len);
UtfToLocal(src, dest, ULmapGB18030, NULL,
sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), 0, PG_GB18030, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.15 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.16 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ gbk_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapGBK,
sizeof(LUmapGBK) / sizeof(pg_local_to_utf), PG_GBK, len);
LocalToUtf(src, dest, LUmapGBK, NULL,
sizeof(LUmapGBK) / sizeof(pg_local_to_utf), 0, PG_GBK, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_GBK);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapGBK,
sizeof(ULmapGBK) / sizeof(pg_utf_to_local), PG_GBK, len);
UtfToLocal(src, dest, ULmapGBK, NULL,
sizeof(ULmapGBK) / sizeof(pg_utf_to_local), 0, PG_GBK, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.25 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.26 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -127,7 +127,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
LocalToUtf(src, dest, maps[i].map1, maps[i].size1, encoding, len);
LocalToUtf(src, dest, maps[i].map1, NULL, maps[i].size1, 0, encoding, len);
PG_RETURN_VOID();
}
}
@ -155,7 +155,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
UtfToLocal(src, dest, maps[i].map2, maps[i].size2, encoding, len);
UtfToLocal(src, dest, maps[i].map2, NULL, maps[i].size2, 0, encoding, len);
PG_RETURN_VOID();
}
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.16 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.17 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ johab_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapJOHAB,
sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), PG_JOHAB, len);
LocalToUtf(src, dest, LUmapJOHAB, NULL,
sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), 0, PG_JOHAB, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_johab(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_JOHAB);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapJOHAB,
sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), PG_JOHAB, len);
UtfToLocal(src, dest, ULmapJOHAB, NULL,
sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), 0, PG_JOHAB, len);
PG_RETURN_VOID();
}

View File

@ -0,0 +1,12 @@
#-------------------------------------------------------------------------
#
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_shift_jis_2004/Makefile,v 1.1 2007/03/25 11:56:03 ishii Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/utf8_and_shift_jis_2004
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = utf8_and_shift_jis_2004
include $(srcdir)/../proc.mk

View File

@ -0,0 +1,76 @@
/*-------------------------------------------------------------------------
*
* SHIFT_JIS_2004 <--> UTF8
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_shift_jis_2004/utf8_and_shift_jis_2004.c,v 1.1 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/shift_jis_2004_to_utf8.map"
#include "../../Unicode/utf8_to_shift_jis_2004.map"
#include "../../Unicode/shift_jis_2004_to_utf8_combined.map"
#include "../../Unicode/utf8_to_shift_jis_2004_combined.map"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(shift_jis_2004_to_utf8);
PG_FUNCTION_INFO_V1(utf8_to_shift_jis_2004);
extern Datum shift_jis_2004_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_shift_jis_2004(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns VOID;
* ----------
*/
Datum
shift_jis_2004_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_SHIFT_JIS_2004);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapSHIFT_JIS_2004, LUmapSHIFT_JIS_2004_combined,
sizeof(LUmapSHIFT_JIS_2004) / sizeof(pg_local_to_utf),
sizeof(LUmapSHIFT_JIS_2004_combined) / sizeof(pg_local_to_utf_combined),
PG_SHIFT_JIS_2004, len);
PG_RETURN_VOID();
}
Datum
utf8_to_shift_jis_2004(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_SHIFT_JIS_2004);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapSHIFT_JIS_2004, ULmapSHIFT_JIS_2004_combined,
sizeof(ULmapSHIFT_JIS_2004) / sizeof(pg_utf_to_local),
sizeof(ULmapSHIFT_JIS_2004_combined) / sizeof(pg_utf_to_local_combined),
PG_SHIFT_JIS_2004, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.15 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.16 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ sjis_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapSJIS,
sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), PG_SJIS, len);
LocalToUtf(src, dest, LUmapSJIS, NULL,
sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), 0, PG_SJIS, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_SJIS);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapSJIS,
sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), PG_SJIS, len);
UtfToLocal(src, dest, ULmapSJIS, NULL,
sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), 0, PG_SJIS, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.15 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.16 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,8 +46,8 @@ uhc_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapUHC,
sizeof(LUmapUHC) / sizeof(pg_local_to_utf), PG_UHC, len);
LocalToUtf(src, dest, LUmapUHC, NULL,
sizeof(LUmapUHC) / sizeof(pg_local_to_utf), 0, PG_UHC, len);
PG_RETURN_VOID();
}
@ -63,8 +63,8 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UHC);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapUHC,
sizeof(ULmapUHC) / sizeof(pg_utf_to_local), PG_UHC, len);
UtfToLocal(src, dest, ULmapUHC, NULL,
sizeof(ULmapUHC) / sizeof(pg_utf_to_local), 0, PG_UHC, len);
PG_RETURN_VOID();
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c,v 1.9 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c,v 1.10 2007/03/25 11:56:04 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -117,7 +117,7 @@ win_to_utf8(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
LocalToUtf(src, dest, maps[i].map1, maps[i].size1, encoding, len);
LocalToUtf(src, dest, maps[i].map1, NULL, maps[i].size1, 0, encoding, len);
PG_RETURN_VOID();
}
}
@ -145,7 +145,7 @@ utf8_to_win(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
UtfToLocal(src, dest, maps[i].map2, maps[i].size2, encoding, len);
UtfToLocal(src, dest, maps[i].map2, NULL, maps[i].size2, 0, encoding, len);
PG_RETURN_VOID();
}
}

View File

@ -2,7 +2,7 @@
* Encoding names and routines for work with it. All
* in this file is shared bedween FE and BE.
*
* $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.31 2006/07/14 14:52:25 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.32 2007/03/25 11:56:02 ishii Exp $
*/
#ifdef FRONTEND
#include "postgres_fe.h"
@ -44,6 +44,10 @@ pg_encname pg_encname_tbl[] =
"euccn", PG_EUC_CN
}, /* EUC-CN; Extended Unix Code for simplified
* Chinese */
{
"eucjis2004", PG_EUC_JIS_2004
}, /* EUC-JIS-2004; Extended UNIX Code fixed Width for
* Japanese, standard JIS X 0213 */
{
"eucjp", PG_EUC_JP
}, /* EUC-JP; Extended UNIX Code fixed Width for
@ -156,6 +160,11 @@ pg_encname pg_encname_tbl[] =
{
"shiftjis", PG_SJIS
}, /* Shift_JIS; JIS X 0202-1991 */
{
"shiftjis2004", PG_SHIFT_JIS_2004
}, /* SHIFT-JIS-2004; Shift JIS for
* Japanese, standard JIS X 0213 */
{
"sjis", PG_SJIS
}, /* alias for Shift_JIS */
@ -391,6 +400,9 @@ pg_enc2name pg_enc2name_tbl[] =
{
"WIN1257", PG_WIN1257
},
{
"EUC_JIS_2004", PG_EUC_JIS_2004
},
{
"SJIS", PG_SJIS
},
@ -405,6 +417,9 @@ pg_enc2name pg_enc2name_tbl[] =
},
{
"GB18030", PG_GB18030
},
{
"SHIFT_JIS_2004", PG_SHIFT_JIS_2004
}
};

View File

@ -1,7 +1,7 @@
/*
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.59 2007/01/24 17:12:17 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.60 2007/03/25 11:56:02 ishii Exp $
*
* WIN1250 client encoding updated by Pavel Behal
*
@ -1346,11 +1346,13 @@ pg_wchar_tbl pg_wchar_table[] = {
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 30; PG_WIN1254 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 31; PG_WIN1255 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 32; PG_WIN1257 */
{0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* 33; PG_SJIS */
{0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* 34; PG_BIG5 */
{0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* 35; PG_GBK */
{0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* 36; PG_UHC */
{0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 2} /* 37; PG_GB18030 */
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* 33; PG_EUC_JIS_2004 */
{0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* 34; PG_SJIS */
{0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* 35; PG_BIG5 */
{0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* 36; PG_GBK */
{0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* 37; PG_UHC */
{0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 2}, /* 38; PG_GB18030 */
{0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* 39; PG_SHIFT_JIS_2004 */
};
/* returns the byte length of a word for mule internal code */

View File

@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.393 2007/03/20 05:45:00 neilc Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.394 2007/03/25 11:56:04 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 200703201
#define CATALOG_VERSION_NO 200703251
#endif

View File

@ -1,4 +1,4 @@
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.70 2006/12/24 00:57:48 tgl Exp $ */
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.71 2007/03/25 11:56:04 ishii Exp $ */
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
@ -187,6 +187,7 @@ typedef enum pg_enc
PG_WIN1254, /* windows-1254 */
PG_WIN1255, /* windows-1255 */
PG_WIN1257, /* windows-1257 */
PG_EUC_JIS_2004, /* EUC-JIS-2004 */
/* PG_ENCODING_BE_LAST points to the above entry */
/* followings are for client encoding only */
@ -195,11 +196,12 @@ typedef enum pg_enc
PG_GBK, /* GBK (Windows-936) */
PG_UHC, /* UHC (Windows-949) */
PG_GB18030, /* GB18030 */
PG_SHIFT_JIS_2004, /* Shift-JIS-2004 */
_PG_LAST_ENCODING_ /* mark only */
} pg_enc;
#define PG_ENCODING_BE_LAST PG_WIN1257
#define PG_ENCODING_BE_LAST PG_EUC_JIS_2004
/*
* Please use these tests before access to pg_encconv_tbl[]
@ -274,23 +276,45 @@ typedef struct
extern pg_wchar_tbl pg_wchar_table[];
/*
* UTF8 to local code conversion map
* UTF-8 to local code conversion map
* Note that we limit the max length of UTF-8 to 4 bytes,
* which is UCS-4 00010000-001FFFFF range.
*/
typedef struct
{
unsigned int utf; /* UTF8 */
unsigned int code; /* local code */
uint32 utf; /* UTF-8 */
uint32 code; /* local code */
} pg_utf_to_local;
/*
* local code to UTF8 conversion map
* local code to UTF-8 conversion map
*/
typedef struct
{
unsigned int code; /* local code */
unsigned int utf; /* UTF8 */
uint32 code; /* local code */
uint32 utf; /* UTF-8 */
} pg_local_to_utf;
/*
* UTF-8 to local code conversion map(combined characters)
*/
typedef struct
{
uint32 utf1; /* UTF-8 code 1 */
uint32 utf2; /* UTF-8 code 2 */
uint32 code; /* local code */
} pg_utf_to_local_combined;
/*
* local code to UTF-8 conversion map(combined characters)
*/
typedef struct
{
uint32 code; /* local code */
uint32 utf1; /* UTF-8 code 1 */
uint32 utf2; /* UTF-8 code 2 */
} pg_local_to_utf_combined;
extern int pg_mb2wchar(const char *from, pg_wchar *to);
extern int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len);
extern int pg_encoding_mb2wchar_with_len(int encoding,
@ -338,10 +362,12 @@ extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
extern void LocalToUtf(const unsigned char *iso, unsigned char *utf,
const pg_local_to_utf *map, int size, int encoding, int len);
const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
int size1, int size2, int encoding, int len);
extern void UtfToLocal(const unsigned char *utf, unsigned char *iso,
const pg_utf_to_local *map, int size, int encoding, int len);
const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
int size1, int size2, int encoding, int len);
extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,

View File

@ -1642,6 +1642,84 @@ SELECT CONVERT('foo', 'WIN1257', 'UTF8');
foo
(1 row)
-- UTF8 --> EUC_JIS_2004
SELECT CONVERT('foo' USING utf8_to_euc_jis_2004);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'UTF8', 'EUC_JIS_2004');
convert
---------
foo
(1 row)
-- EUC_JIS_2004 --> UTF8
SELECT CONVERT('foo' USING euc_jis_2004_to_utf8);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'EUC_JIS_2004', 'UTF8');
convert
---------
foo
(1 row)
-- UTF8 --> SHIFT_JIS_2004
SELECT CONVERT('foo' USING utf8_to_euc_jis_2004);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'UTF8', 'SHIFT_JIS_2004');
convert
---------
foo
(1 row)
-- SHIFT_JIS_2004 --> UTF8
SELECT CONVERT('foo' USING shift_jis_2004_to_utf8);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'SHIFT_JIS_2004', 'UTF8');
convert
---------
foo
(1 row)
-- EUC_JIS_2004 --> SHIFT_JIS_2004
SELECT CONVERT('foo' USING euc_jis_2004_to_shift_jis_2004);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'EUC_JIS_2004', 'SHIFT_JIS_2004');
convert
---------
foo
(1 row)
-- SHIFT_JIS_2004 --> EUC_JIS_2004
SELECT CONVERT('foo' USING shift_jis_2004_to_euc_jis_2004);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'SHIFT_JIS_2004', 'EUC_JIS_2004');
convert
---------
foo
(1 row)
--
-- return to the super user
--

View File

@ -399,6 +399,24 @@ SELECT CONVERT('foo', 'UTF8', 'WIN1257');
-- WIN1257 --> UTF8
SELECT CONVERT('foo' USING windows_1257_to_utf8);
SELECT CONVERT('foo', 'WIN1257', 'UTF8');
-- UTF8 --> EUC_JIS_2004
SELECT CONVERT('foo' USING utf8_to_euc_jis_2004);
SELECT CONVERT('foo', 'UTF8', 'EUC_JIS_2004');
-- EUC_JIS_2004 --> UTF8
SELECT CONVERT('foo' USING euc_jis_2004_to_utf8);
SELECT CONVERT('foo', 'EUC_JIS_2004', 'UTF8');
-- UTF8 --> SHIFT_JIS_2004
SELECT CONVERT('foo' USING utf8_to_euc_jis_2004);
SELECT CONVERT('foo', 'UTF8', 'SHIFT_JIS_2004');
-- SHIFT_JIS_2004 --> UTF8
SELECT CONVERT('foo' USING shift_jis_2004_to_utf8);
SELECT CONVERT('foo', 'SHIFT_JIS_2004', 'UTF8');
-- EUC_JIS_2004 --> SHIFT_JIS_2004
SELECT CONVERT('foo' USING euc_jis_2004_to_shift_jis_2004);
SELECT CONVERT('foo', 'EUC_JIS_2004', 'SHIFT_JIS_2004');
-- SHIFT_JIS_2004 --> EUC_JIS_2004
SELECT CONVERT('foo' USING shift_jis_2004_to_euc_jis_2004);
SELECT CONVERT('foo', 'SHIFT_JIS_2004', 'EUC_JIS_2004');
--
-- return to the super user
--