Allow 4 bytes UTF-8 (UCS-4 range 00010000-001FFFFF)
This is necessary to support JIS X 0213 <--> UTF-8 conversion.
This commit is contained in:
parent
6b77e3a8cc
commit
4c35ec53a9
@ -1,6 +1,8 @@
|
||||
#
|
||||
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/ucs2utf.pl,v 1.2 2003/11/29 22:40:01 pgsql Exp $
|
||||
# convert UCS-2 to UTF-8
|
||||
# Copyright (c) 2001-2007, PostgreSQL Global Development Group
|
||||
#
|
||||
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/ucs2utf.pl,v 1.3 2007/03/23 13:51:30 ishii Exp $
|
||||
# convert UCS-4 to UTF-8
|
||||
#
|
||||
sub ucs2utf {
|
||||
local($ucs) = @_;
|
||||
@ -10,11 +12,16 @@ sub ucs2utf {
|
||||
$utf = $ucs;
|
||||
} elsif ($ucs > 0x007f && $ucs <= 0x07ff) {
|
||||
$utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8);
|
||||
} else {
|
||||
} elsif ($ucs > 0x07ff && $ucs <= 0xffff) {
|
||||
$utf = ((($ucs >> 12) | 0xe0) << 16) |
|
||||
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) |
|
||||
(($ucs & 0x003f) | 0x80);
|
||||
}
|
||||
} else {
|
||||
$utf = ((($ucs >> 18) | 0xf0) << 24) |
|
||||
(((($ucs & 0x3ffff) >> 12) | 0x80) << 16) |
|
||||
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) |
|
||||
(($ucs & 0x003f) | 0x80);
|
||||
}
|
||||
return($utf);
|
||||
}
|
||||
1;
|
||||
|
Loading…
Reference in New Issue
Block a user