diff --git a/src/kits/support/Jamfile b/src/kits/support/Jamfile index 04551b9dac..029eccb8fe 100644 --- a/src/kits/support/Jamfile +++ b/src/kits/support/Jamfile @@ -8,6 +8,7 @@ SharedLibrary textencoding : character_sets.cpp utf8_conversions.cpp ; +DEPENDS libtextencoding.so : libiconv.so ; LinkSharedOSLibs libtextencoding.so : be iconv diff --git a/src/kits/support/character_sets.cpp b/src/kits/support/character_sets.cpp index a8ef008425..34e3a194bc 100644 --- a/src/kits/support/character_sets.cpp +++ b/src/kits/support/character_sets.cpp @@ -104,6 +104,14 @@ const char * iso15aliases[] = { "ISO_8859-14","Latin-9",NULL }; const BCharacterSet iso15(24,111,"ISO 8859-15","ISO-8859-15","ISO-8859-15",iso15aliases); +// chinese character set testing + +const char * big5aliases[] = + { "csBig5",NULL }; +const BCharacterSet big5(25,2026,"Big5","Big5","Big5",big5aliases); + +const BCharacterSet gb18030(26,114,"GB18030","GB18030",NULL,NULL); + /** * The following initializes the global character set array. * It is organized by id for efficient retrieval using predefined constants in UTF8.h and Font.h. @@ -122,6 +130,7 @@ const BCharacterSet * character_sets_by_id[] = { &windows1252, &unicode2, &KOI8R, &windows1251, &IBM866, &IBM437, &eucKR, &iso13, &iso14, &iso15, // R5 convert_to/from_utf8 encodings end here + &big5,&gb18030, }; const uint32 character_sets_by_id_count = sizeof(character_sets_by_id)/sizeof(const BCharacterSet*); diff --git a/src/kits/support/utf8_conversions.cpp b/src/kits/support/utf8_conversions.cpp index de982994a5..e322828695 100644 --- a/src/kits/support/utf8_conversions.cpp +++ b/src/kits/support/utf8_conversions.cpp @@ -15,6 +15,10 @@ convert_encoding(const char * from, const char * to, char * dst, int32 * dstLen, int32 * state) { + if (*srcLen == 0) { + // nothing to do! + return B_OK; + } iconv_t conversion = iconv_open(to,from); if (conversion == (iconv_t)-1) { return B_ERROR; @@ -28,14 +32,30 @@ convert_encoding(const char * from, const char * to, input_buffer_t inputBuffer = const_cast(&src); size_t inputLeft = *srcLen; size_t outputLeft = *dstLen; - size_t bytesLeft = iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft); + size_t nonReversibleConversions = iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft); *srcLen -= inputLeft; *dstLen -= outputLeft; - if ((bytesLeft != 0) && (errno != E2BIG) && (errno != EINVAL)) { + iconv_close(conversion); + if (nonReversibleConversions == -1) { + switch (errno) { + case EILSEQ: // invalid multibyte sequence in the source + return B_ERROR; + case EINVAL: // incomplete multibyte sequence in the input + return B_OK; + case E2BIG: // not enough room in the output buffer for the next converted character + return B_OK; + default: + // unknown error + int err = errno; + } + } + if (*srcLen != 0) { + // able to convert at least one character + return B_OK; + } else { + // not able to convert at least one character return B_ERROR; } - iconv_close(conversion); - return B_OK; } status_t