From cf45491bc856144a539b63155a59e4969346825e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Axel=20D=C3=B6rfler?= Date: Fri, 10 Feb 2006 12:00:38 +0000 Subject: [PATCH] Unlike what shatty wrote in his commit messages, convert_from_utf8() does return B_OK here when called with an empty string. Since this is the natural thing to expect anyway, I changed our version to return a sane value here as well - if this causes an error within our StyledEdit, as shatty writes, we should just fix our StyledEdit. Cleanup to match our style guides a bit better, added license. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@16328 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- src/kits/textencoding/utf8_conversions.cpp | 174 +++++++++++---------- 1 file changed, 93 insertions(+), 81 deletions(-) diff --git a/src/kits/textencoding/utf8_conversions.cpp b/src/kits/textencoding/utf8_conversions.cpp index 08b24d774f..485ba9235d 100644 --- a/src/kits/textencoding/utf8_conversions.cpp +++ b/src/kits/textencoding/utf8_conversions.cpp @@ -1,122 +1,129 @@ -#include -#include +/* + * Copyright 2003-2006, Haiku, Inc. All Rights Reserved. + * Distributed under the terms of the MIT License. + * + * Authors: + * Andrew Bachmann + */ + + #include #include -#include +#include + #include +#include #include -#include + //#define DEBUG_CONV 1 #ifdef DEBUG_CONV - #define DEBPRINT(ARGS) printf ARGS; +# define DEBPRINT(ARGS) printf ARGS; #else - #define DEBPRINT(ARGS) ; +# define DEBPRINT(ARGS) ; #endif using namespace BPrivate; -typedef char ** input_buffer_t; +int iconvctl(iconv_t icd, int request, void* argument); -int iconvctl (iconv_t icd, int request, void* argument); status_t -convert_encoding(const char * from, const char * to, - const char * src, int32 * srcLen, - char * dst, int32 * dstLen, - int32 * state, char substitute) +convert_encoding(const char* from, const char* to, const char* src, + int32* srcLen, char* dst, int32* dstLen, int32* state, + char substitute) { - status_t status; if (*srcLen == 0) { // nothing to do! - DEBPRINT(("nothing to do\n")); *dstLen = 0; - return B_ERROR; + return B_OK; } - iconv_t conversion = iconv_open(to,from); + + iconv_t conversion = iconv_open(to, from); if (conversion == (iconv_t)-1) { DEBPRINT(("iconv_open failed\n")); return B_ERROR; } - if ((state == NULL) || (*state == 0)) { - iconv(conversion,0,0,0,0); - } - input_buffer_t inputBuffer = const_cast(&src); + if (state == NULL || *state == 0) + iconv(conversion, 0, 0, 0, 0); + + char** inputBuffer = const_cast(&src); size_t inputLeft = *srcLen; size_t outputLeft = *dstLen; do { - size_t nonReversibleConversions = iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft); + size_t nonReversibleConversions = iconv(conversion, inputBuffer, + &inputLeft, &dst, &outputLeft); if (nonReversibleConversions == (size_t)-1) { if (errno == E2BIG) { // Not enough room in the output buffer for the next converted character // This is not a "real" error, we just quit out. break; } + switch (errno) { - case EILSEQ: // unable to generate a corresponding character + case EILSEQ: // unable to generate a corresponding character { - // discard the input character - const int one = 1, zero = 0; - iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&one); - iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft); - iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&zero); - // prepare to convert the substitute character to target encoding - char * original = new char[1]; - original[0] = substitute; - size_t len = 1; - char * copy = original; - // Perform the conversion - // We ignore any errors during this as part of robustness/best-effort - // We use ISO-8859-1 as a source because it is a single byte encoding - // It also overlaps UTF-8 for the lower 128 characters. It is also - // likely to have a mapping to almost any target encoding. - iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1"); - if (iso8859_1to != (iconv_t)-1) { - iconv(iso8859_1to,0,0,0,0); - iconv(iso8859_1to,const_cast(©),&len,&dst,&outputLeft); - iconv_close(iso8859_1to); + // discard the input character + const int one = 1, zero = 0; + iconvctl(conversion, ICONV_SET_DISCARD_ILSEQ, (void*)&one); + iconv(conversion, inputBuffer, &inputLeft, &dst, &outputLeft); + iconvctl(conversion, ICONV_SET_DISCARD_ILSEQ, (void*)&zero); + + // prepare to convert the substitute character to target encoding + char* original = new char[1]; + original[0] = substitute; + size_t len = 1; + char* copy = original; + + // Perform the conversion + // We ignore any errors during this as part of robustness/best-effort + // We use ISO-8859-1 as a source because it is a single byte encoding + // It also overlaps UTF-8 for the lower 128 characters. It is also + // likely to have a mapping to almost any target encoding. + iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1"); + if (iso8859_1to != (iconv_t)-1) { + iconv(iso8859_1to, 0, 0, 0, 0); + iconv(iso8859_1to, const_cast(©), &len, &dst, + &outputLeft); + iconv_close(iso8859_1to); + } + delete original; + break; } - delete original; - } - break; - case EINVAL: // incomplete multibyte sequence in the input - // we just eat bad bytes, as part of robustness/best-effort - inputBuffer++; - inputLeft--; - break; - default: - // unknown error, completely bail - status = errno; - iconv_close(conversion); - return status; + + case EINVAL: // incomplete multibyte sequence in the input + // we just eat bad bytes, as part of robustness/best-effort + inputBuffer++; + inputLeft--; + break; + + default: + // unknown error, completely bail + status_t status = errno; + iconv_close(conversion); + return status; } } - } while ((inputLeft > 0) && (outputLeft > 0)); + } while (inputLeft > 0 && outputLeft > 0); + *srcLen -= inputLeft; *dstLen -= outputLeft; iconv_close(conversion); - if (*srcLen != 0) { - // able to convert at least one character - DEBPRINT(("able to convert at least one character\n")); - return B_OK; - } else { - // not able to convert at least one character - DEBPRINT(("not able to convert at least one character\n")); - return B_ERROR; - } + + return B_OK; } + status_t -convert_to_utf8(uint32 srcEncoding, - const char * src, int32 * srcLen, - char * dst, int32 * dstLen, - int32 * state, char substitute) +convert_to_utf8(uint32 srcEncoding, const char* src, int32* srcLen, + char* dst, int32* dstLen, int32* state, char substitute) { - const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(srcEncoding); - if (charset == 0) { + const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID( + srcEncoding); + if (charset == NULL) return B_ERROR; - } + #if DEBUG_CONV fprintf(stderr, "convert_to_utf8(%s) : \"", charset->GetName()); for (int i = 0 ; i < *srcLen ; i++) { @@ -124,19 +131,21 @@ convert_to_utf8(uint32 srcEncoding, } fprintf(stderr, "\"\n"); #endif - return convert_encoding(charset->GetName(),"UTF-8",src,srcLen,dst,dstLen,state,substitute); + + return convert_encoding(charset->GetName(), "UTF-8", src, srcLen, + dst, dstLen, state, substitute); } + status_t -convert_from_utf8(uint32 dstEncoding, - const char * src, int32 * srcLen, - char * dst, int32 * dstLen, - int32 * state, char substitute) +convert_from_utf8(uint32 dstEncoding, const char* src, int32* srcLen, + char* dst, int32* dstLen, int32* state, char substitute) { - const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(dstEncoding); - if (charset == 0) { + const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID( + dstEncoding); + if (charset == NULL) return B_ERROR; - } + #if DEBUG_CONV fprintf(stderr, "convert_from_utf8(%s) : \"", charset->GetName()); for (int i = 0 ; i < *srcLen ; i++) { @@ -144,5 +153,8 @@ convert_from_utf8(uint32 dstEncoding, } fprintf(stderr, "\"\n"); #endif - return convert_encoding("UTF-8",charset->GetName(),src,srcLen,dst,dstLen,state,substitute); + + return convert_encoding("UTF-8", charset->GetName(), src, srcLen, + dst, dstLen, state, substitute); } +