Unlike what shatty wrote in his commit messages, convert_from_utf8() does return

B_OK here when called with an empty string.
Since this is the natural thing to expect anyway, I changed our version to return
a sane value here as well - if this causes an error within our StyledEdit, as
shatty writes, we should just fix our StyledEdit.
Cleanup to match our style guides a bit better, added license.


git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@16328 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Axel Dörfler 2006-02-10 12:00:38 +00:00
parent a544829365
commit cf45491bc8

View File

@ -1,122 +1,129 @@
#include <UTF8.h> /*
#include <iconv.h> * Copyright 2003-2006, Haiku, Inc. All Rights Reserved.
* Distributed under the terms of the MIT License.
*
* Authors:
* Andrew Bachmann
*/
#include <CharacterSet.h> #include <CharacterSet.h>
#include <CharacterSetRoster.h> #include <CharacterSetRoster.h>
#include <Errors.h> #include <UTF8.h>
#include <errno.h> #include <errno.h>
#include <iconv.h>
#include <stdio.h> #include <stdio.h>
#include <Debug.h>
//#define DEBUG_CONV 1 //#define DEBUG_CONV 1
#ifdef DEBUG_CONV #ifdef DEBUG_CONV
#define DEBPRINT(ARGS) printf ARGS; # define DEBPRINT(ARGS) printf ARGS;
#else #else
#define DEBPRINT(ARGS) ; # define DEBPRINT(ARGS) ;
#endif #endif
using namespace BPrivate; using namespace BPrivate;
typedef char ** input_buffer_t; int iconvctl(iconv_t icd, int request, void* argument);
int iconvctl (iconv_t icd, int request, void* argument);
status_t status_t
convert_encoding(const char * from, const char * to, convert_encoding(const char* from, const char* to, const char* src,
const char * src, int32 * srcLen, int32* srcLen, char* dst, int32* dstLen, int32* state,
char * dst, int32 * dstLen, char substitute)
int32 * state, char substitute)
{ {
status_t status;
if (*srcLen == 0) { if (*srcLen == 0) {
// nothing to do! // nothing to do!
DEBPRINT(("nothing to do\n"));
*dstLen = 0; *dstLen = 0;
return B_ERROR; return B_OK;
} }
iconv_t conversion = iconv_open(to,from);
iconv_t conversion = iconv_open(to, from);
if (conversion == (iconv_t)-1) { if (conversion == (iconv_t)-1) {
DEBPRINT(("iconv_open failed\n")); DEBPRINT(("iconv_open failed\n"));
return B_ERROR; return B_ERROR;
} }
if ((state == NULL) || (*state == 0)) { if (state == NULL || *state == 0)
iconv(conversion,0,0,0,0); iconv(conversion, 0, 0, 0, 0);
}
input_buffer_t inputBuffer = const_cast<input_buffer_t>(&src); char** inputBuffer = const_cast<char**>(&src);
size_t inputLeft = *srcLen; size_t inputLeft = *srcLen;
size_t outputLeft = *dstLen; size_t outputLeft = *dstLen;
do { do {
size_t nonReversibleConversions = iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft); size_t nonReversibleConversions = iconv(conversion, inputBuffer,
&inputLeft, &dst, &outputLeft);
if (nonReversibleConversions == (size_t)-1) { if (nonReversibleConversions == (size_t)-1) {
if (errno == E2BIG) { if (errno == E2BIG) {
// Not enough room in the output buffer for the next converted character // Not enough room in the output buffer for the next converted character
// This is not a "real" error, we just quit out. // This is not a "real" error, we just quit out.
break; break;
} }
switch (errno) { switch (errno) {
case EILSEQ: // unable to generate a corresponding character case EILSEQ: // unable to generate a corresponding character
{ {
// discard the input character // discard the input character
const int one = 1, zero = 0; const int one = 1, zero = 0;
iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&one); iconvctl(conversion, ICONV_SET_DISCARD_ILSEQ, (void*)&one);
iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft); iconv(conversion, inputBuffer, &inputLeft, &dst, &outputLeft);
iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&zero); iconvctl(conversion, ICONV_SET_DISCARD_ILSEQ, (void*)&zero);
// prepare to convert the substitute character to target encoding
char * original = new char[1]; // prepare to convert the substitute character to target encoding
original[0] = substitute; char* original = new char[1];
size_t len = 1; original[0] = substitute;
char * copy = original; size_t len = 1;
// Perform the conversion char* copy = original;
// We ignore any errors during this as part of robustness/best-effort
// We use ISO-8859-1 as a source because it is a single byte encoding // Perform the conversion
// It also overlaps UTF-8 for the lower 128 characters. It is also // We ignore any errors during this as part of robustness/best-effort
// likely to have a mapping to almost any target encoding. // We use ISO-8859-1 as a source because it is a single byte encoding
iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1"); // It also overlaps UTF-8 for the lower 128 characters. It is also
if (iso8859_1to != (iconv_t)-1) { // likely to have a mapping to almost any target encoding.
iconv(iso8859_1to,0,0,0,0); iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1");
iconv(iso8859_1to,const_cast<input_buffer_t>(&copy),&len,&dst,&outputLeft); if (iso8859_1to != (iconv_t)-1) {
iconv_close(iso8859_1to); iconv(iso8859_1to, 0, 0, 0, 0);
iconv(iso8859_1to, const_cast<char**>(&copy), &len, &dst,
&outputLeft);
iconv_close(iso8859_1to);
}
delete original;
break;
} }
delete original;
} case EINVAL: // incomplete multibyte sequence in the input
break; // we just eat bad bytes, as part of robustness/best-effort
case EINVAL: // incomplete multibyte sequence in the input inputBuffer++;
// we just eat bad bytes, as part of robustness/best-effort inputLeft--;
inputBuffer++; break;
inputLeft--;
break; default:
default: // unknown error, completely bail
// unknown error, completely bail status_t status = errno;
status = errno; iconv_close(conversion);
iconv_close(conversion); return status;
return status;
} }
} }
} while ((inputLeft > 0) && (outputLeft > 0)); } while (inputLeft > 0 && outputLeft > 0);
*srcLen -= inputLeft; *srcLen -= inputLeft;
*dstLen -= outputLeft; *dstLen -= outputLeft;
iconv_close(conversion); iconv_close(conversion);
if (*srcLen != 0) {
// able to convert at least one character return B_OK;
DEBPRINT(("able to convert at least one character\n"));
return B_OK;
} else {
// not able to convert at least one character
DEBPRINT(("not able to convert at least one character\n"));
return B_ERROR;
}
} }
status_t status_t
convert_to_utf8(uint32 srcEncoding, convert_to_utf8(uint32 srcEncoding, const char* src, int32* srcLen,
const char * src, int32 * srcLen, char* dst, int32* dstLen, int32* state, char substitute)
char * dst, int32 * dstLen,
int32 * state, char substitute)
{ {
const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(srcEncoding); const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID(
if (charset == 0) { srcEncoding);
if (charset == NULL)
return B_ERROR; return B_ERROR;
}
#if DEBUG_CONV #if DEBUG_CONV
fprintf(stderr, "convert_to_utf8(%s) : \"", charset->GetName()); fprintf(stderr, "convert_to_utf8(%s) : \"", charset->GetName());
for (int i = 0 ; i < *srcLen ; i++) { for (int i = 0 ; i < *srcLen ; i++) {
@ -124,19 +131,21 @@ convert_to_utf8(uint32 srcEncoding,
} }
fprintf(stderr, "\"\n"); fprintf(stderr, "\"\n");
#endif #endif
return convert_encoding(charset->GetName(),"UTF-8",src,srcLen,dst,dstLen,state,substitute);
return convert_encoding(charset->GetName(), "UTF-8", src, srcLen,
dst, dstLen, state, substitute);
} }
status_t status_t
convert_from_utf8(uint32 dstEncoding, convert_from_utf8(uint32 dstEncoding, const char* src, int32* srcLen,
const char * src, int32 * srcLen, char* dst, int32* dstLen, int32* state, char substitute)
char * dst, int32 * dstLen,
int32 * state, char substitute)
{ {
const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(dstEncoding); const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID(
if (charset == 0) { dstEncoding);
if (charset == NULL)
return B_ERROR; return B_ERROR;
}
#if DEBUG_CONV #if DEBUG_CONV
fprintf(stderr, "convert_from_utf8(%s) : \"", charset->GetName()); fprintf(stderr, "convert_from_utf8(%s) : \"", charset->GetName());
for (int i = 0 ; i < *srcLen ; i++) { for (int i = 0 ; i < *srcLen ; i++) {
@ -144,5 +153,8 @@ convert_from_utf8(uint32 dstEncoding,
} }
fprintf(stderr, "\"\n"); fprintf(stderr, "\"\n");
#endif #endif
return convert_encoding("UTF-8",charset->GetName(),src,srcLen,dst,dstLen,state,substitute);
return convert_encoding("UTF-8", charset->GetName(), src, srcLen,
dst, dstLen, state, substitute);
} }