Unlike what shatty wrote in his commit messages, convert_from_utf8() does return
B_OK here when called with an empty string. Since this is the natural thing to expect anyway, I changed our version to return a sane value here as well - if this causes an error within our StyledEdit, as shatty writes, we should just fix our StyledEdit. Cleanup to match our style guides a bit better, added license. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@16328 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
parent
a544829365
commit
cf45491bc8
@ -1,122 +1,129 @@
|
|||||||
#include <UTF8.h>
|
/*
|
||||||
#include <iconv.h>
|
* Copyright 2003-2006, Haiku, Inc. All Rights Reserved.
|
||||||
|
* Distributed under the terms of the MIT License.
|
||||||
|
*
|
||||||
|
* Authors:
|
||||||
|
* Andrew Bachmann
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include <CharacterSet.h>
|
#include <CharacterSet.h>
|
||||||
#include <CharacterSetRoster.h>
|
#include <CharacterSetRoster.h>
|
||||||
#include <Errors.h>
|
#include <UTF8.h>
|
||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#include <iconv.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <Debug.h>
|
|
||||||
|
|
||||||
//#define DEBUG_CONV 1
|
//#define DEBUG_CONV 1
|
||||||
|
|
||||||
#ifdef DEBUG_CONV
|
#ifdef DEBUG_CONV
|
||||||
#define DEBPRINT(ARGS) printf ARGS;
|
# define DEBPRINT(ARGS) printf ARGS;
|
||||||
#else
|
#else
|
||||||
#define DEBPRINT(ARGS) ;
|
# define DEBPRINT(ARGS) ;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using namespace BPrivate;
|
using namespace BPrivate;
|
||||||
|
|
||||||
typedef char ** input_buffer_t;
|
int iconvctl(iconv_t icd, int request, void* argument);
|
||||||
|
|
||||||
int iconvctl (iconv_t icd, int request, void* argument);
|
|
||||||
|
|
||||||
status_t
|
status_t
|
||||||
convert_encoding(const char * from, const char * to,
|
convert_encoding(const char* from, const char* to, const char* src,
|
||||||
const char * src, int32 * srcLen,
|
int32* srcLen, char* dst, int32* dstLen, int32* state,
|
||||||
char * dst, int32 * dstLen,
|
char substitute)
|
||||||
int32 * state, char substitute)
|
|
||||||
{
|
{
|
||||||
status_t status;
|
|
||||||
if (*srcLen == 0) {
|
if (*srcLen == 0) {
|
||||||
// nothing to do!
|
// nothing to do!
|
||||||
DEBPRINT(("nothing to do\n"));
|
|
||||||
*dstLen = 0;
|
*dstLen = 0;
|
||||||
return B_ERROR;
|
return B_OK;
|
||||||
}
|
}
|
||||||
iconv_t conversion = iconv_open(to,from);
|
|
||||||
|
iconv_t conversion = iconv_open(to, from);
|
||||||
if (conversion == (iconv_t)-1) {
|
if (conversion == (iconv_t)-1) {
|
||||||
DEBPRINT(("iconv_open failed\n"));
|
DEBPRINT(("iconv_open failed\n"));
|
||||||
return B_ERROR;
|
return B_ERROR;
|
||||||
}
|
}
|
||||||
if ((state == NULL) || (*state == 0)) {
|
if (state == NULL || *state == 0)
|
||||||
iconv(conversion,0,0,0,0);
|
iconv(conversion, 0, 0, 0, 0);
|
||||||
}
|
|
||||||
input_buffer_t inputBuffer = const_cast<input_buffer_t>(&src);
|
char** inputBuffer = const_cast<char**>(&src);
|
||||||
size_t inputLeft = *srcLen;
|
size_t inputLeft = *srcLen;
|
||||||
size_t outputLeft = *dstLen;
|
size_t outputLeft = *dstLen;
|
||||||
do {
|
do {
|
||||||
size_t nonReversibleConversions = iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft);
|
size_t nonReversibleConversions = iconv(conversion, inputBuffer,
|
||||||
|
&inputLeft, &dst, &outputLeft);
|
||||||
if (nonReversibleConversions == (size_t)-1) {
|
if (nonReversibleConversions == (size_t)-1) {
|
||||||
if (errno == E2BIG) {
|
if (errno == E2BIG) {
|
||||||
// Not enough room in the output buffer for the next converted character
|
// Not enough room in the output buffer for the next converted character
|
||||||
// This is not a "real" error, we just quit out.
|
// This is not a "real" error, we just quit out.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (errno) {
|
switch (errno) {
|
||||||
case EILSEQ: // unable to generate a corresponding character
|
case EILSEQ: // unable to generate a corresponding character
|
||||||
{
|
{
|
||||||
// discard the input character
|
// discard the input character
|
||||||
const int one = 1, zero = 0;
|
const int one = 1, zero = 0;
|
||||||
iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&one);
|
iconvctl(conversion, ICONV_SET_DISCARD_ILSEQ, (void*)&one);
|
||||||
iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft);
|
iconv(conversion, inputBuffer, &inputLeft, &dst, &outputLeft);
|
||||||
iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&zero);
|
iconvctl(conversion, ICONV_SET_DISCARD_ILSEQ, (void*)&zero);
|
||||||
// prepare to convert the substitute character to target encoding
|
|
||||||
char * original = new char[1];
|
// prepare to convert the substitute character to target encoding
|
||||||
original[0] = substitute;
|
char* original = new char[1];
|
||||||
size_t len = 1;
|
original[0] = substitute;
|
||||||
char * copy = original;
|
size_t len = 1;
|
||||||
// Perform the conversion
|
char* copy = original;
|
||||||
// We ignore any errors during this as part of robustness/best-effort
|
|
||||||
// We use ISO-8859-1 as a source because it is a single byte encoding
|
// Perform the conversion
|
||||||
// It also overlaps UTF-8 for the lower 128 characters. It is also
|
// We ignore any errors during this as part of robustness/best-effort
|
||||||
// likely to have a mapping to almost any target encoding.
|
// We use ISO-8859-1 as a source because it is a single byte encoding
|
||||||
iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1");
|
// It also overlaps UTF-8 for the lower 128 characters. It is also
|
||||||
if (iso8859_1to != (iconv_t)-1) {
|
// likely to have a mapping to almost any target encoding.
|
||||||
iconv(iso8859_1to,0,0,0,0);
|
iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1");
|
||||||
iconv(iso8859_1to,const_cast<input_buffer_t>(©),&len,&dst,&outputLeft);
|
if (iso8859_1to != (iconv_t)-1) {
|
||||||
iconv_close(iso8859_1to);
|
iconv(iso8859_1to, 0, 0, 0, 0);
|
||||||
|
iconv(iso8859_1to, const_cast<char**>(©), &len, &dst,
|
||||||
|
&outputLeft);
|
||||||
|
iconv_close(iso8859_1to);
|
||||||
|
}
|
||||||
|
delete original;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
delete original;
|
|
||||||
}
|
case EINVAL: // incomplete multibyte sequence in the input
|
||||||
break;
|
// we just eat bad bytes, as part of robustness/best-effort
|
||||||
case EINVAL: // incomplete multibyte sequence in the input
|
inputBuffer++;
|
||||||
// we just eat bad bytes, as part of robustness/best-effort
|
inputLeft--;
|
||||||
inputBuffer++;
|
break;
|
||||||
inputLeft--;
|
|
||||||
break;
|
default:
|
||||||
default:
|
// unknown error, completely bail
|
||||||
// unknown error, completely bail
|
status_t status = errno;
|
||||||
status = errno;
|
iconv_close(conversion);
|
||||||
iconv_close(conversion);
|
return status;
|
||||||
return status;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while ((inputLeft > 0) && (outputLeft > 0));
|
} while (inputLeft > 0 && outputLeft > 0);
|
||||||
|
|
||||||
*srcLen -= inputLeft;
|
*srcLen -= inputLeft;
|
||||||
*dstLen -= outputLeft;
|
*dstLen -= outputLeft;
|
||||||
iconv_close(conversion);
|
iconv_close(conversion);
|
||||||
if (*srcLen != 0) {
|
|
||||||
// able to convert at least one character
|
return B_OK;
|
||||||
DEBPRINT(("able to convert at least one character\n"));
|
|
||||||
return B_OK;
|
|
||||||
} else {
|
|
||||||
// not able to convert at least one character
|
|
||||||
DEBPRINT(("not able to convert at least one character\n"));
|
|
||||||
return B_ERROR;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
status_t
|
status_t
|
||||||
convert_to_utf8(uint32 srcEncoding,
|
convert_to_utf8(uint32 srcEncoding, const char* src, int32* srcLen,
|
||||||
const char * src, int32 * srcLen,
|
char* dst, int32* dstLen, int32* state, char substitute)
|
||||||
char * dst, int32 * dstLen,
|
|
||||||
int32 * state, char substitute)
|
|
||||||
{
|
{
|
||||||
const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(srcEncoding);
|
const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID(
|
||||||
if (charset == 0) {
|
srcEncoding);
|
||||||
|
if (charset == NULL)
|
||||||
return B_ERROR;
|
return B_ERROR;
|
||||||
}
|
|
||||||
#if DEBUG_CONV
|
#if DEBUG_CONV
|
||||||
fprintf(stderr, "convert_to_utf8(%s) : \"", charset->GetName());
|
fprintf(stderr, "convert_to_utf8(%s) : \"", charset->GetName());
|
||||||
for (int i = 0 ; i < *srcLen ; i++) {
|
for (int i = 0 ; i < *srcLen ; i++) {
|
||||||
@ -124,19 +131,21 @@ convert_to_utf8(uint32 srcEncoding,
|
|||||||
}
|
}
|
||||||
fprintf(stderr, "\"\n");
|
fprintf(stderr, "\"\n");
|
||||||
#endif
|
#endif
|
||||||
return convert_encoding(charset->GetName(),"UTF-8",src,srcLen,dst,dstLen,state,substitute);
|
|
||||||
|
return convert_encoding(charset->GetName(), "UTF-8", src, srcLen,
|
||||||
|
dst, dstLen, state, substitute);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
status_t
|
status_t
|
||||||
convert_from_utf8(uint32 dstEncoding,
|
convert_from_utf8(uint32 dstEncoding, const char* src, int32* srcLen,
|
||||||
const char * src, int32 * srcLen,
|
char* dst, int32* dstLen, int32* state, char substitute)
|
||||||
char * dst, int32 * dstLen,
|
|
||||||
int32 * state, char substitute)
|
|
||||||
{
|
{
|
||||||
const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(dstEncoding);
|
const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID(
|
||||||
if (charset == 0) {
|
dstEncoding);
|
||||||
|
if (charset == NULL)
|
||||||
return B_ERROR;
|
return B_ERROR;
|
||||||
}
|
|
||||||
#if DEBUG_CONV
|
#if DEBUG_CONV
|
||||||
fprintf(stderr, "convert_from_utf8(%s) : \"", charset->GetName());
|
fprintf(stderr, "convert_from_utf8(%s) : \"", charset->GetName());
|
||||||
for (int i = 0 ; i < *srcLen ; i++) {
|
for (int i = 0 ; i < *srcLen ; i++) {
|
||||||
@ -144,5 +153,8 @@ convert_from_utf8(uint32 dstEncoding,
|
|||||||
}
|
}
|
||||||
fprintf(stderr, "\"\n");
|
fprintf(stderr, "\"\n");
|
||||||
#endif
|
#endif
|
||||||
return convert_encoding("UTF-8",charset->GetName(),src,srcLen,dst,dstLen,state,substitute);
|
|
||||||
|
return convert_encoding("UTF-8", charset->GetName(), src, srcLen,
|
||||||
|
dst, dstLen, state, substitute);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user