Implement our own version of wcsrtombs().

* add WcharStringToMultibyte() to libroot's locale backend
* implement wcstombs(), wcsrtombs() and wcsnrtombs() on top of that
  new backend function
This commit is contained in:
Oliver Tappe 2011-12-12 12:18:16 +01:00
parent 5dd04ce5da
commit 995d6d827f
7 changed files with 292 additions and 16 deletions

View File

@ -39,6 +39,11 @@ public:
size_t& lengthOut); size_t& lengthOut);
status_t WcharToMultibyte(char* mbOut, wchar_t wc, status_t WcharToMultibyte(char* mbOut, wchar_t wc,
mbstate_t* mbState, size_t& lengthOut); mbstate_t* mbState, size_t& lengthOut);
status_t WcharStringToMultibyte(char* mbDest,
size_t mbDestLength,
const wchar_t** wcSource,
size_t wcSourceLength, mbstate_t* mbState,
size_t& lengthOut);
const char* GetLanginfo(int index); const char* GetLanginfo(int index);

View File

@ -50,6 +50,11 @@ public:
size_t& lengthOut); size_t& lengthOut);
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc, virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
mbstate_t* mbState, size_t& lengthOut); mbstate_t* mbState, size_t& lengthOut);
virtual status_t WcharStringToMultibyte(char* mbDest,
size_t mbDestLength,
const wchar_t** wcSource,
size_t wcSourceLength, mbstate_t* mbState,
size_t& lengthOut);
virtual const char* GetLanginfo(int index); virtual const char* GetLanginfo(int index);

View File

@ -132,6 +132,11 @@ public:
size_t& lengthOut) = 0; size_t& lengthOut) = 0;
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc, virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
mbstate_t* mbState, size_t& lengthOut) = 0; mbstate_t* mbState, size_t& lengthOut) = 0;
virtual status_t WcharStringToMultibyte(char* mbDest,
size_t mbDestLength,
const wchar_t** wcSource,
size_t wcSourceLength, mbstate_t* mbState,
size_t& lengthOut) = 0;
virtual const char* GetLanginfo(int index) = 0; virtual const char* GetLanginfo(int index) = 0;

View File

@ -302,26 +302,22 @@ ICUCtypeData::MultibyteStringToWchar(wchar_t* wcDest, size_t wcDestLength,
if (sourceLengthUsed >= mbSourceLength) if (sourceLengthUsed >= mbSourceLength)
break; break;
UChar32 unicodeChar = ucnv_getNextUChar(converter, &source, UChar32 unicodeChar = ucnv_getNextUChar(converter, &source,
std::min(source + MB_LEN_MAX, sourceEnd), &icuStatus); std::min(source + MB_CUR_MAX, sourceEnd), &icuStatus);
sourceLengthUsed = source - *mbSource; sourceLengthUsed = source - *mbSource;
TRACE(("l:%lu wl:%lu s:%p se:%p sl:%lu slu:%lu uchar:%x st:%x\n", TRACE(("MultibyteStringToWchar() l:%lu wl:%lu s:%p se:%p sl:%lu slu:%lu"
lengthOut, wcDestLength, source, sourceEnd, mbSourceLength, " uchar:%x st:%x\n", lengthOut, wcDestLength, source, sourceEnd,
sourceLengthUsed, unicodeChar, icuStatus)); mbSourceLength, sourceLengthUsed, unicodeChar, icuStatus));
if (!U_SUCCESS(icuStatus)) if (!U_SUCCESS(icuStatus))
break; break;
if (wcDest != NULL) if (wcDest != NULL)
*wcDest++ = unicodeChar; *wcDest++ = unicodeChar;
if (unicodeChar == L'\0') { if (unicodeChar == L'\0') {
if (wcDest != NULL) wcsIsTerminated = true;
wcsIsTerminated = true;
break; break;
} }
icuStatus = U_ZERO_ERROR; icuStatus = U_ZERO_ERROR;
} }
if (wcDest != NULL)
*mbSource = source;
if (!U_SUCCESS(icuStatus)) { if (!U_SUCCESS(icuStatus)) {
// conversion failed because of illegal character sequence // conversion failed because of illegal character sequence
TRACE(("MultibyteStringToWchar(): illegal character sequence\n")); TRACE(("MultibyteStringToWchar(): illegal character sequence\n"));
@ -331,9 +327,13 @@ ICUCtypeData::MultibyteStringToWchar(wchar_t* wcDest, size_t wcDestLength,
// reset to initial state // reset to initial state
_DropConverterFromMbState(mbState); _DropConverterFromMbState(mbState);
memset(mbState, 0, sizeof(mbstate_t)); memset(mbState, 0, sizeof(mbstate_t));
*mbSource = NULL; if (wcDest != NULL)
} else *mbSource = NULL;
} else {
mbState->count = 0; mbState->count = 0;
if (wcDest != NULL)
*mbSource = source;
}
return result; return result;
} }
@ -353,20 +353,40 @@ ICUCtypeData::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
UConverter* converter = converterRef->Converter(); UConverter* converter = converterRef->Converter();
// do the conversion // convert input from UTF-32 to UTF-16
UChar ucharBuffer[2];
size_t ucharLength;
if (U_IS_BMP(wc)) {
ucharBuffer[0] = wc;
ucharLength = 1;
} else {
ucharBuffer[0] = U16_LEAD(wc);
ucharBuffer[1] = U16_TRAIL(wc);
ucharLength = 2;
}
// do the actual conversion
UErrorCode icuStatus = U_ZERO_ERROR; UErrorCode icuStatus = U_ZERO_ERROR;
lengthOut = ucnv_fromUChars(converter, mbOut, MB_LEN_MAX, (UChar*)&wc, size_t mbLength = mbOut == NULL ? 0 : MB_CUR_MAX;
1, &icuStatus); lengthOut = ucnv_fromUChars(converter, mbOut, mbLength, ucharBuffer,
ucharLength, &icuStatus);
TRACE(("WcharToMultibyte() l:%lu mb:%p ml:%lu uchar:%x st:%x\n", lengthOut,
mbOut, mbLength, wc, icuStatus));
if (icuStatus == U_BUFFER_OVERFLOW_ERROR && mbOut == NULL) {
// we have no output buffer, so we ignore buffer overflows
icuStatus = U_ZERO_ERROR;
}
if (!U_SUCCESS(icuStatus)) { if (!U_SUCCESS(icuStatus)) {
if (icuStatus == U_ILLEGAL_ARGUMENT_ERROR) { if (icuStatus == U_ILLEGAL_ARGUMENT_ERROR) {
// bad converter (shouldn't really happen) // bad converter (shouldn't really happen)
TRACE(("MultibyteToWchar(): bad converter\n")); TRACE(("WcharToMultibyte(): bad converter\n"));
return B_BAD_VALUE; return B_BAD_VALUE;
} }
// conversion failed because of illegal/unmappable character // conversion failed because of illegal/unmappable character
TRACE(("MultibyteToWchar(): illegal character sequence\n")); TRACE(("WcharToMultibyte(): illegal character sequence\n"));
ucnv_resetFromUnicode(converter); ucnv_resetFromUnicode(converter);
return B_BAD_DATA; return B_BAD_DATA;
} }
@ -381,6 +401,95 @@ ICUCtypeData::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
} }
status_t
ICUCtypeData::WcharStringToMultibyte(char* mbDest, size_t mbDestLength,
const wchar_t** wcSource, size_t wcSourceLength, mbstate_t* mbState,
size_t& lengthOut)
{
ICUConverterRef converterRef;
status_t result = _GetConverterForMbState(mbState, converterRef);
if (result != B_OK) {
TRACE(("WcharStringToMultibyte(): couldn't get converter for ID %d "
"- %lx\n", mbState->converterID, result));
return result;
}
UConverter* converter = converterRef->Converter();
bool mbsIsTerminated = false;
const UChar32* source = (UChar32*)*wcSource;
UErrorCode icuStatus = U_ZERO_ERROR;
lengthOut = 0;
for (size_t sourceLengthUsed = 0; sourceLengthUsed < wcSourceLength;
++sourceLengthUsed, ++source) {
if (mbDest != NULL && lengthOut >= mbDestLength)
break;
// convert input from UTF-32 to UTF-16
UChar ucharBuffer[2];
size_t ucharLength;
if (U_IS_BMP(*source)) {
ucharBuffer[0] = *source;
ucharLength = 1;
} else {
ucharBuffer[0] = U16_LEAD(*source);
ucharBuffer[1] = U16_TRAIL(*source);
ucharLength = 2;
}
// do the actual conversion
size_t destLength = mbDest == NULL ? 0 : mbDestLength - lengthOut;
char buffer[MB_CUR_MAX];
size_t mbLength = ucnv_fromUChars(converter,
mbDest == NULL ? NULL : buffer, destLength, ucharBuffer,
ucharLength, &icuStatus);
TRACE(("WcharStringToMultibyte() l:%lu mb:%p ml:%lu s:%p ul:%lu slu:%lu"
" uchar:%x st:%x\n", mbLength, mbDest, destLength, source,
ucharLength, sourceLengthUsed, *source, icuStatus));
if (icuStatus == U_BUFFER_OVERFLOW_ERROR) {
// ignore buffer overflows ...
icuStatus = U_ZERO_ERROR;
// ... but stop if the output buffer has been exceeded
if (destLength > 0)
break;
} else if (mbDest != NULL)
memcpy(mbDest, buffer, mbLength);
if (!U_SUCCESS(icuStatus))
break;
if (mbDest != NULL)
mbDest += mbLength;
if (*source == L'\0') {
mbsIsTerminated = true;
break;
}
lengthOut += mbLength;
icuStatus = U_ZERO_ERROR;
}
if (!U_SUCCESS(icuStatus)) {
// conversion failed because of illegal character sequence
TRACE(("WcharStringToMultibyte(): illegal character sequence\n"));
ucnv_resetFromUnicode(converter);
result = B_BAD_DATA;
} else if (mbsIsTerminated) {
// reset to initial state
_DropConverterFromMbState(mbState);
memset(mbState, 0, sizeof(mbstate_t));
if (mbDest != NULL)
*wcSource = NULL;
} else {
mbState->count = 0;
if (mbDest != NULL)
*wcSource = (wchar_t*)source;
}
return result;
}
const char* const char*
ICUCtypeData::GetLanginfo(int index) ICUCtypeData::GetLanginfo(int index)
{ {

View File

@ -184,6 +184,18 @@ ICULocaleBackend::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
} }
status_t
ICULocaleBackend::WcharStringToMultibyte(char* mbDest, size_t mbDestLength,
const wchar_t** wcSource, size_t wcSourceLength, mbstate_t* mbState,
size_t& lengthOut)
{
ErrnoMaintainer errnoMaintainer;
return fCtypeData.WcharStringToMultibyte(mbDest, mbDestLength, wcSource,
wcSourceLength, mbState, lengthOut);
}
const char* const char*
ICULocaleBackend::GetLanginfo(int index) ICULocaleBackend::GetLanginfo(int index)
{ {

View File

@ -0,0 +1,122 @@
/*
** Copyright 2011, Oliver Tappe, zooey@hirschkaefer.de. All rights reserved.
** Distributed under the terms of the Haiku License.
*/
#include <errno.h>
#include <string.h>
#include <wchar.h>
#include <errno_private.h>
#include <wchar_private.h>
#include "LocaleBackend.h"
//#define TRACE_WCSRTOMBS
#ifdef TRACE_WCSRTOMBS
# include <OS.h>
# define TRACE(x) debug_printf x
#else
# define TRACE(x) ;
#endif
using BPrivate::Libroot::gLocaleBackend;
extern "C" size_t
__wcsnrtombs(char* dst, const wchar_t** src, size_t nwc, size_t len,
mbstate_t* ps)
{
TRACE(("wcsnrtombs(%p, %p, %lu, %lu) - lb:%p\n", dst, *src, nwc, len,
gLocaleBackend));
if (ps == NULL) {
static mbstate_t internalMbState;
ps = &internalMbState;
}
if (gLocaleBackend == NULL) {
/*
* The POSIX locale is active. Since the POSIX locale only contains
* chars 0-127 and those ASCII chars are compatible with the UTF32
* values used in wint_t, we can just copy the bytes.
*/
size_t count = 0;
if (dst == NULL) {
// only count number of required wide characters
for (const wchar_t* srcEnd = *src + nwc; *src < srcEnd;
++*src, ++count) {
if (*src < 0) {
// char is non-ASCII
__set_errno(EILSEQ);
return (size_t)-1;
}
if (**src == 0) {
memset(ps, 0, sizeof(mbstate_t));
*src = NULL;
break;
}
}
} else {
// "convert" the characters
for (; count < len; ++*src, ++count) {
if (*src < 0) {
// char is non-ASCII
__set_errno(EILSEQ);
return (size_t)-1;
}
*dst++ = (char)*src;
if (*src == 0) {
memset(ps, 0, sizeof(mbstate_t));
*src = NULL;
break;
}
}
}
TRACE(("wcsnrtombs returns %lx and src %p\n", count, *src));
return count;
}
size_t result = 0;
status_t status = gLocaleBackend->WcharStringToMultibyte(dst, len, src, nwc,
ps, result);
if (status == B_BAD_DATA) {
TRACE(("wcsnrtomb(): setting errno to EILSEQ\n"));
__set_errno(EILSEQ);
result = (size_t)-1;
} else if (status != B_OK) {
TRACE(("wcsnrtomb(): setting errno to EINVAL (status: %lx)\n", status));
__set_errno(EINVAL);
result = (size_t)-1;
}
TRACE(("wcsnrtombs returns %lx and src %p\n", result, *src));
return result;
}
// deactivated, as wcsnrtombs() isn't publically available yet.
extern "C"
B_DEFINE_WEAK_ALIAS(__wcsnrtombs, wcsnrtombs);
extern "C" size_t
__wcsrtombs(char* dst, const wchar_t** src, size_t len, mbstate_t* ps)
{
if (ps == NULL) {
static mbstate_t internalMbState;
ps = &internalMbState;
}
return __wcsnrtombs(dst, src, __wcslen(*src) + 1, len, ps);
}
extern "C"
B_DEFINE_WEAK_ALIAS(__wcsrtombs, wcsrtombs);

View File

@ -0,0 +1,18 @@
/*
** Copyright 2011, Oliver Tappe, zooey@hirschkaefer.de. All rights reserved.
** Distributed under the terms of the Haiku License.
*/
#include <wchar_private.h>
size_t
__wcstombs(char* s, const wchar_t* pwcs, size_t n)
{
static mbstate_t internalMbState;
return __wcsrtombs(s, &pwcs, n, &internalMbState);
}
B_DEFINE_WEAK_ALIAS(__wcstombs, wcstombs);