Implement our own version of wcsrtombs().
* add WcharStringToMultibyte() to libroot's locale backend * implement wcstombs(), wcsrtombs() and wcsnrtombs() on top of that new backend function
This commit is contained in:
parent
5dd04ce5da
commit
995d6d827f
@ -39,6 +39,11 @@ public:
|
|||||||
size_t& lengthOut);
|
size_t& lengthOut);
|
||||||
status_t WcharToMultibyte(char* mbOut, wchar_t wc,
|
status_t WcharToMultibyte(char* mbOut, wchar_t wc,
|
||||||
mbstate_t* mbState, size_t& lengthOut);
|
mbstate_t* mbState, size_t& lengthOut);
|
||||||
|
status_t WcharStringToMultibyte(char* mbDest,
|
||||||
|
size_t mbDestLength,
|
||||||
|
const wchar_t** wcSource,
|
||||||
|
size_t wcSourceLength, mbstate_t* mbState,
|
||||||
|
size_t& lengthOut);
|
||||||
|
|
||||||
const char* GetLanginfo(int index);
|
const char* GetLanginfo(int index);
|
||||||
|
|
||||||
|
@ -50,6 +50,11 @@ public:
|
|||||||
size_t& lengthOut);
|
size_t& lengthOut);
|
||||||
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
|
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
|
||||||
mbstate_t* mbState, size_t& lengthOut);
|
mbstate_t* mbState, size_t& lengthOut);
|
||||||
|
virtual status_t WcharStringToMultibyte(char* mbDest,
|
||||||
|
size_t mbDestLength,
|
||||||
|
const wchar_t** wcSource,
|
||||||
|
size_t wcSourceLength, mbstate_t* mbState,
|
||||||
|
size_t& lengthOut);
|
||||||
|
|
||||||
virtual const char* GetLanginfo(int index);
|
virtual const char* GetLanginfo(int index);
|
||||||
|
|
||||||
|
@ -132,6 +132,11 @@ public:
|
|||||||
size_t& lengthOut) = 0;
|
size_t& lengthOut) = 0;
|
||||||
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
|
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
|
||||||
mbstate_t* mbState, size_t& lengthOut) = 0;
|
mbstate_t* mbState, size_t& lengthOut) = 0;
|
||||||
|
virtual status_t WcharStringToMultibyte(char* mbDest,
|
||||||
|
size_t mbDestLength,
|
||||||
|
const wchar_t** wcSource,
|
||||||
|
size_t wcSourceLength, mbstate_t* mbState,
|
||||||
|
size_t& lengthOut) = 0;
|
||||||
|
|
||||||
virtual const char* GetLanginfo(int index) = 0;
|
virtual const char* GetLanginfo(int index) = 0;
|
||||||
|
|
||||||
|
@ -302,26 +302,22 @@ ICUCtypeData::MultibyteStringToWchar(wchar_t* wcDest, size_t wcDestLength,
|
|||||||
if (sourceLengthUsed >= mbSourceLength)
|
if (sourceLengthUsed >= mbSourceLength)
|
||||||
break;
|
break;
|
||||||
UChar32 unicodeChar = ucnv_getNextUChar(converter, &source,
|
UChar32 unicodeChar = ucnv_getNextUChar(converter, &source,
|
||||||
std::min(source + MB_LEN_MAX, sourceEnd), &icuStatus);
|
std::min(source + MB_CUR_MAX, sourceEnd), &icuStatus);
|
||||||
sourceLengthUsed = source - *mbSource;
|
sourceLengthUsed = source - *mbSource;
|
||||||
TRACE(("l:%lu wl:%lu s:%p se:%p sl:%lu slu:%lu uchar:%x st:%x\n",
|
TRACE(("MultibyteStringToWchar() l:%lu wl:%lu s:%p se:%p sl:%lu slu:%lu"
|
||||||
lengthOut, wcDestLength, source, sourceEnd, mbSourceLength,
|
" uchar:%x st:%x\n", lengthOut, wcDestLength, source, sourceEnd,
|
||||||
sourceLengthUsed, unicodeChar, icuStatus));
|
mbSourceLength, sourceLengthUsed, unicodeChar, icuStatus));
|
||||||
if (!U_SUCCESS(icuStatus))
|
if (!U_SUCCESS(icuStatus))
|
||||||
break;
|
break;
|
||||||
if (wcDest != NULL)
|
if (wcDest != NULL)
|
||||||
*wcDest++ = unicodeChar;
|
*wcDest++ = unicodeChar;
|
||||||
if (unicodeChar == L'\0') {
|
if (unicodeChar == L'\0') {
|
||||||
if (wcDest != NULL)
|
wcsIsTerminated = true;
|
||||||
wcsIsTerminated = true;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
icuStatus = U_ZERO_ERROR;
|
icuStatus = U_ZERO_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wcDest != NULL)
|
|
||||||
*mbSource = source;
|
|
||||||
|
|
||||||
if (!U_SUCCESS(icuStatus)) {
|
if (!U_SUCCESS(icuStatus)) {
|
||||||
// conversion failed because of illegal character sequence
|
// conversion failed because of illegal character sequence
|
||||||
TRACE(("MultibyteStringToWchar(): illegal character sequence\n"));
|
TRACE(("MultibyteStringToWchar(): illegal character sequence\n"));
|
||||||
@ -331,9 +327,13 @@ ICUCtypeData::MultibyteStringToWchar(wchar_t* wcDest, size_t wcDestLength,
|
|||||||
// reset to initial state
|
// reset to initial state
|
||||||
_DropConverterFromMbState(mbState);
|
_DropConverterFromMbState(mbState);
|
||||||
memset(mbState, 0, sizeof(mbstate_t));
|
memset(mbState, 0, sizeof(mbstate_t));
|
||||||
*mbSource = NULL;
|
if (wcDest != NULL)
|
||||||
} else
|
*mbSource = NULL;
|
||||||
|
} else {
|
||||||
mbState->count = 0;
|
mbState->count = 0;
|
||||||
|
if (wcDest != NULL)
|
||||||
|
*mbSource = source;
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -353,20 +353,40 @@ ICUCtypeData::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
|
|||||||
|
|
||||||
UConverter* converter = converterRef->Converter();
|
UConverter* converter = converterRef->Converter();
|
||||||
|
|
||||||
// do the conversion
|
// convert input from UTF-32 to UTF-16
|
||||||
|
UChar ucharBuffer[2];
|
||||||
|
size_t ucharLength;
|
||||||
|
if (U_IS_BMP(wc)) {
|
||||||
|
ucharBuffer[0] = wc;
|
||||||
|
ucharLength = 1;
|
||||||
|
} else {
|
||||||
|
ucharBuffer[0] = U16_LEAD(wc);
|
||||||
|
ucharBuffer[1] = U16_TRAIL(wc);
|
||||||
|
ucharLength = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// do the actual conversion
|
||||||
UErrorCode icuStatus = U_ZERO_ERROR;
|
UErrorCode icuStatus = U_ZERO_ERROR;
|
||||||
lengthOut = ucnv_fromUChars(converter, mbOut, MB_LEN_MAX, (UChar*)&wc,
|
size_t mbLength = mbOut == NULL ? 0 : MB_CUR_MAX;
|
||||||
1, &icuStatus);
|
lengthOut = ucnv_fromUChars(converter, mbOut, mbLength, ucharBuffer,
|
||||||
|
ucharLength, &icuStatus);
|
||||||
|
TRACE(("WcharToMultibyte() l:%lu mb:%p ml:%lu uchar:%x st:%x\n", lengthOut,
|
||||||
|
mbOut, mbLength, wc, icuStatus));
|
||||||
|
|
||||||
|
if (icuStatus == U_BUFFER_OVERFLOW_ERROR && mbOut == NULL) {
|
||||||
|
// we have no output buffer, so we ignore buffer overflows
|
||||||
|
icuStatus = U_ZERO_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
if (!U_SUCCESS(icuStatus)) {
|
if (!U_SUCCESS(icuStatus)) {
|
||||||
if (icuStatus == U_ILLEGAL_ARGUMENT_ERROR) {
|
if (icuStatus == U_ILLEGAL_ARGUMENT_ERROR) {
|
||||||
// bad converter (shouldn't really happen)
|
// bad converter (shouldn't really happen)
|
||||||
TRACE(("MultibyteToWchar(): bad converter\n"));
|
TRACE(("WcharToMultibyte(): bad converter\n"));
|
||||||
return B_BAD_VALUE;
|
return B_BAD_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// conversion failed because of illegal/unmappable character
|
// conversion failed because of illegal/unmappable character
|
||||||
TRACE(("MultibyteToWchar(): illegal character sequence\n"));
|
TRACE(("WcharToMultibyte(): illegal character sequence\n"));
|
||||||
ucnv_resetFromUnicode(converter);
|
ucnv_resetFromUnicode(converter);
|
||||||
return B_BAD_DATA;
|
return B_BAD_DATA;
|
||||||
}
|
}
|
||||||
@ -381,6 +401,95 @@ ICUCtypeData::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
status_t
|
||||||
|
ICUCtypeData::WcharStringToMultibyte(char* mbDest, size_t mbDestLength,
|
||||||
|
const wchar_t** wcSource, size_t wcSourceLength, mbstate_t* mbState,
|
||||||
|
size_t& lengthOut)
|
||||||
|
{
|
||||||
|
ICUConverterRef converterRef;
|
||||||
|
status_t result = _GetConverterForMbState(mbState, converterRef);
|
||||||
|
if (result != B_OK) {
|
||||||
|
TRACE(("WcharStringToMultibyte(): couldn't get converter for ID %d "
|
||||||
|
"- %lx\n", mbState->converterID, result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
UConverter* converter = converterRef->Converter();
|
||||||
|
|
||||||
|
bool mbsIsTerminated = false;
|
||||||
|
const UChar32* source = (UChar32*)*wcSource;
|
||||||
|
|
||||||
|
UErrorCode icuStatus = U_ZERO_ERROR;
|
||||||
|
lengthOut = 0;
|
||||||
|
for (size_t sourceLengthUsed = 0; sourceLengthUsed < wcSourceLength;
|
||||||
|
++sourceLengthUsed, ++source) {
|
||||||
|
if (mbDest != NULL && lengthOut >= mbDestLength)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// convert input from UTF-32 to UTF-16
|
||||||
|
UChar ucharBuffer[2];
|
||||||
|
size_t ucharLength;
|
||||||
|
if (U_IS_BMP(*source)) {
|
||||||
|
ucharBuffer[0] = *source;
|
||||||
|
ucharLength = 1;
|
||||||
|
} else {
|
||||||
|
ucharBuffer[0] = U16_LEAD(*source);
|
||||||
|
ucharBuffer[1] = U16_TRAIL(*source);
|
||||||
|
ucharLength = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// do the actual conversion
|
||||||
|
size_t destLength = mbDest == NULL ? 0 : mbDestLength - lengthOut;
|
||||||
|
char buffer[MB_CUR_MAX];
|
||||||
|
size_t mbLength = ucnv_fromUChars(converter,
|
||||||
|
mbDest == NULL ? NULL : buffer, destLength, ucharBuffer,
|
||||||
|
ucharLength, &icuStatus);
|
||||||
|
TRACE(("WcharStringToMultibyte() l:%lu mb:%p ml:%lu s:%p ul:%lu slu:%lu"
|
||||||
|
" uchar:%x st:%x\n", mbLength, mbDest, destLength, source,
|
||||||
|
ucharLength, sourceLengthUsed, *source, icuStatus));
|
||||||
|
|
||||||
|
if (icuStatus == U_BUFFER_OVERFLOW_ERROR) {
|
||||||
|
// ignore buffer overflows ...
|
||||||
|
icuStatus = U_ZERO_ERROR;
|
||||||
|
// ... but stop if the output buffer has been exceeded
|
||||||
|
if (destLength > 0)
|
||||||
|
break;
|
||||||
|
} else if (mbDest != NULL)
|
||||||
|
memcpy(mbDest, buffer, mbLength);
|
||||||
|
|
||||||
|
if (!U_SUCCESS(icuStatus))
|
||||||
|
break;
|
||||||
|
if (mbDest != NULL)
|
||||||
|
mbDest += mbLength;
|
||||||
|
if (*source == L'\0') {
|
||||||
|
mbsIsTerminated = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
lengthOut += mbLength;
|
||||||
|
icuStatus = U_ZERO_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!U_SUCCESS(icuStatus)) {
|
||||||
|
// conversion failed because of illegal character sequence
|
||||||
|
TRACE(("WcharStringToMultibyte(): illegal character sequence\n"));
|
||||||
|
ucnv_resetFromUnicode(converter);
|
||||||
|
result = B_BAD_DATA;
|
||||||
|
} else if (mbsIsTerminated) {
|
||||||
|
// reset to initial state
|
||||||
|
_DropConverterFromMbState(mbState);
|
||||||
|
memset(mbState, 0, sizeof(mbstate_t));
|
||||||
|
if (mbDest != NULL)
|
||||||
|
*wcSource = NULL;
|
||||||
|
} else {
|
||||||
|
mbState->count = 0;
|
||||||
|
if (mbDest != NULL)
|
||||||
|
*wcSource = (wchar_t*)source;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const char*
|
const char*
|
||||||
ICUCtypeData::GetLanginfo(int index)
|
ICUCtypeData::GetLanginfo(int index)
|
||||||
{
|
{
|
||||||
|
@ -184,6 +184,18 @@ ICULocaleBackend::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
status_t
|
||||||
|
ICULocaleBackend::WcharStringToMultibyte(char* mbDest, size_t mbDestLength,
|
||||||
|
const wchar_t** wcSource, size_t wcSourceLength, mbstate_t* mbState,
|
||||||
|
size_t& lengthOut)
|
||||||
|
{
|
||||||
|
ErrnoMaintainer errnoMaintainer;
|
||||||
|
|
||||||
|
return fCtypeData.WcharStringToMultibyte(mbDest, mbDestLength, wcSource,
|
||||||
|
wcSourceLength, mbState, lengthOut);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const char*
|
const char*
|
||||||
ICULocaleBackend::GetLanginfo(int index)
|
ICULocaleBackend::GetLanginfo(int index)
|
||||||
{
|
{
|
||||||
|
122
src/system/libroot/posix/wchar/wcsrtombs.cpp
Normal file
122
src/system/libroot/posix/wchar/wcsrtombs.cpp
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
/*
|
||||||
|
** Copyright 2011, Oliver Tappe, zooey@hirschkaefer.de. All rights reserved.
|
||||||
|
** Distributed under the terms of the Haiku License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
|
||||||
|
#include <errno_private.h>
|
||||||
|
#include <wchar_private.h>
|
||||||
|
|
||||||
|
#include "LocaleBackend.h"
|
||||||
|
|
||||||
|
|
||||||
|
//#define TRACE_WCSRTOMBS
|
||||||
|
#ifdef TRACE_WCSRTOMBS
|
||||||
|
# include <OS.h>
|
||||||
|
# define TRACE(x) debug_printf x
|
||||||
|
#else
|
||||||
|
# define TRACE(x) ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
using BPrivate::Libroot::gLocaleBackend;
|
||||||
|
|
||||||
|
|
||||||
|
extern "C" size_t
|
||||||
|
__wcsnrtombs(char* dst, const wchar_t** src, size_t nwc, size_t len,
|
||||||
|
mbstate_t* ps)
|
||||||
|
{
|
||||||
|
TRACE(("wcsnrtombs(%p, %p, %lu, %lu) - lb:%p\n", dst, *src, nwc, len,
|
||||||
|
gLocaleBackend));
|
||||||
|
|
||||||
|
if (ps == NULL) {
|
||||||
|
static mbstate_t internalMbState;
|
||||||
|
ps = &internalMbState;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gLocaleBackend == NULL) {
|
||||||
|
/*
|
||||||
|
* The POSIX locale is active. Since the POSIX locale only contains
|
||||||
|
* chars 0-127 and those ASCII chars are compatible with the UTF32
|
||||||
|
* values used in wint_t, we can just copy the bytes.
|
||||||
|
*/
|
||||||
|
size_t count = 0;
|
||||||
|
if (dst == NULL) {
|
||||||
|
// only count number of required wide characters
|
||||||
|
for (const wchar_t* srcEnd = *src + nwc; *src < srcEnd;
|
||||||
|
++*src, ++count) {
|
||||||
|
if (*src < 0) {
|
||||||
|
// char is non-ASCII
|
||||||
|
__set_errno(EILSEQ);
|
||||||
|
return (size_t)-1;
|
||||||
|
}
|
||||||
|
if (**src == 0) {
|
||||||
|
memset(ps, 0, sizeof(mbstate_t));
|
||||||
|
*src = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// "convert" the characters
|
||||||
|
for (; count < len; ++*src, ++count) {
|
||||||
|
if (*src < 0) {
|
||||||
|
// char is non-ASCII
|
||||||
|
__set_errno(EILSEQ);
|
||||||
|
return (size_t)-1;
|
||||||
|
}
|
||||||
|
*dst++ = (char)*src;
|
||||||
|
if (*src == 0) {
|
||||||
|
memset(ps, 0, sizeof(mbstate_t));
|
||||||
|
*src = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACE(("wcsnrtombs returns %lx and src %p\n", count, *src));
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t result = 0;
|
||||||
|
status_t status = gLocaleBackend->WcharStringToMultibyte(dst, len, src, nwc,
|
||||||
|
ps, result);
|
||||||
|
|
||||||
|
if (status == B_BAD_DATA) {
|
||||||
|
TRACE(("wcsnrtomb(): setting errno to EILSEQ\n"));
|
||||||
|
__set_errno(EILSEQ);
|
||||||
|
result = (size_t)-1;
|
||||||
|
} else if (status != B_OK) {
|
||||||
|
TRACE(("wcsnrtomb(): setting errno to EINVAL (status: %lx)\n", status));
|
||||||
|
__set_errno(EINVAL);
|
||||||
|
result = (size_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACE(("wcsnrtombs returns %lx and src %p\n", result, *src));
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// deactivated, as wcsnrtombs() isn't publically available yet.
|
||||||
|
extern "C"
|
||||||
|
B_DEFINE_WEAK_ALIAS(__wcsnrtombs, wcsnrtombs);
|
||||||
|
|
||||||
|
|
||||||
|
extern "C" size_t
|
||||||
|
__wcsrtombs(char* dst, const wchar_t** src, size_t len, mbstate_t* ps)
|
||||||
|
{
|
||||||
|
if (ps == NULL) {
|
||||||
|
static mbstate_t internalMbState;
|
||||||
|
ps = &internalMbState;
|
||||||
|
}
|
||||||
|
|
||||||
|
return __wcsnrtombs(dst, src, __wcslen(*src) + 1, len, ps);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
B_DEFINE_WEAK_ALIAS(__wcsrtombs, wcsrtombs);
|
18
src/system/libroot/posix/wchar/wcstombs.c
Normal file
18
src/system/libroot/posix/wchar/wcstombs.c
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
/*
|
||||||
|
** Copyright 2011, Oliver Tappe, zooey@hirschkaefer.de. All rights reserved.
|
||||||
|
** Distributed under the terms of the Haiku License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <wchar_private.h>
|
||||||
|
|
||||||
|
|
||||||
|
size_t
|
||||||
|
__wcstombs(char* s, const wchar_t* pwcs, size_t n)
|
||||||
|
{
|
||||||
|
static mbstate_t internalMbState;
|
||||||
|
|
||||||
|
return __wcsrtombs(s, &pwcs, n, &internalMbState);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
B_DEFINE_WEAK_ALIAS(__wcstombs, wcstombs);
|
Loading…
Reference in New Issue
Block a user