Implement our own version of wcsrtombs().
* add WcharStringToMultibyte() to libroot's locale backend * implement wcstombs(), wcsrtombs() and wcsnrtombs() on top of that new backend function
This commit is contained in:
parent
5dd04ce5da
commit
995d6d827f
@ -39,6 +39,11 @@ public:
|
||||
size_t& lengthOut);
|
||||
status_t WcharToMultibyte(char* mbOut, wchar_t wc,
|
||||
mbstate_t* mbState, size_t& lengthOut);
|
||||
status_t WcharStringToMultibyte(char* mbDest,
|
||||
size_t mbDestLength,
|
||||
const wchar_t** wcSource,
|
||||
size_t wcSourceLength, mbstate_t* mbState,
|
||||
size_t& lengthOut);
|
||||
|
||||
const char* GetLanginfo(int index);
|
||||
|
||||
|
@ -50,6 +50,11 @@ public:
|
||||
size_t& lengthOut);
|
||||
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
|
||||
mbstate_t* mbState, size_t& lengthOut);
|
||||
virtual status_t WcharStringToMultibyte(char* mbDest,
|
||||
size_t mbDestLength,
|
||||
const wchar_t** wcSource,
|
||||
size_t wcSourceLength, mbstate_t* mbState,
|
||||
size_t& lengthOut);
|
||||
|
||||
virtual const char* GetLanginfo(int index);
|
||||
|
||||
|
@ -132,6 +132,11 @@ public:
|
||||
size_t& lengthOut) = 0;
|
||||
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
|
||||
mbstate_t* mbState, size_t& lengthOut) = 0;
|
||||
virtual status_t WcharStringToMultibyte(char* mbDest,
|
||||
size_t mbDestLength,
|
||||
const wchar_t** wcSource,
|
||||
size_t wcSourceLength, mbstate_t* mbState,
|
||||
size_t& lengthOut) = 0;
|
||||
|
||||
virtual const char* GetLanginfo(int index) = 0;
|
||||
|
||||
|
@ -302,26 +302,22 @@ ICUCtypeData::MultibyteStringToWchar(wchar_t* wcDest, size_t wcDestLength,
|
||||
if (sourceLengthUsed >= mbSourceLength)
|
||||
break;
|
||||
UChar32 unicodeChar = ucnv_getNextUChar(converter, &source,
|
||||
std::min(source + MB_LEN_MAX, sourceEnd), &icuStatus);
|
||||
std::min(source + MB_CUR_MAX, sourceEnd), &icuStatus);
|
||||
sourceLengthUsed = source - *mbSource;
|
||||
TRACE(("l:%lu wl:%lu s:%p se:%p sl:%lu slu:%lu uchar:%x st:%x\n",
|
||||
lengthOut, wcDestLength, source, sourceEnd, mbSourceLength,
|
||||
sourceLengthUsed, unicodeChar, icuStatus));
|
||||
TRACE(("MultibyteStringToWchar() l:%lu wl:%lu s:%p se:%p sl:%lu slu:%lu"
|
||||
" uchar:%x st:%x\n", lengthOut, wcDestLength, source, sourceEnd,
|
||||
mbSourceLength, sourceLengthUsed, unicodeChar, icuStatus));
|
||||
if (!U_SUCCESS(icuStatus))
|
||||
break;
|
||||
if (wcDest != NULL)
|
||||
*wcDest++ = unicodeChar;
|
||||
if (unicodeChar == L'\0') {
|
||||
if (wcDest != NULL)
|
||||
wcsIsTerminated = true;
|
||||
wcsIsTerminated = true;
|
||||
break;
|
||||
}
|
||||
icuStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (wcDest != NULL)
|
||||
*mbSource = source;
|
||||
|
||||
if (!U_SUCCESS(icuStatus)) {
|
||||
// conversion failed because of illegal character sequence
|
||||
TRACE(("MultibyteStringToWchar(): illegal character sequence\n"));
|
||||
@ -331,9 +327,13 @@ ICUCtypeData::MultibyteStringToWchar(wchar_t* wcDest, size_t wcDestLength,
|
||||
// reset to initial state
|
||||
_DropConverterFromMbState(mbState);
|
||||
memset(mbState, 0, sizeof(mbstate_t));
|
||||
*mbSource = NULL;
|
||||
} else
|
||||
if (wcDest != NULL)
|
||||
*mbSource = NULL;
|
||||
} else {
|
||||
mbState->count = 0;
|
||||
if (wcDest != NULL)
|
||||
*mbSource = source;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -353,20 +353,40 @@ ICUCtypeData::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
|
||||
|
||||
UConverter* converter = converterRef->Converter();
|
||||
|
||||
// do the conversion
|
||||
// convert input from UTF-32 to UTF-16
|
||||
UChar ucharBuffer[2];
|
||||
size_t ucharLength;
|
||||
if (U_IS_BMP(wc)) {
|
||||
ucharBuffer[0] = wc;
|
||||
ucharLength = 1;
|
||||
} else {
|
||||
ucharBuffer[0] = U16_LEAD(wc);
|
||||
ucharBuffer[1] = U16_TRAIL(wc);
|
||||
ucharLength = 2;
|
||||
}
|
||||
|
||||
// do the actual conversion
|
||||
UErrorCode icuStatus = U_ZERO_ERROR;
|
||||
lengthOut = ucnv_fromUChars(converter, mbOut, MB_LEN_MAX, (UChar*)&wc,
|
||||
1, &icuStatus);
|
||||
size_t mbLength = mbOut == NULL ? 0 : MB_CUR_MAX;
|
||||
lengthOut = ucnv_fromUChars(converter, mbOut, mbLength, ucharBuffer,
|
||||
ucharLength, &icuStatus);
|
||||
TRACE(("WcharToMultibyte() l:%lu mb:%p ml:%lu uchar:%x st:%x\n", lengthOut,
|
||||
mbOut, mbLength, wc, icuStatus));
|
||||
|
||||
if (icuStatus == U_BUFFER_OVERFLOW_ERROR && mbOut == NULL) {
|
||||
// we have no output buffer, so we ignore buffer overflows
|
||||
icuStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (!U_SUCCESS(icuStatus)) {
|
||||
if (icuStatus == U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
// bad converter (shouldn't really happen)
|
||||
TRACE(("MultibyteToWchar(): bad converter\n"));
|
||||
TRACE(("WcharToMultibyte(): bad converter\n"));
|
||||
return B_BAD_VALUE;
|
||||
}
|
||||
|
||||
// conversion failed because of illegal/unmappable character
|
||||
TRACE(("MultibyteToWchar(): illegal character sequence\n"));
|
||||
TRACE(("WcharToMultibyte(): illegal character sequence\n"));
|
||||
ucnv_resetFromUnicode(converter);
|
||||
return B_BAD_DATA;
|
||||
}
|
||||
@ -381,6 +401,95 @@ ICUCtypeData::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
|
||||
}
|
||||
|
||||
|
||||
status_t
|
||||
ICUCtypeData::WcharStringToMultibyte(char* mbDest, size_t mbDestLength,
|
||||
const wchar_t** wcSource, size_t wcSourceLength, mbstate_t* mbState,
|
||||
size_t& lengthOut)
|
||||
{
|
||||
ICUConverterRef converterRef;
|
||||
status_t result = _GetConverterForMbState(mbState, converterRef);
|
||||
if (result != B_OK) {
|
||||
TRACE(("WcharStringToMultibyte(): couldn't get converter for ID %d "
|
||||
"- %lx\n", mbState->converterID, result));
|
||||
return result;
|
||||
}
|
||||
|
||||
UConverter* converter = converterRef->Converter();
|
||||
|
||||
bool mbsIsTerminated = false;
|
||||
const UChar32* source = (UChar32*)*wcSource;
|
||||
|
||||
UErrorCode icuStatus = U_ZERO_ERROR;
|
||||
lengthOut = 0;
|
||||
for (size_t sourceLengthUsed = 0; sourceLengthUsed < wcSourceLength;
|
||||
++sourceLengthUsed, ++source) {
|
||||
if (mbDest != NULL && lengthOut >= mbDestLength)
|
||||
break;
|
||||
|
||||
// convert input from UTF-32 to UTF-16
|
||||
UChar ucharBuffer[2];
|
||||
size_t ucharLength;
|
||||
if (U_IS_BMP(*source)) {
|
||||
ucharBuffer[0] = *source;
|
||||
ucharLength = 1;
|
||||
} else {
|
||||
ucharBuffer[0] = U16_LEAD(*source);
|
||||
ucharBuffer[1] = U16_TRAIL(*source);
|
||||
ucharLength = 2;
|
||||
}
|
||||
|
||||
// do the actual conversion
|
||||
size_t destLength = mbDest == NULL ? 0 : mbDestLength - lengthOut;
|
||||
char buffer[MB_CUR_MAX];
|
||||
size_t mbLength = ucnv_fromUChars(converter,
|
||||
mbDest == NULL ? NULL : buffer, destLength, ucharBuffer,
|
||||
ucharLength, &icuStatus);
|
||||
TRACE(("WcharStringToMultibyte() l:%lu mb:%p ml:%lu s:%p ul:%lu slu:%lu"
|
||||
" uchar:%x st:%x\n", mbLength, mbDest, destLength, source,
|
||||
ucharLength, sourceLengthUsed, *source, icuStatus));
|
||||
|
||||
if (icuStatus == U_BUFFER_OVERFLOW_ERROR) {
|
||||
// ignore buffer overflows ...
|
||||
icuStatus = U_ZERO_ERROR;
|
||||
// ... but stop if the output buffer has been exceeded
|
||||
if (destLength > 0)
|
||||
break;
|
||||
} else if (mbDest != NULL)
|
||||
memcpy(mbDest, buffer, mbLength);
|
||||
|
||||
if (!U_SUCCESS(icuStatus))
|
||||
break;
|
||||
if (mbDest != NULL)
|
||||
mbDest += mbLength;
|
||||
if (*source == L'\0') {
|
||||
mbsIsTerminated = true;
|
||||
break;
|
||||
}
|
||||
lengthOut += mbLength;
|
||||
icuStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (!U_SUCCESS(icuStatus)) {
|
||||
// conversion failed because of illegal character sequence
|
||||
TRACE(("WcharStringToMultibyte(): illegal character sequence\n"));
|
||||
ucnv_resetFromUnicode(converter);
|
||||
result = B_BAD_DATA;
|
||||
} else if (mbsIsTerminated) {
|
||||
// reset to initial state
|
||||
_DropConverterFromMbState(mbState);
|
||||
memset(mbState, 0, sizeof(mbstate_t));
|
||||
if (mbDest != NULL)
|
||||
*wcSource = NULL;
|
||||
} else {
|
||||
mbState->count = 0;
|
||||
if (mbDest != NULL)
|
||||
*wcSource = (wchar_t*)source;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
const char*
|
||||
ICUCtypeData::GetLanginfo(int index)
|
||||
{
|
||||
|
@ -184,6 +184,18 @@ ICULocaleBackend::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
|
||||
}
|
||||
|
||||
|
||||
status_t
|
||||
ICULocaleBackend::WcharStringToMultibyte(char* mbDest, size_t mbDestLength,
|
||||
const wchar_t** wcSource, size_t wcSourceLength, mbstate_t* mbState,
|
||||
size_t& lengthOut)
|
||||
{
|
||||
ErrnoMaintainer errnoMaintainer;
|
||||
|
||||
return fCtypeData.WcharStringToMultibyte(mbDest, mbDestLength, wcSource,
|
||||
wcSourceLength, mbState, lengthOut);
|
||||
}
|
||||
|
||||
|
||||
const char*
|
||||
ICULocaleBackend::GetLanginfo(int index)
|
||||
{
|
||||
|
122
src/system/libroot/posix/wchar/wcsrtombs.cpp
Normal file
122
src/system/libroot/posix/wchar/wcsrtombs.cpp
Normal file
@ -0,0 +1,122 @@
|
||||
/*
|
||||
** Copyright 2011, Oliver Tappe, zooey@hirschkaefer.de. All rights reserved.
|
||||
** Distributed under the terms of the Haiku License.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include <errno_private.h>
|
||||
#include <wchar_private.h>
|
||||
|
||||
#include "LocaleBackend.h"
|
||||
|
||||
|
||||
//#define TRACE_WCSRTOMBS
|
||||
#ifdef TRACE_WCSRTOMBS
|
||||
# include <OS.h>
|
||||
# define TRACE(x) debug_printf x
|
||||
#else
|
||||
# define TRACE(x) ;
|
||||
#endif
|
||||
|
||||
|
||||
using BPrivate::Libroot::gLocaleBackend;
|
||||
|
||||
|
||||
extern "C" size_t
|
||||
__wcsnrtombs(char* dst, const wchar_t** src, size_t nwc, size_t len,
|
||||
mbstate_t* ps)
|
||||
{
|
||||
TRACE(("wcsnrtombs(%p, %p, %lu, %lu) - lb:%p\n", dst, *src, nwc, len,
|
||||
gLocaleBackend));
|
||||
|
||||
if (ps == NULL) {
|
||||
static mbstate_t internalMbState;
|
||||
ps = &internalMbState;
|
||||
}
|
||||
|
||||
if (gLocaleBackend == NULL) {
|
||||
/*
|
||||
* The POSIX locale is active. Since the POSIX locale only contains
|
||||
* chars 0-127 and those ASCII chars are compatible with the UTF32
|
||||
* values used in wint_t, we can just copy the bytes.
|
||||
*/
|
||||
size_t count = 0;
|
||||
if (dst == NULL) {
|
||||
// only count number of required wide characters
|
||||
for (const wchar_t* srcEnd = *src + nwc; *src < srcEnd;
|
||||
++*src, ++count) {
|
||||
if (*src < 0) {
|
||||
// char is non-ASCII
|
||||
__set_errno(EILSEQ);
|
||||
return (size_t)-1;
|
||||
}
|
||||
if (**src == 0) {
|
||||
memset(ps, 0, sizeof(mbstate_t));
|
||||
*src = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// "convert" the characters
|
||||
for (; count < len; ++*src, ++count) {
|
||||
if (*src < 0) {
|
||||
// char is non-ASCII
|
||||
__set_errno(EILSEQ);
|
||||
return (size_t)-1;
|
||||
}
|
||||
*dst++ = (char)*src;
|
||||
if (*src == 0) {
|
||||
memset(ps, 0, sizeof(mbstate_t));
|
||||
*src = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TRACE(("wcsnrtombs returns %lx and src %p\n", count, *src));
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
size_t result = 0;
|
||||
status_t status = gLocaleBackend->WcharStringToMultibyte(dst, len, src, nwc,
|
||||
ps, result);
|
||||
|
||||
if (status == B_BAD_DATA) {
|
||||
TRACE(("wcsnrtomb(): setting errno to EILSEQ\n"));
|
||||
__set_errno(EILSEQ);
|
||||
result = (size_t)-1;
|
||||
} else if (status != B_OK) {
|
||||
TRACE(("wcsnrtomb(): setting errno to EINVAL (status: %lx)\n", status));
|
||||
__set_errno(EINVAL);
|
||||
result = (size_t)-1;
|
||||
}
|
||||
|
||||
TRACE(("wcsnrtombs returns %lx and src %p\n", result, *src));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// deactivated, as wcsnrtombs() isn't publically available yet.
|
||||
extern "C"
|
||||
B_DEFINE_WEAK_ALIAS(__wcsnrtombs, wcsnrtombs);
|
||||
|
||||
|
||||
extern "C" size_t
|
||||
__wcsrtombs(char* dst, const wchar_t** src, size_t len, mbstate_t* ps)
|
||||
{
|
||||
if (ps == NULL) {
|
||||
static mbstate_t internalMbState;
|
||||
ps = &internalMbState;
|
||||
}
|
||||
|
||||
return __wcsnrtombs(dst, src, __wcslen(*src) + 1, len, ps);
|
||||
}
|
||||
|
||||
|
||||
extern "C"
|
||||
B_DEFINE_WEAK_ALIAS(__wcsrtombs, wcsrtombs);
|
18
src/system/libroot/posix/wchar/wcstombs.c
Normal file
18
src/system/libroot/posix/wchar/wcstombs.c
Normal file
@ -0,0 +1,18 @@
|
||||
/*
|
||||
** Copyright 2011, Oliver Tappe, zooey@hirschkaefer.de. All rights reserved.
|
||||
** Distributed under the terms of the Haiku License.
|
||||
*/
|
||||
|
||||
#include <wchar_private.h>
|
||||
|
||||
|
||||
size_t
|
||||
__wcstombs(char* s, const wchar_t* pwcs, size_t n)
|
||||
{
|
||||
static mbstate_t internalMbState;
|
||||
|
||||
return __wcsrtombs(s, &pwcs, n, &internalMbState);
|
||||
}
|
||||
|
||||
|
||||
B_DEFINE_WEAK_ALIAS(__wcstombs, wcstombs);
|
Loading…
Reference in New Issue
Block a user