Add implementation of mbsrtowcs() to our locale backend.

* add MultibyteStringToWchar() to ICU locale backend
* implement mbsrtowcs() and mbsnrtowcs() on top of 
  MultibyteStringToWchar()
* drop respective glibc files
This commit is contained in:
Oliver Tappe 2011-12-07 18:20:34 +01:00
parent d0e7bc307c
commit 73186b2fcd
11 changed files with 216 additions and 312 deletions

View File

@ -33,6 +33,10 @@ public:
status_t MultibyteToWchar(wchar_t* wcOut, const char* mb,
size_t mbLength, mbstate_t* mbState,
size_t& lengthOut);
status_t MultibyteStringToWchar(wchar_t* wcDest,
size_t wcDestLength, const char** mbSource,
size_t mbSourceLength, mbstate_t* mbState,
size_t& lengthOut);
status_t WcharToMultibyte(char* mbOut, wchar_t wc,
mbstate_t* mbState, size_t& lengthOut);

View File

@ -44,6 +44,10 @@ public:
virtual status_t MultibyteToWchar(wchar_t* wcOut, const char* mb,
size_t mbLength, mbstate_t* mbState,
size_t& lengthOut);
virtual status_t MultibyteStringToWchar(wchar_t* wcDest,
size_t wcDestLength, const char** mbSource,
size_t mbSourceLength, mbstate_t* mbState,
size_t& lengthOut);
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
mbstate_t* mbState, size_t& lengthOut);

View File

@ -126,6 +126,10 @@ public:
virtual status_t MultibyteToWchar(wchar_t* wcOut, const char* mb,
size_t mbLength, mbstate_t* mbState,
size_t& lengthOut) = 0;
virtual status_t MultibyteStringToWchar(wchar_t* wcDest,
size_t wcDestLength, const char** mbSource,
size_t mbSourceLength, mbstate_t* mbState,
size_t& lengthOut) = 0;
virtual status_t WcharToMultibyte(char* mbOut, wchar_t wc,
mbstate_t* mbState, size_t& lengthOut) = 0;

View File

@ -268,6 +268,77 @@ ICUCtypeData::MultibyteToWchar(wchar_t* wcOut, const char* mb, size_t mbLen,
}
status_t
ICUCtypeData::MultibyteStringToWchar(wchar_t* wcDest, size_t wcDestLength,
const char** mbSource, size_t mbSourceLength, mbstate_t* mbState,
size_t& lengthOut)
{
ICUConverterRef converterRef;
status_t result = _GetConverterForMbState(mbState, converterRef);
if (result != B_OK) {
TRACE(("MultibyteStringToWchar(): couldn't get converter for ID %d -"
" %lx\n", mbState->converterID, result));
return result;
}
UConverter* converter = converterRef->Converter();
bool wcsIsTerminated = false;
const char* source = *mbSource;
const char* sourceEnd = source + mbSourceLength;
if (sourceEnd < source) {
// overflow, clamp to highest possible address
sourceEnd = (const char*)-1;
}
if (wcDest == NULL) {
// if there's no destination buffer, there's no length limit either
wcDestLength = (size_t)-1;
}
UErrorCode icuStatus = U_ZERO_ERROR;
size_t sourceLengthUsed = 0;
for (lengthOut = 0; lengthOut < wcDestLength; ++lengthOut) {
if (sourceLengthUsed >= mbSourceLength)
break;
UChar32 unicodeChar = ucnv_getNextUChar(converter, &source,
std::min(source + MB_LEN_MAX, sourceEnd), &icuStatus);
sourceLengthUsed = source - *mbSource;
TRACE(("l:%lu wl:%lu s:%p se:%p sl:%lu slu:%lu uchar:%x st:%x\n",
lengthOut, wcDestLength, source, sourceEnd, mbSourceLength,
sourceLengthUsed, unicodeChar, icuStatus));
if (!U_SUCCESS(icuStatus))
break;
if (wcDest != NULL)
*wcDest++ = unicodeChar;
if (unicodeChar == L'\0') {
if (wcDest != NULL)
wcsIsTerminated = true;
break;
}
icuStatus = U_ZERO_ERROR;
}
if (wcDest != NULL)
*mbSource = source;
if (!U_SUCCESS(icuStatus)) {
// conversion failed because of illegal character sequence
TRACE(("MultibyteStringToWchar(): illegal character sequence\n"));
ucnv_resetToUnicode(converter);
result = B_BAD_DATA;
} else if (wcsIsTerminated) {
// reset to initial state
_DropConverterFromMbState(mbState);
memset(mbState, 0, sizeof(mbstate_t));
*mbSource = NULL;
} else
mbState->count = 0;
return result;
}
status_t
ICUCtypeData::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
size_t& lengthOut)

View File

@ -162,6 +162,18 @@ ICULocaleBackend::MultibyteToWchar(wchar_t* wcOut, const char* mb,
}
status_t
ICULocaleBackend::MultibyteStringToWchar(wchar_t* wcDest, size_t wcDestLength,
const char** mbSource, size_t mbSourceLength, mbstate_t* mbState,
size_t& lengthOut)
{
ErrnoMaintainer errnoMaintainer;
return fCtypeData.MultibyteStringToWchar(wcDest, wcDestLength, mbSource,
mbSourceLength, mbState, lengthOut);
}
status_t
ICULocaleBackend::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
size_t& lengthOut)

View File

@ -18,9 +18,6 @@ UsePrivateHeaders libroot ;
SubDirCcFlags -D_GNU_SOURCE -DUSE_IN_LIBIO ;
MergeObject posix_gnu_wcsmbs.o :
mbsnrtowcs.c
mbsrtowcs.c
# mbsrtowcs_l.c
wcpcpy.c
wcpncpy.c
wcscasecmp.c

View File

@ -1,139 +0,0 @@
/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <dlfcn.h>
#include <errno.h>
#include <gconv.h>
#include <string.h>
#include <wchar.h>
#include <wcsmbsload.h>
#include <assert.h>
#ifndef EILSEQ
# define EILSEQ EINVAL
#endif
/* This is the private state used if PS is NULL. */
static mbstate_t state;
/* This is a non-standard function but it is very useful in the
implementation of stdio because we have to deal with unterminated
buffers. At most NMC bytes will be converted. */
size_t
__mbsnrtowcs (dst, src, nmc, len, ps)
wchar_t *dst;
const char **src;
size_t nmc;
size_t len;
mbstate_t *ps;
{
const unsigned char *srcend;
struct __gconv_step_data data;
size_t result;
int status;
struct __gconv_step *towc;
size_t dummy;
/* Tell where we want the result. */
data.__invocation_counter = 0;
data.__internal_use = 1;
data.__flags = __GCONV_IS_LAST;
data.__statep = ps ?: &state;
data.__trans = NULL;
if (nmc == 0)
return 0;
srcend = *src + __strnlen (*src, nmc - 1) + 1;
/* Make sure we use the correct function. */
update_conversion_ptrs ();
/* Get the structure with the function pointers. */
towc = __wcsmbs_gconv_fcts.towc;
/* We have to handle DST == NULL special. */
if (dst == NULL)
{
wchar_t buf[64]; /* Just an arbitrary size. */
const unsigned char *inbuf = *src;
result = 0;
data.__outbufend = (unsigned char *) buf + sizeof (buf);
do
{
data.__outbuf = (unsigned char *) buf;
status = DL_CALL_FCT (towc->__fct,
(towc, &data, &inbuf, srcend, NULL,
&dummy, 0, 1));
result += (wchar_t *) data.__outbuf - buf;
}
while (status == __GCONV_FULL_OUTPUT);
if ((status == __GCONV_OK || status == __GCONV_EMPTY_INPUT)
&& ((wchar_t *) data.__outbuf)[-1] == L'\0')
/* Don't count the NUL character in. */
--result;
}
else
{
/* This code is based on the safe assumption that all internal
multi-byte encodings use the NUL byte only to mark the end
of the string. */
data.__outbuf = (unsigned char *) dst;
data.__outbufend = data.__outbuf + len * sizeof (wchar_t);
status = DL_CALL_FCT (towc->__fct,
(towc, &data, (const unsigned char **) src, srcend,
NULL, &dummy, 0, 1));
result = (wchar_t *) data.__outbuf - dst;
/* We have to determine whether the last character converted
is the NUL character. */
if ((status == __GCONV_OK || status == __GCONV_EMPTY_INPUT)
&& (assert (result > 0),
((wchar_t *) dst)[result - 1] == L'\0'))
{
assert (__mbsinit (data.__statep));
*src = NULL;
--result;
}
}
/* There must not be any problems with the conversion but illegal input
characters. */
assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
|| status == __GCONV_ILLEGAL_INPUT
|| status == __GCONV_INCOMPLETE_INPUT
|| status == __GCONV_FULL_OUTPUT);
if (status != __GCONV_OK && status != __GCONV_FULL_OUTPUT
&& status != __GCONV_EMPTY_INPUT && status != __GCONV_INCOMPLETE_INPUT)
{
result = (size_t) -1;
__set_errno (EILSEQ);
}
return result;
}
weak_alias (__mbsnrtowcs, mbsnrtowcs)

View File

@ -1,145 +0,0 @@
/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <dlfcn.h>
#include <errno.h>
#include <gconv.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wcsmbsload.h>
#include <assert.h>
#ifndef EILSEQ
# define EILSEQ EINVAL
#endif
/* This is the private state used if PS is NULL. */
static mbstate_t state;
size_t
__mbsrtowcs (dst, src, len, ps)
wchar_t *dst;
const char **src;
size_t len;
mbstate_t *ps;
{
struct __gconv_step_data data;
size_t result;
int status;
struct __gconv_step *towc;
size_t non_reversible;
/* Tell where we want the result. */
data.__invocation_counter = 0;
data.__internal_use = 1;
data.__flags = __GCONV_IS_LAST;
data.__statep = ps ?: &state;
data.__trans = NULL;
/* Make sure we use the correct function. */
update_conversion_ptrs ();
/* Get the structure with the function pointers. */
towc = __wcsmbs_gconv_fcts.towc;
/* We have to handle DST == NULL special. */
if (dst == NULL)
{
mbstate_t temp_state;
wchar_t buf[64]; /* Just an arbitrary size. */
const unsigned char *inbuf = (const unsigned char *) *src;
const unsigned char *srcend = inbuf + strlen (inbuf) + 1;
temp_state = *data.__statep;
data.__statep = &temp_state;
result = 0;
data.__outbufend = (char *) buf + sizeof (buf);
do
{
data.__outbuf = (char *) buf;
status = DL_CALL_FCT (towc->__fct,
(towc, &data, &inbuf, srcend, NULL,
&non_reversible, 0, 1));
result += (wchar_t *) data.__outbuf - buf;
}
while (status == __GCONV_FULL_OUTPUT);
if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT)
{
/* There better should be a NUL wide char at the end. */
assert (((wchar_t *) data.__outbuf)[-1] == L'\0');
/* Don't count the NUL character in. */
--result;
}
}
else
{
/* This code is based on the safe assumption that all internal
multi-byte encodings use the NUL byte only to mark the end
of the string. */
const unsigned char *srcend;
srcend = (const unsigned char *) (*src
+ __strnlen (*src, len * MB_CUR_MAX)
+ 1);
data.__outbuf = (unsigned char *) dst;
data.__outbufend = data.__outbuf + len * sizeof (wchar_t);
status = DL_CALL_FCT (towc->__fct,
(towc, &data, (const unsigned char **) src, srcend,
NULL, &non_reversible, 0, 1));
result = (wchar_t *) data.__outbuf - dst;
/* We have to determine whether the last character converted
is the NUL character. */
if ((status == __GCONV_OK || status == __GCONV_EMPTY_INPUT)
&& ((wchar_t *) dst)[result - 1] == L'\0')
{
assert (result > 0);
assert (__mbsinit (data.__statep));
*src = NULL;
--result;
}
}
/* There must not be any problems with the conversion but illegal input
characters. */
assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
|| status == __GCONV_ILLEGAL_INPUT
|| status == __GCONV_INCOMPLETE_INPUT
|| status == __GCONV_FULL_OUTPUT);
if (status != __GCONV_OK && status != __GCONV_FULL_OUTPUT
&& status != __GCONV_EMPTY_INPUT && status != __GCONV_INCOMPLETE_INPUT)
{
result = (size_t) -1;
__set_errno (EILSEQ);
}
return result;
}
weak_alias (__mbsrtowcs, mbsrtowcs)

View File

@ -1,25 +0,0 @@
/* Copyright (C) 2002 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.org>, 2002.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <ctype.h>
#include <string.h>
#include "wcsmbsload.h"
#define USE_IN_EXTENDED_LOCALE_MODEL 1
#include "mbsrtowcs.c"

View File

@ -11,6 +11,7 @@ MergeObject posix_wchar.o :
mbrlen.c
mbrtowc.cpp
mbsinit.c
mbsrtowcs.cpp
mbtowc.c
wcrtomb.cpp
wcswidth.c

View File

@ -0,0 +1,120 @@
/*
** Copyright 2011, Oliver Tappe, zooey@hirschkaefer.de. All rights reserved.
** Distributed under the terms of the Haiku License.
*/
#include <errno.h>
#include <string.h>
#include <wchar.h>
#include <errno_private.h>
#include "LocaleBackend.h"
//#define TRACE_MBSRTOWCS
#ifdef TRACE_MBSRTOWCS
# include <OS.h>
# define TRACE(x) debug_printf x
#else
# define TRACE(x) ;
#endif
using BPrivate::Libroot::gLocaleBackend;
extern "C" size_t
__mbsnrtowcs(wchar_t* dst, const char** src, size_t nmc, size_t len,
mbstate_t* ps)
{
TRACE(("mbsnrtowcs(%p, %p, %lu, %lu)\n", dst, *src, nmc, len));
if (ps == NULL) {
static mbstate_t internalMbState;
ps = &internalMbState;
}
if (gLocaleBackend == NULL) {
/*
* The POSIX locale is active. Since the POSIX locale only contains
* chars 0-127 and those ASCII chars are compatible with the UTF32
* values used in wint_t, we can just copy the bytes.
*/
size_t count = 0;
if (dst == NULL) {
// only count number of required wide characters
for (const char* srcEnd = *src + nmc; *src < srcEnd;
++*src, ++count) {
if (*src < 0) {
// char is non-ASCII
__set_errno(EILSEQ);
return (size_t)-1;
}
if (**src == 0) {
memset(ps, 0, sizeof(mbstate_t));
*src = NULL;
break;
}
}
} else {
// "convert" the characters
for (; count < len; ++*src, ++count) {
if (*src < 0) {
// char is non-ASCII
__set_errno(EILSEQ);
return (size_t)-1;
}
*dst++ = (wchar_t)*src;
if (*src == 0) {
memset(ps, 0, sizeof(mbstate_t));
*src = NULL;
break;
}
}
}
TRACE(("mbsnrtowcs returns %lx and src %p\n", count, *src));
return count;
}
size_t result = 0;
status_t status = gLocaleBackend->MultibyteStringToWchar(dst, len, src, nmc,
ps, result);
if (status == B_BAD_DATA) {
TRACE(("mbsnrtowc(): setting errno to EILSEQ\n"));
__set_errno(EILSEQ);
result = (size_t)-1;
} else if (status != B_OK) {
TRACE(("mbsnrtowc(): setting errno to EINVAL (status: %lx)\n", status));
__set_errno(EINVAL);
result = (size_t)-1;
}
TRACE(("mbsnrtowcs returns %lx and src %p\n", result, *src));
return result;
}
extern "C"
B_DEFINE_WEAK_ALIAS(__mbsnrtowcs, mbsnrtowcs);
extern "C" size_t
__mbsrtowcs(wchar_t* dst, const char** src, size_t len, mbstate_t* ps)
{
if (ps == NULL) {
static mbstate_t internalMbState;
ps = &internalMbState;
}
size_t srcLen = gLocaleBackend == NULL ? strlen(*src) : (size_t)-1;
return __mbsnrtowcs(dst, src, srcLen, len, ps);
}
extern "C"
B_DEFINE_WEAK_ALIAS(__mbsrtowcs, mbsrtowcs);