Add gnulib-test-mbsrtowcs.c in order to test mbsrtowcs()

* the test exposes problems in our current (glibc's) implementation
  of mbsrtowcs()
This commit is contained in:
Oliver Tappe 2011-12-01 18:15:13 +01:00
parent eb5e1c09e3
commit 0983c476d9
2 changed files with 310 additions and 0 deletions

View File

@ -39,6 +39,7 @@ SimpleTest xsi_sem_test1 : xsi_sem_test1.cpp ;
# wide character tests
SimpleTest gnulib-test-btowc : gnulib-test-btowc.c ;
SimpleTest gnulib-test-mbrtowc : gnulib-test-mbrtowc.c ;
SimpleTest gnulib-test-mbsrtowcs : gnulib-test-mbsrtowcs.c ;
SimpleTest gnulib-test-wcrtomb : gnulib-test-wcrtomb.c ;
SimpleTest mbtest : mbtest.c ;
SimpleTest testmb : testmb.c ;

View File

@ -0,0 +1,309 @@
/* Test of conversion of string to wide string.
Copyright (C) 2008-2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Written by Bruno Haible <bruno@clisp.org>, 2008. */
#undef NDEBUG
#include <assert.h>
#include <locale.h>
#include <stdio.h>
#include <string.h>
#include <wchar.h>
#define BUFSIZE 10
int main(int argc, char *argv[])
{
mbstate_t state;
wchar_t wc;
size_t ret;
int mode;
/* configure should already have checked that the locale is supported. */
if (setlocale(LC_ALL, "") == NULL) {
fprintf(stderr, "unable to set standard locale\n");
return 1;
}
/* Test NUL byte input. */
{
const char *src;
memset(&state, '\0', sizeof(mbstate_t));
src = "";
ret = mbsrtowcs(NULL, &src, 0, &state);
assert(ret == 0);
assert(mbsinit (&state));
src = "";
ret = mbsrtowcs(NULL, &src, 1, &state);
assert(ret == 0);
assert(mbsinit (&state));
wc = (wchar_t) 0xBADFACE;
src = "";
ret = mbsrtowcs(&wc, &src, 0, &state);
assert(ret == 0);
assert(wc == (wchar_t) 0xBADFACE);
assert(mbsinit (&state));
wc = (wchar_t) 0xBADFACE;
src = "";
ret = mbsrtowcs(&wc, &src, 1, &state);
assert(ret == 0);
assert(wc == 0);
assert(mbsinit (&state));
}
for (mode = '1'; mode <= '4'; ++mode) {
int unlimited;
for (unlimited = 0; unlimited < 2; unlimited++) {
wchar_t buf[BUFSIZE];
const char *src;
mbstate_t temp_state;
{
size_t i;
for (i = 0; i < BUFSIZE; i++)
buf[i] = (wchar_t) 0xBADFACE;
}
switch (mode) {
case '1':
/* Locale encoding is ISO-8859-1 or ISO-8859-15. */
printf("ISO8859-1 ...\n");
{
char input[] = "B\374\337er"; /* "Büßer" */
memset(&state, '\0', sizeof(mbstate_t));
if (setlocale (LC_ALL, "en_US.ISO8859-1") == NULL) {
fprintf(stderr,
"unable to set ISO8859-1 locale, skipping\n");
break;
}
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc(&wc, input, 1, &state);
assert(ret == 1);
assert(wc == 'B');
assert(mbsinit (&state));
input[0] = '\0';
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc(&wc, input + 1, 1, &state);
assert(ret == 1);
assert(wctob (wc) == (unsigned char) '\374');
assert(mbsinit (&state));
input[1] = '\0';
src = input + 2;
temp_state = state;
ret = mbsrtowcs(NULL, &src, unlimited ? BUFSIZE : 1,
&temp_state);
assert(ret == 3);
assert(src == input + 2);
assert(mbsinit (&state));
src = input + 2;
ret = mbsrtowcs(buf, &src, unlimited ? BUFSIZE : 1, &state);
assert(ret == (unlimited ? 3 : 1));
assert(src == (unlimited ? NULL : input + 3));
assert(wctob (buf[0]) == (unsigned char) '\337');
if (unlimited) {
assert(buf[1] == 'e');
assert(buf[2] == 'r');
assert(buf[3] == 0);
assert(buf[4] == (wchar_t) 0xBADFACE);
} else
assert(buf[1] == (wchar_t) 0xBADFACE);
assert(mbsinit (&state));
}
break;
case '2':
/* Locale encoding is UTF-8. */
printf("UTF-8 ...\n");
{
char input[] = "B\303\274\303\237er"; /* "Büßer" */
memset(&state, '\0', sizeof(mbstate_t));
if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) {
fprintf(stderr,
"unable to set UTF-8 locale, skipping\n");
break;
}
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc(&wc, input, 1, &state);
assert(ret == 1);
assert(wc == 'B');
assert(mbsinit (&state));
input[0] = '\0';
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc(&wc, input + 1, 1, &state);
assert(ret == (size_t)(-2));
assert(wc == (wchar_t) 0xBADFACE);
assert(!mbsinit (&state));
input[1] = '\0';
src = input + 2;
temp_state = state;
ret = mbsrtowcs(NULL, &src, unlimited ? BUFSIZE : 2,
&temp_state);
assert(ret == 4);
assert(src == input + 2);
assert(!mbsinit (&state));
src = input + 2;
ret = mbsrtowcs(buf, &src, unlimited ? BUFSIZE : 2, &state);
assert(ret == (unlimited ? 4 : 2));
assert(src == (unlimited ? NULL : input + 5));
assert(wctob (buf[0]) == EOF);
assert(wctob (buf[1]) == EOF);
if (unlimited) {
assert(buf[2] == 'e');
assert(buf[3] == 'r');
assert(buf[4] == 0);
assert(buf[5] == (wchar_t) 0xBADFACE);
} else
assert(buf[2] == (wchar_t) 0xBADFACE);
assert(mbsinit (&state));
}
break;
case '3':
/* Locale encoding is EUC-JP. */
printf("EUC-JP ...\n");
{
char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
memset(&state, '\0', sizeof(mbstate_t));
if (setlocale (LC_ALL, "en_US.EUC-JP") == NULL) {
fprintf(stderr,
"unable to set EUC-JP locale, skipping\n");
break;
}
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc(&wc, input, 1, &state);
assert(ret == 1);
assert(wc == '<');
assert(mbsinit (&state));
input[0] = '\0';
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc(&wc, input + 1, 2, &state);
assert(ret == 2);
assert(wctob (wc) == EOF);
assert(mbsinit (&state));
input[1] = '\0';
input[2] = '\0';
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc(&wc, input + 3, 1, &state);
assert(ret == (size_t)(-2));
assert(wc == (wchar_t) 0xBADFACE);
assert(!mbsinit (&state));
input[3] = '\0';
src = input + 4;
temp_state = state;
ret = mbsrtowcs(NULL, &src, unlimited ? BUFSIZE : 2,
&temp_state);
assert(ret == 3);
assert(src == input + 4);
assert(!mbsinit (&state));
src = input + 4;
ret = mbsrtowcs(buf, &src, unlimited ? BUFSIZE : 2, &state);
assert(ret == (unlimited ? 3 : 2));
assert(src == (unlimited ? NULL : input + 7));
assert(wctob (buf[0]) == EOF);
assert(wctob (buf[1]) == EOF);
if (unlimited) {
assert(buf[2] == '>');
assert(buf[3] == 0);
assert(buf[4] == (wchar_t) 0xBADFACE);
} else
assert(buf[2] == (wchar_t) 0xBADFACE);
assert(mbsinit (&state));
}
break;
case '4':
/* Locale encoding is GB18030. */
printf("GB18030 ...\n");
{
char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
memset(&state, '\0', sizeof(mbstate_t));
if (setlocale (LC_ALL, "en_US.GB18030") == NULL) {
fprintf(stderr,
"unable to set GB18030 locale, skipping\n");
break;
}
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc(&wc, input, 1, &state);
assert(ret == 1);
assert(wc == 'B');
assert(mbsinit (&state));
input[0] = '\0';
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc(&wc, input + 1, 1, &state);
assert(ret == (size_t)(-2));
assert(wc == (wchar_t) 0xBADFACE);
assert(!mbsinit (&state));
input[1] = '\0';
src = input + 2;
temp_state = state;
ret = mbsrtowcs(NULL, &src, unlimited ? BUFSIZE : 2,
&temp_state);
assert(ret == 4);
assert(src == input + 2);
assert(!mbsinit (&state));
src = input + 2;
ret = mbsrtowcs(buf, &src, unlimited ? BUFSIZE : 2, &state);
assert(ret == (unlimited ? 4 : 2));
assert(src == (unlimited ? NULL : input + 7));
assert(wctob (buf[0]) == EOF);
assert(wctob (buf[1]) == EOF);
if (unlimited) {
assert(buf[2] == 'e');
assert(buf[3] == 'r');
assert(buf[4] == 0);
assert(buf[5] == (wchar_t) 0xBADFACE);
} else
assert(buf[2] == (wchar_t) 0xBADFACE);
assert(mbsinit (&state));
}
break;
default:
return 1;
}
}
}
return 0;
}