From 3d780117c39f96f8fb5a6689d9101633b94e0995 Mon Sep 17 00:00:00 2001 From: "K. Lange" Date: Wed, 10 Oct 2018 11:01:30 +0900 Subject: [PATCH] libc: working wcstombs and mbstowcs --- apps/t_mbstowcs.c | 18 ++++++++ base/usr/include/stdlib.h | 3 ++ base/usr/include/wchar.h | 3 -- libc/stdlib/mbstowcs.c | 86 +++++++++++++++++++++++++++++++++++++++ libc/wchar/wcs.c | 27 ------------ 5 files changed, 107 insertions(+), 30 deletions(-) create mode 100644 apps/t_mbstowcs.c create mode 100644 libc/stdlib/mbstowcs.c delete mode 100644 libc/wchar/wcs.c diff --git a/apps/t_mbstowcs.c b/apps/t_mbstowcs.c new file mode 100644 index 00000000..cecc067b --- /dev/null +++ b/apps/t_mbstowcs.c @@ -0,0 +1,18 @@ +#include +#include +#include + +int main(int argc, char * argv[]) { + size_t req = mbstowcs(NULL, argv[1], 0); + wchar_t * dest = malloc(sizeof(wchar_t) * req); + mbstowcs(dest, argv[1], req+1); + + for (size_t i = 0; i < req; ++i) { + char tmp[8]; + wchar_t in[] = {dest[i], L'\0'}; + wcstombs(tmp, in, 8); + fprintf(stdout, "U+%4x %s\n", dest[i], tmp); + } + + return 0; +} diff --git a/base/usr/include/stdlib.h b/base/usr/include/stdlib.h index 0c17e852..366dac27 100644 --- a/base/usr/include/stdlib.h +++ b/base/usr/include/stdlib.h @@ -50,3 +50,6 @@ extern void *bsearch(const void *key, const void *base, size_t nmemb, size_t siz int (*compar)(const void *, const void *)); extern char * mktemp(char * template); + +extern size_t mbstowcs(wchar_t *dest, const char *src, size_t n); +extern size_t wcstombs(char * dest, const wchar_t *src, size_t n); diff --git a/base/usr/include/wchar.h b/base/usr/include/wchar.h index 0261201c..31974661 100644 --- a/base/usr/include/wchar.h +++ b/base/usr/include/wchar.h @@ -14,7 +14,4 @@ extern wchar_t * wcschr(const wchar_t *wcs, wchar_t wc); extern wchar_t * wcsrchr(const wchar_t *wcs, wchar_t wc); extern wchar_t * wcsncat(wchar_t *dest, const wchar_t * src, size_t n); -/* TODO */ -extern size_t wcstombs(char * dest, const wchar_t *src, size_t n); - typedef unsigned int wint_t; diff --git a/libc/stdlib/mbstowcs.c b/libc/stdlib/mbstowcs.c new file mode 100644 index 00000000..96a10bd5 --- /dev/null +++ b/libc/stdlib/mbstowcs.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include + +#include + +static int to_eight(uint32_t codepoint, char * out) { + memset(out, 0x00, 7); + + if (codepoint < 0x0080) { + out[0] = (char)codepoint; + } else if (codepoint < 0x0800) { + out[0] = 0xC0 | (codepoint >> 6); + out[1] = 0x80 | (codepoint & 0x3F); + } else if (codepoint < 0x10000) { + out[0] = 0xE0 | (codepoint >> 12); + out[1] = 0x80 | ((codepoint >> 6) & 0x3F); + out[2] = 0x80 | (codepoint & 0x3F); + } else if (codepoint < 0x200000) { + out[0] = 0xF0 | (codepoint >> 18); + out[1] = 0x80 | ((codepoint >> 12) & 0x3F); + out[2] = 0x80 | ((codepoint >> 6) & 0x3F); + out[3] = 0x80 | ((codepoint) & 0x3F); + } else if (codepoint < 0x4000000) { + out[0] = 0xF8 | (codepoint >> 24); + out[1] = 0x80 | (codepoint >> 18); + out[2] = 0x80 | ((codepoint >> 12) & 0x3F); + out[3] = 0x80 | ((codepoint >> 6) & 0x3F); + out[4] = 0x80 | ((codepoint) & 0x3F); + } else { + out[0] = 0xF8 | (codepoint >> 30); + out[1] = 0x80 | ((codepoint >> 24) & 0x3F); + out[2] = 0x80 | ((codepoint >> 18) & 0x3F); + out[3] = 0x80 | ((codepoint >> 12) & 0x3F); + out[4] = 0x80 | ((codepoint >> 6) & 0x3F); + out[5] = 0x80 | ((codepoint) & 0x3F); + } + + return strlen(out); +} + +size_t mbstowcs(wchar_t *dest, const char *src, size_t n) { + size_t count = 0; + uint32_t state = 0; + uint32_t codepoint = 0; + + while ((!dest || count < n) && *src) { + if (!decode(&state, &codepoint, *(unsigned char *)src)) { + if (dest) { + dest[count] = codepoint; + } + count++; + codepoint = 0; + } else if (state == UTF8_REJECT) { + return (size_t)-1; + } + src++; + } + + if (dest && !*src && count < n) { + dest[count] = L'\0'; + } + + return count; +} + +size_t wcstombs(char * dest, const wchar_t *src, size_t n) { + size_t count = 0; + + while ((!dest || count < n) && *src) { + char tmp[7]; + int size = to_eight(*src, tmp); + if (count + size > n) return n; + memcpy(&dest[count], tmp, size); + count += size; + src++; + } + + if (dest && !*src && count < n) { + dest[count] = '\0'; + } + + return count; +} + diff --git a/libc/wchar/wcs.c b/libc/wchar/wcs.c deleted file mode 100644 index 21f0c31e..00000000 --- a/libc/wchar/wcs.c +++ /dev/null @@ -1,27 +0,0 @@ -#include - -size_t wcstombs(char * dest, const wchar_t *src, size_t n) { - /* TODO */ - size_t c = 0; - while (c < n && *src) { - *dest = *src; - c++; - src++; - dest++; - } - *dest = 0; - return c; -} - -size_t mbstowcs(wchar_t * dest, const char *src, size_t n) { - /* TODO */ - size_t c = 0; - while (c < n && *src) { - *dest = *src; - c++; - src++; - dest++; - } - *dest = 0; - return c; -}