From 5c91c30a18db93189eac750476785153216fc498 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 15 Nov 2022 08:38:50 +0100 Subject: [PATCH] [winpr,crt] Added new unicode conversion functions * Added functions converting WCHAR to/from UTF-8 with given buffers and proper size_t arguments to have a centralized check for integer overflows on RDP deserialization * Added allocating functions converting WCHAR to/from UTF-8 as convenience --- winpr/include/winpr/string.h | 124 ++++ winpr/libwinpr/crt/string.c | 4 + .../libwinpr/crt/test/TestUnicodeConversion.c | 653 ++++++++++++++++++ winpr/libwinpr/crt/unicode.c | 199 ++++++ 4 files changed, 980 insertions(+) diff --git a/winpr/include/winpr/string.h b/winpr/include/winpr/string.h index 42ddca69f..090a1cce8 100644 --- a/winpr/include/winpr/string.h +++ b/winpr/include/winpr/string.h @@ -201,6 +201,130 @@ extern "C" #endif /* Extended API */ + /** \brief Converts form UTF-16 to UTF-8 + * + * The function does string conversions of any '\0' terminated input string + * + * Supplying len = 0 will return the required size of the buffer in characters. + * + * \warning Supplying a buffer length smaller than required will result in + * platform dependent (=undefined) behaviour! + * + * \param wstr A '\0' terminated WCHAR string, may be NULL + * \param str A pointer to the result string + * \param len The length in characters of the result buffer + * + * \return the size of the converted string in char (strlen), or -1 for failure + */ + WINPR_API SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len); + + /** \brief Converts form UTF-16 to UTF-8 + * + * The function does string conversions of any input string of wlen (or less) + * characters until it reaches the first '\0'. + * + * Supplying len = 0 will return the required size of the buffer in characters. + * + * \warning Supplying a buffer length smaller than required will result in + * platform dependent (=undefined) behaviour! + * + * \param wstr A WCHAR string of \b wlen length + * \param wlen The (buffer) length in characters of \b wstr + * \param str A pointer to the result string + * \param len The length in characters of the result buffer + * + * \return the size of the converted string in char (strlen), or -1 for failure + */ + WINPR_API SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len); + + /** \brief Converts form UTF-8 to UTF-16 + * + * The function does string conversions of any '\0' terminated input string + * + * Supplying len = 0 will return the required size of the buffer in characters. + * + * \warning Supplying a buffer length smaller than required will result in + * platform dependent (=undefined) behaviour! + * + * \param str A '\0' terminated CHAR string, may be NULL + * \param wstr A pointer to the result WCHAR string + * \param wlen The length in WCHAR characters of the result buffer + * + * \return the size of the converted string in WCHAR characters (wcslen), or -1 for failure + */ + WINPR_API SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen); + + /** \brief Converts form UTF-8 to UTF-16 + * + * The function does string conversions of any input string of len (or less) + * characters until it reaches the first '\0'. + * + * Supplying len = 0 will return the required size of the buffer in characters. + * + * \warning Supplying a buffer length smaller than required will result in + * platform dependent (=undefined) behaviour! + * + * \param str A CHAR string of \b len length + * \param len The (buffer) length in characters of \b str + * \param wstr A pointer to the result WCHAR string + * \param wlen The length in WCHAR characters of the result buffer + * + * \return the size of the converted string in WCHAR characters (wcslen), or -1 for failure + */ + WINPR_API SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen); + + /** \brief Converts form UTF-16 to UTF-8, returns an allocated string + * + * The function does string conversions of any '\0' terminated input string + * + * \param wstr A '\0' terminated WCHAR string, may be NULL + * \param pSize Ignored if NULL, otherwise receives the length of the result string in + * characters (strlen) + * + * \return An allocated zero terminated UTF-8 string or NULL in case of failure. + */ + WINPR_API char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pSize); + + /** \brief Converts form UTF-16 to UTF-8, returns an allocated string + * + * The function does string conversions of any input string of wlen (or less) + * characters until it reaches the first '\0'. + * + * \param wstr A WCHAR string of \b wlen length + * \param wlen The (buffer) length in characters of \b wstr + * \param pSize Ignored if NULL, otherwise receives the length of the result string in + * characters (strlen) + * + * \return An allocated zero terminated UTF-8 string or NULL in case of failure. + */ + WINPR_API char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pSize); + + /** \brief Converts form UTF-8 to UTF-16, returns an allocated string + * + * The function does string conversions of any '\0' terminated input string + * + * \param str A '\0' terminated CHAR string, may be NULL + * \param len The (buffer) length in characters of \b str + * \param pSize Ignored if NULL, otherwise receives the length of the result string in + * characters (wcslen) + * + * \return An allocated zero terminated UTF-16 string or NULL in case of failure. + */ + WINPR_API WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize); + + /** \brief Converts form UTF-8 to UTF-16, returns an allocated string + * + * The function does string conversions of any input string of len (or less) + * characters until it reaches the first '\0'. + * + * \param str A CHAR string of \b len length + * \param len The (buffer) length in characters of \b str + * \param pSize Ignored if NULL, otherwise receives the length of the result string in + * characters (wcslen) + * + * \return An allocated zero terminated UTF-16 string or NULL in case of failure. + */ + WINPR_API WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize); WINPR_API int ConvertToUnicode(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte, LPWSTR* lpWideCharStr, int cchWideChar); diff --git a/winpr/libwinpr/crt/string.c b/winpr/libwinpr/crt/string.c index 9dc6c77a6..e868af4e5 100644 --- a/winpr/libwinpr/crt/string.c +++ b/winpr/libwinpr/crt/string.c @@ -35,6 +35,10 @@ #include "../log.h" #define TAG WINPR_TAG("crt") +#ifndef MIN +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +#endif + BOOL winpr_str_append(const char* what, char* buffer, size_t size, const char* separator) { const size_t used = strnlen(buffer, size); diff --git a/winpr/libwinpr/crt/test/TestUnicodeConversion.c b/winpr/libwinpr/crt/test/TestUnicodeConversion.c index a0c27c1b5..438ec5fe4 100644 --- a/winpr/libwinpr/crt/test/TestUnicodeConversion.c +++ b/winpr/libwinpr/crt/test/TestUnicodeConversion.c @@ -6,6 +6,652 @@ #include #include +#define TESTCASE_BUFFER_SIZE 8192 + +typedef struct +{ + char* utf8; + size_t utf8len; + WCHAR* utf16; + size_t utf16len; +} testcase_t; + +// TODO: The unit tests do not check for valid code points, so always end the test +// strings with a simple ASCII symbol for now. +static const testcase_t unit_testcases[] = { + { "foo", 3, "f\x00o\x00o\x00\x00\x00", 3 }, + { "foo", 4, "f\x00o\x00o\x00\x00\x00", 4 }, + { "βœŠπŸŽ…Δ™Κ₯κ£Έπ‘—Ša", 19, + "\x0a\x27\x3c\xd8\x85\xdf\x19\x01\xa5\x02\xf8\xa8\x05\xd8\xca\xdd\x61\x00\x00\x00", 9 } +}; + +static void create_prefix(char* prefix, size_t prefixlen, size_t buffersize, SSIZE_T rc, + SSIZE_T inputlen, const testcase_t* test, const char* fkt, size_t line) +{ + _snprintf(prefix, prefixlen, + "[%s:%" PRIuz "] '%s' [utf8: %" PRIuz ", utf16: %" PRIuz "] buffersize: %" PRIuz + ", rc: %" PRIdz ", inputlen: %" PRIdz ":: ", + fkt, line, test->utf8, test->utf8len, test->utf16len, buffersize, rc, inputlen); +} + +#define compare_utf16(what, buffersize, rc, inputlen, test) \ + compare_utf16_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__) +static BOOL compare_utf16_int(const WCHAR* what, size_t buffersize, SSIZE_T rc, SSIZE_T inputlen, + const testcase_t* test, const char* fkt, size_t line) +{ + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line); + + WINPR_ASSERT(what || (buffersize == 0)); + WINPR_ASSERT(test); + + const size_t welen = _wcsnlen(test->utf16, test->utf16len); + if (buffersize > welen) + { + if (rc != welen) + { + fprintf(stderr, "%s length does not match expectation: %" PRIdz " != %" PRIuz "\n", + prefix, rc, welen); + return FALSE; + } + } + else + { + if (rc != buffersize) + { + if (rc != 0) + { + fprintf(stderr, "%s length does not match buffersize: %" PRIdz " != %" PRIuz "\n", + prefix, rc, buffersize); + return FALSE; + } + else + { + const DWORD err = GetLastError(); + if (err != ERROR_INSUFFICIENT_BUFFER) + { + + fprintf(stderr, + "%s length does not match buffersize: %" PRIdz " != %" PRIuz + ", unexpected GetLastError() 0x08%" PRIx32 "\n", + prefix, rc, buffersize, err); + return FALSE; + } + else + return TRUE; + } + } + } + + if (buffersize > rc) + { + const size_t wlen = _wcsnlen(what, buffersize); + if (wlen != rc) + { + fprintf(stderr, "%s length does not match wcslen: %" PRIdz " != %" PRIuz "\n", prefix, + rc, wlen); + return FALSE; + } + } + + if (memcmp(test->utf16, what, rc * sizeof(WCHAR)) != 0) + { + fprintf(stderr, "%s contents does not match expectations: TODO '%s' != '%s'\n", prefix, + test->utf8, test->utf8); + return FALSE; + } + + printf("%s success\n", prefix); + + return TRUE; +} + +#define compare_utf8(what, buffersize, rc, inputlen, test) \ + compare_utf8_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__) +static BOOL compare_utf8_int(const char* what, size_t buffersize, SSIZE_T rc, SSIZE_T inputlen, + const testcase_t* test, const char* fkt, size_t line) +{ + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line); + + WINPR_ASSERT(what || (buffersize == 0)); + WINPR_ASSERT(test); + + const size_t slen = strnlen(test->utf8, test->utf8len); + if (buffersize > slen) + { + if (rc != slen) + { + fprintf(stderr, "%s length does not match expectation: %" PRIdz " != %" PRIuz "\n", + prefix, rc, slen); + return FALSE; + } + } + else + { + if (rc != buffersize) + { + if (rc != 0) + { + fprintf(stderr, "%s length does not match buffersize: %" PRIdz " != %" PRIuz "\n", + prefix, rc, buffersize); + return FALSE; + } + else + { + const DWORD err = GetLastError(); + if (err != ERROR_INSUFFICIENT_BUFFER) + { + + fprintf(stderr, + "%s length does not match buffersize: %" PRIdz " != %" PRIuz + ", unexpected GetLastError() 0x08%" PRIx32 "\n", + prefix, rc, buffersize, err); + return FALSE; + } + else + return TRUE; + } + } + } + + if (buffersize > rc) + { + const size_t wlen = strnlen(what, buffersize); + if (wlen != rc) + { + fprintf(stderr, "%s length does not match strnlen: %" PRIdz " != %" PRIuz "\n", prefix, + rc, wlen); + return FALSE; + } + } + + if (memcmp(test->utf8, what, rc) != 0) + { + fprintf(stderr, "%s contents does not match expectations: '%s' != '%s'\n", prefix, what, + test->utf8); + return FALSE; + } + printf("%s success\n", prefix); + + return TRUE; +} + +static BOOL test_convert_to_utf16(const testcase_t* test) +{ + const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1, + test->utf16len - 1 }; + const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1; + + const SSIZE_T rc2 = ConvertUtf8ToWChar(test->utf8, NULL, 0); + const size_t wlen = _wcsnlen(test->utf16, test->utf16len); + if (rc2 != wlen) + { + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__); + fprintf(stderr, "%s ConvertUtf8ToWChar(%s, NULL, 0) expected %" PRIuz ", got %" PRIdz "\n", + prefix, test->utf8, wlen, rc2); + return FALSE; + } + for (size_t x = 0; x < max; x++) + { + WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 }; + const SSIZE_T rc = ConvertUtf8ToWChar(test->utf8, buffer, len[x]); + if (!compare_utf16(buffer, len[x], rc, -1, test)) + return FALSE; + } + + return TRUE; +} + +static BOOL test_convert_to_utf16_n(const testcase_t* test) +{ + const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1, + test->utf16len - 1 }; + const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1; + + const SSIZE_T rc2 = ConvertUtf8NToWChar(test->utf8, test->utf8len, NULL, 0); + const size_t wlen = _wcsnlen(test->utf16, test->utf16len); + if (rc2 != wlen) + { + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf8len, test, __FUNCTION__, + __LINE__); + fprintf(stderr, + "%s ConvertUtf8NToWChar(%s, %" PRIuz ", NULL, 0) expected %" PRIuz ", got %" PRIdz + "\n", + prefix, test->utf8, test->utf8len, wlen, rc2); + return FALSE; + } + + for (size_t x = 0; x < max; x++) + { + const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1, + test->utf8len - 1 }; + const size_t imax = test->utf8len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1; + + for (size_t y = 0; y < imax; y++) + { + WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 }; + SSIZE_T rc = ConvertUtf8NToWChar(test->utf8, ilen[x], buffer, len[x]); + if (!compare_utf16(buffer, len[x], rc, ilen[x], test)) + return FALSE; + } + } + return TRUE; +} + +static BOOL test_convert_to_utf8(const testcase_t* test) +{ + const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1, + test->utf8len - 1 }; + const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1; + + const SSIZE_T rc2 = ConvertWCharToUtf8(test->utf16, NULL, 0); + const size_t wlen = strnlen(test->utf8, test->utf8len); + if (rc2 != wlen) + { + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__); + fprintf(stderr, "%s ConvertWCharToUtf8(%s, NULL, 0) expected %" PRIuz ", got %" PRIdz "\n", + prefix, test->utf8, wlen, rc2); + return FALSE; + } + + for (size_t x = 0; x < max; x++) + { + char buffer[TESTCASE_BUFFER_SIZE] = { 0 }; + SSIZE_T rc = ConvertWCharToUtf8(test->utf16, buffer, len[x]); + if (!compare_utf8(buffer, len[x], rc, -1, test)) + return FALSE; + } + + return TRUE; +} + +static BOOL test_convert_to_utf8_n(const testcase_t* test) +{ + const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1, + test->utf8len - 1 }; + const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1; + + const SSIZE_T rc2 = ConvertWCharNToUtf8(test->utf16, test->utf16len, NULL, 0); + const size_t wlen = strnlen(test->utf8, test->utf8len); + if (rc2 != wlen) + { + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf16len, test, __FUNCTION__, + __LINE__); + fprintf(stderr, + "%s ConvertWCharNToUtf8(%s, %" PRIuz ", NULL, 0) expected %" PRIuz ", got %" PRIdz + "\n", + prefix, test->utf8, test->utf16len, wlen, rc2); + return FALSE; + } + + for (size_t x = 0; x < max; x++) + { + const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1, + test->utf16len - 1 }; + const size_t imax = test->utf16len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1; + + for (size_t y = 0; y < imax; y++) + { + char buffer[TESTCASE_BUFFER_SIZE] = { 0 }; + SSIZE_T rc = ConvertWCharNToUtf8(test->utf16, ilen[x], buffer, len[x]); + if (!compare_utf8(buffer, len[x], rc, ilen[x], test)) + return FALSE; + } + } + + return TRUE; +} + +static BOOL test_conversion(const testcase_t* testcases, size_t count) +{ + WINPR_ASSERT(testcases || (count == 0)); + for (size_t x = 0; x < count; x++) + { + const testcase_t* test = &testcases[x]; + + printf("Running test case %" PRIuz " [%s]\n", x, test->utf8); + if (!test_convert_to_utf16(test)) + return FALSE; + if (!test_convert_to_utf16_n(test)) + return FALSE; + if (!test_convert_to_utf8(test)) + return FALSE; + if (!test_convert_to_utf8_n(test)) + return FALSE; + } + return TRUE; +} + +#define compare_win_utf16(what, buffersize, rc, inputlen, test) \ + compare_win_utf16_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__) +static BOOL compare_win_utf16_int(const WCHAR* what, size_t buffersize, int rc, int inputlen, + const testcase_t* test, const char* fkt, size_t line) +{ + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line); + + WINPR_ASSERT(what || (buffersize == 0)); + WINPR_ASSERT(test); + + BOOL isNullTerminated = TRUE; + if (inputlen > 0) + isNullTerminated = strnlen(test->utf8, inputlen) < inputlen; + size_t welen = _wcsnlen(test->utf16, buffersize); + if (isNullTerminated) + welen++; + + if (buffersize >= welen) + { + if (rc != welen) + { + fprintf(stderr, "%s length does not match expectation: %d != %" PRIuz "\n", prefix, rc, + welen); + return FALSE; + } + } + else + { + if (rc != buffersize) + { + if (rc != 0) + { + fprintf(stderr, "%s length does not match buffersize: %d != %" PRIuz "\n", prefix, + rc, buffersize); + return FALSE; + } + else + { + const DWORD err = GetLastError(); + if (err != ERROR_INSUFFICIENT_BUFFER) + { + + fprintf(stderr, + "%s length does not match buffersize: %d != %" PRIuz + ", unexpected GetLastError() 0x08%" PRIx32 "\n", + prefix, rc, buffersize, err); + return FALSE; + } + else + return TRUE; + } + } + } + + if (buffersize > rc) + { + size_t wlen = _wcsnlen(what, buffersize); + if (isNullTerminated) + wlen++; + if (wlen != rc) + { + fprintf(stderr, "%s length does not match wcslen: %d != %" PRIuz "\n", prefix, rc, + wlen); + return FALSE; + } + } + + if (memcmp(test->utf16, what, rc * sizeof(WCHAR)) != 0) + { + fprintf(stderr, "%s contents does not match expectations: TODO '%s' != '%s'\n", prefix, + test->utf8, test->utf8); + return FALSE; + } + + printf("%s success\n", prefix); + + return TRUE; +} + +#define compare_win_utf8(what, buffersize, rc, inputlen, test) \ + compare_win_utf8_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__) +static BOOL compare_win_utf8_int(const char* what, size_t buffersize, SSIZE_T rc, SSIZE_T inputlen, + const testcase_t* test, const char* fkt, size_t line) +{ + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line); + + WINPR_ASSERT(what || (buffersize == 0)); + WINPR_ASSERT(test); + + BOOL isNullTerminated = TRUE; + if (inputlen > 0) + isNullTerminated = _wcsnlen(test->utf16, inputlen) < inputlen; + + size_t slen = strnlen(test->utf8, test->utf8len); + if (isNullTerminated) + slen++; + + if (buffersize > slen) + { + if (rc != slen) + { + fprintf(stderr, "%s length does not match expectation: %" PRIdz " != %" PRIuz "\n", + prefix, rc, slen); + return FALSE; + } + } + else + { + if (rc != buffersize) + { + if (rc != 0) + { + fprintf(stderr, "%s length does not match buffersize: %" PRIdz " != %" PRIuz "\n", + prefix, rc, buffersize); + return FALSE; + } + else + { + const DWORD err = GetLastError(); + if (err != ERROR_INSUFFICIENT_BUFFER) + { + + fprintf(stderr, + "%s length does not match buffersize: %" PRIdz " != %" PRIuz + ", unexpected GetLastError() 0x08%" PRIx32 "\n", + prefix, rc, buffersize, err); + return FALSE; + } + else + return TRUE; + } + } + } + + if (buffersize > rc) + { + size_t wlen = strnlen(what, buffersize); + if (isNullTerminated) + wlen++; + + if (wlen != rc) + { + fprintf(stderr, "%s length does not match wcslen: %" PRIdz " != %" PRIuz "\n", prefix, + rc, wlen); + return FALSE; + } + } + + if (memcmp(test->utf8, what, rc) != 0) + { + fprintf(stderr, "%s contents does not match expectations: '%s' != '%s'\n", prefix, what, + test->utf8); + return FALSE; + } + printf("%s success\n", prefix); + + return TRUE; +} + +static BOOL test_win_convert_to_utf16(const testcase_t* test) +{ + const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1, + test->utf16len - 1 }; + const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1; + + const int rc2 = MultiByteToWideChar(CP_UTF8, 0, test->utf8, -1, NULL, 0); + const size_t wlen = _wcsnlen(test->utf16, test->utf16len); + if (rc2 != wlen + 1) + { + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__); + fprintf(stderr, + "%s MultiByteToWideChar(CP_UTF8, 0, %s, [-1], NULL, 0) expected %" PRIuz + ", got %d\n", + prefix, test->utf8, wlen + 1, rc2); + return FALSE; + } + for (size_t x = 0; x < max; x++) + { + WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 }; + const int rc = MultiByteToWideChar(CP_UTF8, 0, test->utf8, -1, buffer, len[x]); + if (!compare_win_utf16(buffer, len[x], rc, -1, test)) + return FALSE; + } + + return TRUE; +} + +static BOOL test_win_convert_to_utf16_n(const testcase_t* test) +{ + const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1, + test->utf16len - 1 }; + const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1; + + BOOL isNullTerminated = strnlen(test->utf8, test->utf8len) < test->utf8len; + const int rc2 = MultiByteToWideChar(CP_UTF8, 0, test->utf8, test->utf8len, NULL, 0); + size_t wlen = _wcsnlen(test->utf16, test->utf16len); + if (isNullTerminated) + wlen++; + + if (rc2 != wlen) + { + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf8len, test, __FUNCTION__, + __LINE__); + fprintf(stderr, + "%s MultiByteToWideChar(CP_UTF8, 0, %s, %" PRIuz ", NULL, 0) expected %" PRIuz + ", got %d\n", + prefix, test->utf8, test->utf8len, wlen, rc2); + return FALSE; + } + + for (size_t x = 0; x < max; x++) + { + const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1, + test->utf8len - 1 }; + const size_t imax = test->utf8len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1; + + for (size_t y = 0; y < imax; y++) + { + WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 }; + const int rc = MultiByteToWideChar(CP_UTF8, 0, test->utf8, ilen[x], buffer, len[x]); + if (!compare_win_utf16(buffer, len[x], rc, ilen[x], test)) + return FALSE; + } + } + return TRUE; +} + +static BOOL test_win_convert_to_utf8(const testcase_t* test) +{ + const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1, + test->utf8len - 1 }; + const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1; + + const int rc2 = WideCharToMultiByte(CP_UTF8, 0, test->utf16, -1, NULL, 0, NULL, NULL); + const size_t wlen = strnlen(test->utf8, test->utf8len) + 1; + if (rc2 != wlen) + { + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__); + fprintf(stderr, + "%s WideCharToMultiByte(CP_UTF8, 0, %s, -1, NULL, 0, NULL, NULL) expected %" PRIuz + ", got %d\n", + prefix, test->utf8, wlen, rc2); + return FALSE; + } + + for (size_t x = 0; x < max; x++) + { + char buffer[TESTCASE_BUFFER_SIZE] = { 0 }; + int rc = WideCharToMultiByte(CP_UTF8, 0, test->utf16, -1, buffer, len[x], NULL, NULL); + if (!compare_win_utf8(buffer, len[x], rc, -1, test)) + return FALSE; + } + + return TRUE; +} + +static BOOL test_win_convert_to_utf8_n(const testcase_t* test) +{ + const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1, + test->utf8len - 1 }; + const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1; + + const BOOL isNullTerminated = _wcsnlen(test->utf16, test->utf16len) < test->utf16len; + const int rc2 = + WideCharToMultiByte(CP_UTF8, 0, test->utf16, test->utf16len, NULL, 0, NULL, NULL); + size_t wlen = strnlen(test->utf8, test->utf8len); + if (isNullTerminated) + wlen++; + + if (rc2 != wlen) + { + char prefix[8192] = { 0 }; + create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf16len, test, __FUNCTION__, + __LINE__); + fprintf(stderr, + "%s WideCharToMultiByte(CP_UTF8, 0, %s, %" PRIuz + ", NULL, 0, NULL, NULL) expected %" PRIuz ", got %d\n", + prefix, test->utf8, test->utf16len, wlen, rc2); + return FALSE; + } + + for (size_t x = 0; x < max; x++) + { + const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1, + test->utf16len - 1 }; + const size_t imax = test->utf16len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1; + + for (size_t y = 0; y < imax; y++) + { + char buffer[TESTCASE_BUFFER_SIZE] = { 0 }; + const int rc = + WideCharToMultiByte(CP_UTF8, 0, test->utf16, ilen[x], buffer, len[x], NULL, NULL); + if (!compare_win_utf8(buffer, len[x], rc, ilen[x], test)) + return FALSE; + } + } + + return TRUE; +} + +static BOOL test_win_conversion(const testcase_t* testcases, size_t count) +{ + WINPR_ASSERT(testcases || (count == 0)); + for (size_t x = 0; x < count; x++) + { + const testcase_t* test = &testcases[x]; + + printf("Running test case %" PRIuz " [%s]\n", x, test->utf8); + if (!test_win_convert_to_utf16(test)) + return FALSE; + if (!test_win_convert_to_utf16_n(test)) + return FALSE; + if (!test_win_convert_to_utf8(test)) + return FALSE; + if (!test_win_convert_to_utf8_n(test)) + return FALSE; + } + return TRUE; +} + /* Letters */ static BYTE c_cedilla_UTF8[] = "\xC3\xA7\x00"; @@ -510,6 +1156,12 @@ int TestUnicodeConversion(int argc, char* argv[]) WINPR_UNUSED(argc); WINPR_UNUSED(argv); + if (!test_conversion(unit_testcases, ARRAYSIZE(unit_testcases))) + return -1; + + if (!test_win_conversion(unit_testcases, ARRAYSIZE(unit_testcases))) + return -1; + /* Letters */ printf("Letters\n"); @@ -637,5 +1289,6 @@ int TestUnicodeConversion(int argc, char* argv[]) } */ + return 0; } diff --git a/winpr/libwinpr/crt/unicode.c b/winpr/libwinpr/crt/unicode.c index c9ad8befd..acc7d0008 100644 --- a/winpr/libwinpr/crt/unicode.c +++ b/winpr/libwinpr/crt/unicode.c @@ -351,6 +351,205 @@ int ConvertFromUnicode(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int void ByteSwapUnicode(WCHAR* wstr, size_t length) { + WINPR_ASSERT(wstr || (length == 0)); + for (size_t x = 0; x < length; x++) wstr[x] = _byteswap_ushort(wstr[x]); } + +SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len) +{ + if (!wstr) + return 0; + + const int rc = + WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, (int)MIN(INT32_MAX, len), NULL, NULL); + if (rc <= 0) + return rc; + else if (rc == len) + { + if (str && (str[rc - 1] != '\0')) + return rc; + } + return rc - 1; +} + +SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len) +{ + BOOL isNullTerminated = FALSE; + if (wlen == 0) + return 0; + + WINPR_ASSERT(wstr); + size_t iwlen = _wcsnlen(wstr, wlen); + + if (wlen > INT32_MAX) + return -1; + + if (iwlen < wlen) + { + isNullTerminated = TRUE; + iwlen++; + } + const int rc = WideCharToMultiByte(CP_UTF8, 0, wstr, (int)iwlen, str, (int)MIN(INT32_MAX, len), + NULL, NULL); + if ((rc <= 0) || ((len > 0) && (rc > len))) + return -1; + else if (!isNullTerminated) + { + if (str && (rc < len)) + str[rc] = '\0'; + return rc; + } + else if (rc == len) + { + if (str && (str[rc - 1] != '\0')) + return rc; + } + return rc - 1; +} + +SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen) +{ + if (!str) + return 0; + + const int iwlen = MIN(INT32_MAX, wlen); + const int rc = MultiByteToWideChar(CP_UTF8, 0, str, -1, wstr, iwlen); + if (rc <= 0) + return rc; + else if (iwlen == rc) + { + if (wstr && (wstr[rc - 1] != '\0')) + return rc; + } + return rc - 1; +} + +SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen) +{ + size_t ilen = strnlen(str, len); + BOOL isNullTerminated = FALSE; + if (len == 0) + return 0; + + WINPR_ASSERT(str); + + if (len > INT32_MAX) + return -1; + if (ilen < len) + { + isNullTerminated = TRUE; + ilen++; + } + + const int iwlen = MIN(INT32_MAX, wlen); + const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)ilen, wstr, (int)iwlen); + if ((rc <= 0) || ((wlen > 0) && (rc > iwlen))) + return -1; + if (!isNullTerminated) + { + if (wstr && (rc < iwlen)) + wstr[rc] = '\0'; + return rc; + } + else if (rc == iwlen) + { + if (wstr && (wstr[rc - 1] != '\0')) + return rc; + } + return rc - 1; +} + +char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pUtfCharLength) +{ + char* tmp = NULL; + const SSIZE_T rc = ConvertWCharToUtf8(wstr, NULL, 0); + if (pUtfCharLength) + *pUtfCharLength = 0; + if (rc <= 0) + return NULL; + tmp = calloc((size_t)rc + 3ull, sizeof(char)); + if (!tmp) + return NULL; + const SSIZE_T rc2 = ConvertWCharToUtf8(wstr, tmp, (size_t)rc + 2ull); + if (rc2 <= 0) + { + free(tmp); + return NULL; + } + WINPR_ASSERT(rc == rc2); + if (pUtfCharLength) + *pUtfCharLength = (size_t)rc2; + return tmp; +} + +char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength) +{ + char* tmp = NULL; + const SSIZE_T rc = ConvertWCharNToUtf8(wstr, wlen, NULL, 0); + + if (pUtfCharLength) + *pUtfCharLength = 0; + if (rc <= 0) + return NULL; + tmp = calloc((size_t)rc + 3ull, sizeof(char)); + if (!tmp) + return NULL; + const SSIZE_T rc2 = ConvertWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 2ull); + if (rc2 <= 0) + { + free(tmp); + return NULL; + } + WINPR_ASSERT(rc == rc2); + if (pUtfCharLength) + *pUtfCharLength = (size_t)rc2; + return tmp; +} + +WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize) +{ + WCHAR* tmp = NULL; + const SSIZE_T rc = ConvertUtf8ToWChar(str, NULL, 0); + if (pSize) + *pSize = 0; + if (rc <= 0) + return NULL; + tmp = calloc((size_t)rc + 3ull, sizeof(WCHAR)); + if (!tmp) + return NULL; + const SSIZE_T rc2 = ConvertUtf8ToWChar(str, tmp, (size_t)rc + 2ull); + if (rc2 <= 0) + { + free(tmp); + return NULL; + } + WINPR_ASSERT(rc == rc2); + if (pSize) + *pSize = (size_t)rc2; + return tmp; +} + +WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize) +{ + WCHAR* tmp = NULL; + const SSIZE_T rc = ConvertUtf8NToWChar(str, len, NULL, 0); + if (pSize) + *pSize = 0; + if (rc <= 0) + return NULL; + tmp = calloc((size_t)rc + 3ull, sizeof(WCHAR)); + if (!tmp) + return NULL; + const SSIZE_T rc2 = ConvertUtf8NToWChar(str, len, tmp, (size_t)rc + 2ull); + if (rc2 <= 0) + { + free(tmp); + return NULL; + } + WINPR_ASSERT(rc == rc2); + if (pSize) + *pSize = (size_t)rc2; + return tmp; +}