[winpr,crt] Added new unicode conversion functions

* Added functions converting WCHAR to/from UTF-8 with given buffers
  and proper size_t arguments to have a centralized check for
  integer overflows on RDP deserialization
* Added allocating functions converting WCHAR to/from UTF-8 as
  convenience
This commit is contained in:
akallabeth 2022-11-15 08:38:50 +01:00 committed by akallabeth
parent 5ae159303f
commit 5c91c30a18
4 changed files with 980 additions and 0 deletions

View File

@ -201,6 +201,130 @@ extern "C"
#endif
/* Extended API */
/** \brief Converts form UTF-16 to UTF-8
*
* The function does string conversions of any '\0' terminated input string
*
* Supplying len = 0 will return the required size of the buffer in characters.
*
* \warning Supplying a buffer length smaller than required will result in
* platform dependent (=undefined) behaviour!
*
* \param wstr A '\0' terminated WCHAR string, may be NULL
* \param str A pointer to the result string
* \param len The length in characters of the result buffer
*
* \return the size of the converted string in char (strlen), or -1 for failure
*/
WINPR_API SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len);
/** \brief Converts form UTF-16 to UTF-8
*
* The function does string conversions of any input string of wlen (or less)
* characters until it reaches the first '\0'.
*
* Supplying len = 0 will return the required size of the buffer in characters.
*
* \warning Supplying a buffer length smaller than required will result in
* platform dependent (=undefined) behaviour!
*
* \param wstr A WCHAR string of \b wlen length
* \param wlen The (buffer) length in characters of \b wstr
* \param str A pointer to the result string
* \param len The length in characters of the result buffer
*
* \return the size of the converted string in char (strlen), or -1 for failure
*/
WINPR_API SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len);
/** \brief Converts form UTF-8 to UTF-16
*
* The function does string conversions of any '\0' terminated input string
*
* Supplying len = 0 will return the required size of the buffer in characters.
*
* \warning Supplying a buffer length smaller than required will result in
* platform dependent (=undefined) behaviour!
*
* \param str A '\0' terminated CHAR string, may be NULL
* \param wstr A pointer to the result WCHAR string
* \param wlen The length in WCHAR characters of the result buffer
*
* \return the size of the converted string in WCHAR characters (wcslen), or -1 for failure
*/
WINPR_API SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen);
/** \brief Converts form UTF-8 to UTF-16
*
* The function does string conversions of any input string of len (or less)
* characters until it reaches the first '\0'.
*
* Supplying len = 0 will return the required size of the buffer in characters.
*
* \warning Supplying a buffer length smaller than required will result in
* platform dependent (=undefined) behaviour!
*
* \param str A CHAR string of \b len length
* \param len The (buffer) length in characters of \b str
* \param wstr A pointer to the result WCHAR string
* \param wlen The length in WCHAR characters of the result buffer
*
* \return the size of the converted string in WCHAR characters (wcslen), or -1 for failure
*/
WINPR_API SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen);
/** \brief Converts form UTF-16 to UTF-8, returns an allocated string
*
* The function does string conversions of any '\0' terminated input string
*
* \param wstr A '\0' terminated WCHAR string, may be NULL
* \param pSize Ignored if NULL, otherwise receives the length of the result string in
* characters (strlen)
*
* \return An allocated zero terminated UTF-8 string or NULL in case of failure.
*/
WINPR_API char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pSize);
/** \brief Converts form UTF-16 to UTF-8, returns an allocated string
*
* The function does string conversions of any input string of wlen (or less)
* characters until it reaches the first '\0'.
*
* \param wstr A WCHAR string of \b wlen length
* \param wlen The (buffer) length in characters of \b wstr
* \param pSize Ignored if NULL, otherwise receives the length of the result string in
* characters (strlen)
*
* \return An allocated zero terminated UTF-8 string or NULL in case of failure.
*/
WINPR_API char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pSize);
/** \brief Converts form UTF-8 to UTF-16, returns an allocated string
*
* The function does string conversions of any '\0' terminated input string
*
* \param str A '\0' terminated CHAR string, may be NULL
* \param len The (buffer) length in characters of \b str
* \param pSize Ignored if NULL, otherwise receives the length of the result string in
* characters (wcslen)
*
* \return An allocated zero terminated UTF-16 string or NULL in case of failure.
*/
WINPR_API WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize);
/** \brief Converts form UTF-8 to UTF-16, returns an allocated string
*
* The function does string conversions of any input string of len (or less)
* characters until it reaches the first '\0'.
*
* \param str A CHAR string of \b len length
* \param len The (buffer) length in characters of \b str
* \param pSize Ignored if NULL, otherwise receives the length of the result string in
* characters (wcslen)
*
* \return An allocated zero terminated UTF-16 string or NULL in case of failure.
*/
WINPR_API WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize);
WINPR_API int ConvertToUnicode(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
int cbMultiByte, LPWSTR* lpWideCharStr, int cchWideChar);

View File

@ -35,6 +35,10 @@
#include "../log.h"
#define TAG WINPR_TAG("crt")
#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif
BOOL winpr_str_append(const char* what, char* buffer, size_t size, const char* separator)
{
const size_t used = strnlen(buffer, size);

View File

@ -6,6 +6,652 @@
#include <winpr/print.h>
#include <winpr/windows.h>
#define TESTCASE_BUFFER_SIZE 8192
typedef struct
{
char* utf8;
size_t utf8len;
WCHAR* utf16;
size_t utf16len;
} testcase_t;
// TODO: The unit tests do not check for valid code points, so always end the test
// strings with a simple ASCII symbol for now.
static const testcase_t unit_testcases[] = {
{ "foo", 3, "f\x00o\x00o\x00\x00\x00", 3 },
{ "foo", 4, "f\x00o\x00o\x00\x00\x00", 4 },
{ "✊🎅ęʥ꣸𑗊a", 19,
"\x0a\x27\x3c\xd8\x85\xdf\x19\x01\xa5\x02\xf8\xa8\x05\xd8\xca\xdd\x61\x00\x00\x00", 9 }
};
static void create_prefix(char* prefix, size_t prefixlen, size_t buffersize, SSIZE_T rc,
SSIZE_T inputlen, const testcase_t* test, const char* fkt, size_t line)
{
_snprintf(prefix, prefixlen,
"[%s:%" PRIuz "] '%s' [utf8: %" PRIuz ", utf16: %" PRIuz "] buffersize: %" PRIuz
", rc: %" PRIdz ", inputlen: %" PRIdz ":: ",
fkt, line, test->utf8, test->utf8len, test->utf16len, buffersize, rc, inputlen);
}
#define compare_utf16(what, buffersize, rc, inputlen, test) \
compare_utf16_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__)
static BOOL compare_utf16_int(const WCHAR* what, size_t buffersize, SSIZE_T rc, SSIZE_T inputlen,
const testcase_t* test, const char* fkt, size_t line)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line);
WINPR_ASSERT(what || (buffersize == 0));
WINPR_ASSERT(test);
const size_t welen = _wcsnlen(test->utf16, test->utf16len);
if (buffersize > welen)
{
if (rc != welen)
{
fprintf(stderr, "%s length does not match expectation: %" PRIdz " != %" PRIuz "\n",
prefix, rc, welen);
return FALSE;
}
}
else
{
if (rc != buffersize)
{
if (rc != 0)
{
fprintf(stderr, "%s length does not match buffersize: %" PRIdz " != %" PRIuz "\n",
prefix, rc, buffersize);
return FALSE;
}
else
{
const DWORD err = GetLastError();
if (err != ERROR_INSUFFICIENT_BUFFER)
{
fprintf(stderr,
"%s length does not match buffersize: %" PRIdz " != %" PRIuz
", unexpected GetLastError() 0x08%" PRIx32 "\n",
prefix, rc, buffersize, err);
return FALSE;
}
else
return TRUE;
}
}
}
if (buffersize > rc)
{
const size_t wlen = _wcsnlen(what, buffersize);
if (wlen != rc)
{
fprintf(stderr, "%s length does not match wcslen: %" PRIdz " != %" PRIuz "\n", prefix,
rc, wlen);
return FALSE;
}
}
if (memcmp(test->utf16, what, rc * sizeof(WCHAR)) != 0)
{
fprintf(stderr, "%s contents does not match expectations: TODO '%s' != '%s'\n", prefix,
test->utf8, test->utf8);
return FALSE;
}
printf("%s success\n", prefix);
return TRUE;
}
#define compare_utf8(what, buffersize, rc, inputlen, test) \
compare_utf8_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__)
static BOOL compare_utf8_int(const char* what, size_t buffersize, SSIZE_T rc, SSIZE_T inputlen,
const testcase_t* test, const char* fkt, size_t line)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line);
WINPR_ASSERT(what || (buffersize == 0));
WINPR_ASSERT(test);
const size_t slen = strnlen(test->utf8, test->utf8len);
if (buffersize > slen)
{
if (rc != slen)
{
fprintf(stderr, "%s length does not match expectation: %" PRIdz " != %" PRIuz "\n",
prefix, rc, slen);
return FALSE;
}
}
else
{
if (rc != buffersize)
{
if (rc != 0)
{
fprintf(stderr, "%s length does not match buffersize: %" PRIdz " != %" PRIuz "\n",
prefix, rc, buffersize);
return FALSE;
}
else
{
const DWORD err = GetLastError();
if (err != ERROR_INSUFFICIENT_BUFFER)
{
fprintf(stderr,
"%s length does not match buffersize: %" PRIdz " != %" PRIuz
", unexpected GetLastError() 0x08%" PRIx32 "\n",
prefix, rc, buffersize, err);
return FALSE;
}
else
return TRUE;
}
}
}
if (buffersize > rc)
{
const size_t wlen = strnlen(what, buffersize);
if (wlen != rc)
{
fprintf(stderr, "%s length does not match strnlen: %" PRIdz " != %" PRIuz "\n", prefix,
rc, wlen);
return FALSE;
}
}
if (memcmp(test->utf8, what, rc) != 0)
{
fprintf(stderr, "%s contents does not match expectations: '%s' != '%s'\n", prefix, what,
test->utf8);
return FALSE;
}
printf("%s success\n", prefix);
return TRUE;
}
static BOOL test_convert_to_utf16(const testcase_t* test)
{
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
test->utf16len - 1 };
const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
const SSIZE_T rc2 = ConvertUtf8ToWChar(test->utf8, NULL, 0);
const size_t wlen = _wcsnlen(test->utf16, test->utf16len);
if (rc2 != wlen)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__);
fprintf(stderr, "%s ConvertUtf8ToWChar(%s, NULL, 0) expected %" PRIuz ", got %" PRIdz "\n",
prefix, test->utf8, wlen, rc2);
return FALSE;
}
for (size_t x = 0; x < max; x++)
{
WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 };
const SSIZE_T rc = ConvertUtf8ToWChar(test->utf8, buffer, len[x]);
if (!compare_utf16(buffer, len[x], rc, -1, test))
return FALSE;
}
return TRUE;
}
static BOOL test_convert_to_utf16_n(const testcase_t* test)
{
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
test->utf16len - 1 };
const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
const SSIZE_T rc2 = ConvertUtf8NToWChar(test->utf8, test->utf8len, NULL, 0);
const size_t wlen = _wcsnlen(test->utf16, test->utf16len);
if (rc2 != wlen)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf8len, test, __FUNCTION__,
__LINE__);
fprintf(stderr,
"%s ConvertUtf8NToWChar(%s, %" PRIuz ", NULL, 0) expected %" PRIuz ", got %" PRIdz
"\n",
prefix, test->utf8, test->utf8len, wlen, rc2);
return FALSE;
}
for (size_t x = 0; x < max; x++)
{
const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
test->utf8len - 1 };
const size_t imax = test->utf8len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1;
for (size_t y = 0; y < imax; y++)
{
WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 };
SSIZE_T rc = ConvertUtf8NToWChar(test->utf8, ilen[x], buffer, len[x]);
if (!compare_utf16(buffer, len[x], rc, ilen[x], test))
return FALSE;
}
}
return TRUE;
}
static BOOL test_convert_to_utf8(const testcase_t* test)
{
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
test->utf8len - 1 };
const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
const SSIZE_T rc2 = ConvertWCharToUtf8(test->utf16, NULL, 0);
const size_t wlen = strnlen(test->utf8, test->utf8len);
if (rc2 != wlen)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__);
fprintf(stderr, "%s ConvertWCharToUtf8(%s, NULL, 0) expected %" PRIuz ", got %" PRIdz "\n",
prefix, test->utf8, wlen, rc2);
return FALSE;
}
for (size_t x = 0; x < max; x++)
{
char buffer[TESTCASE_BUFFER_SIZE] = { 0 };
SSIZE_T rc = ConvertWCharToUtf8(test->utf16, buffer, len[x]);
if (!compare_utf8(buffer, len[x], rc, -1, test))
return FALSE;
}
return TRUE;
}
static BOOL test_convert_to_utf8_n(const testcase_t* test)
{
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
test->utf8len - 1 };
const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
const SSIZE_T rc2 = ConvertWCharNToUtf8(test->utf16, test->utf16len, NULL, 0);
const size_t wlen = strnlen(test->utf8, test->utf8len);
if (rc2 != wlen)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf16len, test, __FUNCTION__,
__LINE__);
fprintf(stderr,
"%s ConvertWCharNToUtf8(%s, %" PRIuz ", NULL, 0) expected %" PRIuz ", got %" PRIdz
"\n",
prefix, test->utf8, test->utf16len, wlen, rc2);
return FALSE;
}
for (size_t x = 0; x < max; x++)
{
const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
test->utf16len - 1 };
const size_t imax = test->utf16len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1;
for (size_t y = 0; y < imax; y++)
{
char buffer[TESTCASE_BUFFER_SIZE] = { 0 };
SSIZE_T rc = ConvertWCharNToUtf8(test->utf16, ilen[x], buffer, len[x]);
if (!compare_utf8(buffer, len[x], rc, ilen[x], test))
return FALSE;
}
}
return TRUE;
}
static BOOL test_conversion(const testcase_t* testcases, size_t count)
{
WINPR_ASSERT(testcases || (count == 0));
for (size_t x = 0; x < count; x++)
{
const testcase_t* test = &testcases[x];
printf("Running test case %" PRIuz " [%s]\n", x, test->utf8);
if (!test_convert_to_utf16(test))
return FALSE;
if (!test_convert_to_utf16_n(test))
return FALSE;
if (!test_convert_to_utf8(test))
return FALSE;
if (!test_convert_to_utf8_n(test))
return FALSE;
}
return TRUE;
}
#define compare_win_utf16(what, buffersize, rc, inputlen, test) \
compare_win_utf16_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__)
static BOOL compare_win_utf16_int(const WCHAR* what, size_t buffersize, int rc, int inputlen,
const testcase_t* test, const char* fkt, size_t line)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line);
WINPR_ASSERT(what || (buffersize == 0));
WINPR_ASSERT(test);
BOOL isNullTerminated = TRUE;
if (inputlen > 0)
isNullTerminated = strnlen(test->utf8, inputlen) < inputlen;
size_t welen = _wcsnlen(test->utf16, buffersize);
if (isNullTerminated)
welen++;
if (buffersize >= welen)
{
if (rc != welen)
{
fprintf(stderr, "%s length does not match expectation: %d != %" PRIuz "\n", prefix, rc,
welen);
return FALSE;
}
}
else
{
if (rc != buffersize)
{
if (rc != 0)
{
fprintf(stderr, "%s length does not match buffersize: %d != %" PRIuz "\n", prefix,
rc, buffersize);
return FALSE;
}
else
{
const DWORD err = GetLastError();
if (err != ERROR_INSUFFICIENT_BUFFER)
{
fprintf(stderr,
"%s length does not match buffersize: %d != %" PRIuz
", unexpected GetLastError() 0x08%" PRIx32 "\n",
prefix, rc, buffersize, err);
return FALSE;
}
else
return TRUE;
}
}
}
if (buffersize > rc)
{
size_t wlen = _wcsnlen(what, buffersize);
if (isNullTerminated)
wlen++;
if (wlen != rc)
{
fprintf(stderr, "%s length does not match wcslen: %d != %" PRIuz "\n", prefix, rc,
wlen);
return FALSE;
}
}
if (memcmp(test->utf16, what, rc * sizeof(WCHAR)) != 0)
{
fprintf(stderr, "%s contents does not match expectations: TODO '%s' != '%s'\n", prefix,
test->utf8, test->utf8);
return FALSE;
}
printf("%s success\n", prefix);
return TRUE;
}
#define compare_win_utf8(what, buffersize, rc, inputlen, test) \
compare_win_utf8_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__)
static BOOL compare_win_utf8_int(const char* what, size_t buffersize, SSIZE_T rc, SSIZE_T inputlen,
const testcase_t* test, const char* fkt, size_t line)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line);
WINPR_ASSERT(what || (buffersize == 0));
WINPR_ASSERT(test);
BOOL isNullTerminated = TRUE;
if (inputlen > 0)
isNullTerminated = _wcsnlen(test->utf16, inputlen) < inputlen;
size_t slen = strnlen(test->utf8, test->utf8len);
if (isNullTerminated)
slen++;
if (buffersize > slen)
{
if (rc != slen)
{
fprintf(stderr, "%s length does not match expectation: %" PRIdz " != %" PRIuz "\n",
prefix, rc, slen);
return FALSE;
}
}
else
{
if (rc != buffersize)
{
if (rc != 0)
{
fprintf(stderr, "%s length does not match buffersize: %" PRIdz " != %" PRIuz "\n",
prefix, rc, buffersize);
return FALSE;
}
else
{
const DWORD err = GetLastError();
if (err != ERROR_INSUFFICIENT_BUFFER)
{
fprintf(stderr,
"%s length does not match buffersize: %" PRIdz " != %" PRIuz
", unexpected GetLastError() 0x08%" PRIx32 "\n",
prefix, rc, buffersize, err);
return FALSE;
}
else
return TRUE;
}
}
}
if (buffersize > rc)
{
size_t wlen = strnlen(what, buffersize);
if (isNullTerminated)
wlen++;
if (wlen != rc)
{
fprintf(stderr, "%s length does not match wcslen: %" PRIdz " != %" PRIuz "\n", prefix,
rc, wlen);
return FALSE;
}
}
if (memcmp(test->utf8, what, rc) != 0)
{
fprintf(stderr, "%s contents does not match expectations: '%s' != '%s'\n", prefix, what,
test->utf8);
return FALSE;
}
printf("%s success\n", prefix);
return TRUE;
}
static BOOL test_win_convert_to_utf16(const testcase_t* test)
{
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
test->utf16len - 1 };
const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
const int rc2 = MultiByteToWideChar(CP_UTF8, 0, test->utf8, -1, NULL, 0);
const size_t wlen = _wcsnlen(test->utf16, test->utf16len);
if (rc2 != wlen + 1)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__);
fprintf(stderr,
"%s MultiByteToWideChar(CP_UTF8, 0, %s, [-1], NULL, 0) expected %" PRIuz
", got %d\n",
prefix, test->utf8, wlen + 1, rc2);
return FALSE;
}
for (size_t x = 0; x < max; x++)
{
WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 };
const int rc = MultiByteToWideChar(CP_UTF8, 0, test->utf8, -1, buffer, len[x]);
if (!compare_win_utf16(buffer, len[x], rc, -1, test))
return FALSE;
}
return TRUE;
}
static BOOL test_win_convert_to_utf16_n(const testcase_t* test)
{
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
test->utf16len - 1 };
const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
BOOL isNullTerminated = strnlen(test->utf8, test->utf8len) < test->utf8len;
const int rc2 = MultiByteToWideChar(CP_UTF8, 0, test->utf8, test->utf8len, NULL, 0);
size_t wlen = _wcsnlen(test->utf16, test->utf16len);
if (isNullTerminated)
wlen++;
if (rc2 != wlen)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf8len, test, __FUNCTION__,
__LINE__);
fprintf(stderr,
"%s MultiByteToWideChar(CP_UTF8, 0, %s, %" PRIuz ", NULL, 0) expected %" PRIuz
", got %d\n",
prefix, test->utf8, test->utf8len, wlen, rc2);
return FALSE;
}
for (size_t x = 0; x < max; x++)
{
const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
test->utf8len - 1 };
const size_t imax = test->utf8len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1;
for (size_t y = 0; y < imax; y++)
{
WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 };
const int rc = MultiByteToWideChar(CP_UTF8, 0, test->utf8, ilen[x], buffer, len[x]);
if (!compare_win_utf16(buffer, len[x], rc, ilen[x], test))
return FALSE;
}
}
return TRUE;
}
static BOOL test_win_convert_to_utf8(const testcase_t* test)
{
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
test->utf8len - 1 };
const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
const int rc2 = WideCharToMultiByte(CP_UTF8, 0, test->utf16, -1, NULL, 0, NULL, NULL);
const size_t wlen = strnlen(test->utf8, test->utf8len) + 1;
if (rc2 != wlen)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__);
fprintf(stderr,
"%s WideCharToMultiByte(CP_UTF8, 0, %s, -1, NULL, 0, NULL, NULL) expected %" PRIuz
", got %d\n",
prefix, test->utf8, wlen, rc2);
return FALSE;
}
for (size_t x = 0; x < max; x++)
{
char buffer[TESTCASE_BUFFER_SIZE] = { 0 };
int rc = WideCharToMultiByte(CP_UTF8, 0, test->utf16, -1, buffer, len[x], NULL, NULL);
if (!compare_win_utf8(buffer, len[x], rc, -1, test))
return FALSE;
}
return TRUE;
}
static BOOL test_win_convert_to_utf8_n(const testcase_t* test)
{
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
test->utf8len - 1 };
const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
const BOOL isNullTerminated = _wcsnlen(test->utf16, test->utf16len) < test->utf16len;
const int rc2 =
WideCharToMultiByte(CP_UTF8, 0, test->utf16, test->utf16len, NULL, 0, NULL, NULL);
size_t wlen = strnlen(test->utf8, test->utf8len);
if (isNullTerminated)
wlen++;
if (rc2 != wlen)
{
char prefix[8192] = { 0 };
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf16len, test, __FUNCTION__,
__LINE__);
fprintf(stderr,
"%s WideCharToMultiByte(CP_UTF8, 0, %s, %" PRIuz
", NULL, 0, NULL, NULL) expected %" PRIuz ", got %d\n",
prefix, test->utf8, test->utf16len, wlen, rc2);
return FALSE;
}
for (size_t x = 0; x < max; x++)
{
const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
test->utf16len - 1 };
const size_t imax = test->utf16len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1;
for (size_t y = 0; y < imax; y++)
{
char buffer[TESTCASE_BUFFER_SIZE] = { 0 };
const int rc =
WideCharToMultiByte(CP_UTF8, 0, test->utf16, ilen[x], buffer, len[x], NULL, NULL);
if (!compare_win_utf8(buffer, len[x], rc, ilen[x], test))
return FALSE;
}
}
return TRUE;
}
static BOOL test_win_conversion(const testcase_t* testcases, size_t count)
{
WINPR_ASSERT(testcases || (count == 0));
for (size_t x = 0; x < count; x++)
{
const testcase_t* test = &testcases[x];
printf("Running test case %" PRIuz " [%s]\n", x, test->utf8);
if (!test_win_convert_to_utf16(test))
return FALSE;
if (!test_win_convert_to_utf16_n(test))
return FALSE;
if (!test_win_convert_to_utf8(test))
return FALSE;
if (!test_win_convert_to_utf8_n(test))
return FALSE;
}
return TRUE;
}
/* Letters */
static BYTE c_cedilla_UTF8[] = "\xC3\xA7\x00";
@ -510,6 +1156,12 @@ int TestUnicodeConversion(int argc, char* argv[])
WINPR_UNUSED(argc);
WINPR_UNUSED(argv);
if (!test_conversion(unit_testcases, ARRAYSIZE(unit_testcases)))
return -1;
if (!test_win_conversion(unit_testcases, ARRAYSIZE(unit_testcases)))
return -1;
/* Letters */
printf("Letters\n");
@ -637,5 +1289,6 @@ int TestUnicodeConversion(int argc, char* argv[])
}
*/
return 0;
}

View File

@ -351,6 +351,205 @@ int ConvertFromUnicode(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int
void ByteSwapUnicode(WCHAR* wstr, size_t length)
{
WINPR_ASSERT(wstr || (length == 0));
for (size_t x = 0; x < length; x++)
wstr[x] = _byteswap_ushort(wstr[x]);
}
SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len)
{
if (!wstr)
return 0;
const int rc =
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, (int)MIN(INT32_MAX, len), NULL, NULL);
if (rc <= 0)
return rc;
else if (rc == len)
{
if (str && (str[rc - 1] != '\0'))
return rc;
}
return rc - 1;
}
SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len)
{
BOOL isNullTerminated = FALSE;
if (wlen == 0)
return 0;
WINPR_ASSERT(wstr);
size_t iwlen = _wcsnlen(wstr, wlen);
if (wlen > INT32_MAX)
return -1;
if (iwlen < wlen)
{
isNullTerminated = TRUE;
iwlen++;
}
const int rc = WideCharToMultiByte(CP_UTF8, 0, wstr, (int)iwlen, str, (int)MIN(INT32_MAX, len),
NULL, NULL);
if ((rc <= 0) || ((len > 0) && (rc > len)))
return -1;
else if (!isNullTerminated)
{
if (str && (rc < len))
str[rc] = '\0';
return rc;
}
else if (rc == len)
{
if (str && (str[rc - 1] != '\0'))
return rc;
}
return rc - 1;
}
SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen)
{
if (!str)
return 0;
const int iwlen = MIN(INT32_MAX, wlen);
const int rc = MultiByteToWideChar(CP_UTF8, 0, str, -1, wstr, iwlen);
if (rc <= 0)
return rc;
else if (iwlen == rc)
{
if (wstr && (wstr[rc - 1] != '\0'))
return rc;
}
return rc - 1;
}
SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
{
size_t ilen = strnlen(str, len);
BOOL isNullTerminated = FALSE;
if (len == 0)
return 0;
WINPR_ASSERT(str);
if (len > INT32_MAX)
return -1;
if (ilen < len)
{
isNullTerminated = TRUE;
ilen++;
}
const int iwlen = MIN(INT32_MAX, wlen);
const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)ilen, wstr, (int)iwlen);
if ((rc <= 0) || ((wlen > 0) && (rc > iwlen)))
return -1;
if (!isNullTerminated)
{
if (wstr && (rc < iwlen))
wstr[rc] = '\0';
return rc;
}
else if (rc == iwlen)
{
if (wstr && (wstr[rc - 1] != '\0'))
return rc;
}
return rc - 1;
}
char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pUtfCharLength)
{
char* tmp = NULL;
const SSIZE_T rc = ConvertWCharToUtf8(wstr, NULL, 0);
if (pUtfCharLength)
*pUtfCharLength = 0;
if (rc <= 0)
return NULL;
tmp = calloc((size_t)rc + 3ull, sizeof(char));
if (!tmp)
return NULL;
const SSIZE_T rc2 = ConvertWCharToUtf8(wstr, tmp, (size_t)rc + 2ull);
if (rc2 <= 0)
{
free(tmp);
return NULL;
}
WINPR_ASSERT(rc == rc2);
if (pUtfCharLength)
*pUtfCharLength = (size_t)rc2;
return tmp;
}
char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength)
{
char* tmp = NULL;
const SSIZE_T rc = ConvertWCharNToUtf8(wstr, wlen, NULL, 0);
if (pUtfCharLength)
*pUtfCharLength = 0;
if (rc <= 0)
return NULL;
tmp = calloc((size_t)rc + 3ull, sizeof(char));
if (!tmp)
return NULL;
const SSIZE_T rc2 = ConvertWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 2ull);
if (rc2 <= 0)
{
free(tmp);
return NULL;
}
WINPR_ASSERT(rc == rc2);
if (pUtfCharLength)
*pUtfCharLength = (size_t)rc2;
return tmp;
}
WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize)
{
WCHAR* tmp = NULL;
const SSIZE_T rc = ConvertUtf8ToWChar(str, NULL, 0);
if (pSize)
*pSize = 0;
if (rc <= 0)
return NULL;
tmp = calloc((size_t)rc + 3ull, sizeof(WCHAR));
if (!tmp)
return NULL;
const SSIZE_T rc2 = ConvertUtf8ToWChar(str, tmp, (size_t)rc + 2ull);
if (rc2 <= 0)
{
free(tmp);
return NULL;
}
WINPR_ASSERT(rc == rc2);
if (pSize)
*pSize = (size_t)rc2;
return tmp;
}
WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
{
WCHAR* tmp = NULL;
const SSIZE_T rc = ConvertUtf8NToWChar(str, len, NULL, 0);
if (pSize)
*pSize = 0;
if (rc <= 0)
return NULL;
tmp = calloc((size_t)rc + 3ull, sizeof(WCHAR));
if (!tmp)
return NULL;
const SSIZE_T rc2 = ConvertUtf8NToWChar(str, len, tmp, (size_t)rc + 2ull);
if (rc2 <= 0)
{
free(tmp);
return NULL;
}
WINPR_ASSERT(rc == rc2);
if (pSize)
*pSize = (size_t)rc2;
return tmp;
}