mirror of https://github.com/FreeRDP/FreeRDP
[winpr,crt] Added new unicode conversion functions
* Added functions converting WCHAR to/from UTF-8 with given buffers and proper size_t arguments to have a centralized check for integer overflows on RDP deserialization * Added allocating functions converting WCHAR to/from UTF-8 as convenience
This commit is contained in:
parent
5ae159303f
commit
5c91c30a18
|
@ -201,6 +201,130 @@ extern "C"
|
|||
#endif
|
||||
|
||||
/* Extended API */
|
||||
/** \brief Converts form UTF-16 to UTF-8
|
||||
*
|
||||
* The function does string conversions of any '\0' terminated input string
|
||||
*
|
||||
* Supplying len = 0 will return the required size of the buffer in characters.
|
||||
*
|
||||
* \warning Supplying a buffer length smaller than required will result in
|
||||
* platform dependent (=undefined) behaviour!
|
||||
*
|
||||
* \param wstr A '\0' terminated WCHAR string, may be NULL
|
||||
* \param str A pointer to the result string
|
||||
* \param len The length in characters of the result buffer
|
||||
*
|
||||
* \return the size of the converted string in char (strlen), or -1 for failure
|
||||
*/
|
||||
WINPR_API SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len);
|
||||
|
||||
/** \brief Converts form UTF-16 to UTF-8
|
||||
*
|
||||
* The function does string conversions of any input string of wlen (or less)
|
||||
* characters until it reaches the first '\0'.
|
||||
*
|
||||
* Supplying len = 0 will return the required size of the buffer in characters.
|
||||
*
|
||||
* \warning Supplying a buffer length smaller than required will result in
|
||||
* platform dependent (=undefined) behaviour!
|
||||
*
|
||||
* \param wstr A WCHAR string of \b wlen length
|
||||
* \param wlen The (buffer) length in characters of \b wstr
|
||||
* \param str A pointer to the result string
|
||||
* \param len The length in characters of the result buffer
|
||||
*
|
||||
* \return the size of the converted string in char (strlen), or -1 for failure
|
||||
*/
|
||||
WINPR_API SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len);
|
||||
|
||||
/** \brief Converts form UTF-8 to UTF-16
|
||||
*
|
||||
* The function does string conversions of any '\0' terminated input string
|
||||
*
|
||||
* Supplying len = 0 will return the required size of the buffer in characters.
|
||||
*
|
||||
* \warning Supplying a buffer length smaller than required will result in
|
||||
* platform dependent (=undefined) behaviour!
|
||||
*
|
||||
* \param str A '\0' terminated CHAR string, may be NULL
|
||||
* \param wstr A pointer to the result WCHAR string
|
||||
* \param wlen The length in WCHAR characters of the result buffer
|
||||
*
|
||||
* \return the size of the converted string in WCHAR characters (wcslen), or -1 for failure
|
||||
*/
|
||||
WINPR_API SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen);
|
||||
|
||||
/** \brief Converts form UTF-8 to UTF-16
|
||||
*
|
||||
* The function does string conversions of any input string of len (or less)
|
||||
* characters until it reaches the first '\0'.
|
||||
*
|
||||
* Supplying len = 0 will return the required size of the buffer in characters.
|
||||
*
|
||||
* \warning Supplying a buffer length smaller than required will result in
|
||||
* platform dependent (=undefined) behaviour!
|
||||
*
|
||||
* \param str A CHAR string of \b len length
|
||||
* \param len The (buffer) length in characters of \b str
|
||||
* \param wstr A pointer to the result WCHAR string
|
||||
* \param wlen The length in WCHAR characters of the result buffer
|
||||
*
|
||||
* \return the size of the converted string in WCHAR characters (wcslen), or -1 for failure
|
||||
*/
|
||||
WINPR_API SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen);
|
||||
|
||||
/** \brief Converts form UTF-16 to UTF-8, returns an allocated string
|
||||
*
|
||||
* The function does string conversions of any '\0' terminated input string
|
||||
*
|
||||
* \param wstr A '\0' terminated WCHAR string, may be NULL
|
||||
* \param pSize Ignored if NULL, otherwise receives the length of the result string in
|
||||
* characters (strlen)
|
||||
*
|
||||
* \return An allocated zero terminated UTF-8 string or NULL in case of failure.
|
||||
*/
|
||||
WINPR_API char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pSize);
|
||||
|
||||
/** \brief Converts form UTF-16 to UTF-8, returns an allocated string
|
||||
*
|
||||
* The function does string conversions of any input string of wlen (or less)
|
||||
* characters until it reaches the first '\0'.
|
||||
*
|
||||
* \param wstr A WCHAR string of \b wlen length
|
||||
* \param wlen The (buffer) length in characters of \b wstr
|
||||
* \param pSize Ignored if NULL, otherwise receives the length of the result string in
|
||||
* characters (strlen)
|
||||
*
|
||||
* \return An allocated zero terminated UTF-8 string or NULL in case of failure.
|
||||
*/
|
||||
WINPR_API char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pSize);
|
||||
|
||||
/** \brief Converts form UTF-8 to UTF-16, returns an allocated string
|
||||
*
|
||||
* The function does string conversions of any '\0' terminated input string
|
||||
*
|
||||
* \param str A '\0' terminated CHAR string, may be NULL
|
||||
* \param len The (buffer) length in characters of \b str
|
||||
* \param pSize Ignored if NULL, otherwise receives the length of the result string in
|
||||
* characters (wcslen)
|
||||
*
|
||||
* \return An allocated zero terminated UTF-16 string or NULL in case of failure.
|
||||
*/
|
||||
WINPR_API WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize);
|
||||
|
||||
/** \brief Converts form UTF-8 to UTF-16, returns an allocated string
|
||||
*
|
||||
* The function does string conversions of any input string of len (or less)
|
||||
* characters until it reaches the first '\0'.
|
||||
*
|
||||
* \param str A CHAR string of \b len length
|
||||
* \param len The (buffer) length in characters of \b str
|
||||
* \param pSize Ignored if NULL, otherwise receives the length of the result string in
|
||||
* characters (wcslen)
|
||||
*
|
||||
* \return An allocated zero terminated UTF-16 string or NULL in case of failure.
|
||||
*/
|
||||
WINPR_API WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize);
|
||||
|
||||
WINPR_API int ConvertToUnicode(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
|
||||
int cbMultiByte, LPWSTR* lpWideCharStr, int cchWideChar);
|
||||
|
|
|
@ -35,6 +35,10 @@
|
|||
#include "../log.h"
|
||||
#define TAG WINPR_TAG("crt")
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
BOOL winpr_str_append(const char* what, char* buffer, size_t size, const char* separator)
|
||||
{
|
||||
const size_t used = strnlen(buffer, size);
|
||||
|
|
|
@ -6,6 +6,652 @@
|
|||
#include <winpr/print.h>
|
||||
#include <winpr/windows.h>
|
||||
|
||||
#define TESTCASE_BUFFER_SIZE 8192
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char* utf8;
|
||||
size_t utf8len;
|
||||
WCHAR* utf16;
|
||||
size_t utf16len;
|
||||
} testcase_t;
|
||||
|
||||
// TODO: The unit tests do not check for valid code points, so always end the test
|
||||
// strings with a simple ASCII symbol for now.
|
||||
static const testcase_t unit_testcases[] = {
|
||||
{ "foo", 3, "f\x00o\x00o\x00\x00\x00", 3 },
|
||||
{ "foo", 4, "f\x00o\x00o\x00\x00\x00", 4 },
|
||||
{ "✊🎅ęʥ꣸𑗊a", 19,
|
||||
"\x0a\x27\x3c\xd8\x85\xdf\x19\x01\xa5\x02\xf8\xa8\x05\xd8\xca\xdd\x61\x00\x00\x00", 9 }
|
||||
};
|
||||
|
||||
static void create_prefix(char* prefix, size_t prefixlen, size_t buffersize, SSIZE_T rc,
|
||||
SSIZE_T inputlen, const testcase_t* test, const char* fkt, size_t line)
|
||||
{
|
||||
_snprintf(prefix, prefixlen,
|
||||
"[%s:%" PRIuz "] '%s' [utf8: %" PRIuz ", utf16: %" PRIuz "] buffersize: %" PRIuz
|
||||
", rc: %" PRIdz ", inputlen: %" PRIdz ":: ",
|
||||
fkt, line, test->utf8, test->utf8len, test->utf16len, buffersize, rc, inputlen);
|
||||
}
|
||||
|
||||
#define compare_utf16(what, buffersize, rc, inputlen, test) \
|
||||
compare_utf16_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__)
|
||||
static BOOL compare_utf16_int(const WCHAR* what, size_t buffersize, SSIZE_T rc, SSIZE_T inputlen,
|
||||
const testcase_t* test, const char* fkt, size_t line)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line);
|
||||
|
||||
WINPR_ASSERT(what || (buffersize == 0));
|
||||
WINPR_ASSERT(test);
|
||||
|
||||
const size_t welen = _wcsnlen(test->utf16, test->utf16len);
|
||||
if (buffersize > welen)
|
||||
{
|
||||
if (rc != welen)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match expectation: %" PRIdz " != %" PRIuz "\n",
|
||||
prefix, rc, welen);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc != buffersize)
|
||||
{
|
||||
if (rc != 0)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match buffersize: %" PRIdz " != %" PRIuz "\n",
|
||||
prefix, rc, buffersize);
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
const DWORD err = GetLastError();
|
||||
if (err != ERROR_INSUFFICIENT_BUFFER)
|
||||
{
|
||||
|
||||
fprintf(stderr,
|
||||
"%s length does not match buffersize: %" PRIdz " != %" PRIuz
|
||||
", unexpected GetLastError() 0x08%" PRIx32 "\n",
|
||||
prefix, rc, buffersize, err);
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (buffersize > rc)
|
||||
{
|
||||
const size_t wlen = _wcsnlen(what, buffersize);
|
||||
if (wlen != rc)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match wcslen: %" PRIdz " != %" PRIuz "\n", prefix,
|
||||
rc, wlen);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (memcmp(test->utf16, what, rc * sizeof(WCHAR)) != 0)
|
||||
{
|
||||
fprintf(stderr, "%s contents does not match expectations: TODO '%s' != '%s'\n", prefix,
|
||||
test->utf8, test->utf8);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
printf("%s success\n", prefix);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#define compare_utf8(what, buffersize, rc, inputlen, test) \
|
||||
compare_utf8_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__)
|
||||
static BOOL compare_utf8_int(const char* what, size_t buffersize, SSIZE_T rc, SSIZE_T inputlen,
|
||||
const testcase_t* test, const char* fkt, size_t line)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line);
|
||||
|
||||
WINPR_ASSERT(what || (buffersize == 0));
|
||||
WINPR_ASSERT(test);
|
||||
|
||||
const size_t slen = strnlen(test->utf8, test->utf8len);
|
||||
if (buffersize > slen)
|
||||
{
|
||||
if (rc != slen)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match expectation: %" PRIdz " != %" PRIuz "\n",
|
||||
prefix, rc, slen);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc != buffersize)
|
||||
{
|
||||
if (rc != 0)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match buffersize: %" PRIdz " != %" PRIuz "\n",
|
||||
prefix, rc, buffersize);
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
const DWORD err = GetLastError();
|
||||
if (err != ERROR_INSUFFICIENT_BUFFER)
|
||||
{
|
||||
|
||||
fprintf(stderr,
|
||||
"%s length does not match buffersize: %" PRIdz " != %" PRIuz
|
||||
", unexpected GetLastError() 0x08%" PRIx32 "\n",
|
||||
prefix, rc, buffersize, err);
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (buffersize > rc)
|
||||
{
|
||||
const size_t wlen = strnlen(what, buffersize);
|
||||
if (wlen != rc)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match strnlen: %" PRIdz " != %" PRIuz "\n", prefix,
|
||||
rc, wlen);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (memcmp(test->utf8, what, rc) != 0)
|
||||
{
|
||||
fprintf(stderr, "%s contents does not match expectations: '%s' != '%s'\n", prefix, what,
|
||||
test->utf8);
|
||||
return FALSE;
|
||||
}
|
||||
printf("%s success\n", prefix);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_convert_to_utf16(const testcase_t* test)
|
||||
{
|
||||
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
|
||||
test->utf16len - 1 };
|
||||
const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
|
||||
|
||||
const SSIZE_T rc2 = ConvertUtf8ToWChar(test->utf8, NULL, 0);
|
||||
const size_t wlen = _wcsnlen(test->utf16, test->utf16len);
|
||||
if (rc2 != wlen)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__);
|
||||
fprintf(stderr, "%s ConvertUtf8ToWChar(%s, NULL, 0) expected %" PRIuz ", got %" PRIdz "\n",
|
||||
prefix, test->utf8, wlen, rc2);
|
||||
return FALSE;
|
||||
}
|
||||
for (size_t x = 0; x < max; x++)
|
||||
{
|
||||
WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 };
|
||||
const SSIZE_T rc = ConvertUtf8ToWChar(test->utf8, buffer, len[x]);
|
||||
if (!compare_utf16(buffer, len[x], rc, -1, test))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_convert_to_utf16_n(const testcase_t* test)
|
||||
{
|
||||
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
|
||||
test->utf16len - 1 };
|
||||
const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
|
||||
|
||||
const SSIZE_T rc2 = ConvertUtf8NToWChar(test->utf8, test->utf8len, NULL, 0);
|
||||
const size_t wlen = _wcsnlen(test->utf16, test->utf16len);
|
||||
if (rc2 != wlen)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf8len, test, __FUNCTION__,
|
||||
__LINE__);
|
||||
fprintf(stderr,
|
||||
"%s ConvertUtf8NToWChar(%s, %" PRIuz ", NULL, 0) expected %" PRIuz ", got %" PRIdz
|
||||
"\n",
|
||||
prefix, test->utf8, test->utf8len, wlen, rc2);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (size_t x = 0; x < max; x++)
|
||||
{
|
||||
const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
|
||||
test->utf8len - 1 };
|
||||
const size_t imax = test->utf8len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1;
|
||||
|
||||
for (size_t y = 0; y < imax; y++)
|
||||
{
|
||||
WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 };
|
||||
SSIZE_T rc = ConvertUtf8NToWChar(test->utf8, ilen[x], buffer, len[x]);
|
||||
if (!compare_utf16(buffer, len[x], rc, ilen[x], test))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_convert_to_utf8(const testcase_t* test)
|
||||
{
|
||||
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
|
||||
test->utf8len - 1 };
|
||||
const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
|
||||
|
||||
const SSIZE_T rc2 = ConvertWCharToUtf8(test->utf16, NULL, 0);
|
||||
const size_t wlen = strnlen(test->utf8, test->utf8len);
|
||||
if (rc2 != wlen)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__);
|
||||
fprintf(stderr, "%s ConvertWCharToUtf8(%s, NULL, 0) expected %" PRIuz ", got %" PRIdz "\n",
|
||||
prefix, test->utf8, wlen, rc2);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (size_t x = 0; x < max; x++)
|
||||
{
|
||||
char buffer[TESTCASE_BUFFER_SIZE] = { 0 };
|
||||
SSIZE_T rc = ConvertWCharToUtf8(test->utf16, buffer, len[x]);
|
||||
if (!compare_utf8(buffer, len[x], rc, -1, test))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_convert_to_utf8_n(const testcase_t* test)
|
||||
{
|
||||
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
|
||||
test->utf8len - 1 };
|
||||
const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
|
||||
|
||||
const SSIZE_T rc2 = ConvertWCharNToUtf8(test->utf16, test->utf16len, NULL, 0);
|
||||
const size_t wlen = strnlen(test->utf8, test->utf8len);
|
||||
if (rc2 != wlen)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf16len, test, __FUNCTION__,
|
||||
__LINE__);
|
||||
fprintf(stderr,
|
||||
"%s ConvertWCharNToUtf8(%s, %" PRIuz ", NULL, 0) expected %" PRIuz ", got %" PRIdz
|
||||
"\n",
|
||||
prefix, test->utf8, test->utf16len, wlen, rc2);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (size_t x = 0; x < max; x++)
|
||||
{
|
||||
const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
|
||||
test->utf16len - 1 };
|
||||
const size_t imax = test->utf16len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1;
|
||||
|
||||
for (size_t y = 0; y < imax; y++)
|
||||
{
|
||||
char buffer[TESTCASE_BUFFER_SIZE] = { 0 };
|
||||
SSIZE_T rc = ConvertWCharNToUtf8(test->utf16, ilen[x], buffer, len[x]);
|
||||
if (!compare_utf8(buffer, len[x], rc, ilen[x], test))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_conversion(const testcase_t* testcases, size_t count)
|
||||
{
|
||||
WINPR_ASSERT(testcases || (count == 0));
|
||||
for (size_t x = 0; x < count; x++)
|
||||
{
|
||||
const testcase_t* test = &testcases[x];
|
||||
|
||||
printf("Running test case %" PRIuz " [%s]\n", x, test->utf8);
|
||||
if (!test_convert_to_utf16(test))
|
||||
return FALSE;
|
||||
if (!test_convert_to_utf16_n(test))
|
||||
return FALSE;
|
||||
if (!test_convert_to_utf8(test))
|
||||
return FALSE;
|
||||
if (!test_convert_to_utf8_n(test))
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#define compare_win_utf16(what, buffersize, rc, inputlen, test) \
|
||||
compare_win_utf16_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__)
|
||||
static BOOL compare_win_utf16_int(const WCHAR* what, size_t buffersize, int rc, int inputlen,
|
||||
const testcase_t* test, const char* fkt, size_t line)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line);
|
||||
|
||||
WINPR_ASSERT(what || (buffersize == 0));
|
||||
WINPR_ASSERT(test);
|
||||
|
||||
BOOL isNullTerminated = TRUE;
|
||||
if (inputlen > 0)
|
||||
isNullTerminated = strnlen(test->utf8, inputlen) < inputlen;
|
||||
size_t welen = _wcsnlen(test->utf16, buffersize);
|
||||
if (isNullTerminated)
|
||||
welen++;
|
||||
|
||||
if (buffersize >= welen)
|
||||
{
|
||||
if (rc != welen)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match expectation: %d != %" PRIuz "\n", prefix, rc,
|
||||
welen);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc != buffersize)
|
||||
{
|
||||
if (rc != 0)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match buffersize: %d != %" PRIuz "\n", prefix,
|
||||
rc, buffersize);
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
const DWORD err = GetLastError();
|
||||
if (err != ERROR_INSUFFICIENT_BUFFER)
|
||||
{
|
||||
|
||||
fprintf(stderr,
|
||||
"%s length does not match buffersize: %d != %" PRIuz
|
||||
", unexpected GetLastError() 0x08%" PRIx32 "\n",
|
||||
prefix, rc, buffersize, err);
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (buffersize > rc)
|
||||
{
|
||||
size_t wlen = _wcsnlen(what, buffersize);
|
||||
if (isNullTerminated)
|
||||
wlen++;
|
||||
if (wlen != rc)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match wcslen: %d != %" PRIuz "\n", prefix, rc,
|
||||
wlen);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (memcmp(test->utf16, what, rc * sizeof(WCHAR)) != 0)
|
||||
{
|
||||
fprintf(stderr, "%s contents does not match expectations: TODO '%s' != '%s'\n", prefix,
|
||||
test->utf8, test->utf8);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
printf("%s success\n", prefix);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#define compare_win_utf8(what, buffersize, rc, inputlen, test) \
|
||||
compare_win_utf8_int((what), (buffersize), (rc), (inputlen), (test), __FUNCTION__, __LINE__)
|
||||
static BOOL compare_win_utf8_int(const char* what, size_t buffersize, SSIZE_T rc, SSIZE_T inputlen,
|
||||
const testcase_t* test, const char* fkt, size_t line)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), buffersize, rc, inputlen, test, fkt, line);
|
||||
|
||||
WINPR_ASSERT(what || (buffersize == 0));
|
||||
WINPR_ASSERT(test);
|
||||
|
||||
BOOL isNullTerminated = TRUE;
|
||||
if (inputlen > 0)
|
||||
isNullTerminated = _wcsnlen(test->utf16, inputlen) < inputlen;
|
||||
|
||||
size_t slen = strnlen(test->utf8, test->utf8len);
|
||||
if (isNullTerminated)
|
||||
slen++;
|
||||
|
||||
if (buffersize > slen)
|
||||
{
|
||||
if (rc != slen)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match expectation: %" PRIdz " != %" PRIuz "\n",
|
||||
prefix, rc, slen);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc != buffersize)
|
||||
{
|
||||
if (rc != 0)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match buffersize: %" PRIdz " != %" PRIuz "\n",
|
||||
prefix, rc, buffersize);
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
const DWORD err = GetLastError();
|
||||
if (err != ERROR_INSUFFICIENT_BUFFER)
|
||||
{
|
||||
|
||||
fprintf(stderr,
|
||||
"%s length does not match buffersize: %" PRIdz " != %" PRIuz
|
||||
", unexpected GetLastError() 0x08%" PRIx32 "\n",
|
||||
prefix, rc, buffersize, err);
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (buffersize > rc)
|
||||
{
|
||||
size_t wlen = strnlen(what, buffersize);
|
||||
if (isNullTerminated)
|
||||
wlen++;
|
||||
|
||||
if (wlen != rc)
|
||||
{
|
||||
fprintf(stderr, "%s length does not match wcslen: %" PRIdz " != %" PRIuz "\n", prefix,
|
||||
rc, wlen);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (memcmp(test->utf8, what, rc) != 0)
|
||||
{
|
||||
fprintf(stderr, "%s contents does not match expectations: '%s' != '%s'\n", prefix, what,
|
||||
test->utf8);
|
||||
return FALSE;
|
||||
}
|
||||
printf("%s success\n", prefix);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_win_convert_to_utf16(const testcase_t* test)
|
||||
{
|
||||
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
|
||||
test->utf16len - 1 };
|
||||
const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
|
||||
|
||||
const int rc2 = MultiByteToWideChar(CP_UTF8, 0, test->utf8, -1, NULL, 0);
|
||||
const size_t wlen = _wcsnlen(test->utf16, test->utf16len);
|
||||
if (rc2 != wlen + 1)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__);
|
||||
fprintf(stderr,
|
||||
"%s MultiByteToWideChar(CP_UTF8, 0, %s, [-1], NULL, 0) expected %" PRIuz
|
||||
", got %d\n",
|
||||
prefix, test->utf8, wlen + 1, rc2);
|
||||
return FALSE;
|
||||
}
|
||||
for (size_t x = 0; x < max; x++)
|
||||
{
|
||||
WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 };
|
||||
const int rc = MultiByteToWideChar(CP_UTF8, 0, test->utf8, -1, buffer, len[x]);
|
||||
if (!compare_win_utf16(buffer, len[x], rc, -1, test))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_win_convert_to_utf16_n(const testcase_t* test)
|
||||
{
|
||||
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
|
||||
test->utf16len - 1 };
|
||||
const size_t max = test->utf16len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
|
||||
|
||||
BOOL isNullTerminated = strnlen(test->utf8, test->utf8len) < test->utf8len;
|
||||
const int rc2 = MultiByteToWideChar(CP_UTF8, 0, test->utf8, test->utf8len, NULL, 0);
|
||||
size_t wlen = _wcsnlen(test->utf16, test->utf16len);
|
||||
if (isNullTerminated)
|
||||
wlen++;
|
||||
|
||||
if (rc2 != wlen)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf8len, test, __FUNCTION__,
|
||||
__LINE__);
|
||||
fprintf(stderr,
|
||||
"%s MultiByteToWideChar(CP_UTF8, 0, %s, %" PRIuz ", NULL, 0) expected %" PRIuz
|
||||
", got %d\n",
|
||||
prefix, test->utf8, test->utf8len, wlen, rc2);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (size_t x = 0; x < max; x++)
|
||||
{
|
||||
const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
|
||||
test->utf8len - 1 };
|
||||
const size_t imax = test->utf8len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1;
|
||||
|
||||
for (size_t y = 0; y < imax; y++)
|
||||
{
|
||||
WCHAR buffer[TESTCASE_BUFFER_SIZE] = { 0 };
|
||||
const int rc = MultiByteToWideChar(CP_UTF8, 0, test->utf8, ilen[x], buffer, len[x]);
|
||||
if (!compare_win_utf16(buffer, len[x], rc, ilen[x], test))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_win_convert_to_utf8(const testcase_t* test)
|
||||
{
|
||||
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
|
||||
test->utf8len - 1 };
|
||||
const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
|
||||
|
||||
const int rc2 = WideCharToMultiByte(CP_UTF8, 0, test->utf16, -1, NULL, 0, NULL, NULL);
|
||||
const size_t wlen = strnlen(test->utf8, test->utf8len) + 1;
|
||||
if (rc2 != wlen)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, -1, test, __FUNCTION__, __LINE__);
|
||||
fprintf(stderr,
|
||||
"%s WideCharToMultiByte(CP_UTF8, 0, %s, -1, NULL, 0, NULL, NULL) expected %" PRIuz
|
||||
", got %d\n",
|
||||
prefix, test->utf8, wlen, rc2);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (size_t x = 0; x < max; x++)
|
||||
{
|
||||
char buffer[TESTCASE_BUFFER_SIZE] = { 0 };
|
||||
int rc = WideCharToMultiByte(CP_UTF8, 0, test->utf16, -1, buffer, len[x], NULL, NULL);
|
||||
if (!compare_win_utf8(buffer, len[x], rc, -1, test))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_win_convert_to_utf8_n(const testcase_t* test)
|
||||
{
|
||||
const size_t len[] = { TESTCASE_BUFFER_SIZE, test->utf8len, test->utf8len + 1,
|
||||
test->utf8len - 1 };
|
||||
const size_t max = test->utf8len > 0 ? ARRAYSIZE(len) : ARRAYSIZE(len) - 1;
|
||||
|
||||
const BOOL isNullTerminated = _wcsnlen(test->utf16, test->utf16len) < test->utf16len;
|
||||
const int rc2 =
|
||||
WideCharToMultiByte(CP_UTF8, 0, test->utf16, test->utf16len, NULL, 0, NULL, NULL);
|
||||
size_t wlen = strnlen(test->utf8, test->utf8len);
|
||||
if (isNullTerminated)
|
||||
wlen++;
|
||||
|
||||
if (rc2 != wlen)
|
||||
{
|
||||
char prefix[8192] = { 0 };
|
||||
create_prefix(prefix, ARRAYSIZE(prefix), 0, rc2, test->utf16len, test, __FUNCTION__,
|
||||
__LINE__);
|
||||
fprintf(stderr,
|
||||
"%s WideCharToMultiByte(CP_UTF8, 0, %s, %" PRIuz
|
||||
", NULL, 0, NULL, NULL) expected %" PRIuz ", got %d\n",
|
||||
prefix, test->utf8, test->utf16len, wlen, rc2);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (size_t x = 0; x < max; x++)
|
||||
{
|
||||
const size_t ilen[] = { TESTCASE_BUFFER_SIZE, test->utf16len, test->utf16len + 1,
|
||||
test->utf16len - 1 };
|
||||
const size_t imax = test->utf16len > 0 ? ARRAYSIZE(ilen) : ARRAYSIZE(ilen) - 1;
|
||||
|
||||
for (size_t y = 0; y < imax; y++)
|
||||
{
|
||||
char buffer[TESTCASE_BUFFER_SIZE] = { 0 };
|
||||
const int rc =
|
||||
WideCharToMultiByte(CP_UTF8, 0, test->utf16, ilen[x], buffer, len[x], NULL, NULL);
|
||||
if (!compare_win_utf8(buffer, len[x], rc, ilen[x], test))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_win_conversion(const testcase_t* testcases, size_t count)
|
||||
{
|
||||
WINPR_ASSERT(testcases || (count == 0));
|
||||
for (size_t x = 0; x < count; x++)
|
||||
{
|
||||
const testcase_t* test = &testcases[x];
|
||||
|
||||
printf("Running test case %" PRIuz " [%s]\n", x, test->utf8);
|
||||
if (!test_win_convert_to_utf16(test))
|
||||
return FALSE;
|
||||
if (!test_win_convert_to_utf16_n(test))
|
||||
return FALSE;
|
||||
if (!test_win_convert_to_utf8(test))
|
||||
return FALSE;
|
||||
if (!test_win_convert_to_utf8_n(test))
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Letters */
|
||||
|
||||
static BYTE c_cedilla_UTF8[] = "\xC3\xA7\x00";
|
||||
|
@ -510,6 +1156,12 @@ int TestUnicodeConversion(int argc, char* argv[])
|
|||
WINPR_UNUSED(argc);
|
||||
WINPR_UNUSED(argv);
|
||||
|
||||
if (!test_conversion(unit_testcases, ARRAYSIZE(unit_testcases)))
|
||||
return -1;
|
||||
|
||||
if (!test_win_conversion(unit_testcases, ARRAYSIZE(unit_testcases)))
|
||||
return -1;
|
||||
|
||||
/* Letters */
|
||||
|
||||
printf("Letters\n");
|
||||
|
@ -637,5 +1289,6 @@ int TestUnicodeConversion(int argc, char* argv[])
|
|||
|
||||
}
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -351,6 +351,205 @@ int ConvertFromUnicode(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int
|
|||
|
||||
void ByteSwapUnicode(WCHAR* wstr, size_t length)
|
||||
{
|
||||
WINPR_ASSERT(wstr || (length == 0));
|
||||
|
||||
for (size_t x = 0; x < length; x++)
|
||||
wstr[x] = _byteswap_ushort(wstr[x]);
|
||||
}
|
||||
|
||||
SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len)
|
||||
{
|
||||
if (!wstr)
|
||||
return 0;
|
||||
|
||||
const int rc =
|
||||
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, (int)MIN(INT32_MAX, len), NULL, NULL);
|
||||
if (rc <= 0)
|
||||
return rc;
|
||||
else if (rc == len)
|
||||
{
|
||||
if (str && (str[rc - 1] != '\0'))
|
||||
return rc;
|
||||
}
|
||||
return rc - 1;
|
||||
}
|
||||
|
||||
SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len)
|
||||
{
|
||||
BOOL isNullTerminated = FALSE;
|
||||
if (wlen == 0)
|
||||
return 0;
|
||||
|
||||
WINPR_ASSERT(wstr);
|
||||
size_t iwlen = _wcsnlen(wstr, wlen);
|
||||
|
||||
if (wlen > INT32_MAX)
|
||||
return -1;
|
||||
|
||||
if (iwlen < wlen)
|
||||
{
|
||||
isNullTerminated = TRUE;
|
||||
iwlen++;
|
||||
}
|
||||
const int rc = WideCharToMultiByte(CP_UTF8, 0, wstr, (int)iwlen, str, (int)MIN(INT32_MAX, len),
|
||||
NULL, NULL);
|
||||
if ((rc <= 0) || ((len > 0) && (rc > len)))
|
||||
return -1;
|
||||
else if (!isNullTerminated)
|
||||
{
|
||||
if (str && (rc < len))
|
||||
str[rc] = '\0';
|
||||
return rc;
|
||||
}
|
||||
else if (rc == len)
|
||||
{
|
||||
if (str && (str[rc - 1] != '\0'))
|
||||
return rc;
|
||||
}
|
||||
return rc - 1;
|
||||
}
|
||||
|
||||
SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen)
|
||||
{
|
||||
if (!str)
|
||||
return 0;
|
||||
|
||||
const int iwlen = MIN(INT32_MAX, wlen);
|
||||
const int rc = MultiByteToWideChar(CP_UTF8, 0, str, -1, wstr, iwlen);
|
||||
if (rc <= 0)
|
||||
return rc;
|
||||
else if (iwlen == rc)
|
||||
{
|
||||
if (wstr && (wstr[rc - 1] != '\0'))
|
||||
return rc;
|
||||
}
|
||||
return rc - 1;
|
||||
}
|
||||
|
||||
SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
|
||||
{
|
||||
size_t ilen = strnlen(str, len);
|
||||
BOOL isNullTerminated = FALSE;
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
WINPR_ASSERT(str);
|
||||
|
||||
if (len > INT32_MAX)
|
||||
return -1;
|
||||
if (ilen < len)
|
||||
{
|
||||
isNullTerminated = TRUE;
|
||||
ilen++;
|
||||
}
|
||||
|
||||
const int iwlen = MIN(INT32_MAX, wlen);
|
||||
const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)ilen, wstr, (int)iwlen);
|
||||
if ((rc <= 0) || ((wlen > 0) && (rc > iwlen)))
|
||||
return -1;
|
||||
if (!isNullTerminated)
|
||||
{
|
||||
if (wstr && (rc < iwlen))
|
||||
wstr[rc] = '\0';
|
||||
return rc;
|
||||
}
|
||||
else if (rc == iwlen)
|
||||
{
|
||||
if (wstr && (wstr[rc - 1] != '\0'))
|
||||
return rc;
|
||||
}
|
||||
return rc - 1;
|
||||
}
|
||||
|
||||
char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pUtfCharLength)
|
||||
{
|
||||
char* tmp = NULL;
|
||||
const SSIZE_T rc = ConvertWCharToUtf8(wstr, NULL, 0);
|
||||
if (pUtfCharLength)
|
||||
*pUtfCharLength = 0;
|
||||
if (rc <= 0)
|
||||
return NULL;
|
||||
tmp = calloc((size_t)rc + 3ull, sizeof(char));
|
||||
if (!tmp)
|
||||
return NULL;
|
||||
const SSIZE_T rc2 = ConvertWCharToUtf8(wstr, tmp, (size_t)rc + 2ull);
|
||||
if (rc2 <= 0)
|
||||
{
|
||||
free(tmp);
|
||||
return NULL;
|
||||
}
|
||||
WINPR_ASSERT(rc == rc2);
|
||||
if (pUtfCharLength)
|
||||
*pUtfCharLength = (size_t)rc2;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength)
|
||||
{
|
||||
char* tmp = NULL;
|
||||
const SSIZE_T rc = ConvertWCharNToUtf8(wstr, wlen, NULL, 0);
|
||||
|
||||
if (pUtfCharLength)
|
||||
*pUtfCharLength = 0;
|
||||
if (rc <= 0)
|
||||
return NULL;
|
||||
tmp = calloc((size_t)rc + 3ull, sizeof(char));
|
||||
if (!tmp)
|
||||
return NULL;
|
||||
const SSIZE_T rc2 = ConvertWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 2ull);
|
||||
if (rc2 <= 0)
|
||||
{
|
||||
free(tmp);
|
||||
return NULL;
|
||||
}
|
||||
WINPR_ASSERT(rc == rc2);
|
||||
if (pUtfCharLength)
|
||||
*pUtfCharLength = (size_t)rc2;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize)
|
||||
{
|
||||
WCHAR* tmp = NULL;
|
||||
const SSIZE_T rc = ConvertUtf8ToWChar(str, NULL, 0);
|
||||
if (pSize)
|
||||
*pSize = 0;
|
||||
if (rc <= 0)
|
||||
return NULL;
|
||||
tmp = calloc((size_t)rc + 3ull, sizeof(WCHAR));
|
||||
if (!tmp)
|
||||
return NULL;
|
||||
const SSIZE_T rc2 = ConvertUtf8ToWChar(str, tmp, (size_t)rc + 2ull);
|
||||
if (rc2 <= 0)
|
||||
{
|
||||
free(tmp);
|
||||
return NULL;
|
||||
}
|
||||
WINPR_ASSERT(rc == rc2);
|
||||
if (pSize)
|
||||
*pSize = (size_t)rc2;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
|
||||
{
|
||||
WCHAR* tmp = NULL;
|
||||
const SSIZE_T rc = ConvertUtf8NToWChar(str, len, NULL, 0);
|
||||
if (pSize)
|
||||
*pSize = 0;
|
||||
if (rc <= 0)
|
||||
return NULL;
|
||||
tmp = calloc((size_t)rc + 3ull, sizeof(WCHAR));
|
||||
if (!tmp)
|
||||
return NULL;
|
||||
const SSIZE_T rc2 = ConvertUtf8NToWChar(str, len, tmp, (size_t)rc + 2ull);
|
||||
if (rc2 <= 0)
|
||||
{
|
||||
free(tmp);
|
||||
return NULL;
|
||||
}
|
||||
WINPR_ASSERT(rc == rc2);
|
||||
if (pSize)
|
||||
*pSize = (size_t)rc2;
|
||||
return tmp;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue