2012-12-17 01:21:48 +04:00
|
|
|
/**
|
|
|
|
* WinPR: Windows Portable Runtime
|
|
|
|
* Unicode Conversion (CRT)
|
|
|
|
*
|
|
|
|
* Copyright 2012 Marc-Andre Moreau <marcandre.moreau@gmail.com>
|
2022-11-15 10:27:31 +03:00
|
|
|
* Copyright 2022 Armin Novak <anovak@thincast.com>
|
|
|
|
* Copyright 2022 Thincast Technologies GmbH
|
2012-12-17 01:21:48 +04:00
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2022-02-16 12:08:00 +03:00
|
|
|
#include <winpr/config.h>
|
2022-11-15 10:27:31 +03:00
|
|
|
#include <winpr/assert.h>
|
2012-12-17 01:21:48 +04:00
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
#include <wctype.h>
|
|
|
|
|
|
|
|
#include <winpr/crt.h>
|
2014-06-08 00:46:32 +04:00
|
|
|
#include <winpr/error.h>
|
2012-12-17 01:21:48 +04:00
|
|
|
#include <winpr/print.h>
|
|
|
|
|
2022-11-15 10:27:31 +03:00
|
|
|
#ifndef MIN
|
|
|
|
#define MIN(a, b) (a) < (b) ? (a) : (b)
|
|
|
|
#endif
|
|
|
|
|
2012-12-17 01:21:48 +04:00
|
|
|
#ifndef _WIN32
|
|
|
|
|
2022-11-15 10:27:31 +03:00
|
|
|
#include "unicode.h"
|
2017-07-26 13:58:34 +03:00
|
|
|
|
|
|
|
#include "../log.h"
|
|
|
|
#define TAG WINPR_TAG("unicode")
|
2012-12-17 01:21:48 +04:00
|
|
|
|
2012-12-17 05:44:40 +04:00
|
|
|
/**
|
|
|
|
* Notes on cross-platform Unicode portability:
|
|
|
|
*
|
|
|
|
* Unicode has many possible Unicode Transformation Format (UTF) encodings,
|
|
|
|
* where some of the most commonly used are UTF-8, UTF-16 and sometimes UTF-32.
|
|
|
|
*
|
|
|
|
* The number in the UTF encoding name (8, 16, 32) refers to the number of bits
|
|
|
|
* per code unit. A code unit is the minimal bit combination that can represent
|
|
|
|
* a unit of encoded text in the given encoding. For instance, UTF-8 encodes
|
|
|
|
* the English alphabet using 8 bits (or one byte) each, just like in ASCII.
|
|
|
|
*
|
|
|
|
* However, the total number of code points (values in the Unicode codespace)
|
|
|
|
* only fits completely within 32 bits. This means that for UTF-8 and UTF-16,
|
|
|
|
* more than one code unit may be required to fully encode a specific value.
|
|
|
|
* UTF-8 and UTF-16 are variable-width encodings, while UTF-32 is fixed-width.
|
|
|
|
*
|
|
|
|
* UTF-8 has the advantage of being backwards compatible with ASCII, and is
|
|
|
|
* one of the most commonly used Unicode encoding.
|
|
|
|
*
|
|
|
|
* UTF-16 is used everywhere in the Windows API. The strategy employed by
|
|
|
|
* Microsoft to provide backwards compatibility in their API was to create
|
|
|
|
* an ANSI and a Unicode version of the same function, ending with A (ANSI)
|
|
|
|
* and W (Wide character, or UTF-16 Unicode). In headers, the original
|
|
|
|
* function name is replaced by a macro that defines to either the ANSI
|
|
|
|
* or Unicode version based on the definition of the _UNICODE macro.
|
|
|
|
*
|
|
|
|
* UTF-32 has the advantage of being fixed width, but wastes a lot of space
|
|
|
|
* for English text (4x more than UTF-8, 2x more than UTF-16).
|
|
|
|
*
|
|
|
|
* In C, wide character strings are often defined with the wchar_t type.
|
|
|
|
* Many functions are provided to deal with those wide character strings,
|
|
|
|
* such as wcslen (strlen equivalent) or wprintf (printf equivalent).
|
|
|
|
*
|
|
|
|
* This may lead to some confusion, since many of these functions exist
|
|
|
|
* on both Windows and Linux, but they are *not* the same!
|
|
|
|
*
|
|
|
|
* This sample hello world is a good example:
|
|
|
|
*
|
|
|
|
* #include <wchar.h>
|
|
|
|
*
|
|
|
|
* wchar_t hello[] = L"Hello, World!\n";
|
|
|
|
*
|
|
|
|
* int main(int argc, char** argv)
|
|
|
|
* {
|
|
|
|
* wprintf(hello);
|
|
|
|
* wprintf(L"sizeof(wchar_t): %d\n", sizeof(wchar_t));
|
|
|
|
* return 0;
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* There is a reason why the sample prints the size of the wchar_t type:
|
|
|
|
* On Windows, wchar_t is two bytes (UTF-16), while on most other systems
|
|
|
|
* it is 4 bytes (UTF-32). This means that if you write code on Windows,
|
|
|
|
* use L"" to define a string which is meant to be UTF-16 and not UTF-32,
|
|
|
|
* you will have a little surprise when trying to port your code to Linux.
|
|
|
|
*
|
|
|
|
* Since the Windows API uses UTF-16, not UTF-32, WinPR defines the WCHAR
|
|
|
|
* type to always be 2-bytes long and uses it instead of wchar_t. Do not
|
|
|
|
* ever use wchar_t with WinPR unless you know what you are doing.
|
|
|
|
*
|
|
|
|
* As for L"", it is unfortunately unusable in a portable way, unless a
|
|
|
|
* special option is passed to GCC to define wchar_t as being two bytes.
|
|
|
|
* For string constants that must be UTF-16, it is a pain, but they can
|
|
|
|
* be defined in a portable way like this:
|
|
|
|
*
|
|
|
|
* WCHAR hello[] = { 'H','e','l','l','o','\0' };
|
|
|
|
*
|
|
|
|
* Such strings cannot be passed to native functions like wcslen(), which
|
|
|
|
* may expect a different wchar_t size. For this reason, WinPR provides
|
|
|
|
* _wcslen, which expects UTF-16 WCHAR strings on all platforms.
|
|
|
|
*
|
2012-12-17 01:21:48 +04:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2012-12-17 05:44:40 +04:00
|
|
|
* Conversion to Unicode (UTF-16)
|
2012-12-17 01:21:48 +04:00
|
|
|
* MultiByteToWideChar: http://msdn.microsoft.com/en-us/library/windows/desktop/dd319072/
|
2012-12-17 05:44:40 +04:00
|
|
|
*
|
|
|
|
* cbMultiByte is an input size in bytes (BYTE)
|
|
|
|
* cchWideChar is an output size in wide characters (WCHAR)
|
|
|
|
*
|
|
|
|
* Null-terminated UTF-8 strings:
|
|
|
|
*
|
|
|
|
* cchWideChar *cannot* be assumed to be cbMultiByte since UTF-8 is variable-width!
|
|
|
|
*
|
|
|
|
* Instead, obtain the required cchWideChar output size like this:
|
|
|
|
* cchWideChar = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) lpMultiByteStr, -1, NULL, 0);
|
|
|
|
*
|
|
|
|
* A value of -1 for cbMultiByte indicates that the input string is null-terminated,
|
|
|
|
* and the null terminator *will* be processed. The size returned by MultiByteToWideChar
|
|
|
|
* will therefore include the null terminator. Equivalent behavior can be obtained by
|
|
|
|
* computing the length in bytes of the input buffer, including the null terminator:
|
|
|
|
*
|
|
|
|
* cbMultiByte = strlen((char*) lpMultiByteStr) + 1;
|
|
|
|
*
|
|
|
|
* An output buffer of the proper size can then be allocated:
|
|
|
|
*
|
|
|
|
* lpWideCharStr = (LPWSTR) malloc(cchWideChar * sizeof(WCHAR));
|
|
|
|
*
|
|
|
|
* Since cchWideChar is an output size in wide characters, the actual buffer size is:
|
|
|
|
* (cchWideChar * sizeof(WCHAR)) or (cchWideChar * 2)
|
|
|
|
*
|
|
|
|
* Finally, perform the conversion:
|
|
|
|
*
|
2019-11-06 17:24:51 +03:00
|
|
|
* cchWideChar = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) lpMultiByteStr, -1, lpWideCharStr,
|
|
|
|
* cchWideChar);
|
2012-12-17 05:44:40 +04:00
|
|
|
*
|
|
|
|
* The value returned by MultiByteToWideChar corresponds to the number of wide characters written
|
2019-11-06 17:24:51 +03:00
|
|
|
* to the output buffer, and should match the value obtained on the first call to
|
|
|
|
* MultiByteToWideChar.
|
2012-12-17 05:44:40 +04:00
|
|
|
*
|
2012-12-17 01:21:48 +04:00
|
|
|
*/
|
|
|
|
|
2019-11-06 17:24:51 +03:00
|
|
|
int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
|
|
|
|
LPWSTR lpWideCharStr, int cchWideChar)
|
2012-12-17 01:21:48 +04:00
|
|
|
{
|
2022-11-15 10:27:31 +03:00
|
|
|
return int_MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, lpWideCharStr,
|
|
|
|
cchWideChar);
|
2012-12-17 01:21:48 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2012-12-17 05:44:40 +04:00
|
|
|
* Conversion from Unicode (UTF-16)
|
2012-12-17 01:21:48 +04:00
|
|
|
* WideCharToMultiByte: http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130/
|
2012-12-17 05:44:40 +04:00
|
|
|
*
|
|
|
|
* cchWideChar is an input size in wide characters (WCHAR)
|
|
|
|
* cbMultiByte is an output size in bytes (BYTE)
|
|
|
|
*
|
|
|
|
* Null-terminated UTF-16 strings:
|
|
|
|
*
|
|
|
|
* cbMultiByte *cannot* be assumed to be cchWideChar since UTF-8 is variable-width!
|
|
|
|
*
|
|
|
|
* Instead, obtain the required cbMultiByte output size like this:
|
|
|
|
* cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) lpWideCharStr, -1, NULL, 0, NULL, NULL);
|
|
|
|
*
|
|
|
|
* A value of -1 for cbMultiByte indicates that the input string is null-terminated,
|
|
|
|
* and the null terminator *will* be processed. The size returned by WideCharToMultiByte
|
|
|
|
* will therefore include the null terminator. Equivalent behavior can be obtained by
|
|
|
|
* computing the length in bytes of the input buffer, including the null terminator:
|
|
|
|
*
|
|
|
|
* cchWideChar = _wcslen((WCHAR*) lpWideCharStr) + 1;
|
|
|
|
*
|
|
|
|
* An output buffer of the proper size can then be allocated:
|
|
|
|
* lpMultiByteStr = (LPSTR) malloc(cbMultiByte);
|
|
|
|
*
|
|
|
|
* Since cbMultiByte is an output size in bytes, it is the same as the buffer size
|
|
|
|
*
|
|
|
|
* Finally, perform the conversion:
|
|
|
|
*
|
2019-11-06 17:24:51 +03:00
|
|
|
* cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) lpWideCharStr, -1, lpMultiByteStr,
|
|
|
|
* cbMultiByte, NULL, NULL);
|
2012-12-17 05:44:40 +04:00
|
|
|
*
|
|
|
|
* The value returned by WideCharToMultiByte corresponds to the number of bytes written
|
2019-11-06 17:24:51 +03:00
|
|
|
* to the output buffer, and should match the value obtained on the first call to
|
|
|
|
* WideCharToMultiByte.
|
2012-12-17 05:44:40 +04:00
|
|
|
*
|
2012-12-17 01:21:48 +04:00
|
|
|
*/
|
|
|
|
|
|
|
|
int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
|
2019-11-06 17:24:51 +03:00
|
|
|
LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
|
|
|
|
LPBOOL lpUsedDefaultChar)
|
2012-12-17 01:21:48 +04:00
|
|
|
{
|
2022-11-15 10:27:31 +03:00
|
|
|
return int_WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, lpMultiByteStr,
|
|
|
|
cbMultiByte, lpDefaultChar, lpUsedDefaultChar);
|
2012-12-17 01:21:48 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2016-03-03 18:21:12 +03:00
|
|
|
/**
|
|
|
|
* ConvertToUnicode is a convenience wrapper for MultiByteToWideChar:
|
|
|
|
*
|
2017-03-21 12:31:21 +03:00
|
|
|
* If the lpWideCharStr parameter for the converted string points to NULL
|
2016-03-03 18:21:12 +03:00
|
|
|
* or if the cchWideChar parameter is set to 0 this function will automatically
|
|
|
|
* allocate the required memory which is guaranteed to be null-terminated
|
|
|
|
* after the conversion, even if the source c string isn't.
|
|
|
|
*
|
|
|
|
* If the cbMultiByte parameter is set to -1 the passed lpMultiByteStr must
|
|
|
|
* be null-terminated and the required length for the converted string will be
|
|
|
|
* calculated accordingly.
|
|
|
|
*/
|
|
|
|
|
2019-11-06 17:24:51 +03:00
|
|
|
int ConvertToUnicode(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
|
|
|
|
LPWSTR* lpWideCharStr, int cchWideChar)
|
2012-12-17 08:00:40 +04:00
|
|
|
{
|
|
|
|
int status;
|
|
|
|
BOOL allocate = FALSE;
|
|
|
|
|
|
|
|
if (!lpMultiByteStr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!lpWideCharStr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (cbMultiByte == -1)
|
2019-02-21 19:28:49 +03:00
|
|
|
{
|
2019-10-29 12:18:09 +03:00
|
|
|
size_t len = strnlen(lpMultiByteStr, INT_MAX);
|
2019-02-21 19:28:49 +03:00
|
|
|
if (len >= INT_MAX)
|
|
|
|
return 0;
|
|
|
|
cbMultiByte = (int)(len + 1);
|
|
|
|
}
|
2012-12-17 08:00:40 +04:00
|
|
|
|
|
|
|
if (cchWideChar == 0)
|
|
|
|
{
|
|
|
|
cchWideChar = MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, NULL, 0);
|
|
|
|
allocate = TRUE;
|
|
|
|
}
|
2020-05-07 16:38:35 +03:00
|
|
|
else if (!(*lpWideCharStr))
|
|
|
|
allocate = TRUE;
|
2012-12-17 08:00:40 +04:00
|
|
|
|
|
|
|
if (cchWideChar < 1)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (allocate)
|
2014-06-07 01:20:34 +04:00
|
|
|
{
|
2019-11-06 17:24:51 +03:00
|
|
|
*lpWideCharStr = (LPWSTR)calloc(cchWideChar + 1, sizeof(WCHAR));
|
2014-06-07 01:20:34 +04:00
|
|
|
|
|
|
|
if (!(*lpWideCharStr))
|
|
|
|
{
|
2019-11-06 17:24:51 +03:00
|
|
|
// SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
2014-06-07 01:20:34 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2012-12-17 08:00:40 +04:00
|
|
|
|
2017-02-20 16:32:54 +03:00
|
|
|
status = MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, *lpWideCharStr,
|
|
|
|
cchWideChar);
|
2012-12-17 08:00:40 +04:00
|
|
|
|
|
|
|
if (status != cchWideChar)
|
2016-03-03 18:21:12 +03:00
|
|
|
{
|
|
|
|
if (allocate)
|
|
|
|
{
|
|
|
|
free(*lpWideCharStr);
|
|
|
|
*lpWideCharStr = NULL;
|
2021-02-09 17:48:20 +03:00
|
|
|
status = 0;
|
2016-03-03 18:21:12 +03:00
|
|
|
}
|
|
|
|
}
|
2012-12-17 08:00:40 +04:00
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2016-03-03 18:21:12 +03:00
|
|
|
/**
|
|
|
|
* ConvertFromUnicode is a convenience wrapper for WideCharToMultiByte:
|
|
|
|
*
|
|
|
|
* If the lpMultiByteStr parameter for the converted string points to NULL
|
|
|
|
* or if the cbMultiByte parameter is set to 0 this function will automatically
|
|
|
|
* allocate the required memory which is guaranteed to be null-terminated
|
|
|
|
* after the conversion, even if the source unicode string isn't.
|
|
|
|
*
|
|
|
|
* If the cchWideChar parameter is set to -1 the passed lpWideCharStr must
|
|
|
|
* be null-terminated and the required length for the converted string will be
|
|
|
|
* calculated accordingly.
|
|
|
|
*/
|
|
|
|
|
2012-12-17 08:00:40 +04:00
|
|
|
int ConvertFromUnicode(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
|
2019-11-06 17:24:51 +03:00
|
|
|
LPSTR* lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
|
|
|
|
LPBOOL lpUsedDefaultChar)
|
2012-12-17 08:00:40 +04:00
|
|
|
{
|
|
|
|
int status;
|
|
|
|
BOOL allocate = FALSE;
|
|
|
|
|
|
|
|
if (!lpWideCharStr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!lpMultiByteStr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (cchWideChar == -1)
|
2017-02-20 16:32:54 +03:00
|
|
|
cchWideChar = (int)(_wcslen(lpWideCharStr) + 1);
|
2012-12-17 08:00:40 +04:00
|
|
|
|
|
|
|
if (cbMultiByte == 0)
|
|
|
|
{
|
2019-11-06 17:24:51 +03:00
|
|
|
cbMultiByte =
|
|
|
|
WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, NULL, 0, NULL, NULL);
|
2012-12-17 08:00:40 +04:00
|
|
|
allocate = TRUE;
|
|
|
|
}
|
2020-05-07 16:38:35 +03:00
|
|
|
else if (!(*lpMultiByteStr))
|
|
|
|
allocate = TRUE;
|
2012-12-17 08:00:40 +04:00
|
|
|
|
|
|
|
if (cbMultiByte < 1)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (allocate)
|
2012-12-19 18:03:31 +04:00
|
|
|
{
|
2019-11-06 17:24:51 +03:00
|
|
|
*lpMultiByteStr = (LPSTR)calloc(1, cbMultiByte + 1);
|
2014-06-07 01:20:34 +04:00
|
|
|
|
|
|
|
if (!(*lpMultiByteStr))
|
|
|
|
{
|
2019-11-06 17:24:51 +03:00
|
|
|
// SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
2014-06-07 01:20:34 +04:00
|
|
|
return 0;
|
|
|
|
}
|
2012-12-19 18:03:31 +04:00
|
|
|
}
|
2012-12-17 08:00:40 +04:00
|
|
|
|
2019-11-06 17:24:51 +03:00
|
|
|
status = WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, *lpMultiByteStr,
|
|
|
|
cbMultiByte, lpDefaultChar, lpUsedDefaultChar);
|
2012-12-17 08:00:40 +04:00
|
|
|
|
2014-06-11 00:38:16 +04:00
|
|
|
if ((status != cbMultiByte) && allocate)
|
|
|
|
{
|
2012-12-17 08:00:40 +04:00
|
|
|
status = 0;
|
2014-06-11 00:38:16 +04:00
|
|
|
}
|
2012-12-17 08:00:40 +04:00
|
|
|
|
2013-08-28 17:53:50 +04:00
|
|
|
if ((status <= 0) && allocate)
|
|
|
|
{
|
|
|
|
free(*lpMultiByteStr);
|
|
|
|
*lpMultiByteStr = NULL;
|
|
|
|
}
|
|
|
|
|
2012-12-17 08:00:40 +04:00
|
|
|
return status;
|
|
|
|
}
|
2014-10-17 02:07:44 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Swap Unicode byte order (UTF16LE <-> UTF16BE)
|
|
|
|
*/
|
|
|
|
|
2022-10-28 11:51:54 +03:00
|
|
|
void ByteSwapUnicode(WCHAR* wstr, size_t length)
|
2014-10-17 02:07:44 +04:00
|
|
|
{
|
2022-11-15 10:38:50 +03:00
|
|
|
WINPR_ASSERT(wstr || (length == 0));
|
|
|
|
|
2022-10-28 11:51:54 +03:00
|
|
|
for (size_t x = 0; x < length; x++)
|
|
|
|
wstr[x] = _byteswap_ushort(wstr[x]);
|
2014-10-17 02:07:44 +04:00
|
|
|
}
|
2022-11-15 10:38:50 +03:00
|
|
|
|
|
|
|
SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len)
|
|
|
|
{
|
|
|
|
if (!wstr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
const int rc =
|
|
|
|
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, (int)MIN(INT32_MAX, len), NULL, NULL);
|
|
|
|
if (rc <= 0)
|
|
|
|
return rc;
|
|
|
|
else if (rc == len)
|
|
|
|
{
|
|
|
|
if (str && (str[rc - 1] != '\0'))
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
return rc - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len)
|
|
|
|
{
|
|
|
|
BOOL isNullTerminated = FALSE;
|
|
|
|
if (wlen == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
WINPR_ASSERT(wstr);
|
|
|
|
size_t iwlen = _wcsnlen(wstr, wlen);
|
|
|
|
|
|
|
|
if (wlen > INT32_MAX)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (iwlen < wlen)
|
|
|
|
{
|
|
|
|
isNullTerminated = TRUE;
|
|
|
|
iwlen++;
|
|
|
|
}
|
|
|
|
const int rc = WideCharToMultiByte(CP_UTF8, 0, wstr, (int)iwlen, str, (int)MIN(INT32_MAX, len),
|
|
|
|
NULL, NULL);
|
|
|
|
if ((rc <= 0) || ((len > 0) && (rc > len)))
|
|
|
|
return -1;
|
|
|
|
else if (!isNullTerminated)
|
|
|
|
{
|
|
|
|
if (str && (rc < len))
|
|
|
|
str[rc] = '\0';
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
else if (rc == len)
|
|
|
|
{
|
|
|
|
if (str && (str[rc - 1] != '\0'))
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
return rc - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen)
|
|
|
|
{
|
|
|
|
if (!str)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
const int iwlen = MIN(INT32_MAX, wlen);
|
|
|
|
const int rc = MultiByteToWideChar(CP_UTF8, 0, str, -1, wstr, iwlen);
|
|
|
|
if (rc <= 0)
|
|
|
|
return rc;
|
|
|
|
else if (iwlen == rc)
|
|
|
|
{
|
|
|
|
if (wstr && (wstr[rc - 1] != '\0'))
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
return rc - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
|
|
|
|
{
|
|
|
|
size_t ilen = strnlen(str, len);
|
|
|
|
BOOL isNullTerminated = FALSE;
|
|
|
|
if (len == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
WINPR_ASSERT(str);
|
|
|
|
|
|
|
|
if (len > INT32_MAX)
|
|
|
|
return -1;
|
|
|
|
if (ilen < len)
|
|
|
|
{
|
|
|
|
isNullTerminated = TRUE;
|
|
|
|
ilen++;
|
|
|
|
}
|
|
|
|
|
|
|
|
const int iwlen = MIN(INT32_MAX, wlen);
|
|
|
|
const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)ilen, wstr, (int)iwlen);
|
|
|
|
if ((rc <= 0) || ((wlen > 0) && (rc > iwlen)))
|
|
|
|
return -1;
|
|
|
|
if (!isNullTerminated)
|
|
|
|
{
|
|
|
|
if (wstr && (rc < iwlen))
|
|
|
|
wstr[rc] = '\0';
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
else if (rc == iwlen)
|
|
|
|
{
|
|
|
|
if (wstr && (wstr[rc - 1] != '\0'))
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
return rc - 1;
|
|
|
|
}
|
|
|
|
|
2022-11-22 16:09:52 +03:00
|
|
|
SSIZE_T ConvertMszUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
|
|
|
|
{
|
|
|
|
if (len == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
WINPR_ASSERT(str);
|
|
|
|
|
|
|
|
if (len > INT32_MAX)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
const int iwlen = MIN(INT32_MAX, wlen);
|
|
|
|
const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)len, wstr, (int)iwlen);
|
|
|
|
if ((rc <= 0) || ((wlen > 0) && (rc > iwlen)))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2022-11-15 10:38:50 +03:00
|
|
|
char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pUtfCharLength)
|
|
|
|
{
|
|
|
|
char* tmp = NULL;
|
|
|
|
const SSIZE_T rc = ConvertWCharToUtf8(wstr, NULL, 0);
|
|
|
|
if (pUtfCharLength)
|
|
|
|
*pUtfCharLength = 0;
|
|
|
|
if (rc <= 0)
|
|
|
|
return NULL;
|
|
|
|
tmp = calloc((size_t)rc + 3ull, sizeof(char));
|
|
|
|
if (!tmp)
|
|
|
|
return NULL;
|
|
|
|
const SSIZE_T rc2 = ConvertWCharToUtf8(wstr, tmp, (size_t)rc + 2ull);
|
|
|
|
if (rc2 <= 0)
|
|
|
|
{
|
|
|
|
free(tmp);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
WINPR_ASSERT(rc == rc2);
|
|
|
|
if (pUtfCharLength)
|
|
|
|
*pUtfCharLength = (size_t)rc2;
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength)
|
|
|
|
{
|
|
|
|
char* tmp = NULL;
|
|
|
|
const SSIZE_T rc = ConvertWCharNToUtf8(wstr, wlen, NULL, 0);
|
|
|
|
|
|
|
|
if (pUtfCharLength)
|
|
|
|
*pUtfCharLength = 0;
|
|
|
|
if (rc <= 0)
|
|
|
|
return NULL;
|
|
|
|
tmp = calloc((size_t)rc + 3ull, sizeof(char));
|
|
|
|
if (!tmp)
|
|
|
|
return NULL;
|
|
|
|
const SSIZE_T rc2 = ConvertWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 2ull);
|
|
|
|
if (rc2 <= 0)
|
|
|
|
{
|
|
|
|
free(tmp);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
WINPR_ASSERT(rc == rc2);
|
|
|
|
if (pUtfCharLength)
|
|
|
|
*pUtfCharLength = (size_t)rc2;
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize)
|
|
|
|
{
|
|
|
|
WCHAR* tmp = NULL;
|
|
|
|
const SSIZE_T rc = ConvertUtf8ToWChar(str, NULL, 0);
|
|
|
|
if (pSize)
|
|
|
|
*pSize = 0;
|
|
|
|
if (rc <= 0)
|
|
|
|
return NULL;
|
|
|
|
tmp = calloc((size_t)rc + 3ull, sizeof(WCHAR));
|
|
|
|
if (!tmp)
|
|
|
|
return NULL;
|
|
|
|
const SSIZE_T rc2 = ConvertUtf8ToWChar(str, tmp, (size_t)rc + 2ull);
|
|
|
|
if (rc2 <= 0)
|
|
|
|
{
|
|
|
|
free(tmp);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
WINPR_ASSERT(rc == rc2);
|
|
|
|
if (pSize)
|
|
|
|
*pSize = (size_t)rc2;
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
|
|
|
|
{
|
|
|
|
WCHAR* tmp = NULL;
|
|
|
|
const SSIZE_T rc = ConvertUtf8NToWChar(str, len, NULL, 0);
|
|
|
|
if (pSize)
|
|
|
|
*pSize = 0;
|
|
|
|
if (rc <= 0)
|
|
|
|
return NULL;
|
|
|
|
tmp = calloc((size_t)rc + 3ull, sizeof(WCHAR));
|
|
|
|
if (!tmp)
|
|
|
|
return NULL;
|
|
|
|
const SSIZE_T rc2 = ConvertUtf8NToWChar(str, len, tmp, (size_t)rc + 2ull);
|
|
|
|
if (rc2 <= 0)
|
|
|
|
{
|
|
|
|
free(tmp);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
WINPR_ASSERT(rc == rc2);
|
|
|
|
if (pSize)
|
|
|
|
*pSize = (size_t)rc2;
|
|
|
|
return tmp;
|
|
|
|
}
|
2022-11-22 16:09:52 +03:00
|
|
|
|
|
|
|
WCHAR* ConvertMszUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
|
|
|
|
{
|
|
|
|
WCHAR* tmp = NULL;
|
|
|
|
const SSIZE_T rc = ConvertMszUtf8NToWChar(str, len, NULL, 0);
|
|
|
|
if (pSize)
|
|
|
|
*pSize = 0;
|
|
|
|
if (rc <= 0)
|
|
|
|
return NULL;
|
|
|
|
tmp = calloc((size_t)rc + 3ull, sizeof(WCHAR));
|
|
|
|
if (!tmp)
|
|
|
|
return NULL;
|
|
|
|
const SSIZE_T rc2 = ConvertMszUtf8NToWChar(str, len, tmp, (size_t)rc + 2ull);
|
|
|
|
if (rc2 <= 0)
|
|
|
|
{
|
|
|
|
free(tmp);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
WINPR_ASSERT(rc == rc2);
|
|
|
|
if (pSize)
|
|
|
|
*pSize = (size_t)rc2;
|
|
|
|
return tmp;
|
|
|
|
}
|