From 36a1683a5db1f4d5efaf584bba0321635a17c11c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com>
Date: Sun, 16 Dec 2012 16:21:48 -0500
Subject: [PATCH] libwinpr-crt: initial WideCharToMultiByte and
 MultiByteToWideChar replacement implementation

---
 libfreerdp/utils/unicode.c                    | 237 ++++++-
 winpr/libwinpr/crt/CMakeLists.txt             |   5 +-
 winpr/libwinpr/crt/string.c                   | 228 -------
 .../libwinpr/crt/test/TestUnicodeConversion.c |  16 +-
 winpr/libwinpr/crt/unicode.c                  | 140 ++++
 winpr/libwinpr/crt/utf.c                      | 623 ++++++++++++++++++
 winpr/libwinpr/crt/utf.h                      | 152 +++++
 7 files changed, 1163 insertions(+), 238 deletions(-)
 create mode 100644 winpr/libwinpr/crt/unicode.c
 create mode 100644 winpr/libwinpr/crt/utf.c
 create mode 100644 winpr/libwinpr/crt/utf.h

diff --git a/libfreerdp/utils/unicode.c b/libfreerdp/utils/unicode.c
index fc582e20a..78ddf15b2 100644
--- a/libfreerdp/utils/unicode.c
+++ b/libfreerdp/utils/unicode.c
@@ -30,6 +30,237 @@
 
 #include <winpr/crt.h>
 
+/**
+ * This is a temporary copy of the old buggy implementations of
+ * MultiByteToWideChar and WideCharToMultiByte
+ */
+
+#if 1
+#define _MultiByteToWideChar	old_MultiByteToWideChar
+#define _WideCharToMultiByte	old_WideCharToMultiByte
+#else
+#define _MultiByteToWideChar	MultiByteToWideChar
+#define _WideCharToMultiByte	WideCharToMultiByte
+#endif
+
+int old_MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
+		int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar)
+{
+	size_t ibl;
+	size_t obl;
+	char* pin;
+	char* pout;
+	char* pout0;
+
+	if (lpMultiByteStr == NULL)
+		return 0;
+
+	if (cbMultiByte < 0)
+		cbMultiByte = strlen(lpMultiByteStr) + 1;
+
+	ibl = cbMultiByte;
+	obl = 2 * ibl;
+
+	if (cchWideChar < 1)
+		return (obl / 2);
+
+	pin = (char*) lpMultiByteStr;
+	pout0 = (char*) lpWideCharStr;
+	pout = pout0;
+
+#ifdef HAVE_ICONV
+	{
+		iconv_t* out_iconv_h;
+
+		out_iconv_h = iconv_open(WINDOWS_CODEPAGE, DEFAULT_CODEPAGE);
+
+		if (errno == EINVAL)
+		{
+			printf("Error opening iconv converter to %s from %s\n", WINDOWS_CODEPAGE, DEFAULT_CODEPAGE);
+			return 0;
+		}
+
+		if (iconv(out_iconv_h, (ICONV_CONST char **) &pin, &ibl, &pout, &obl) == (size_t) - 1)
+		{
+			printf("MultiByteToWideChar: iconv() error\n");
+			return NULL;
+		}
+
+		iconv_close(out_iconv_h);
+	}
+#else
+	while ((ibl > 0) && (obl > 0))
+	{
+		unsigned int wc;
+
+		wc = (unsigned int) (unsigned char) (*pin++);
+		ibl--;
+
+		if (wc >= 0xF0)
+		{
+			wc = (wc - 0xF0) << 18;
+			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80) << 12;
+			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80) << 6;
+			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80);
+			ibl -= 3;
+		}
+		else if (wc >= 0xE0)
+		{
+			wc = (wc - 0xE0) << 12;
+			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80) << 6;
+			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80);
+			ibl -= 2;
+		}
+		else if (wc >= 0xC0)
+		{
+			wc = (wc - 0xC0) << 6;
+			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80);
+			ibl -= 1;
+		}
+
+		if (wc <= 0xFFFF)
+		{
+			*pout++ = (char) (wc & 0xFF);
+			*pout++ = (char) (wc >> 8);
+			obl -= 2;
+		}
+		else
+		{
+			wc -= 0x10000;
+			*pout++ = (char) ((wc >> 10) & 0xFF);
+			*pout++ = (char) ((wc >> 18) + 0xD8);
+			*pout++ = (char) (wc & 0xFF);
+			*pout++ = (char) (((wc >> 8) & 0x03) + 0xDC);
+			obl -= 4;
+		}
+	}
+#endif
+
+	if (ibl > 0)
+	{
+		printf("MultiByteToWideChar: string not fully converted - %d chars left\n", (int) ibl);
+		return 0;
+	}
+
+	return (pout - pout0) / 2;
+}
+
+/*
+ * Conversion *from* Unicode
+ * WideCharToMultiByte: http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130/
+ */
+
+int old_WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
+		LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar)
+{
+	char* pout;
+	char* conv_pout;
+	size_t conv_in_len;
+	size_t conv_out_len;
+	unsigned char* conv_pin;
+
+	/*
+	 * if cbMultiByte is set to 0, the function returns the required buffer size
+	 * for lpMultiByteStr and makes no use of the output parameter itself.
+	 */
+
+	if (cbMultiByte == 0)
+		return lstrlenW(lpWideCharStr);
+
+	/* If cchWideChar is set to 0, the function fails */
+
+	if (cchWideChar == 0)
+		return 0;
+
+	/* cchWideChar is set to -1 if the string is null-terminated */
+
+	if (cchWideChar == -1)
+		cchWideChar = lstrlenW(lpWideCharStr);
+
+	conv_pin = (unsigned char*) lpWideCharStr;
+	conv_in_len = cchWideChar * 2;
+	pout = lpMultiByteStr;
+	conv_pout = pout;
+	conv_out_len = cchWideChar * 2;
+
+#ifdef HAVE_ICONV
+	{
+		iconv_t* in_iconv_h;
+
+		in_iconv_h = iconv_open(DEFAULT_CODEPAGE, WINDOWS_CODEPAGE);
+
+		if (errno == EINVAL)
+		{
+			printf("Error opening iconv converter to %s from %s\n", DEFAULT_CODEPAGE, WINDOWS_CODEPAGE);
+			return 0;
+		}
+
+		if (iconv(in_iconv_h, (ICONV_CONST char **) &conv_pin, &conv_in_len, &conv_pout, &conv_out_len) == (size_t) - 1)
+		{
+			printf("WideCharToMultiByte: iconv failure\n");
+			return 0;
+		}
+
+		iconv_close(in_iconv_h);
+	}
+#else
+	while (conv_in_len >= 2)
+	{
+		unsigned int wc;
+
+		wc = (unsigned int) (unsigned char) (*conv_pin++);
+		wc += ((unsigned int) (unsigned char) (*conv_pin++)) << 8;
+		conv_in_len -= 2;
+
+		if (wc >= 0xD800 && wc <= 0xDFFF && conv_in_len >= 2)
+		{
+			/* Code points U+10000 to U+10FFFF using surrogate pair */
+			wc = ((wc - 0xD800) << 10) + 0x10000;
+			wc += (unsigned int) (unsigned char) (*conv_pin++);
+			wc += ((unsigned int) (unsigned char) (*conv_pin++) - 0xDC) << 8;
+			conv_in_len -= 2;
+		}
+
+		if (wc <= 0x7F)
+		{
+			*conv_pout++ = (char) wc;
+			conv_out_len--;
+		}
+		else if (wc <= 0x07FF)
+		{
+			*conv_pout++ = (char) (0xC0 + (wc >> 6));
+			*conv_pout++ = (char) (0x80 + (wc & 0x3F));
+			conv_out_len -= 2;
+		}
+		else if (wc <= 0xFFFF)
+		{
+			*conv_pout++ = (char) (0xE0 + (wc >> 12));
+			*conv_pout++ = (char) (0x80 + ((wc >> 6) & 0x3F));
+			*conv_pout++ = (char) (0x80 + (wc & 0x3F));
+			conv_out_len -= 3;
+		}
+		else
+		{
+			*conv_pout++ = (char) (0xF0 + (wc >> 18));
+			*conv_pout++ = (char) (0x80 + ((wc >> 12) & 0x3F));
+			*conv_pout++ = (char) (0x80 + ((wc >> 6) & 0x3F));
+			*conv_pout++ = (char) (0x80 + (wc & 0x3F));
+			conv_out_len -= 4;
+		}
+	}
+#endif
+
+	if (conv_in_len > 0)
+	{
+		printf("WideCharToMultiByte: conversion failure - %d chars left\n", (int) conv_in_len);
+		return 0;
+	}
+
+	*conv_pout = 0;
+
+	return conv_out_len;
+}
+
 int freerdp_AsciiToUnicodeAlloc(const CHAR* str, WCHAR** wstr, int length)
 {
 	if (!str)
@@ -41,10 +272,10 @@ int freerdp_AsciiToUnicodeAlloc(const CHAR* str, WCHAR** wstr, int length)
 	if (length < 1)
 		length = strlen(str);
 
-	length = MultiByteToWideChar(CP_UTF8, 0, str, length, NULL, 0);
+	length = _MultiByteToWideChar(CP_UTF8, 0, str, length, NULL, 0);
 	*wstr = (WCHAR*) malloc((length + 1) * sizeof(WCHAR));
 
-	MultiByteToWideChar(CP_UTF8, 0, str, length, (LPWSTR) (*wstr), length * sizeof(WCHAR));
+	_MultiByteToWideChar(CP_UTF8, 0, str, length, (LPWSTR) (*wstr), length * sizeof(WCHAR));
 	(*wstr)[length] = 0;
 
 	return length;
@@ -55,7 +286,7 @@ int freerdp_UnicodeToAsciiAlloc(const WCHAR* wstr, CHAR** str, int length)
 	*str = malloc((length * 2) + 1);
 	memset(*str, 0, (length * 2) + 1);
 
-	WideCharToMultiByte(CP_UTF8, 0, wstr, length, *str, length, NULL, NULL);
+	_WideCharToMultiByte(CP_UTF8, 0, wstr, length, *str, length, NULL, NULL);
 	(*str)[length] = 0;
 
 	return length;
diff --git a/winpr/libwinpr/crt/CMakeLists.txt b/winpr/libwinpr/crt/CMakeLists.txt
index d3cd831eb..5d91a5062 100644
--- a/winpr/libwinpr/crt/CMakeLists.txt
+++ b/winpr/libwinpr/crt/CMakeLists.txt
@@ -23,7 +23,10 @@ set(${MODULE_PREFIX}_SRCS
 	conversion.c
 	buffer.c
 	memory.c
-	string.c)
+	string.c
+	unicode.c
+	utf.c
+	utf.h)
 
 if(MSVC AND (NOT MONOLITHIC_BUILD))
 	set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS} module.def)
diff --git a/winpr/libwinpr/crt/string.c b/winpr/libwinpr/crt/string.c
index 1527e9609..4015108f5 100644
--- a/winpr/libwinpr/crt/string.c
+++ b/winpr/libwinpr/crt/string.c
@@ -372,234 +372,6 @@ BOOL IsCharLowerW(WCHAR ch)
 	return 0;
 }
 
-/*
- * Advanced String Techniques in C++ - Part I: Unicode
- * http://www.flipcode.com/archives/Advanced_String_Techniques_in_C-Part_I_Unicode.shtml
- */
-
-/*
- * Conversion *to* Unicode
- * MultiByteToWideChar: http://msdn.microsoft.com/en-us/library/windows/desktop/dd319072/
- */
-
-int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
-		int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar)
-{
-	size_t ibl;
-	size_t obl;
-	char* pin;
-	char* pout;
-	char* pout0;
-
-	if (lpMultiByteStr == NULL)
-		return 0;
-
-	if (cbMultiByte < 0)
-		cbMultiByte = strlen(lpMultiByteStr) + 1;
-
-	ibl = cbMultiByte;
-	obl = 2 * ibl;
-
-	if (cchWideChar < 1)
-		return (obl / 2);
-
-	pin = (char*) lpMultiByteStr;
-	pout0 = (char*) lpWideCharStr;
-	pout = pout0;
-
-#ifdef HAVE_ICONV
-	{
-		iconv_t* out_iconv_h;
-
-		out_iconv_h = iconv_open(WINDOWS_CODEPAGE, DEFAULT_CODEPAGE);
-
-		if (errno == EINVAL)
-		{
-			printf("Error opening iconv converter to %s from %s\n", WINDOWS_CODEPAGE, DEFAULT_CODEPAGE);
-			return 0;
-		}
-
-		if (iconv(out_iconv_h, (ICONV_CONST char **) &pin, &ibl, &pout, &obl) == (size_t) - 1)
-		{
-			printf("MultiByteToWideChar: iconv() error\n");
-			return NULL;
-		}
-
-		iconv_close(out_iconv_h);
-	}
-#else
-	while ((ibl > 0) && (obl > 0))
-	{
-		unsigned int wc;
-
-		wc = (unsigned int) (unsigned char) (*pin++);
-		ibl--;
-
-		if (wc >= 0xF0)
-		{
-			wc = (wc - 0xF0) << 18;
-			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80) << 12;
-			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80) << 6;
-			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80);
-			ibl -= 3;
-		}
-		else if (wc >= 0xE0)
-		{
-			wc = (wc - 0xE0) << 12;
-			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80) << 6;
-			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80);
-			ibl -= 2;
-		}
-		else if (wc >= 0xC0)
-		{
-			wc = (wc - 0xC0) << 6;
-			wc += ((unsigned int) (unsigned char) (*pin++) - 0x80);
-			ibl -= 1;
-		}
-
-		if (wc <= 0xFFFF)
-		{
-			*pout++ = (char) (wc & 0xFF);
-			*pout++ = (char) (wc >> 8);
-			obl -= 2;
-		}
-		else
-		{
-			wc -= 0x10000;
-			*pout++ = (char) ((wc >> 10) & 0xFF);
-			*pout++ = (char) ((wc >> 18) + 0xD8);
-			*pout++ = (char) (wc & 0xFF);
-			*pout++ = (char) (((wc >> 8) & 0x03) + 0xDC);
-			obl -= 4;
-		}
-	}
-#endif
-
-	if (ibl > 0)
-	{
-		printf("MultiByteToWideChar: string not fully converted - %d chars left\n", (int) ibl);
-		return 0;
-	}
-
-	return (pout - pout0) / 2;
-}
-
-/*
- * Conversion *from* Unicode
- * WideCharToMultiByte: http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130/
- */
-
-int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
-		LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar)
-{
-	char* pout;
-	char* conv_pout;
-	size_t conv_in_len;
-	size_t conv_out_len;
-	unsigned char* conv_pin;
-
-	/*
-	 * if cbMultiByte is set to 0, the function returns the required buffer size
-	 * for lpMultiByteStr and makes no use of the output parameter itself.
-	 */
-
-	if (cbMultiByte == 0)
-		return lstrlenW(lpWideCharStr);
-
-	/* If cchWideChar is set to 0, the function fails */
-
-	if (cchWideChar == 0)
-		return 0;
-
-	/* cchWideChar is set to -1 if the string is null-terminated */
-
-	if (cchWideChar == -1)
-		cchWideChar = lstrlenW(lpWideCharStr);
-
-	conv_pin = (unsigned char*) lpWideCharStr;
-	conv_in_len = cchWideChar * 2;
-	pout = lpMultiByteStr;
-	conv_pout = pout;
-	conv_out_len = cchWideChar * 2;
-
-#ifdef HAVE_ICONV
-	{
-		iconv_t* in_iconv_h;
-
-		in_iconv_h = iconv_open(DEFAULT_CODEPAGE, WINDOWS_CODEPAGE);
-
-		if (errno == EINVAL)
-		{
-			printf("Error opening iconv converter to %s from %s\n", DEFAULT_CODEPAGE, WINDOWS_CODEPAGE);
-			return 0;
-		}
-
-		if (iconv(in_iconv_h, (ICONV_CONST char **) &conv_pin, &conv_in_len, &conv_pout, &conv_out_len) == (size_t) - 1)
-		{
-			printf("WideCharToMultiByte: iconv failure\n");
-			return 0;
-		}
-
-		iconv_close(in_iconv_h);
-	}
-#else
-	while (conv_in_len >= 2)
-	{
-		unsigned int wc;
-
-		wc = (unsigned int) (unsigned char) (*conv_pin++);
-		wc += ((unsigned int) (unsigned char) (*conv_pin++)) << 8;
-		conv_in_len -= 2;
-
-		if (wc >= 0xD800 && wc <= 0xDFFF && conv_in_len >= 2)
-		{
-			/* Code points U+10000 to U+10FFFF using surrogate pair */
-			wc = ((wc - 0xD800) << 10) + 0x10000;
-			wc += (unsigned int) (unsigned char) (*conv_pin++);
-			wc += ((unsigned int) (unsigned char) (*conv_pin++) - 0xDC) << 8;
-			conv_in_len -= 2;
-		}
-
-		if (wc <= 0x7F)
-		{
-			*conv_pout++ = (char) wc;
-			conv_out_len--;
-		}
-		else if (wc <= 0x07FF)
-		{
-			*conv_pout++ = (char) (0xC0 + (wc >> 6));
-			*conv_pout++ = (char) (0x80 + (wc & 0x3F));
-			conv_out_len -= 2;
-		}
-		else if (wc <= 0xFFFF)
-		{
-			*conv_pout++ = (char) (0xE0 + (wc >> 12));
-			*conv_pout++ = (char) (0x80 + ((wc >> 6) & 0x3F));
-			*conv_pout++ = (char) (0x80 + (wc & 0x3F));
-			conv_out_len -= 3;
-		}
-		else
-		{
-			*conv_pout++ = (char) (0xF0 + (wc >> 18));
-			*conv_pout++ = (char) (0x80 + ((wc >> 12) & 0x3F));
-			*conv_pout++ = (char) (0x80 + ((wc >> 6) & 0x3F));
-			*conv_pout++ = (char) (0x80 + (wc & 0x3F));
-			conv_out_len -= 4;
-		}
-	}
-#endif
-
-	if (conv_in_len > 0)
-	{
-		printf("WideCharToMultiByte: conversion failure - %d chars left\n", (int) conv_in_len);
-		return 0;
-	}
-
-	*conv_pout = 0;
-
-	return conv_out_len;
-}
-
 int lstrlenA(LPCSTR lpString)
 {
 	return strlen(lpString);
diff --git a/winpr/libwinpr/crt/test/TestUnicodeConversion.c b/winpr/libwinpr/crt/test/TestUnicodeConversion.c
index 24b89e6fe..878ff94b3 100644
--- a/winpr/libwinpr/crt/test/TestUnicodeConversion.c
+++ b/winpr/libwinpr/crt/test/TestUnicodeConversion.c
@@ -50,6 +50,8 @@ static BYTE ru_HowAreYou_UTF16[] = "\x1A\x04\x30\x04\x3A\x04\x20\x00\x34\x04\x35
 static int ru_HowAreYou_cchWideChar = 10;
 static int ru_HowAreYou_cbMultiByte = 17;
 
+#if 0
+
 /* Arabic */
 
 static BYTE ar_Hello_UTF8[] = "\xD8\xA7\xD9\x84\xD8\xB3\xD9\x84\xD8\xA7\xD9\x85\x20\xD8\xB9\xD9"
@@ -66,15 +68,17 @@ static BYTE ar_HowAreYou_UTF16[] = "\x43\x06\x4A\x06\x41\x06\x20\x00\x2D\x06\x27
 static int ar_HowAreYou_cchWideChar = 10;
 static int ar_HowAreYou_cbMultiByte = 18;
 
+#endif
+
 /* Chinese */
 
-static BYTE ch_Hello_UTF8[] = "\xE4\xBD\xA0\xE5\xA5\xBD";
-static BYTE ch_Hello_UTF16[] = "\x60\x4F\x7D\x59";
+static BYTE ch_Hello_UTF8[] = "\xE4\xBD\xA0\xE5\xA5\xBD\x00";
+static BYTE ch_Hello_UTF16[] = "\x60\x4F\x7D\x59\x00\x00";
 static int ch_Hello_cchWideChar = 3;
 static int ch_Hello_cbMultiByte = 7;
 
-static BYTE ch_HowAreYou_UTF8[] = "\xE4\xBD\xA0\xE5\xA5\xBD\xE5\x90\x97";
-static BYTE ch_HowAreYou_UTF16[] = "\x60\x4F\x7D\x59\x17\x54";
+static BYTE ch_HowAreYou_UTF8[] = "\xE4\xBD\xA0\xE5\xA5\xBD\xE5\x90\x97\x00";
+static BYTE ch_HowAreYou_UTF16[] = "\x60\x4F\x7D\x59\x17\x54\x00\x00";
 static int ch_HowAreYou_cchWideChar = 4;
 static int ch_HowAreYou_cbMultiByte = 10;
 
@@ -115,8 +119,6 @@ int convert_utf8_to_utf16(BYTE* lpMultiByteStr, BYTE* expected_lpWideCharStr, in
 	int cchWideChar;
 	LPWSTR lpWideCharStr;
 
-	return 1;
-
 	cbMultiByte = strlen((char*) lpMultiByteStr);
 	cchWideChar = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) lpMultiByteStr, -1, NULL, 0);
 
@@ -288,6 +290,7 @@ int TestUnicodeConversion(int argc, char* argv[])
 	if (convert_utf16_to_utf8(ru_HowAreYou_UTF16, ru_HowAreYou_UTF8, ru_HowAreYou_cbMultiByte) < 1)
 		return -1;
 
+#if 0
 	/* Arabic */
 
 	printf("Arabic\n");
@@ -301,6 +304,7 @@ int TestUnicodeConversion(int argc, char* argv[])
 		return -1;
 	if (convert_utf16_to_utf8(ar_HowAreYou_UTF16, ar_HowAreYou_UTF8, ar_HowAreYou_cbMultiByte) < 1)
 		return -1;
+#endif
 
 	/* Chinese */
 
diff --git a/winpr/libwinpr/crt/unicode.c b/winpr/libwinpr/crt/unicode.c
new file mode 100644
index 000000000..921116a64
--- /dev/null
+++ b/winpr/libwinpr/crt/unicode.c
@@ -0,0 +1,140 @@
+/**
+ * WinPR: Windows Portable Runtime
+ * Unicode Conversion (CRT)
+ *
+ * Copyright 2012 Marc-Andre Moreau <marcandre.moreau@gmail.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <errno.h>
+#include <wctype.h>
+
+#include <winpr/crt.h>
+#include <winpr/print.h>
+
+#ifndef _WIN32
+
+#include "utf.h"
+
+/*
+ * Advanced String Techniques in C++ - Part I: Unicode
+ * http://www.flipcode.com/archives/Advanced_String_Techniques_in_C-Part_I_Unicode.shtml
+ */
+
+/*
+ * Conversion *to* Unicode
+ * MultiByteToWideChar: http://msdn.microsoft.com/en-us/library/windows/desktop/dd319072/
+ */
+
+int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
+		int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar)
+{
+	int length;
+	const BYTE* sourceStart;
+	LPWSTR targetStart;
+	ConversionResult result;
+
+	/* If cbMultiByte is 0, the function fails */
+
+	if (cbMultiByte == 0)
+		return 0;
+
+	/* If cbMultiByte is -1, the string is null-terminated */
+
+	if (cbMultiByte == -1)
+		cbMultiByte = strlen((char*) lpMultiByteStr) + 1;
+
+	if (!lpWideCharStr)
+		lpWideCharStr = (LPWSTR) malloc((cbMultiByte + 1) * sizeof(WCHAR) * 4);
+
+	sourceStart = (const BYTE*) lpMultiByteStr;
+	targetStart = lpWideCharStr;
+
+	result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
+			&targetStart, &targetStart[((cbMultiByte + 1) * 4) / sizeof(WCHAR)], strictConversion);
+	length = targetStart - ((WCHAR*) lpWideCharStr);
+	lpWideCharStr[length] = '\0';
+
+	cchWideChar = length;
+
+	/*
+	 * if cchWideChar is 0, the function returns the required buffer size
+	 * in characters for lpWideCharStr and makes no use of the output parameter itself.
+	 */
+
+	if (cchWideChar == 0)
+	{
+		free(lpWideCharStr);
+		return cchWideChar;
+	}
+
+	return cchWideChar;
+}
+
+/*
+ * Conversion *from* Unicode
+ * WideCharToMultiByte: http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130/
+ */
+
+int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
+		LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar)
+{
+	int length;
+	const WCHAR* sourceStart;
+	BYTE* targetStart;
+	ConversionResult result;
+
+	/* If cchWideChar is 0, the function fails */
+
+	if (cchWideChar == 0)
+		return 0;
+
+	/* If cchWideChar is -1, the string is null-terminated */
+
+	if (cchWideChar == -1)
+		cchWideChar = _wcslen(lpWideCharStr) + 1;
+
+	if (!lpMultiByteStr)
+		lpMultiByteStr = (LPSTR) malloc((cchWideChar + 1) * 4);
+
+	sourceStart = (WCHAR*) lpWideCharStr;
+	targetStart = (BYTE*) lpMultiByteStr;
+
+	result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar],
+			&targetStart, &targetStart[(cchWideChar + 1) * 4], strictConversion);
+	length = targetStart - ((BYTE*) lpMultiByteStr);
+	lpMultiByteStr[length] = '\0';
+
+	cbMultiByte = length;
+
+	/*
+	 * if cbMultiByte is 0, the function returns the required buffer size
+	 * in bytes for lpMultiByteStr and makes no use of the output parameter itself.
+	 */
+
+	if (cbMultiByte == 0)
+	{
+		free(lpMultiByteStr);
+		return cbMultiByte;
+	}
+
+	return cbMultiByte;
+}
+
+#endif
+
diff --git a/winpr/libwinpr/crt/utf.c b/winpr/libwinpr/crt/utf.c
new file mode 100644
index 000000000..14c6bb0d4
--- /dev/null
+++ b/winpr/libwinpr/crt/utf.c
@@ -0,0 +1,623 @@
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ * 
+ * Disclaimer
+ * 
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ * 
+ * Limitations on Rights to Redistribute This Code
+ * 
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+    Conversions between UTF32, UTF-16, and UTF-8. Source code file.
+    Author: Mark E. Davis, 1994.
+    Rev History: Rick McGowan, fixes & updates May 2001.
+    Sept 2001: fixed const & error conditions per
+    mods suggested by S. Parent & A. Lillich.
+    June 2002: Tim Dodd added detection and handling of incomplete
+    source sequences, enhanced error detection, added casts
+    to eliminate compiler warnings.
+    July 2003: slight mods to back out aggressive FFFE detection.
+    Jan 2004: updated switches in from-UTF8 conversions.
+    Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
+
+    See the header file "utf.h" for complete documentation.
+
+------------------------------------------------------------------------ */
+
+#include "utf.h"
+
+static const int halfShift  = 10; /* used for shifting by 10 bits */
+
+static const DWORD halfBase = 0x0010000UL;
+static const DWORD halfMask = 0x3FFUL;
+
+#define UNI_SUR_HIGH_START  (DWORD)0xD800
+#define UNI_SUR_HIGH_END    (DWORD)0xDBFF
+#define UNI_SUR_LOW_START   (DWORD)0xDC00
+#define UNI_SUR_LOW_END     (DWORD)0xDFFF
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF16 (
+    const DWORD** sourceStart, const DWORD* sourceEnd, 
+    WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const DWORD* source = *sourceStart;
+    WCHAR* target = *targetStart;
+    while (source < sourceEnd) {
+    DWORD ch;
+    if (target >= targetEnd) {
+        result = targetExhausted; break;
+    }
+    ch = *source++;
+    if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+        /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
+        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+        if (flags == strictConversion) {
+            --source; /* return to the illegal value itself */
+            result = sourceIllegal;
+            break;
+        } else {
+            *target++ = UNI_REPLACEMENT_CHAR;
+        }
+        } else {
+        *target++ = (WCHAR)ch; /* normal case */
+        }
+    } else if (ch > UNI_MAX_LEGAL_UTF32) {
+        if (flags == strictConversion) {
+        result = sourceIllegal;
+        } else {
+        *target++ = UNI_REPLACEMENT_CHAR;
+        }
+    } else {
+        /* target is a character in range 0xFFFF - 0x10FFFF. */
+        if (target + 1 >= targetEnd) {
+        --source; /* Back up source pointer! */
+        result = targetExhausted; break;
+        }
+        ch -= halfBase;
+        *target++ = (WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START);
+        *target++ = (WCHAR)((ch & halfMask) + UNI_SUR_LOW_START);
+    }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF16toUTF32 (
+    const WCHAR** sourceStart, const WCHAR* sourceEnd,
+    DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const WCHAR* source = *sourceStart;
+    DWORD* target = *targetStart;
+    DWORD ch, ch2;
+    while (source < sourceEnd) {
+    const WCHAR* oldSource = source; /*  In case we have to back up because of target overflow. */
+    ch = *source++;
+    /* If we have a surrogate pair, convert to UTF32 first. */
+    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+        /* If the 16 bits following the high surrogate are in the source buffer... */
+        if (source < sourceEnd) {
+        ch2 = *source;
+        /* If it's a low surrogate, convert to UTF32. */
+        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+            ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+            + (ch2 - UNI_SUR_LOW_START) + halfBase;
+            ++source;
+        } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+            --source; /* return to the illegal value itself */
+            result = sourceIllegal;
+            break;
+        }
+        } else { /* We don't have the 16 bits following the high surrogate. */
+        --source; /* return to the high surrogate */
+        result = sourceExhausted;
+        break;
+        }
+    } else if (flags == strictConversion) {
+        /* UTF-16 surrogate values are illegal in UTF-32 */
+        if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+        --source; /* return to the illegal value itself */
+        result = sourceIllegal;
+        break;
+        }
+    }
+    if (target >= targetEnd) {
+        source = oldSource; /* Back up source pointer! */
+        result = targetExhausted; break;
+    }
+    *target++ = ch;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+#ifdef CVTUTF_DEBUG
+if (result == sourceIllegal) {
+    fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
+    fflush(stderr);
+}
+#endif
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const DWORD offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
+             0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow.  There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
+ */
+static const BYTE firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+/* --------------------------------------------------------------------- */
+
+/* The interface converts a whole buffer to avoid function-call overhead.
+ * Constants have been gathered. Loops & conditionals have been removed as
+ * much as possible for efficiency, in favor of drop-through switches.
+ * (See "Note A" at the bottom of the file for equivalent code.)
+ * If your compiler supports it, the "isLegalUTF8" call can be turned
+ * into an inline function.
+ */
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF16toUTF8(
+	const WCHAR** sourceStart, const WCHAR* sourceEnd,
+	BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags)
+{
+	ConversionResult result = conversionOK;
+	const WCHAR* source = *sourceStart;
+	BYTE* target = *targetStart;
+
+	while (source < sourceEnd)
+	{
+		DWORD ch;
+		unsigned short bytesToWrite = 0;
+		const DWORD byteMask = 0xBF;
+		const DWORD byteMark = 0x80;
+		const WCHAR* oldSource = source; /* In case we have to back up because of target overflow. */
+
+		ch = *source++;
+
+		/* If we have a surrogate pair, convert to UTF32 first. */
+		if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
+		{
+			/* If the 16 bits following the high surrogate are in the source buffer... */
+
+			if (source < sourceEnd)
+			{
+				DWORD ch2 = *source;
+				/* If it's a low surrogate, convert to UTF32. */
+				if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
+				{
+					ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+							+ (ch2 - UNI_SUR_LOW_START) + halfBase;
+					++source;
+				}
+				else if (flags == strictConversion)
+				{
+					/* it's an unpaired high surrogate */
+					--source; /* return to the illegal value itself */
+					result = sourceIllegal;
+					break;
+				}
+			}
+			else
+			{
+				/* We don't have the 16 bits following the high surrogate. */
+				--source; /* return to the high surrogate */
+				result = sourceExhausted;
+				break;
+			}
+		}
+		else if (flags == strictConversion)
+		{
+			/* UTF-16 surrogate values are illegal in UTF-32 */
+			if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
+			{
+				--source; /* return to the illegal value itself */
+				result = sourceIllegal;
+				break;
+			}
+		}
+
+		/* Figure out how many bytes the result will require */
+		if (ch < (DWORD) 0x80)
+		{
+			bytesToWrite = 1;
+		}
+		else if (ch < (DWORD) 0x800)
+		{
+			bytesToWrite = 2;
+		}
+		else if (ch < (DWORD) 0x10000)
+		{
+			bytesToWrite = 3;
+		}
+		else if (ch < (DWORD) 0x110000)
+		{
+			bytesToWrite = 4;
+		}
+		else
+		{
+			bytesToWrite = 3;
+			ch = UNI_REPLACEMENT_CHAR;
+		}
+
+		target += bytesToWrite;
+
+		if (target > targetEnd)
+		{
+			source = oldSource; /* Back up source pointer! */
+			target -= bytesToWrite;
+			result = targetExhausted;
+			break;
+		}
+
+		switch (bytesToWrite)
+		{
+			/* note: everything falls through. */
+			case 4: *--target = (BYTE)((ch | byteMark) & byteMask); ch >>= 6;
+			case 3: *--target = (BYTE)((ch | byteMark) & byteMask); ch >>= 6;
+			case 2: *--target = (BYTE)((ch | byteMark) & byteMask); ch >>= 6;
+			case 1: *--target =  (BYTE)(ch | firstByteMark[bytesToWrite]);
+		}
+
+		target += bytesToWrite;
+	}
+
+	*sourceStart = source;
+	*targetStart = target;
+
+	return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ *  length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns FALSE.  The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
+
+static BOOL isLegalUTF8(const BYTE *source, int length)
+{
+	BYTE a;
+	const BYTE *srcptr = source + length;
+
+	switch (length)
+	{
+		default:
+			return FALSE;
+
+		/* Everything else falls through when "TRUE"... */
+		case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return FALSE;
+		case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return FALSE;
+		case 2: if ((a = (*--srcptr)) > 0xBF) return FALSE;
+
+			switch (*source)
+			{
+				/* no fall-through in this inner switch */
+				case 0xE0: if (a < 0xA0) return FALSE; break;
+				case 0xED: if (a > 0x9F) return FALSE; break;
+				case 0xF0: if (a < 0x90) return FALSE; break;
+				case 0xF4: if (a > 0x8F) return FALSE; break;
+				default:   if (a < 0x80) return FALSE;
+			}
+
+		case 1: if (*source >= 0x80 && *source < 0xC2) return FALSE;
+	}
+
+	if (*source > 0xF4)
+		return FALSE;
+
+	return TRUE;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return whether a UTF-8 sequence is legal or not.
+ * This is not used here; it's just exported.
+ */
+BOOL isLegalUTF8Sequence(const BYTE *source, const BYTE *sourceEnd)
+{
+	int length = trailingBytesForUTF8[*source] + 1;
+
+	if (source + length > sourceEnd)
+		return FALSE;
+
+	return isLegalUTF8(source, length);
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF16(
+	const BYTE** sourceStart, const BYTE* sourceEnd,
+	WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags)
+{
+	ConversionResult result = conversionOK;
+	const BYTE* source = *sourceStart;
+	WCHAR* target = *targetStart;
+
+	while (source < sourceEnd)
+	{
+		DWORD ch = 0;
+		unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+
+		if (source + extraBytesToRead >= sourceEnd)
+		{
+			result = sourceExhausted;
+			break;
+		}
+
+		/* Do this check whether lenient or strict */
+		if (!isLegalUTF8(source, extraBytesToRead + 1))
+		{
+			result = sourceIllegal;
+			break;
+		}
+
+		/*
+		 * The cases all fall through. See "Note A" below.
+		 */
+		switch (extraBytesToRead)
+		{
+			case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+			case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+			case 3: ch += *source++; ch <<= 6;
+			case 2: ch += *source++; ch <<= 6;
+			case 1: ch += *source++; ch <<= 6;
+			case 0: ch += *source++;
+		}
+
+		ch -= offsetsFromUTF8[extraBytesToRead];
+
+		if (target >= targetEnd)
+		{
+			source -= (extraBytesToRead + 1); /* Back up source pointer! */
+			result = targetExhausted;
+			break;
+		}
+
+		if (ch <= UNI_MAX_BMP)
+		{
+			/* Target is a character <= 0xFFFF */
+			/* UTF-16 surrogate values are illegal in UTF-32 */
+
+			if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
+			{
+				if (flags == strictConversion)
+				{
+					source -= (extraBytesToRead + 1); /* return to the illegal value itself */
+					result = sourceIllegal;
+					break;
+				}
+				else
+				{
+					*target++ = UNI_REPLACEMENT_CHAR;
+				}
+			}
+			else
+			{
+				*target++ = (WCHAR) ch; /* normal case */
+			}
+		}
+		else if (ch > UNI_MAX_UTF16)
+		{
+			if (flags == strictConversion)
+			{
+				result = sourceIllegal;
+				source -= (extraBytesToRead + 1); /* return to the start */
+				break; /* Bail out; shouldn't continue */
+			}
+			else
+			{
+				*target++ = UNI_REPLACEMENT_CHAR;
+			}
+		}
+		else
+		{
+			/* target is a character in range 0xFFFF - 0x10FFFF. */
+
+			if (target + 1 >= targetEnd)
+			{
+				source -= (extraBytesToRead+1); /* Back up source pointer! */
+				result = targetExhausted;
+				break;
+			}
+
+			ch -= halfBase;
+			*target++ = (WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START);
+			*target++ = (WCHAR)((ch & halfMask) + UNI_SUR_LOW_START);
+		}
+	}
+
+	*sourceStart = source;
+	*targetStart = target;
+
+	return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF8 (
+    const DWORD** sourceStart, const DWORD* sourceEnd, 
+    BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const DWORD* source = *sourceStart;
+    BYTE* target = *targetStart;
+    while (source < sourceEnd) {
+    DWORD ch;
+    unsigned short bytesToWrite = 0;
+    const DWORD byteMask = 0xBF;
+    const DWORD byteMark = 0x80; 
+    ch = *source++;
+    if (flags == strictConversion ) {
+        /* UTF-16 surrogate values are illegal in UTF-32 */
+        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+        --source; /* return to the illegal value itself */
+        result = sourceIllegal;
+        break;
+        }
+    }
+    /*
+     * Figure out how many bytes the result will require. Turn any
+     * illegally large UTF32 things (> Plane 17) into replacement chars.
+     */
+    if (ch < (DWORD)0x80) {      bytesToWrite = 1;
+    } else if (ch < (DWORD)0x800) {     bytesToWrite = 2;
+    } else if (ch < (DWORD)0x10000) {   bytesToWrite = 3;
+    } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
+    } else {       bytesToWrite = 3;
+                        ch = UNI_REPLACEMENT_CHAR;
+                        result = sourceIllegal;
+    }
+
+    target += bytesToWrite;
+    if (target > targetEnd) {
+        --source; /* Back up source pointer! */
+        target -= bytesToWrite; result = targetExhausted; break;
+    }
+    switch (bytesToWrite) { /* note: everything falls through. */
+        case 4: *--target = (BYTE)((ch | byteMark) & byteMask); ch >>= 6;
+        case 3: *--target = (BYTE)((ch | byteMark) & byteMask); ch >>= 6;
+        case 2: *--target = (BYTE)((ch | byteMark) & byteMask); ch >>= 6;
+        case 1: *--target = (BYTE) (ch | firstByteMark[bytesToWrite]);
+    }
+    target += bytesToWrite;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF32 (
+    const BYTE** sourceStart, const BYTE* sourceEnd, 
+    DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const BYTE* source = *sourceStart;
+    DWORD* target = *targetStart;
+    while (source < sourceEnd) {
+    DWORD ch = 0;
+    unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+    if (source + extraBytesToRead >= sourceEnd) {
+        result = sourceExhausted; break;
+    }
+    /* Do this check whether lenient or strict */
+    if (! isLegalUTF8(source, extraBytesToRead+1)) {
+        result = sourceIllegal;
+        break;
+    }
+    /*
+     * The cases all fall through. See "Note A" below.
+     */
+    switch (extraBytesToRead) {
+        case 5: ch += *source++; ch <<= 6;
+        case 4: ch += *source++; ch <<= 6;
+        case 3: ch += *source++; ch <<= 6;
+        case 2: ch += *source++; ch <<= 6;
+        case 1: ch += *source++; ch <<= 6;
+        case 0: ch += *source++;
+    }
+    ch -= offsetsFromUTF8[extraBytesToRead];
+
+    if (target >= targetEnd) {
+        source -= (extraBytesToRead+1); /* Back up the source pointer! */
+        result = targetExhausted; break;
+    }
+    if (ch <= UNI_MAX_LEGAL_UTF32) {
+        /*
+         * UTF-16 surrogate values are illegal in UTF-32, and anything
+         * over Plane 17 (> 0x10FFFF) is illegal.
+         */
+        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+        if (flags == strictConversion) {
+            source -= (extraBytesToRead+1); /* return to the illegal value itself */
+            result = sourceIllegal;
+            break;
+        } else {
+            *target++ = UNI_REPLACEMENT_CHAR;
+        }
+        } else {
+        *target++ = ch;
+        }
+    } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+        result = sourceIllegal;
+        *target++ = UNI_REPLACEMENT_CHAR;
+    }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* ---------------------------------------------------------------------
+
+    Note A.
+    The fall-through switches in UTF-8 reading code save a
+    temp variable, some decrements & conditionals.  The switches
+    are equivalent to the following loop:
+    {
+        int tmpBytesToRead = extraBytesToRead+1;
+        do {
+        ch += *source++;
+        --tmpBytesToRead;
+        if (tmpBytesToRead) ch <<= 6;
+        } while (tmpBytesToRead > 0);
+    }
+    In UTF-8 writing code, the switches on "bytesToWrite" are
+    similarly unrolled loops.
+
+   --------------------------------------------------------------------- */
diff --git a/winpr/libwinpr/crt/utf.h b/winpr/libwinpr/crt/utf.h
new file mode 100644
index 000000000..ffe161196
--- /dev/null
+++ b/winpr/libwinpr/crt/utf.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ * 
+ * Disclaimer
+ * 
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ * 
+ * Limitations on Rights to Redistribute This Code
+ * 
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+    Conversions between UTF32, UTF-16, and UTF-8.  Header file.
+
+    Several funtions are included here, forming a complete set of
+    conversions between the three formats.  UTF-7 is not included
+    here, but is handled in a separate source file.
+
+    Each of these routines takes pointers to input buffers and output
+    buffers.  The input buffers are const.
+
+    Each routine converts the text between *sourceStart and sourceEnd,
+    putting the result into the buffer between *targetStart and
+    targetEnd. Note: the end pointers are *after* the last item: e.g. 
+    *(sourceEnd - 1) is the last item.
+
+    The return result indicates whether the conversion was successful,
+    and if not, whether the problem was in the source or target buffers.
+    (Only the first encountered problem is indicated.)
+
+    After the conversion, *sourceStart and *targetStart are both
+    updated to point to the end of last text successfully converted in
+    the respective buffers.
+
+    Input parameters:
+    sourceStart - pointer to a pointer to the source buffer.
+        The contents of this are modified on return so that
+        it points at the next thing to be converted.
+    targetStart - similarly, pointer to pointer to the target buffer.
+    sourceEnd, targetEnd - respectively pointers to the ends of the
+        two buffers, for overflow checking only.
+
+    These conversion functions take a ConversionFlags argument. When this
+    flag is set to strict, both irregular sequences and isolated surrogates
+    will cause an error.  When the flag is set to lenient, both irregular
+    sequences and isolated surrogates are converted.
+
+    Whether the flag is strict or lenient, all illegal sequences will cause
+    an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
+    or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
+    must check for illegal sequences.
+
+    When the flag is set to lenient, characters over 0x10FFFF are converted
+    to the replacement character; otherwise (when the flag is set to strict)
+    they constitute an error.
+
+    Output parameters:
+    The value "sourceIllegal" is returned from some routines if the input
+    sequence is malformed.  When "sourceIllegal" is returned, the source
+    value will point to the illegal value that caused the problem. E.g.,
+    in UTF-8 when a sequence is malformed, it points to the start of the
+    malformed sequence.  
+
+    Author: Mark E. Davis, 1994.
+    Rev History: Rick McGowan, fixes & updates May 2001.
+         Fixes & updates, Sept 2001.
+
+------------------------------------------------------------------------ */
+
+#ifndef FREERDP_UNICODE_CONVERT_UTF_H
+#define FREERDP_UNICODE_CONVERT_UTF_H
+
+#include <winpr/wtypes.h>
+
+/*
+ * Character Types:
+ *
+ * UTF8:	BYTE		8 bits
+ * UTF16:	WCHAR		16 bits
+ * UTF32:	DWORD		32 bits
+ */
+
+/* Some fundamental constants */
+#define UNI_REPLACEMENT_CHAR	(DWORD)0x0000FFFD
+#define UNI_MAX_BMP		(DWORD)0x0000FFFF
+#define UNI_MAX_UTF16		(DWORD)0x0010FFFF
+#define UNI_MAX_UTF32		(DWORD)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32	(DWORD)0x0010FFFF
+
+typedef enum
+{
+ 	conversionOK,   /* conversion successful */
+	sourceExhausted, /* partial character in source, but hit end */
+	targetExhausted, /* insuff. room in target for conversion */
+	sourceIllegal  /* source sequence is illegal/malformed */
+} ConversionResult;
+
+typedef enum
+{
+	strictConversion = 0,
+	lenientConversion
+} ConversionFlags;
+
+/* This is for C++ and does no harm in C */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ConversionResult ConvertUTF8toUTF16(
+	const BYTE** sourceStart, const BYTE* sourceEnd,
+	WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF16toUTF8(
+	const WCHAR** sourceStart, const WCHAR* sourceEnd,
+	BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF8toUTF32(
+	const BYTE** sourceStart, const BYTE* sourceEnd,
+	DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF32toUTF8(
+	const DWORD** sourceStart, const DWORD* sourceEnd,
+	BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF16toUTF32(
+	const WCHAR** sourceStart, const WCHAR* sourceEnd,
+	DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF32toUTF16(
+	const DWORD** sourceStart, const DWORD* sourceEnd,
+	WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags);
+
+BOOL isLegalUTF8Sequence(const BYTE *source, const BYTE *sourceEnd);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* FREERDP_UNICODE_CONVERT_UTF_H */
+