winpr/crt: Fix endianness in unicode conversions

Unicode conversions doesn't work on big endian machines currently. The strings are stored as little endian. Use conversion macros from endian.h to load and store the data properly. Let's use wide char strings always as little endian. It seems that Windows API also always expects data to be little endian, so it makes sense to require wide char strings as little endian also. The patches fixes transformations between UTF8 and UTF16 only, which are used by freerdp. UTF32 transformations are not used by freerdp. https://github.com/FreeRDP/FreeRDP/issues/2520
2016-04-13 16:26:42 +02:00 · 2016-04-13 16:26:42 +02:00 · f722dc5c28
parent e326e43e66
commit f722dc5c28
1 changed files with 26 additions and 12 deletions
--- a/winpr/libwinpr/crt/utf.c
+++ b/winpr/libwinpr/crt/utf.c
@ -39,6 +39,7 @@
 ------------------------------------------------------------------------ */

 #include "utf.h"
+#include <winpr/endian.h>

 static const int halfShift  = 10; /* used for shifting by 10 bits */

@ -275,7 +276,8 @@ ConversionResult ConvertUTF16toUTF8(
 		const DWORD byteMask = 0xBF;
 		const DWORD byteMark = 0x80;
 		const WCHAR* oldSource = source; /* In case we have to back up because of target overflow. */
-		ch = *source++;
+		Data_Read_UINT16 (source, ch);
+		source++;

 		/* If we have a surrogate pair, convert to UTF32 first. */
 		if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
@ -283,7 +285,8 @@ ConversionResult ConvertUTF16toUTF8(
 			/* If the 16 bits following the high surrogate are in the source buffer... */
 			if (source < sourceEnd)
 			{
-				DWORD ch2 = *source;
+				DWORD ch2;
+				Data_Read_UINT16 (source, ch2);

 				/* If it's a low surrogate, convert to UTF32. */
 				if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
@ -574,16 +577,20 @@ ConversionResult ConvertUTF8toUTF16(
 				}
 				else
 				{
-					if (!computeLength)
-						*target++ = UNI_REPLACEMENT_CHAR;
+					if (!computeLength) {
+						Data_Write_UINT16(target, UNI_REPLACEMENT_CHAR);
+						target++;
+					}
 					else
 						target++;
 				}
 			}
 			else
 			{
-				if (!computeLength)
-					*target++ = (WCHAR) ch; /* normal case */
+				if (!computeLength) {
+					Data_Write_UINT16(target, ch); /* normal case */
+					target++;
+				}
 				else
 					target++;
 			}
@ -598,8 +605,10 @@ ConversionResult ConvertUTF8toUTF16(
 			}
 			else
 			{
-				if (!computeLength)
-					*target++ = UNI_REPLACEMENT_CHAR;
+				if (!computeLength) {
+					Data_Write_UINT16(target, UNI_REPLACEMENT_CHAR);
+					target++;
+				}
 				else
 					target++;
 			}
@ -616,10 +625,15 @@ ConversionResult ConvertUTF8toUTF16(

 			ch -= halfBase;

-			if (!computeLength)
-			{
-				*target++ = (WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START);
-				*target++ = (WCHAR)((ch & halfMask) + UNI_SUR_LOW_START);
+			if (!computeLength) {
+				WCHAR wchar;
+
+				wchar = (ch >> halfShift) + UNI_SUR_HIGH_START;
+				Data_Write_UINT16(target, wchar);
+				target++;
+				wchar = (ch & halfMask) + UNI_SUR_LOW_START;
+				Data_Write_UINT16(target, wchar);
+				target++;
 			}
 			else
 			{