Using ICU instead of custom unicode conversion.

If WITH_ICU CMake option is set the unicode conversion routines use ICU instead of custom conversion code.
2017-07-26 12:58:34 +02:00 · 2017-07-26 12:58:34 +02:00 · 1868acb0ac
commit 1868acb0ac
parent f8c9f43843
4 changed files with 111 additions and 9 deletions
--- a/cmake/ConfigOptions.cmake
+++ b/cmake/ConfigOptions.cmake
@ -130,6 +130,7 @@ option(WITH_DEBUG_RINGBUFFER "Enable Ringbuffer debug messages" ${DEFAULT_DEBUG_

 option(WITH_DEBUG_SYMBOLS "Pack debug symbols to installer" OFF)
 option(WITH_CCACHE "Use ccache support if available" ON)
+option(WITH_ICU "Use ICU for unicode conversion" OFF)

 if(ANDROID)
 include(ConfigOptionsAndroid)
--- a/config.h.in
+++ b/config.h.in
@ -33,6 +33,7 @@
 #cmakedefine WITH_NATIVE_SSPI
 #cmakedefine WITH_JPEG
 #cmakedefine WITH_WIN8
+#cmakedefine WITH_ICU
 #cmakedefine WITH_RDPSND_DSOUND
 #cmakedefine WITH_EVENTFD_READ_WRITE
 #cmakedefine HAVE_MATH_C99_LONG_DOUBLE
--- a/winpr/libwinpr/crt/CMakeLists.txt
+++ b/winpr/libwinpr/crt/CMakeLists.txt
@ -15,15 +15,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-winpr_module_add(
-	alignment.c
+set (CRT_FILES alignment.c
 	conversion.c
 	buffer.c
 	memory.c
-	string.c
 	unicode.c
-	utf.c
-	utf.h)
+	string.c)
+
+if (NOT WITH_ICU)
+	set (CRT_FILES ${CRT_FILES}
+		utf.c
+		utf.h)
+endif(NOT WITH_ICU)
+
+if (WITH_ICU)
+	find_package(ICU REQUIRED i18n uc io)
+	include_directories(${ICU_INCLUDE_DIRS})
+	winpr_library_add(${ICU_LIBRARIES})
+endif (WITH_ICU)
+
+winpr_module_add(${CRT_FILES})

 if(BUILD_TESTING)
 	add_subdirectory(test)
--- a/winpr/libwinpr/crt/unicode.c
+++ b/winpr/libwinpr/crt/unicode.c
@ -30,7 +30,15 @@

 #ifndef _WIN32

+#if defined(WITH_ICU)
+#include <unicode/ucnv.h>
+#include <unicode/ustring.h>
+#else
 #include "utf.h"
+#endif
+
+#include "../log.h"
+#define TAG WINPR_TAG("unicode")

 /**
 * Notes on cross-platform Unicode portability:
@ -144,14 +152,16 @@
 int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
                        int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar)
 {
-	int length;
 	LPWSTR targetStart;
+#if !defined(WITH_ICU)
 	const BYTE* sourceStart;
+	int length;
 	ConversionResult result;
+#endif

 	/* If cbMultiByte is 0, the function fails */

-	if (cbMultiByte == 0)
+	if ((cbMultiByte == 0) || (cbMultiByte < -1))
 		return 0;

 	/* If cbMultiByte is -1, the string is null-terminated */
@ -163,11 +173,47 @@ int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
 	 * if cchWideChar is 0, the function returns the required buffer size
 	 * in characters for lpWideCharStr and makes no use of the output parameter itself.
 	 */
+#if defined(WITH_ICU)
+	{
+		UErrorCode error;
+		int32_t targetLength;
+		int32_t targetCapacity;

+		switch (CodePage)
+		{
+		case CP_UTF8:
+			break;
+		default:
+			WLog_ERR(TAG, "Unsupported encoding %u", CodePage);
+			return 0;
+		}
+
+		if (cbMultiByte > UINT32_MAX)
+			return 0;
+
+		targetStart = lpWideCharStr;
+		targetCapacity = cchWideChar;
+
+		error = U_ZERO_ERROR;
+		if (cchWideChar == 0)
+		{
+			u_strFromUTF8(NULL, 0, &targetLength,
+					lpMultiByteStr, cbMultiByte, &error);
+			cchWideChar = targetLength;
+		}
+		else
+		{
+			u_strFromUTF8(targetStart, targetCapacity, &targetLength,
+					lpMultiByteStr, cbMultiByte, &error);
+			cchWideChar = U_SUCCESS(error) ? targetLength : 0;
+		}
+	}
+#else
 	if (cchWideChar == 0)
 	{
 		sourceStart = (const BYTE*) lpMultiByteStr;
 		targetStart = (WCHAR*) NULL;
+
 		result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
 		                            &targetStart, NULL, strictConversion);
 		length = targetStart - ((WCHAR*) NULL);
@ -176,12 +222,14 @@ int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
 	{
 		sourceStart = (const BYTE*) lpMultiByteStr;
 		targetStart = lpWideCharStr;
+
 		result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
 		                            &targetStart, &targetStart[cchWideChar], strictConversion);
 		length = targetStart - ((WCHAR*) lpWideCharStr);
 	}

 	cchWideChar = (result == conversionOK) ? length : 0;
+#endif
 	return cchWideChar;
 }

@ -223,14 +271,16 @@ int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
 int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
                        LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar)
 {
-	int length;
 	BYTE* targetStart;
+#if !defined(WITH_ICU)
+	int length;
 	const WCHAR* sourceStart;
 	ConversionResult result;
+#endif

 	/* If cchWideChar is 0, the function fails */

-	if (cchWideChar == 0)
+	if ((cchWideChar == 0) || (cchWideChar < -1))
 		return 0;

 	/* If cchWideChar is -1, the string is null-terminated */
@ -242,25 +292,64 @@ int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int
 	 * if cbMultiByte is 0, the function returns the required buffer size
 	 * in bytes for lpMultiByteStr and makes no use of the output parameter itself.
 	 */
+#if defined(WITH_ICU)
+	{
+		UErrorCode error;
+		int32_t targetLength;
+		int32_t targetCapacity;

+		switch(CodePage)
+		{
+		case CP_UTF8:
+			break;
+		default:
+			WLog_ERR(TAG, "Unsupported encoding %u", CodePage);
+			return 0;
+		}
+
+		if (cchWideChar > UINT32_MAX)
+			return 0;
+
+		targetStart = lpMultiByteStr;
+		targetCapacity = cbMultiByte;
+
+		error = U_ZERO_ERROR;
+		if (cbMultiByte == 0)
+		{
+			u_strToUTF8(NULL, 0, &targetLength,
+					lpWideCharStr, cchWideChar, &error);
+			cbMultiByte = targetLength;
+		}
+		else
+		{
+			u_strToUTF8(targetStart, targetCapacity, &targetLength,
+					lpWideCharStr, cchWideChar, &error);
+			cbMultiByte = U_SUCCESS(error) ? targetLength : 0;
+		}
+	}
+#else
 	if (cbMultiByte == 0)
 	{
 		sourceStart = (WCHAR*) lpWideCharStr;
 		targetStart = (BYTE*) NULL;
+
 		result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar],
 		                            &targetStart, NULL, strictConversion);
+
 		length = targetStart - ((BYTE*) NULL);
 	}
 	else
 	{
 		sourceStart = (WCHAR*) lpWideCharStr;
 		targetStart = (BYTE*) lpMultiByteStr;
+
 		result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar],
 		                            &targetStart, &targetStart[cbMultiByte], strictConversion);
 		length = targetStart - ((BYTE*) lpMultiByteStr);
 	}

 	cbMultiByte = (result == conversionOK) ? length : 0;
+#endif
 	return cbMultiByte;
 }