libwinpr-crt: improve UTF conversion efficiency
This commit is contained in:
parent
36a1683a5d
commit
d7bae41092
@ -59,19 +59,6 @@ int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
|
||||
if (cbMultiByte == -1)
|
||||
cbMultiByte = strlen((char*) lpMultiByteStr) + 1;
|
||||
|
||||
if (!lpWideCharStr)
|
||||
lpWideCharStr = (LPWSTR) malloc((cbMultiByte + 1) * sizeof(WCHAR) * 4);
|
||||
|
||||
sourceStart = (const BYTE*) lpMultiByteStr;
|
||||
targetStart = lpWideCharStr;
|
||||
|
||||
result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
|
||||
&targetStart, &targetStart[((cbMultiByte + 1) * 4) / sizeof(WCHAR)], strictConversion);
|
||||
length = targetStart - ((WCHAR*) lpWideCharStr);
|
||||
lpWideCharStr[length] = '\0';
|
||||
|
||||
cchWideChar = length;
|
||||
|
||||
/*
|
||||
* if cchWideChar is 0, the function returns the required buffer size
|
||||
* in characters for lpWideCharStr and makes no use of the output parameter itself.
|
||||
@ -79,8 +66,26 @@ int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
|
||||
|
||||
if (cchWideChar == 0)
|
||||
{
|
||||
free(lpWideCharStr);
|
||||
return cchWideChar;
|
||||
sourceStart = (const BYTE*) lpMultiByteStr;
|
||||
targetStart = (WCHAR*) NULL;
|
||||
|
||||
result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
|
||||
&targetStart, NULL, strictConversion);
|
||||
|
||||
length = targetStart - ((WCHAR*) NULL);
|
||||
cchWideChar = length;
|
||||
}
|
||||
else
|
||||
{
|
||||
sourceStart = (const BYTE*) lpMultiByteStr;
|
||||
targetStart = lpWideCharStr;
|
||||
|
||||
result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
|
||||
&targetStart, &targetStart[cchWideChar], strictConversion);
|
||||
|
||||
length = targetStart - ((WCHAR*) lpWideCharStr);
|
||||
lpWideCharStr[length] = '\0';
|
||||
cchWideChar = length;
|
||||
}
|
||||
|
||||
return cchWideChar;
|
||||
@ -109,19 +114,6 @@ int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int
|
||||
if (cchWideChar == -1)
|
||||
cchWideChar = _wcslen(lpWideCharStr) + 1;
|
||||
|
||||
if (!lpMultiByteStr)
|
||||
lpMultiByteStr = (LPSTR) malloc((cchWideChar + 1) * 4);
|
||||
|
||||
sourceStart = (WCHAR*) lpWideCharStr;
|
||||
targetStart = (BYTE*) lpMultiByteStr;
|
||||
|
||||
result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar],
|
||||
&targetStart, &targetStart[(cchWideChar + 1) * 4], strictConversion);
|
||||
length = targetStart - ((BYTE*) lpMultiByteStr);
|
||||
lpMultiByteStr[length] = '\0';
|
||||
|
||||
cbMultiByte = length;
|
||||
|
||||
/*
|
||||
* if cbMultiByte is 0, the function returns the required buffer size
|
||||
* in bytes for lpMultiByteStr and makes no use of the output parameter itself.
|
||||
@ -129,8 +121,26 @@ int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int
|
||||
|
||||
if (cbMultiByte == 0)
|
||||
{
|
||||
free(lpMultiByteStr);
|
||||
return cbMultiByte;
|
||||
sourceStart = (WCHAR*) lpWideCharStr;
|
||||
targetStart = (BYTE*) NULL;
|
||||
|
||||
result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar],
|
||||
&targetStart, NULL, strictConversion);
|
||||
|
||||
length = targetStart - ((BYTE*) NULL);
|
||||
cbMultiByte = length;
|
||||
}
|
||||
else
|
||||
{
|
||||
sourceStart = (WCHAR*) lpWideCharStr;
|
||||
targetStart = (BYTE*) lpMultiByteStr;
|
||||
|
||||
result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar],
|
||||
&targetStart, &targetStart[cbMultiByte], strictConversion);
|
||||
|
||||
length = targetStart - ((BYTE*) lpMultiByteStr);
|
||||
lpMultiByteStr[length] = '\0';
|
||||
cbMultiByte = length;
|
||||
}
|
||||
|
||||
return cbMultiByte;
|
||||
|
@ -209,9 +209,16 @@ ConversionResult ConvertUTF16toUTF8(
|
||||
const WCHAR** sourceStart, const WCHAR* sourceEnd,
|
||||
BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags)
|
||||
{
|
||||
ConversionResult result = conversionOK;
|
||||
const WCHAR* source = *sourceStart;
|
||||
BYTE* target = *targetStart;
|
||||
BYTE* target;
|
||||
const WCHAR* source;
|
||||
BOOL computeLength;
|
||||
ConversionResult result;
|
||||
|
||||
computeLength = (!targetEnd) ? TRUE : FALSE;
|
||||
|
||||
source = *sourceStart;
|
||||
target = *targetStart;
|
||||
result = conversionOK;
|
||||
|
||||
while (source < sourceEnd)
|
||||
{
|
||||
@ -290,7 +297,7 @@ ConversionResult ConvertUTF16toUTF8(
|
||||
|
||||
target += bytesToWrite;
|
||||
|
||||
if (target > targetEnd)
|
||||
if ((target > targetEnd) && (!computeLength))
|
||||
{
|
||||
source = oldSource; /* Back up source pointer! */
|
||||
target -= bytesToWrite;
|
||||
@ -298,13 +305,46 @@ ConversionResult ConvertUTF16toUTF8(
|
||||
break;
|
||||
}
|
||||
|
||||
switch (bytesToWrite)
|
||||
if (!computeLength)
|
||||
{
|
||||
/* note: everything falls through. */
|
||||
case 4: *--target = (BYTE)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 3: *--target = (BYTE)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 2: *--target = (BYTE)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 1: *--target = (BYTE)(ch | firstByteMark[bytesToWrite]);
|
||||
switch (bytesToWrite)
|
||||
{
|
||||
/* note: everything falls through. */
|
||||
|
||||
case 4:
|
||||
*--target = (BYTE)((ch | byteMark) & byteMask);
|
||||
ch >>= 6;
|
||||
case 3:
|
||||
*--target = (BYTE)((ch | byteMark) & byteMask);
|
||||
ch >>= 6;
|
||||
case 2:
|
||||
*--target = (BYTE)((ch | byteMark) & byteMask);
|
||||
ch >>= 6;
|
||||
case 1:
|
||||
*--target = (BYTE)(ch | firstByteMark[bytesToWrite]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (bytesToWrite)
|
||||
{
|
||||
/* note: everything falls through. */
|
||||
|
||||
case 4:
|
||||
--target;
|
||||
ch >>= 6;
|
||||
|
||||
case 3:
|
||||
--target;
|
||||
ch >>= 6;
|
||||
|
||||
case 2:
|
||||
--target;
|
||||
ch >>= 6;
|
||||
|
||||
case 1:
|
||||
--target;
|
||||
}
|
||||
}
|
||||
|
||||
target += bytesToWrite;
|
||||
@ -385,16 +425,23 @@ ConversionResult ConvertUTF8toUTF16(
|
||||
const BYTE** sourceStart, const BYTE* sourceEnd,
|
||||
WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags)
|
||||
{
|
||||
ConversionResult result = conversionOK;
|
||||
const BYTE* source = *sourceStart;
|
||||
WCHAR* target = *targetStart;
|
||||
WCHAR* target;
|
||||
const BYTE* source;
|
||||
BOOL computeLength;
|
||||
ConversionResult result;
|
||||
|
||||
computeLength = (!targetEnd) ? TRUE : FALSE;
|
||||
|
||||
result = conversionOK;
|
||||
source = *sourceStart;
|
||||
target = *targetStart;
|
||||
|
||||
while (source < sourceEnd)
|
||||
{
|
||||
DWORD ch = 0;
|
||||
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
||||
|
||||
if (source + extraBytesToRead >= sourceEnd)
|
||||
if ((source + extraBytesToRead) >= sourceEnd)
|
||||
{
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
@ -422,7 +469,7 @@ ConversionResult ConvertUTF8toUTF16(
|
||||
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
|
||||
if (target >= targetEnd)
|
||||
if ((target >= targetEnd) && (!computeLength))
|
||||
{
|
||||
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
||||
result = targetExhausted;
|
||||
@ -444,12 +491,18 @@ ConversionResult ConvertUTF8toUTF16(
|
||||
}
|
||||
else
|
||||
{
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
if (!computeLength)
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
else
|
||||
target++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*target++ = (WCHAR) ch; /* normal case */
|
||||
if (!computeLength)
|
||||
*target++ = (WCHAR) ch; /* normal case */
|
||||
else
|
||||
target++;
|
||||
}
|
||||
}
|
||||
else if (ch > UNI_MAX_UTF16)
|
||||
@ -462,23 +515,35 @@ ConversionResult ConvertUTF8toUTF16(
|
||||
}
|
||||
else
|
||||
{
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
if (!computeLength)
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
else
|
||||
target++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||
|
||||
if (target + 1 >= targetEnd)
|
||||
if ((target + 1 >= targetEnd) && (!computeLength))
|
||||
{
|
||||
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
||||
result = targetExhausted;
|
||||
break;
|
||||
}
|
||||
|
||||
ch -= halfBase;
|
||||
*target++ = (WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||
*target++ = (WCHAR)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||
|
||||
if (!computeLength)
|
||||
{
|
||||
*target++ = (WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||
*target++ = (WCHAR)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||
}
|
||||
else
|
||||
{
|
||||
target++;
|
||||
target++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user