utf8 behaviour fixes
This commit is contained in:
parent
57ad88fd82
commit
7540384db1
@ -257,16 +257,24 @@ static const BYTE firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
|
|||||||
/* --------------------------------------------------------------------- */
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
ConversionResult ConvertUTF16toUTF8(const WCHAR** sourceStart, const WCHAR* sourceEnd,
|
ConversionResult ConvertUTF16toUTF8(const WCHAR** sourceStart, const WCHAR* sourceEnd,
|
||||||
BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags)
|
BYTE** targetStart, BYTE* te, ConversionFlags flags)
|
||||||
{
|
{
|
||||||
BYTE* target;
|
size_t pos = 0;
|
||||||
|
size_t end = 0;
|
||||||
const WCHAR* source;
|
const WCHAR* source;
|
||||||
BOOL computeLength;
|
const BOOL computeLength = (!te) ? TRUE : FALSE;
|
||||||
ConversionResult result;
|
ConversionResult result = conversionOK;
|
||||||
computeLength = (!targetEnd) ? TRUE : FALSE;
|
|
||||||
|
if (targetStart && te)
|
||||||
|
{
|
||||||
|
const size_t s = (size_t)*targetStart;
|
||||||
|
const size_t e = (size_t)te;
|
||||||
|
if (s > e)
|
||||||
|
return sourceIllegal;
|
||||||
|
end = e - s;
|
||||||
|
}
|
||||||
|
|
||||||
source = *sourceStart;
|
source = *sourceStart;
|
||||||
target = *targetStart;
|
|
||||||
result = conversionOK;
|
|
||||||
|
|
||||||
while (source < sourceEnd)
|
while (source < sourceEnd)
|
||||||
{
|
{
|
||||||
@ -345,12 +353,12 @@ ConversionResult ConvertUTF16toUTF8(const WCHAR** sourceStart, const WCHAR* sour
|
|||||||
ch = UNI_REPLACEMENT_CHAR;
|
ch = UNI_REPLACEMENT_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
target += bytesToWrite;
|
pos += bytesToWrite;
|
||||||
|
|
||||||
if ((target > targetEnd) && (!computeLength))
|
if ((pos > end) && (!computeLength))
|
||||||
{
|
{
|
||||||
source = oldSource; /* Back up source pointer! */
|
source = oldSource; /* Back up source pointer! */
|
||||||
target -= bytesToWrite;
|
pos -= bytesToWrite;
|
||||||
result = targetExhausted;
|
result = targetExhausted;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -361,19 +369,19 @@ ConversionResult ConvertUTF16toUTF8(const WCHAR** sourceStart, const WCHAR* sour
|
|||||||
{
|
{
|
||||||
/* note: everything falls through. */
|
/* note: everything falls through. */
|
||||||
case 4:
|
case 4:
|
||||||
*--target = (BYTE)((ch | byteMark) & byteMask);
|
(*targetStart)[--pos] = (BYTE)((ch | byteMark) & byteMask);
|
||||||
ch >>= 6;
|
ch >>= 6;
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
*--target = (BYTE)((ch | byteMark) & byteMask);
|
(*targetStart)[--pos] = (BYTE)((ch | byteMark) & byteMask);
|
||||||
ch >>= 6;
|
ch >>= 6;
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
*--target = (BYTE)((ch | byteMark) & byteMask);
|
(*targetStart)[--pos] = (BYTE)((ch | byteMark) & byteMask);
|
||||||
ch >>= 6;
|
ch >>= 6;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
*--target = (BYTE)(ch | firstByteMark[bytesToWrite]);
|
(*targetStart)[--pos] = (BYTE)(ch | firstByteMark[bytesToWrite]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -382,24 +390,27 @@ ConversionResult ConvertUTF16toUTF8(const WCHAR** sourceStart, const WCHAR* sour
|
|||||||
{
|
{
|
||||||
/* note: everything falls through. */
|
/* note: everything falls through. */
|
||||||
case 4:
|
case 4:
|
||||||
--target;
|
--pos;
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
--target;
|
--pos;
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
--target;
|
--pos;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
--target;
|
--pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
target += bytesToWrite;
|
pos += bytesToWrite;
|
||||||
}
|
}
|
||||||
|
|
||||||
*sourceStart = source;
|
*sourceStart = source;
|
||||||
*targetStart = target;
|
if (targetStart && *targetStart)
|
||||||
|
*targetStart = &(*targetStart)[pos];
|
||||||
|
else if (targetStart)
|
||||||
|
*targetStart = (BYTE*)pos;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -503,14 +514,24 @@ BOOL isLegalUTF8Sequence(const BYTE* source, const BYTE* sourceEnd)
|
|||||||
ConversionResult ConvertUTF8toUTF16(const BYTE** sourceStart, const BYTE* sourceEnd,
|
ConversionResult ConvertUTF8toUTF16(const BYTE** sourceStart, const BYTE* sourceEnd,
|
||||||
WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags)
|
WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags)
|
||||||
{
|
{
|
||||||
WCHAR* target;
|
size_t target = 0;
|
||||||
|
size_t end = 0;
|
||||||
const BYTE* source;
|
const BYTE* source;
|
||||||
BOOL computeLength;
|
BOOL computeLength;
|
||||||
ConversionResult result;
|
ConversionResult result;
|
||||||
computeLength = (!targetEnd) ? TRUE : FALSE;
|
computeLength = (!targetEnd) ? TRUE : FALSE;
|
||||||
result = conversionOK;
|
result = conversionOK;
|
||||||
source = *sourceStart;
|
source = *sourceStart;
|
||||||
target = *targetStart;
|
|
||||||
|
if (targetStart && targetEnd)
|
||||||
|
{
|
||||||
|
const size_t s = (size_t)*targetStart;
|
||||||
|
const size_t e = (size_t)targetEnd;
|
||||||
|
if (s > e)
|
||||||
|
return sourceIllegal;
|
||||||
|
|
||||||
|
end = ((size_t)(targetEnd)) - ((size_t)(*targetStart));
|
||||||
|
}
|
||||||
|
|
||||||
while (source < sourceEnd)
|
while (source < sourceEnd)
|
||||||
{
|
{
|
||||||
@ -561,7 +582,7 @@ ConversionResult ConvertUTF8toUTF16(const BYTE** sourceStart, const BYTE* source
|
|||||||
|
|
||||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||||
|
|
||||||
if ((target >= targetEnd) && (!computeLength))
|
if ((target >= end) && (!computeLength))
|
||||||
{
|
{
|
||||||
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
||||||
result = targetExhausted;
|
result = targetExhausted;
|
||||||
@ -583,22 +604,14 @@ ConversionResult ConvertUTF8toUTF16(const BYTE** sourceStart, const BYTE* source
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!computeLength)
|
if (!computeLength)
|
||||||
{
|
Data_Write_UINT16(&(*targetStart)[target], UNI_REPLACEMENT_CHAR);
|
||||||
Data_Write_UINT16(target, UNI_REPLACEMENT_CHAR);
|
|
||||||
target++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
target++;
|
target++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!computeLength)
|
if (!computeLength)
|
||||||
{
|
Data_Write_UINT16(&(*targetStart)[target], ch); /* normal case */
|
||||||
Data_Write_UINT16(target, ch); /* normal case */
|
|
||||||
target++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
target++;
|
target++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -613,18 +626,14 @@ ConversionResult ConvertUTF8toUTF16(const BYTE** sourceStart, const BYTE* source
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!computeLength)
|
if (!computeLength)
|
||||||
{
|
Data_Write_UINT16(&(*targetStart)[target], UNI_REPLACEMENT_CHAR);
|
||||||
Data_Write_UINT16(target, UNI_REPLACEMENT_CHAR);
|
|
||||||
target++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
target++;
|
target++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||||
if ((target + 1 >= targetEnd) && (!computeLength))
|
if ((target + 1 >= end) && (!computeLength))
|
||||||
{
|
{
|
||||||
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
||||||
result = targetExhausted;
|
result = targetExhausted;
|
||||||
@ -637,11 +646,9 @@ ConversionResult ConvertUTF8toUTF16(const BYTE** sourceStart, const BYTE* source
|
|||||||
{
|
{
|
||||||
WCHAR wchar;
|
WCHAR wchar;
|
||||||
wchar = (ch >> halfShift) + UNI_SUR_HIGH_START;
|
wchar = (ch >> halfShift) + UNI_SUR_HIGH_START;
|
||||||
Data_Write_UINT16(target, wchar);
|
Data_Write_UINT16(&(*targetStart)[target++], wchar);
|
||||||
target++;
|
|
||||||
wchar = (ch & halfMask) + UNI_SUR_LOW_START;
|
wchar = (ch & halfMask) + UNI_SUR_LOW_START;
|
||||||
Data_Write_UINT16(target, wchar);
|
Data_Write_UINT16(&(*targetStart)[target++], wchar);
|
||||||
target++;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -652,7 +659,10 @@ ConversionResult ConvertUTF8toUTF16(const BYTE** sourceStart, const BYTE* source
|
|||||||
}
|
}
|
||||||
|
|
||||||
*sourceStart = source;
|
*sourceStart = source;
|
||||||
*targetStart = target;
|
if (targetStart && (*targetStart))
|
||||||
|
*targetStart = &(*targetStart)[target];
|
||||||
|
else if (targetStart)
|
||||||
|
*targetStart = (WCHAR*)(target * sizeof(WCHAR));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user