From 32624578b6b41c49b7e702065d9a7f62234c8cb0 Mon Sep 17 00:00:00 2001 From: Murai Takashi Date: Sun, 19 Jun 2016 19:53:13 +0900 Subject: [PATCH] utility.cpp: Add encoding / decoding UTF-16LE. --- .../partitioning_systems/gpt/utility.cpp | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/src/add-ons/kernel/partitioning_systems/gpt/utility.cpp b/src/add-ons/kernel/partitioning_systems/gpt/utility.cpp index 4564377d8c..eb9dcfce68 100644 --- a/src/add-ons/kernel/partitioning_systems/gpt/utility.cpp +++ b/src/add-ons/kernel/partitioning_systems/gpt/utility.cpp @@ -38,10 +38,32 @@ void to_utf8(const uint16* from, size_t maxFromLength, char* to, size_t toSize) { for (uint32 i = 0; i < maxFromLength; i++) { - uint16 c = B_LENDIAN_TO_HOST_INT16(from[i]); - if (!c) + // Decoding UTF-16LE + uint32 c = 0; + uint16 w1 = B_LENDIAN_TO_HOST_INT16(from[i]); + if (!w1) break; + bool valid = false; + if (w1 < 0xD800 || w1 > 0xDFFF) { + c = w1; + valid = true; + } + + if (!valid && (w1 >= 0xD800 && w1 <= 0xDBFF)) { + if (i + 1 < maxFromLength) { + uint16 w2 = B_LENDIAN_TO_HOST_INT16(from[i + 1]); + if (w2 >= 0xDC00 && w2 <= 0xDFFF) { + c = ((w1 & 0x3FF) << 10) | (w2 & 0x3FF); + c += 0x10000; + ++i; + valid = true; + } + } + } + + if (!valid) break; + if (c < 0x80) put_utf8_byte(to, toSize, c); else if (c < 0x800) { @@ -70,10 +92,21 @@ to_ucs2(const char* from, size_t fromLength, uint16* to, size_t maxToLength) { size_t index = 0; while (from[0] != '\0' && index < maxToLength) { - // TODO: handle characters that are not representable in UCS-2 better - uint32 code = UTF8ToCharCode(&from); - if (code < 0x10000) - to[index++] = code; + uint32 c = UTF8ToCharCode(&from); + + // Encoding UTF-16LE + if (c > 0x10FFFF) break; // invalid + if (c < 0x10000) { + to[index++] = B_HOST_TO_LENDIAN_INT16(c); + } else { + if (index + 1 >= maxToLength) break; + uint32 c2 = c - 0x10000; + uint16 w1 = 0xD800, w2 = 0xDC00; + w1 = w1 + ((c2 >> 10) & 0x3FF); + w2 = w2 + (c2 & 0x3FF); + to[index++] = B_HOST_TO_LENDIAN_INT16(w1); + to[index++] = B_HOST_TO_LENDIAN_INT16(w2); + } } if (index < maxToLength)