From 466c605a478319d81316df25d2a956c9701116ce Mon Sep 17 00:00:00 2001 From: Tyler Dauwalder Date: Fri, 4 Jul 2003 06:13:10 +0000 Subject: [PATCH] Static Unicode string class. git-svn-id: file:///srv/svn/repos/haiku/trunk/current@3825 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- .../kernel/file_systems/udf/CS0String.cpp | 49 +++++++ .../kernel/file_systems/udf/CS0String.h | 134 ++++++++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 src/add-ons/kernel/file_systems/udf/CS0String.cpp create mode 100644 src/add-ons/kernel/file_systems/udf/CS0String.h diff --git a/src/add-ons/kernel/file_systems/udf/CS0String.cpp b/src/add-ons/kernel/file_systems/udf/CS0String.cpp new file mode 100644 index 0000000000..2cdadd76ad --- /dev/null +++ b/src/add-ons/kernel/file_systems/udf/CS0String.cpp @@ -0,0 +1,49 @@ +#include "CS0String.h" + +/*! \brief Converts the given unicode character to utf8. +*/ +void +Udf::unicode_to_utf8(uint32 c, char **out) +{ + char *s = *out; + + if (c < 0x80) + *(s++) = c; + else if (c < 0x800) { + *(s++) = 0xc0 | (c>>6); + *(s++) = 0x80 | (c & 0x3f); + } else if (c < 0x10000) { + *(s++) = 0xe0 | (c>>12); + *(s++) = 0x80 | ((c>>6) & 0x3f); + *(s++) = 0x80 | (c & 0x3f); + } else if (c <= 0x10ffff) { + *(s++) = 0xf0 | (c>>18); + *(s++) = 0x80 | ((c>>12) & 0x3f); + *(s++) = 0x80 | ((c>>6) & 0x3f); + *(s++) = 0x80 | (c & 0x3f); + } + *out = s; +} + +using namespace Udf; + +CS0String::CS0String() + : fUtf8String(NULL) +{ +} + +CS0String::~CS0String() +{ + DEBUG_INIT(CF_HELPER | CF_HIGH_VOLUME, "CS0String"); + + _Clear(); +} + +void +CS0String::_Clear() +{ + DEBUG_INIT(CF_HELPER | CF_HIGH_VOLUME, "CS0String"); + + delete [] fUtf8String; + fUtf8String = NULL; +} diff --git a/src/add-ons/kernel/file_systems/udf/CS0String.h b/src/add-ons/kernel/file_systems/udf/CS0String.h new file mode 100644 index 0000000000..a34f567281 --- /dev/null +++ b/src/add-ons/kernel/file_systems/udf/CS0String.h @@ -0,0 +1,134 @@ +//---------------------------------------------------------------------- +// This software is part of the OpenBeOS distribution and is covered +// by the OpenBeOS license. +// +// Copyright (c) 2003 Tyler Dauwalder, tyler@dauwalder.net +//--------------------------------------------------------------------- + +#ifndef _UDF_CS0_STRING_H +#define _UDF_CS0_STRING_H + +#include + +#include "cpp.h" + +#include "Array.h" +#include "UdfDebug.h" + +//#include "SupportDefs.h" + +namespace Udf { + +/*! \brief String class that takes as input CS0 unicode strings, + which it converts to UTF8 upon construction. + + For CS0 info, see: ECMA-167 1/7.2.2 (not very helpful), UDF-2.01 2.1.1 +*/ +class CS0String { +public: + CS0String(); + template + CS0String(array &cs0); + ~CS0String(); + + template + void SetTo(array &cs0); + + template + CS0String& operator=(array &cs0); + + const char* String() const { return fUtf8String; } + +private: + void _Clear(); + + char *fUtf8String; +}; + +void unicode_to_utf8(uint32 c, char **out); + +template +CS0String::CS0String(array &cs0) + : fUtf8String(NULL) +{ + DEBUG_INIT(CF_HELPER | CF_HIGH_VOLUME, "CS0String"); + + SetTo(cs0); +} + +template +void +CS0String::SetTo(array &cs0) +{ + DEBUG_INIT(CF_HELPER | CF_HIGH_VOLUME, "CS0String"); + + _Clear(); + + // The first byte of the CS0 string is the compression ID. + // - 8: 1 byte characters + // - 16: 2 byte, big endian characters + // - 254: "CS0 expansion is empty and unique", 1 byte characters + // - 255: "CS0 expansion is empty and unique", 2 byte, big endian characters + PRINT(("compression ID == %d\n", cs0.data[0])); + switch (cs0.data[0]) { + case 8: + case 254: + { + uint8 *inputString = reinterpret_cast(&(cs0.data[1])); + int32 maxLength = length-1; // Max length of input string in uint8 characters + int32 allocationLength = maxLength*2+1; // Need at most 2 utf8 chars per uint8 char + fUtf8String = new char[allocationLength]; + if (fUtf8String) { + char *outputString = fUtf8String; + + for (int32 i = 0; i < maxLength && inputString[i]; i++) { + unicode_to_utf8(inputString[i], &outputString); + } + outputString[0] = 0; + } else { + PRINT(("new fUtf8String[%ld] allocation failed\n", allocationLength)); + } + + break; + } + + case 16: + case 255: + { + uint16 *inputString = reinterpret_cast(&(cs0.data[1])); + int32 maxLength = (length-1) / 2; // Max length of input string in uint16 characters + int32 allocationLength = maxLength*3+1; // Need at most 3 utf8 chars per uint16 char + fUtf8String = new char[allocationLength]; + if (fUtf8String) { + char *outputString = fUtf8String; + + for (int32 i = 0; i < maxLength && inputString[i]; i++) { + unicode_to_utf8(B_BENDIAN_TO_HOST_INT16(inputString[i]), &outputString); + } + outputString[0] = 0; + } else { + PRINT(("new fUtf8String[%ld] allocation failed\n", allocationLength)); + } + + break; + } + + default: + PRINT(("invalid compression id\n")); + } +} + +template +CS0String& +CS0String::operator=(array &cs0) +{ + SetTo(cs0); + return *this; +} + + +}; // namespace UDF + + + +#endif // _UDF_CS0_STRING_H