Adjust mbstate_t to embed the state of the ICU converter.

* make room in mbstate_t for containing an ICU-converter's state
  (well, in fact the whole converter object)
* adjust libroot's locale add-on to clone converters into a given
  mbstate_t directly
* adjust ICUThreadLocalStorageValue to contain the converter pointer
  instead of a converter-ID (if the converter is related to an
  mbstate_t, it points into the mbstate_t).
* adjust users of converters to directly use converter pointers
  instead of ICUConverterRef
* drop now unused ICUConverterManager and ICUConverterRef
* update gcc4 optional package

This brings our multibyte implementation into a fully working state,
both non-ascii and non-8-bit characters can now be handled normally
in the Terminal, i.e. this finally fixes #6276.

N.B.: Since the size of mbstate_t has changed, everything (including
the compiler!) needs to be rebuilt.
This commit is contained in:
Oliver Tappe 2011-12-15 13:10:26 +01:00
parent 8726c67f5e
commit ec99f3b2a6
11 changed files with 141 additions and 223 deletions

View File

@ -536,8 +536,8 @@ if [ IsOptionalHaikuImagePackageAdded DevelopmentBase ]
if $(HAIKU_GCC_VERSION[1]) = 4 || $(isHybridBuild) {
InstallOptionalHaikuImagePackage
gcc-4.5.3-x86-gcc4-2011-11-22.zip
: $(baseURL)/gcc-4.5.3-x86-gcc4-2011-11-22.zip ;
gcc-4.5.3-x86-gcc4-2011-12-14.zip
: $(baseURL)/gcc-4.5.3-x86-gcc4-2011-12-14.zip ;
}
if $(HAIKU_GCC_VERSION[1]) = 4 {
@ -549,7 +549,7 @@ if [ IsOptionalHaikuImagePackageAdded DevelopmentBase ]
local libs = libstdc++.so libsupc++.so ;
for lib in $(libs) {
AddSymlinkToHaikuHybridImage
develop abi x86 gcc4 tools gcc-4.5.3-haiku-111122 lib
develop abi x86 gcc4 tools gcc-4.5.3-haiku-111214 lib
: /system/lib $(lib) : : true ;
}
}

View File

@ -27,8 +27,10 @@ typedef __WINT_TYPE__ wint_t;
typedef int wctype_t;
typedef struct {
void* converter;
char charset[64];
unsigned int count;
unsigned int converterID;
char data[1024 + 8]; // 1024 bytes for data, 8 for alignment space
} mbstate_t;

View File

@ -8,12 +8,11 @@
#include <pthread.h>
#include <unicode/locid.h>
#include <unicode/ucnv.h>
#include <unicode/unistr.h>
#include <SupportDefs.h>
#include "ICUConverterManager.h"
#include "ICUThreadLocalStorageValue.h"
namespace BPrivate {
@ -38,7 +37,7 @@ protected:
char* destination, int destinationSize,
const char* defaultValue = "");
status_t _GetConverter(ICUConverterRef& converterRefOut);
status_t _GetConverter(UConverter*& converterOut);
static const uint16 skMaxPosixLocaleNameLen = 128;
static const size_t skLCBufSize = 16;

View File

@ -1,106 +0,0 @@
/*
* Copyright 2011, Oliver Tappe, zooey@hirschkaefer.de.
* Distributed under the terms of the MIT License.
*/
#ifndef _ICU_CONVERTER_MANAGER_H
#define _ICU_CONVERTER_MANAGER_H
#include <pthread.h>
#include <map>
#include <unicode/ucnv.h>
#include <SupportDefs.h>
#include <locks.h>
#include <Referenceable.h>
#include <util/DoublyLinkedList.h>
//#include <util/OpenHashTable.h>
#include "ICUThreadLocalStorageValue.h"
namespace BPrivate {
namespace Libroot {
class ICUConverterInfo : public BReferenceable {
public:
ICUConverterInfo(UConverter* converter,
const char* charset, ICUConverterID id);
virtual ~ICUConverterInfo();
UConverter* Converter() const
{ return fConverter; }
const char* Charset() const
{ return fCharset; }
ICUConverterID ID() const
{ return fID; }
private:
UConverter* fConverter;
char fCharset[UCNV_MAX_CONVERTER_NAME_LENGTH];
ICUConverterID fID;
};
typedef BReference<ICUConverterInfo> ICUConverterRef;
class ICUConverterManager {
public:
ICUConverterManager();
~ICUConverterManager();
status_t CreateConverter(const char* charset,
ICUConverterRef& converterRefOut,
ICUConverterID& idOut);
status_t GetConverter(ICUConverterID id,
ICUConverterRef& converterRefOut);
status_t DropConverter(ICUConverterID id);
static ICUConverterManager* Instance();
private:
static void _CreateInstance();
static ICUConverterManager* sInstance;
static const size_t skMaxConvertersPerProcess = 1024;
private:
class LinkedConverterInfo
:
public ICUConverterInfo,
public DoublyLinkedListLinkImpl<LinkedConverterInfo>
{
public:
LinkedConverterInfo(UConverter* converter, const char* charset,
ICUConverterID id)
:
ICUConverterInfo(converter, charset, id)
{
}
};
typedef std::map<ICUConverterID, LinkedConverterInfo*> ConverterMap;
typedef DoublyLinkedList<LinkedConverterInfo> ConverterList;
private:
ConverterMap fConverterMap;
ConverterList fLRUConverters;
mutex fMutex;
ICUConverterID fNextConverterID;
};
} // namespace Libroot
} // namespace BPrivate
#endif // _ICU_CONVERTER_MANAGER_H

View File

@ -49,7 +49,7 @@ public:
private:
status_t _GetConverterForMbState(mbstate_t* mbState,
ICUConverterRef& converterRefOut);
UConverter*& converterOut);
status_t _DropConverterFromMbState(mbstate_t* mbState);

View File

@ -8,6 +8,8 @@
#include <pthread.h>
#include <unicode/ucnv.h>
#include <SupportDefs.h>
@ -15,11 +17,9 @@ namespace BPrivate {
namespace Libroot {
typedef unsigned int ICUConverterID;
struct ICUThreadLocalStorageValue {
ICUConverterID converterID;
UConverter* converter;
char charset[64];
ICUThreadLocalStorageValue();
~ICUThreadLocalStorageValue();

View File

@ -29,44 +29,6 @@ ICUCategoryData::~ICUCategoryData()
}
status_t
ICUCategoryData::_GetConverter(ICUConverterRef& converterRefOut)
{
// we use different converter-IDs per thread in order to avoid converters
// being used by more than one thread
ICUThreadLocalStorageValue* tlsValue = NULL;
status_t result = ICUThreadLocalStorageValue::GetInstanceForKey(
fThreadLocalStorageKey, tlsValue);
if (result != B_OK)
return result;
ICUConverterRef converterRef;
result = ICUConverterManager::Instance()->GetConverter(
tlsValue->converterID, converterRef);
if (result == B_OK) {
if (strcmp(converterRef->Charset(), fGivenCharset) == 0) {
converterRefOut = converterRef;
return B_OK;
}
// charset no longer matches the converter, we need to dump it and
// create a new one
ICUConverterManager::Instance()->DropConverter(tlsValue->converterID);
tlsValue->converterID = 0;
}
// create a new converter for the current charset
result = ICUConverterManager::Instance()->CreateConverter(fGivenCharset,
converterRef, tlsValue->converterID);
if (result != B_OK)
return result;
converterRefOut = converterRef;
return B_OK;
}
status_t
ICUCategoryData::SetTo(const Locale& locale, const char* posixLocaleName)
{
@ -115,14 +77,14 @@ ICUCategoryData::_ConvertUnicodeStringToLocaleconvEntry(
const UnicodeString& string, char* destination, int destinationSize,
const char* defaultValue)
{
ICUConverterRef converterRef;
status_t result = _GetConverter(converterRef);
UConverter* converter;
status_t result = _GetConverter(converter);
if (result != B_OK)
return result;
UErrorCode icuStatus = U_ZERO_ERROR;
ucnv_fromUChars(converterRef->Converter(), destination, destinationSize,
string.getBuffer(), string.length(), &icuStatus);
ucnv_fromUChars(converter, destination, destinationSize, string.getBuffer(),
string.length(), &icuStatus);
if (!U_SUCCESS(icuStatus)) {
switch (icuStatus) {
case U_BUFFER_OVERFLOW_ERROR:
@ -144,11 +106,63 @@ ICUCategoryData::_ConvertUnicodeStringToLocaleconvEntry(
}
status_t
ICUCategoryData::_GetConverter(UConverter*& converterOut)
{
// we use different converters per thread to avoid concurrent accesses
ICUThreadLocalStorageValue* tlsValue = NULL;
status_t result = ICUThreadLocalStorageValue::GetInstanceForKey(
fThreadLocalStorageKey, tlsValue);
if (result != B_OK)
return result;
if (tlsValue->converter != NULL) {
if (strcmp(tlsValue->charset, fGivenCharset) == 0) {
converterOut = tlsValue->converter;
return B_OK;
}
// charset no longer matches the converter, we need to dump it and
// create a new one
ucnv_close(tlsValue->converter);
}
// create a new converter for the current charset
UErrorCode icuStatus = U_ZERO_ERROR;
UConverter* icuConverter = ucnv_open(fGivenCharset, &icuStatus);
if (icuConverter == NULL)
return B_NAME_NOT_FOUND;
// setup the new converter to stop upon any errors
icuStatus = U_ZERO_ERROR;
ucnv_setToUCallBack(icuConverter, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
&icuStatus);
if (!U_SUCCESS(icuStatus)) {
ucnv_close(icuConverter);
return B_ERROR;
}
icuStatus = U_ZERO_ERROR;
ucnv_setFromUCallBack(icuConverter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
NULL, &icuStatus);
if (!U_SUCCESS(icuStatus)) {
ucnv_close(icuConverter);
return B_ERROR;
}
tlsValue->converter = icuConverter;
strlcpy(tlsValue->charset, fGivenCharset, sizeof(tlsValue->charset));
converterOut = icuConverter;
return B_OK;
}
status_t
ICUCategoryData::_SetupConverter()
{
ICUConverterRef converterRef;
return _GetConverter(converterRef);
UConverter* converter;
return _GetConverter(converter);
}

View File

@ -10,8 +10,6 @@
#include <unicode/unistr.h>
#include "ICUConverterManager.h"
namespace BPrivate {
namespace Libroot {
@ -153,14 +151,13 @@ ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out)
if (inLen == 0)
return B_OK;
ICUConverterRef converterRef;
status_t result = _GetConverter(converterRef);
UConverter* converter;
status_t result = _GetConverter(converter);
if (result != B_OK)
return result;
UErrorCode icuStatus = U_ZERO_ERROR;
int32_t outLen = ucnv_toUChars(converterRef->Converter(), NULL, 0, in,
inLen, &icuStatus);
int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus);
if (icuStatus != U_BUFFER_OVERFLOW_ERROR)
return B_BAD_VALUE;
if (outLen < 0)
@ -170,8 +167,8 @@ ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out)
UChar* outBuf = out.getBuffer(outLen + 1);
icuStatus = U_ZERO_ERROR;
outLen = ucnv_toUChars(converterRef->Converter(), outBuf, outLen + 1, in,
inLen, &icuStatus);
outLen
= ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus);
if (!U_SUCCESS(icuStatus)) {
out.releaseBuffer(0);
return B_BAD_VALUE;

View File

@ -10,10 +10,15 @@
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <unicode/uchar.h>
#include <Debug.h>
//#define TRACE_CTYPE
#undef TRACE
#ifdef TRACE_CTYPE
# include <OS.h>
# define TRACE(x) debug_printf x
@ -58,12 +63,11 @@ ICUCtypeData::SetTo(const Locale& locale, const char* posixLocaleName)
UErrorCode icuStatus = U_ZERO_ERROR;
ICUConverterRef converterRef;
result = _GetConverter(converterRef);
UConverter* converter;
result = _GetConverter(converter);
if (result != B_OK)
return result;
UConverter* converter = converterRef->Converter();
ucnv_reset(converter);
fDataBridge->setMbCurMax(ucnv_getMaxCharSize(converter));
@ -207,16 +211,14 @@ status_t
ICUCtypeData::MultibyteToWchar(wchar_t* wcOut, const char* mb, size_t mbLen,
mbstate_t* mbState, size_t& lengthOut)
{
ICUConverterRef converterRef;
status_t result = _GetConverterForMbState(mbState, converterRef);
UConverter* converter = NULL;
status_t result = _GetConverterForMbState(mbState, converter);
if (result != B_OK) {
TRACE(("MultibyteToWchar(): couldn't get converter for ID %d - %lx\n",
mbState->converterID, result));
TRACE(("MultibyteToWchar(): couldn't get converter for mbstate %p - "
"%lx\n", mbState, result));
return result;
}
UConverter* converter = converterRef->Converter();
// do the conversion
UErrorCode icuStatus = U_ZERO_ERROR;
@ -233,8 +235,6 @@ ICUCtypeData::MultibyteToWchar(wchar_t* wcOut, const char* mb, size_t mbLen,
icuStatus = U_ZERO_ERROR;
}
UChar32 unicodeChar = 0xBADBEEF;
if (!U_SUCCESS(icuStatus)) {
// conversion failed because of illegal character sequence
TRACE(("MultibyteToWchar(): illegal character sequence\n"));
@ -247,6 +247,7 @@ ICUCtypeData::MultibyteToWchar(wchar_t* wcOut, const char* mb, size_t mbLen,
mbState->count = sourceLengthUsed;
result = B_BAD_INDEX;
} else {
UChar32 unicodeChar = 0xBADBEEF;
U16_GET(targetBuffer, 0, 0, 2, unicodeChar);
if (unicodeChar == 0) {
@ -274,16 +275,14 @@ ICUCtypeData::MultibyteStringToWchar(wchar_t* wcDest, size_t wcDestLength,
const char** mbSource, size_t mbSourceLength, mbstate_t* mbState,
size_t& lengthOut)
{
ICUConverterRef converterRef;
status_t result = _GetConverterForMbState(mbState, converterRef);
UConverter* converter = NULL;
status_t result = _GetConverterForMbState(mbState, converter);
if (result != B_OK) {
TRACE(("MultibyteStringToWchar(): couldn't get converter for ID %d -"
" %lx\n", mbState->converterID, result));
TRACE(("MultibyteStringToWchar(): couldn't get converter for mbstate %p"
" - %lx\n", mbState, result));
return result;
}
UConverter* converter = converterRef->Converter();
bool wcsIsTerminated = false;
const char* source = *mbSource;
const char* sourceEnd = source + mbSourceLength;
@ -346,16 +345,14 @@ status_t
ICUCtypeData::WcharToMultibyte(char* mbOut, wchar_t wc, mbstate_t* mbState,
size_t& lengthOut)
{
ICUConverterRef converterRef;
status_t result = _GetConverterForMbState(mbState, converterRef);
UConverter* converter = NULL;
status_t result = _GetConverterForMbState(mbState, converter);
if (result != B_OK) {
TRACE(("WcharToMultibyte(): couldn't get converter for ID %d - %lx\n",
mbState->converterID, result));
TRACE(("WcharToMultibyte(): couldn't get converter for mbstate %p - "
"%lx\n", mbState, result));
return result;
}
UConverter* converter = converterRef->Converter();
// convert input from UTF-32 to UTF-16
UChar ucharBuffer[2];
size_t ucharLength;
@ -409,16 +406,14 @@ ICUCtypeData::WcharStringToMultibyte(char* mbDest, size_t mbDestLength,
const wchar_t** wcSource, size_t wcSourceLength, mbstate_t* mbState,
size_t& lengthOut)
{
ICUConverterRef converterRef;
status_t result = _GetConverterForMbState(mbState, converterRef);
UConverter* converter = NULL;
status_t result = _GetConverterForMbState(mbState, converter);
if (result != B_OK) {
TRACE(("WcharStringToMultibyte(): couldn't get converter for ID %d "
"- %lx\n", mbState->converterID, result));
TRACE(("WcharStringToMultibyte(): couldn't get converter for mbstate %p"
" - %lx\n", mbState, result));
return result;
}
UConverter* converter = converterRef->Converter();
bool mbsIsTerminated = false;
const UChar32* source = (UChar32*)*wcSource;
@ -509,29 +504,45 @@ ICUCtypeData::GetLanginfo(int index)
status_t
ICUCtypeData::_GetConverterForMbState(mbstate_t* mbState,
ICUConverterRef& converterRefOut)
UConverter*& converterOut)
{
ICUConverterRef converterRef;
status_t result = ICUConverterManager::Instance()->GetConverter(
mbState->converterID, converterRef);
if (result == B_OK) {
if (strcmp(converterRef->Charset(), fGivenCharset) == 0) {
converterRefOut = converterRef;
return B_OK;
}
// charset no longer matches the converter, we need to dump it and
// create a new one
_DropConverterFromMbState(mbState);
if (strcmp(mbState->charset, fGivenCharset) == 0
&& (char*)mbState->converter >= mbState->data
&& (char*)mbState->converter < mbState->data + 8) {
// charset matches and converter actually lives in *this* mbState,
// so we can use it (if the converter points to the outside, it means
// that the mbstate_t has been copied)
converterOut = (UConverter*)mbState->converter;
return B_OK;
}
// create a new converter for the current charset
result = ICUConverterManager::Instance()->CreateConverter(fGivenCharset,
converterRef, mbState->converterID);
// charset no longer matches the converter, we need to dump it and
// create a new one
_DropConverterFromMbState(mbState);
// create a new converter for the current charset ...
UConverter* icuConverter;
status_t result = _GetConverter(icuConverter);
if (result != B_OK)
return result;
converterRefOut = converterRef;
// ... and clone it into the mbstate
UErrorCode icuStatus = U_ZERO_ERROR;
int32_t bufferSize = sizeof(mbState->data);
UConverter* clone
= ucnv_safeClone(icuConverter, mbState->data, &bufferSize, &icuStatus);
if (clone == NULL || !U_SUCCESS(icuStatus))
return B_ERROR;
if ((char*)clone < mbState->data || (char*)clone >= mbState->data + 8) {
// buffer is too small (shouldn't happen according to ICU docs)
return B_NO_MEMORY;
}
strlcpy(mbState->charset, fGivenCharset, sizeof(mbState->charset));
mbState->converter = clone;
converterOut = clone;
return B_OK;
}
@ -540,8 +551,9 @@ ICUCtypeData::_GetConverterForMbState(mbstate_t* mbState,
status_t
ICUCtypeData::_DropConverterFromMbState(mbstate_t* mbState)
{
ICUConverterManager::Instance()->DropConverter(mbState->converterID);
mbState->converterID = 0;
if (mbState->converter != NULL)
ucnv_close((UConverter*)mbState->converter);
memset(mbState, 0, sizeof(mbstate_t));
return B_OK;
}

View File

@ -8,7 +8,7 @@
#include <new>
#include "ICUConverterManager.h"
#include <unicode/ucnv.h>
namespace BPrivate {
@ -16,15 +16,16 @@ namespace Libroot {
ICUThreadLocalStorageValue::ICUThreadLocalStorageValue()
: converterID(0)
: converter(NULL)
{
charset[0] = '\0';
}
ICUThreadLocalStorageValue::~ICUThreadLocalStorageValue()
{
if (converterID != 0)
ICUConverterManager::Instance()->DropConverter(converterID);
if (converter != NULL)
ucnv_close(converter);
}

View File

@ -11,7 +11,6 @@ UsePrivateHeaders
local sources =
ICUCategoryData.cpp
ICUCollateData.cpp
ICUConverterManager.cpp
ICUCtypeData.cpp
ICULocaleBackend.cpp
ICULocaleconvData.cpp