/*
******************************************************************************
* Copyright (C) 1996-2009, International Business Machines *
* Corporation and others. All Rights Reserved. *
******************************************************************************
*/
/**
* \file
* \brief C++ API: Collation data used to compute minLengthInChars.
* \internal
*/
#ifndef COLL_DATA_H
#define COLL_DATA_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#include "unicode/uobject.h"
#include "unicode/ucol.h"
U_NAMESPACE_BEGIN
/**
* The size of the internal buffer for the Collator's short description string.
*/
#define KEY_BUFFER_SIZE 64
/**
* The size of the internal CE buffer in a CEList
object
*/
#define CELIST_BUFFER_SIZE 4
/**
* Define this to enable the CEList
objects to collect
* statistics.
*/
//#define INSTRUMENT_CELIST
/**
* The size of the initial list in a StringList
object.
*/
#define STRING_LIST_BUFFER_SIZE 16
/**
* Define this to enable the StringList
objects to
* collect statistics.
*/
//#define INSTRUMENT_STRING_LIST
/**
* This object holds a list of CEs generated from a particular
* UnicodeString
*
* @internal ICU 4.0.1 technology preview
*/
class U_I18N_API CEList : public UObject
{
public:
/**
* Construct a CEList
object.
*
* @param coll - the Collator used to collect the CEs.
* @param string - the string for which to collect the CEs.
* @param status - will be set if any errors occur.
*
* Note: if on return, status is set to an error code,
* the only safe thing to do with this object is to call
* the destructor.
*
* @internal ICU 4.0.1 technology preview
*/
CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
/**
* The destructor.
*/
~CEList();
/**
* Return the number of CEs in the list.
*
* @return the number of CEs in the list.
*
* @internal ICU 4.0.1 technology preview
*/
int32_t size() const;
/**
* Get a particular CE from the list.
*
* @param index - the index of the CE to return
*
* @return the CE, or 0
if index
is out of range
*
* @internal ICU 4.0.1 technology preview
*/
uint32_t get(int32_t index) const;
/**
* Check if the CEs in another CEList
match the
* suffix of this list starting at a give offset.
*
* @param offset - the offset of the suffix
* @param other - the other CEList
*
* @return TRUE
if the CEs match, FALSE
otherwise.
*
* @internal ICU 4.0.1 technology preview
*/
UBool matchesAt(int32_t offset, const CEList *other) const;
/**
* The index operator.
*
* @param index - the index
*
* @return a reference to the given CE in the list
*
* @internal ICU 4.0.1 technology preview
*/
uint32_t &operator[](int32_t index) const;
/**
* UObject glue...
*/
virtual UClassID getDynamicClassID() const;
/**
* UObject glue...
*/
static UClassID getStaticClassID();
private:
void add(uint32_t ce, UErrorCode &status);
uint32_t ceBuffer[CELIST_BUFFER_SIZE];
uint32_t *ces;
int32_t listMax;
int32_t listSize;
#ifdef INSTRUMENT_CELIST
static int32_t _active;
static int32_t _histogram[10];
#endif
};
/**
* StringList
*
* This object holds a list of UnicodeString
objects.
*
* @internal ICU 4.0.1 technology preview
*/
class U_I18N_API StringList : public UObject
{
public:
/**
* Construct an empty StringList
*
* @param status - will be set if any errors occur.
*
* Note: if on return, status is set to an error code,
* the only safe thing to do with this object is to call
* the destructor.
*
* @internal ICU 4.0.1 technology preview
*/
StringList(UErrorCode &status);
/**
* The destructor.
*
* @internal ICU 4.0.1 technology preview
*/
~StringList();
/**
* Add a string to the list.
*
* @param string - the string to add
* @param status - will be set if any errors occur.
*
* @internal ICU 4.0.1 technology preview
*/
void add(const UnicodeString *string, UErrorCode &status);
/**
* Add an array of Unicode code points to the list.
*
* @param chars - the address of the array of code points
* @param count - the number of code points in the array
* @param status - will be set if any errors occur.
*
* @internal ICU 4.0.1 technology preview
*/
void add(const UChar *chars, int32_t count, UErrorCode &status);
/**
* Get a particular string from the list.
*
* @param index - the index of the string
*
* @return a pointer to the UnicodeString
or NULL
* if index
is out of bounds.
*
* @internal ICU 4.0.1 technology preview
*/
const UnicodeString *get(int32_t index) const;
/**
* Get the number of stings in the list.
*
* @return the number of strings in the list.
*
* @internal ICU 4.0.1 technology preview
*/
int32_t size() const;
/**
* the UObject glue...
*/
virtual UClassID getDynamicClassID() const;
/**
* the UObject glue...
*/
static UClassID getStaticClassID();
private:
UnicodeString *strings;
int32_t listMax;
int32_t listSize;
#ifdef INSTRUMENT_STRING_LIST
static int32_t _lists;
static int32_t _strings;
static int32_t _histogram[101];
#endif
};
/*
* Forward references to internal classes.
*/
class StringToCEsMap;
class CEToStringsMap;
class CollDataCache;
/**
* CollData
*
* This class holds the Collator-specific data needed to
* compute the length of the shortest string that can
* generate a partcular list of CEs.
*
* CollData
objects are quite expensive to compute. Because
* of this, they are cached. When you call CollData::open
it
* returns a reference counted cached object. When you call CollData::close
* the reference count on the object is decremented but the object is not deleted.
*
* If you do not need to reuse any unreferenced objects in the cache, you can call
* CollData::flushCollDataCache
. If you no longer need any CollData
* objects, you can call CollData::freeCollDataCache
*
* @internal ICU 4.0.1 technology preview
*/
class U_I18N_API CollData : public UObject
{
public:
/**
* Construct a CollData
object.
*
* @param collator - the collator
* @param status - will be set if any errors occur.
*
* @return the CollData
object. You must call
* close
when you are done using the object.
*
* Note: if on return, status is set to an error code,
* the only safe thing to do with this object is to call
* CollData::close
.
*
* @internal ICU 4.0.1 technology preview
*/
static CollData *open(UCollator *collator, UErrorCode &status);
/**
* Release a CollData
object.
*
* @param collData - the object
*
* @internal ICU 4.0.1 technology preview
*/
static void close(CollData *collData);
/**
* Get the UCollator
object used to create this object.
* The object returned may not be the exact object that was used to
* create this object, but it will have the same behavior.
*/
UCollator *getCollator() const;
/**
* Get a list of all the strings which generate a list
* of CEs starting with a given CE.
*
* @param ce - the CE
*
* return a StringList
object containing all
* the stirngs, or NULL
if there are
* no such strings.
*
* @internal ICU 4.0.1 technology preview.
*/
const StringList *getStringList(int32_t ce) const;
/**
* Get a list of the CEs generated by a partcular stirng.
*
* @param string - the string
*
* @return a CEList
object containt the CEs. You
* must call freeCEList
when you are finished
* using the CEList
/
*
* @internal ICU 4.0.1 technology preview.
*/
const CEList *getCEList(const UnicodeString *string) const;
/**
* Release a CEList
returned by getCEList
.
*
* @param list - the CEList
to free.
*
* @internal ICU 4.0.1 technology preview
*/
void freeCEList(const CEList *list);
/**
* Return the length of the shortest string that will generate
* the given list of CEs.
*
* @param ces - the CEs
* @param offset - the offset of the first CE in the list to use.
*
* @return the length of the shortest string.
*
* @internal ICU 4.0.1 technology preview
*/
int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
/**
* Return the length of the shortest string that will generate
* the given list of CEs.
*
* Note: the algorithm used to do this computation is recursive. To
* limit the amount of recursion, a "history" list is used to record
* the best answer starting at a particular offset in the list of CEs.
* If the same offset is visited again during the recursion, the answer
* in the history list is used.
*
* @param ces - the CEs
* @param offset - the offset of the first CE in the list to use.
* @param history - the history list. Must be at least as long as
* the number of cEs in the CEList
*
* @return the length of the shortest string.
*
* @internal ICU 4.0.1 technology preview
*/
int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
/**
* UObject glue...
*/
virtual UClassID getDynamicClassID() const;
/**
* UObject glue...
*/
static UClassID getStaticClassID();
/**
* CollData
objects are expensive to compute, and so
* may be cached. This routine will free the cached objects and delete
* the cache.
*
* WARNING: Don't call this until you are have called close
* for each CollData
object that you have used. also,
* DO NOT call this if another thread may be calling flushCollDataCache
* at the same time.
*
* @internal 4.0.1 technology preview
*/
static void freeCollDataCache();
/**
* CollData
objects are expensive to compute, and so
* may be cached. This routine will remove any unused CollData
* objects from the cache.
*
* @internal 4.0.1 technology preview
*/
static void flushCollDataCache();
private:
friend class CollDataCache;
friend class CollDataCacheEntry;
CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
~CollData();
CollData();
static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
static CollDataCache *getCollDataCache();
UCollator *coll;
StringToCEsMap *charsToCEList;
CEToStringsMap *ceToCharsStartingWith;
char keyBuffer[KEY_BUFFER_SIZE];
char *key;
static CollDataCache *collDataCache;
uint32_t minHan;
uint32_t maxHan;
uint32_t jamoLimits[4];
};
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_COLLATION
#endif // #ifndef COLL_DATA_H