/* ****************************************************************************** * Copyright (C) 1996-2009, International Business Machines * * Corporation and others. All Rights Reserved. * ****************************************************************************** */ /** * \file * \brief C++ API: Collation data used to compute minLengthInChars. * \internal */ #ifndef COLL_DATA_H #define COLL_DATA_H #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION #include "unicode/uobject.h" #include "unicode/ucol.h" U_NAMESPACE_BEGIN /** * The size of the internal buffer for the Collator's short description string. */ #define KEY_BUFFER_SIZE 64 /** * The size of the internal CE buffer in a CEList object */ #define CELIST_BUFFER_SIZE 4 /** * Define this to enable the CEList objects to collect * statistics. */ //#define INSTRUMENT_CELIST /** * The size of the initial list in a StringList object. */ #define STRING_LIST_BUFFER_SIZE 16 /** * Define this to enable the StringList objects to * collect statistics. */ //#define INSTRUMENT_STRING_LIST /** * This object holds a list of CEs generated from a particular * UnicodeString * * @internal ICU 4.0.1 technology preview */ class U_I18N_API CEList : public UObject { public: /** * Construct a CEList object. * * @param coll - the Collator used to collect the CEs. * @param string - the string for which to collect the CEs. * @param status - will be set if any errors occur. * * Note: if on return, status is set to an error code, * the only safe thing to do with this object is to call * the destructor. * * @internal ICU 4.0.1 technology preview */ CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); /** * The destructor. */ ~CEList(); /** * Return the number of CEs in the list. * * @return the number of CEs in the list. * * @internal ICU 4.0.1 technology preview */ int32_t size() const; /** * Get a particular CE from the list. * * @param index - the index of the CE to return * * @return the CE, or 0 if index is out of range * * @internal ICU 4.0.1 technology preview */ uint32_t get(int32_t index) const; /** * Check if the CEs in another CEList match the * suffix of this list starting at a give offset. * * @param offset - the offset of the suffix * @param other - the other CEList * * @return TRUE if the CEs match, FALSE otherwise. * * @internal ICU 4.0.1 technology preview */ UBool matchesAt(int32_t offset, const CEList *other) const; /** * The index operator. * * @param index - the index * * @return a reference to the given CE in the list * * @internal ICU 4.0.1 technology preview */ uint32_t &operator[](int32_t index) const; /** * UObject glue... */ virtual UClassID getDynamicClassID() const; /** * UObject glue... */ static UClassID getStaticClassID(); private: void add(uint32_t ce, UErrorCode &status); uint32_t ceBuffer[CELIST_BUFFER_SIZE]; uint32_t *ces; int32_t listMax; int32_t listSize; #ifdef INSTRUMENT_CELIST static int32_t _active; static int32_t _histogram[10]; #endif }; /** * StringList * * This object holds a list of UnicodeString objects. * * @internal ICU 4.0.1 technology preview */ class U_I18N_API StringList : public UObject { public: /** * Construct an empty StringList * * @param status - will be set if any errors occur. * * Note: if on return, status is set to an error code, * the only safe thing to do with this object is to call * the destructor. * * @internal ICU 4.0.1 technology preview */ StringList(UErrorCode &status); /** * The destructor. * * @internal ICU 4.0.1 technology preview */ ~StringList(); /** * Add a string to the list. * * @param string - the string to add * @param status - will be set if any errors occur. * * @internal ICU 4.0.1 technology preview */ void add(const UnicodeString *string, UErrorCode &status); /** * Add an array of Unicode code points to the list. * * @param chars - the address of the array of code points * @param count - the number of code points in the array * @param status - will be set if any errors occur. * * @internal ICU 4.0.1 technology preview */ void add(const UChar *chars, int32_t count, UErrorCode &status); /** * Get a particular string from the list. * * @param index - the index of the string * * @return a pointer to the UnicodeString or NULL * if index is out of bounds. * * @internal ICU 4.0.1 technology preview */ const UnicodeString *get(int32_t index) const; /** * Get the number of stings in the list. * * @return the number of strings in the list. * * @internal ICU 4.0.1 technology preview */ int32_t size() const; /** * the UObject glue... */ virtual UClassID getDynamicClassID() const; /** * the UObject glue... */ static UClassID getStaticClassID(); private: UnicodeString *strings; int32_t listMax; int32_t listSize; #ifdef INSTRUMENT_STRING_LIST static int32_t _lists; static int32_t _strings; static int32_t _histogram[101]; #endif }; /* * Forward references to internal classes. */ class StringToCEsMap; class CEToStringsMap; class CollDataCache; /** * CollData * * This class holds the Collator-specific data needed to * compute the length of the shortest string that can * generate a partcular list of CEs. * * CollData objects are quite expensive to compute. Because * of this, they are cached. When you call CollData::open it * returns a reference counted cached object. When you call CollData::close * the reference count on the object is decremented but the object is not deleted. * * If you do not need to reuse any unreferenced objects in the cache, you can call * CollData::flushCollDataCache. If you no longer need any CollData * objects, you can call CollData::freeCollDataCache * * @internal ICU 4.0.1 technology preview */ class U_I18N_API CollData : public UObject { public: /** * Construct a CollData object. * * @param collator - the collator * @param status - will be set if any errors occur. * * @return the CollData object. You must call * close when you are done using the object. * * Note: if on return, status is set to an error code, * the only safe thing to do with this object is to call * CollData::close. * * @internal ICU 4.0.1 technology preview */ static CollData *open(UCollator *collator, UErrorCode &status); /** * Release a CollData object. * * @param collData - the object * * @internal ICU 4.0.1 technology preview */ static void close(CollData *collData); /** * Get the UCollator object used to create this object. * The object returned may not be the exact object that was used to * create this object, but it will have the same behavior. */ UCollator *getCollator() const; /** * Get a list of all the strings which generate a list * of CEs starting with a given CE. * * @param ce - the CE * * return a StringList object containing all * the stirngs, or NULL if there are * no such strings. * * @internal ICU 4.0.1 technology preview. */ const StringList *getStringList(int32_t ce) const; /** * Get a list of the CEs generated by a partcular stirng. * * @param string - the string * * @return a CEList object containt the CEs. You * must call freeCEList when you are finished * using the CEList/ * * @internal ICU 4.0.1 technology preview. */ const CEList *getCEList(const UnicodeString *string) const; /** * Release a CEList returned by getCEList. * * @param list - the CEList to free. * * @internal ICU 4.0.1 technology preview */ void freeCEList(const CEList *list); /** * Return the length of the shortest string that will generate * the given list of CEs. * * @param ces - the CEs * @param offset - the offset of the first CE in the list to use. * * @return the length of the shortest string. * * @internal ICU 4.0.1 technology preview */ int32_t minLengthInChars(const CEList *ces, int32_t offset) const; /** * Return the length of the shortest string that will generate * the given list of CEs. * * Note: the algorithm used to do this computation is recursive. To * limit the amount of recursion, a "history" list is used to record * the best answer starting at a particular offset in the list of CEs. * If the same offset is visited again during the recursion, the answer * in the history list is used. * * @param ces - the CEs * @param offset - the offset of the first CE in the list to use. * @param history - the history list. Must be at least as long as * the number of cEs in the CEList * * @return the length of the shortest string. * * @internal ICU 4.0.1 technology preview */ int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const; /** * UObject glue... */ virtual UClassID getDynamicClassID() const; /** * UObject glue... */ static UClassID getStaticClassID(); /** * CollData objects are expensive to compute, and so * may be cached. This routine will free the cached objects and delete * the cache. * * WARNING: Don't call this until you are have called close * for each CollData object that you have used. also, * DO NOT call this if another thread may be calling flushCollDataCache * at the same time. * * @internal 4.0.1 technology preview */ static void freeCollDataCache(); /** * CollData objects are expensive to compute, and so * may be cached. This routine will remove any unused CollData * objects from the cache. * * @internal 4.0.1 technology preview */ static void flushCollDataCache(); private: friend class CollDataCache; friend class CollDataCacheEntry; CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status); ~CollData(); CollData(); static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength); static CollDataCache *getCollDataCache(); UCollator *coll; StringToCEsMap *charsToCEList; CEToStringsMap *ceToCharsStartingWith; char keyBuffer[KEY_BUFFER_SIZE]; char *key; static CollDataCache *collDataCache; uint32_t minHan; uint32_t maxHan; uint32_t jamoLimits[4]; }; U_NAMESPACE_END #endif // #if !UCONFIG_NO_COLLATION #endif // #ifndef COLL_DATA_H