2011-08-10 01:46:13 +04:00
|
|
|
/*
|
2013-02-07 06:05:00 +04:00
|
|
|
* Copyright 2011 Haiku, Inc. All rights reserved.
|
2017-02-10 06:03:59 +03:00
|
|
|
* Distributed under the terms of the MIT License.
|
2011-08-10 01:46:13 +04:00
|
|
|
*
|
|
|
|
* Authors:
|
2013-02-07 06:05:00 +04:00
|
|
|
* Axel Dörfler, axeld@pinc-software.de
|
|
|
|
* John Scipione, jscipione@gmail.com
|
|
|
|
*
|
2011-08-10 01:46:13 +04:00
|
|
|
* Corresponds to:
|
2013-02-07 06:05:00 +04:00
|
|
|
* headers/os/locale/UnicodeChar.h rev 42274
|
|
|
|
* src/kits/locale/UnicodeChar.cpp rev 42274
|
2011-08-10 01:46:13 +04:00
|
|
|
*/
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
/*!
|
2013-02-07 06:05:00 +04:00
|
|
|
\file UnicodeChar.h
|
2011-08-10 01:46:13 +04:00
|
|
|
\ingroup locale
|
2013-02-07 06:05:00 +04:00
|
|
|
\ingroup libbe
|
|
|
|
\brief Provides the BUnicodeChar class.
|
|
|
|
*/
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
|
|
|
/*!
|
|
|
|
\class BUnicodeChar
|
|
|
|
\ingroup locale
|
|
|
|
\ingroup libbe
|
2011-08-10 01:46:13 +04:00
|
|
|
\brief Management of all information about characters.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
This class provide a set of tools for managing the whole set of characters
|
|
|
|
defined by unicode. This include information about special sets of
|
|
|
|
characters such as if the character is whitespace, or alphanumeric. It also
|
|
|
|
provides the uppercase equivalent of a character and determines whether a
|
|
|
|
character can be ornamented with accents.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
This class consists entirely of static methods, so you do not have to
|
|
|
|
instantiate it. You can call one of the methods passing in the character
|
|
|
|
that you want to be examined.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
Note all the function work with chars encoded in UTF-32. This is not the
|
2011-08-10 01:46:13 +04:00
|
|
|
most usual way to handle characters, but it is the fastest. To convert an
|
2013-02-07 06:05:00 +04:00
|
|
|
UTF-8 string to an UTF-32 character use the FromUTF8() method.
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsAlpha(uint32 c)
|
|
|
|
\brief Determine if \a c is alphabetic.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
\returns \c true if the specified unicode character is an
|
2014-06-12 00:48:17 +04:00
|
|
|
alphabetic character.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsAlNum(uint32 c)
|
|
|
|
\brief Determine if \a c is alphanumeric.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is a
|
2014-06-12 00:48:17 +04:00
|
|
|
alphabetic or numeric character.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsDigit(uint32 c)
|
|
|
|
\brief Determine if \a c is numeric.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is a
|
2014-06-12 00:48:17 +04:00
|
|
|
number character.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsHexDigit(uint32 c)
|
|
|
|
\brief Determine if \a c is a hexadecimal digit.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is a
|
2014-06-12 00:48:17 +04:00
|
|
|
hexadecimal number character.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsUpper(uint32 c)
|
|
|
|
\brief Determine if \a c is uppercase.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is an
|
2014-06-12 00:48:17 +04:00
|
|
|
uppercase character.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsLower(uint32 c)
|
|
|
|
\brief Determine if \a c is lowercase.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
\returns \c true if the specified unicode character is a
|
2014-06-12 00:48:17 +04:00
|
|
|
lowercase character.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsSpace(uint32 c)
|
|
|
|
\brief Determine if \a c is a space.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
Unlike IsWhitespace() this function will return \c true for non-breakable
|
|
|
|
spaces. This method is useful for determining if the character will render
|
|
|
|
as an empty space which can be stretched on-screen.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is some
|
2014-06-12 00:48:17 +04:00
|
|
|
kind of a space character.
|
2011-08-10 01:46:13 +04:00
|
|
|
|
|
|
|
\sa IsWhitespace()
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsWhitespace(uint32 c)
|
|
|
|
\brief Determine if \a c is whitespace.
|
|
|
|
|
|
|
|
This method is essentially the same as IsSpace(), but excludes all
|
|
|
|
non-breakable spaces.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is a whitespace
|
2014-06-12 00:48:17 +04:00
|
|
|
character.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
\sa IsSpace()
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsControl(uint32 c)
|
|
|
|
\brief Determine if \a c is a control character.
|
|
|
|
|
|
|
|
Example control characters are the non-printable ASCII characters from
|
|
|
|
0x0 to 0x1F.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is a control
|
2014-06-12 00:48:17 +04:00
|
|
|
character.
|
2011-08-10 01:46:13 +04:00
|
|
|
|
|
|
|
\sa IsPrintable()
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsPunctuation(uint32 c)
|
|
|
|
\brief Determine if \a c is punctuation character.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is a
|
2014-06-12 00:48:17 +04:00
|
|
|
punctuation character.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsPrintable(uint32 c)
|
|
|
|
\brief Determine if \a c is printable.
|
|
|
|
|
|
|
|
Printable characters are not control characters.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
\returns \c true if the specified unicode character is a printable
|
2014-06-12 00:48:17 +04:00
|
|
|
character.
|
2011-08-10 01:46:13 +04:00
|
|
|
|
|
|
|
\sa IsControl()
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsTitle(uint32 c)
|
|
|
|
\brief Determine if \a c is title case.
|
|
|
|
|
|
|
|
Title case characters are a smaller version of normal uppercase letters.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
\returns \c true if the specified unicode character is a title case
|
2014-06-12 00:48:17 +04:00
|
|
|
character.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsDefined(uint32 c)
|
|
|
|
\brief Determine if \a c is defined.
|
|
|
|
|
|
|
|
In unicode some codes are not valid or not attributed yet.
|
|
|
|
For these codes this method will return \c false.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is defined.
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static bool BUnicodeChar::IsBase(uint32 c)
|
|
|
|
\brief Determine if \a c can be used with a diacritic.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
\note IsBase() does not determine if a unicode character is distinct.
|
|
|
|
|
|
|
|
\returns \c true if the specified unicode character is a base
|
2014-06-12 00:48:17 +04:00
|
|
|
form character that can be used with a diacritic.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn static int8 BUnicodeChar::Type(uint32 c)
|
|
|
|
\brief Gets the type of a character.
|
|
|
|
|
|
|
|
\returns A member of the \c unicode_char_category enum.
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn uint32 BUnicodeChar::ToLower(uint32 c)
|
|
|
|
\brief Transforms \a c to lowercase.
|
|
|
|
|
|
|
|
\returns The lowercase version of the specified unicode character.
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn uint32 BUnicodeChar::ToUpper(uint32 c)
|
|
|
|
\brief Transforms \a c to uppercase.
|
|
|
|
|
|
|
|
\returns The uppercase version of the specified unicode character.
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn uint32 BUnicodeChar::ToTitle(uint32 c)
|
|
|
|
\brief Transforms \a c to title case.
|
|
|
|
|
|
|
|
\returns The title case version of the specified unicode character.
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2011-08-10 01:46:13 +04:00
|
|
|
\fn int32 BUnicodeChar::DigitValue(uint32 c)
|
|
|
|
\brief Gets the numeric value \a c.
|
|
|
|
|
|
|
|
\returns The numeric version of the specified unicode character.
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2014-06-12 00:48:17 +04:00
|
|
|
\fn void BUnicodeChar::ToUTF8(uint32 c, char** out)
|
2013-02-07 06:05:00 +04:00
|
|
|
\brief Transform a character to UTF-8 encoding.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
\returns The UTF-8 encoding of the specified unicode character.
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2014-06-12 00:48:17 +04:00
|
|
|
\fn uint32 BUnicodeChar::FromUTF8(const char** in)
|
2013-02-07 06:05:00 +04:00
|
|
|
\brief Transform a UTF-8 string to an UTF-32 character.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
If the string contains multiple characters, only the fist one is used.
|
|
|
|
This function updates the in pointer so that it points on the next
|
|
|
|
character for the following call.
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
\returns The UTF-32 encoded version of \a in.
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2014-06-12 00:48:17 +04:00
|
|
|
\fn size_t BUnicodeChar::UTF8StringLength(const char* string)
|
2011-08-10 01:46:13 +04:00
|
|
|
\brief Counts the characters in the given \c NUL terminated string.
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
\returns the number of UTF-8 characters in the \c NUL terminated string.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2011-08-10 01:46:13 +04:00
|
|
|
\sa BString::CountChars()
|
2014-06-12 00:48:17 +04:00
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
|
2010-08-11 16:33:47 +04:00
|
|
|
/*!
|
2014-06-12 00:48:17 +04:00
|
|
|
\fn size_t BUnicodeChar::UTF8StringLength(const char* string,
|
|
|
|
size_t maxLength)
|
2011-08-10 01:46:13 +04:00
|
|
|
\brief Counts the characters in the given string up to \a maxLength
|
|
|
|
characters.
|
|
|
|
|
2014-06-12 00:48:17 +04:00
|
|
|
\param string does not need to be \c NUL terminated if you specify a
|
|
|
|
\a maxLength that is shorter than the maximum length of the string.
|
|
|
|
\param maxLength The maximum length of the string in bytes.
|
2010-08-11 16:33:47 +04:00
|
|
|
|
2013-02-07 06:05:00 +04:00
|
|
|
\returns the number of UTF-8 characters in the \c NUL terminated string
|
2014-06-12 00:48:17 +04:00
|
|
|
up to \a maxLength characters.
|
|
|
|
|
|
|
|
\since Haiku R1
|
2010-08-11 16:33:47 +04:00
|
|
|
*/
|