diff --git a/headers/os/locale/UnicodeChar.h b/headers/os/locale/UnicodeChar.h
index d902a79c23..3031866aaf 100644
--- a/headers/os/locale/UnicodeChar.h
+++ b/headers/os/locale/UnicodeChar.h
@@ -8,6 +8,7 @@ enum unicode_char_category
 	// Non-category for unassigned and non-character code points.
 	B_UNICODE_UNASSIGNED				= 0,
 
+	B_UNICODE_GENERAL_OTHER_TYPES		= 0,	// Cn
 	B_UNICODE_UPPERCASE_LETTER			= 1,	// Lu
 	B_UNICODE_LOWERCASE_LETTER			= 2,	// Ll
 	B_UNICODE_TITLECASE_LETTER			= 3,	// Lt
@@ -37,152 +38,289 @@ enum unicode_char_category
 	B_UNICODE_OTHER_SYMBOL				= 27,	// So
 	B_UNICODE_INITIAL_PUNCTUATION		= 28,	// Pi
 	B_UNICODE_FINAL_PUNCTUATION			= 29,	// Pf
-	B_UNICODE_GENERAL_OTHER_TYPES		= 30,	// Cn
 
 	B_UNICODE_CATEGORY_COUNT
 };
 
 
-/**
- * This specifies the language directional property of a character set.
- */
+// This specifies the language directional property of a character set.
 
 enum unicode_char_direction {
-	B_UNICODE_LEFT_TO_RIGHT               = 0,
-	B_UNICODE_RIGHT_TO_LEFT               = 1,
-	B_UNICODE_EUROPEAN_NUMBER             = 2,
-	B_UNICODE_EUROPEAN_NUMBER_SEPARATOR   = 3,
-	B_UNICODE_EUROPEAN_NUMBER_TERMINATOR  = 4,
-	B_UNICODE_ARABIC_NUMBER               = 5,
-	B_UNICODE_COMMON_NUMBER_SEPARATOR     = 6,
-	B_UNICODE_BLOCK_SEPARATOR             = 7,
-	B_UNICODE_SEGMENT_SEPARATOR           = 8,
-	B_UNICODE_WHITE_SPACE_NEUTRAL         = 9,
-	B_UNICODE_OTHER_NEUTRAL               = 10,
-	B_UNICODE_LEFT_TO_RIGHT_EMBEDDING     = 11,
-	B_UNICODE_LEFT_TO_RIGHT_OVERRIDE      = 12,
-	B_UNICODE_RIGHT_TO_LEFT_ARABIC        = 13,
-	B_UNICODE_RIGHT_TO_LEFT_EMBEDDING     = 14,
-	B_UNICODE_RIGHT_TO_LEFT_OVERRIDE      = 15,
-	B_UNICODE_POP_DIRECTIONAL_FORMAT      = 16,
-	B_UNICODE_DIR_NON_SPACING_MARK        = 17,
-	B_UNICODE_BOUNDARY_NEUTRAL            = 18,
+	B_UNICODE_LEFT_TO_RIGHT					= 0,
+	B_UNICODE_RIGHT_TO_LEFT					= 1,
+	B_UNICODE_EUROPEAN_NUMBER				= 2,
+	B_UNICODE_EUROPEAN_NUMBER_SEPARATOR		= 3,
+	B_UNICODE_EUROPEAN_NUMBER_TERMINATOR	= 4,
+	B_UNICODE_ARABIC_NUMBER					= 5,
+	B_UNICODE_COMMON_NUMBER_SEPARATOR		= 6,
+	B_UNICODE_BLOCK_SEPARATOR				= 7,
+	B_UNICODE_SEGMENT_SEPARATOR				= 8,
+	B_UNICODE_WHITE_SPACE_NEUTRAL			= 9,
+	B_UNICODE_OTHER_NEUTRAL					= 10,
+	B_UNICODE_LEFT_TO_RIGHT_EMBEDDING		= 11,
+	B_UNICODE_LEFT_TO_RIGHT_OVERRIDE		= 12,
+	B_UNICODE_RIGHT_TO_LEFT_ARABIC			= 13,
+	B_UNICODE_RIGHT_TO_LEFT_EMBEDDING		= 14,
+	B_UNICODE_RIGHT_TO_LEFT_OVERRIDE		= 15,
+	B_UNICODE_POP_DIRECTIONAL_FORMAT		= 16,
+	B_UNICODE_DIR_NON_SPACING_MARK			= 17,
+	B_UNICODE_BOUNDARY_NEUTRAL				= 18,
 
 	B_UNICODE_DIRECTION_COUNT
 };
 
 
-/**
- * Script range as defined in the Unicode standard.
- */
+// Script range as defined in the Unicode standard.
 
 enum unicode_char_script {
-	// Script names
-	B_UNICODE_BASIC_LATIN,
-	B_UNICODE_LATIN_1_SUPPLEMENT,
-	B_UNICODE_LATIN_EXTENDED_A,
-	B_UNICODE_LATIN_EXTENDED_B,
-	B_UNICODE_IPA_EXTENSIONS,
-	B_UNICODE_SPACING_MODIFIER_LETTERS,
-	B_UNICODE_COMBINING_DIACRITICAL_MARKS,
-	B_UNICODE_GREEK,
-	B_UNICODE_CYRILLIC,
-	B_UNICODE_ARMENIAN,
-	B_UNICODE_HEBREW,
-	B_UNICODE_ARABIC,
-	B_UNICODE_SYRIAC,
-	B_UNICODE_THAANA,
-	B_UNICODE_DEVANAGARI,
-	B_UNICODE_BENGALI,
-	B_UNICODE_GURMUKHI,
-	B_UNICODE_GUJARATI,
-	B_UNICODE_ORIYA,
-	B_UNICODE_TAMIL,
-	B_UNICODE_TELUGU,
-	B_UNICODE_KANNADA,
-	B_UNICODE_MALAYALAM,
-	B_UNICODE_SINHALA,
-	B_UNICODE_THAI,
-	B_UNICODE_LAO,
-	B_UNICODE_TIBETAN,
-	B_UNICODE_MYANMAR,
-	B_UNICODE_GEORGIAN,
-	B_UNICODE_HANGUL_JAMO,
-	B_UNICODE_ETHIOPIC,
-	B_UNICODE_CHEROKEE,
-	B_UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
-	B_UNICODE_OGHAM,
-	B_UNICODE_RUNIC,
-	B_UNICODE_KHMER,
-	B_UNICODE_MONGOLIAN,
-	B_UNICODE_LATIN_EXTENDED_ADDITIONAL,
-	B_UNICODE_GREEK_EXTENDED,
-	B_UNICODE_GENERAL_PUNCTUATION,
-	B_UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS,
-	B_UNICODE_CURRENCY_SYMBOLS,
-	B_UNICODE_COMBINING_MARKS_FOR_SYMBOLS,
-	B_UNICODE_LETTERLIKE_SYMBOLS,
-	B_UNICODE_NUMBER_FORMS,
-	B_UNICODE_ARROWS,
-	B_UNICODE_MATHEMATICAL_OPERATORS,
-	B_UNICODE_MISCELLANEOUS_TECHNICAL,
-	B_UNICODE_CONTROL_PICTURES,
-	B_UNICODE_OPTICAL_CHARACTER_RECOGNITION,
-	B_UNICODE_ENCLOSED_ALPHANUMERICS,
-	B_UNICODE_BOX_DRAWING,
-	B_UNICODE_BLOCK_ELEMENTS,
-	B_UNICODE_GEOMETRIC_SHAPES,
-	B_UNICODE_MISCELLANEOUS_SYMBOLS,
-	B_UNICODE_DINGBATS,
-	B_UNICODE_BRAILLE_PATTERNS,
-	B_UNICODE_CJK_RADICALS_SUPPLEMENT,
-	B_UNICODE_KANGXI_RADICALS,
-	B_UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
-	B_UNICODE_CJK_SYMBOLS_AND_PUNCTUATION,
-	B_UNICODE_HIRAGANA,
-	B_UNICODE_KATAKANA,
-	B_UNICODE_BOPOMOFO,
-	B_UNICODE_HANGUL_COMPATIBILITY_JAMO,
-	B_UNICODE_KANBUN,
-	B_UNICODE_BOPOMOFO_EXTENDED,
-	B_UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS,
-	B_UNICODE_CJK_COMPATIBILITY,
-	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
-	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS,
-	B_UNICODE_YI_SYLLABLES,
-	B_UNICODE_YI_RADICALS,
-	B_UNICODE_HANGUL_SYLLABLES,
-	B_UNICODE_HIGH_SURROGATES,
-	B_UNICODE_HIGH_PRIVATE_USE_SURROGATES,
-	B_UNICODE_LOW_SURROGATES,
-	B_UNICODE_PRIVATE_USE_AREA,
-	B_UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS,
-	B_UNICODE_ALPHABETIC_PRESENTATION_FORMS,
-	B_UNICODE_ARABIC_PRESENTATION_FORMS_A,
-	B_UNICODE_COMBINING_HALF_MARKS,
-	B_UNICODE_CJK_COMPATIBILITY_FORMS,
-	B_UNICODE_SMALL_FORM_VARIANTS,
-	B_UNICODE_ARABIC_PRESENTATION_FORMS_B,
-	B_UNICODE_SPECIALS,
-	B_UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS,
+	// New No_Block value in Unicode 4.
+	B_UNICODE_NO_BLOCK								= 0, // [none] Special range
+	B_UNICODE_BASIC_LATIN							= 1, // [0000]
+	B_UNICODE_LATIN_1_SUPPLEMENT					= 2, // [0080]
+	B_UNICODE_LATIN_EXTENDED_A						= 3, // [0100]
+	B_UNICODE_LATIN_EXTENDED_B						= 4, // [0180]
+	B_UNICODE_IPA_EXTENSIONS						= 5, // [0250]
+	B_UNICODE_SPACING_MODIFIER_LETTERS				= 6, // [02B0]
+	B_UNICODE_COMBINING_DIACRITICAL_MARKS			= 7, // [0300]
+	B_UNICODE_GREEK									= 8, // [0370]
+	B_UNICODE_CYRILLIC								= 9, // [0400]
+	B_UNICODE_ARMENIAN								= 10, // [0530]
+	B_UNICODE_HEBREW								= 11, // [0590]
+	B_UNICODE_ARABIC								= 12, // [0600]
+	B_UNICODE_SYRIAC								= 13, // [0700]
+	B_UNICODE_THAANA								= 14, // [0780]
+	B_UNICODE_DEVANAGARI							= 15, // [0900]
+	B_UNICODE_BENGALI								= 16, // [0980]
+	B_UNICODE_GURMUKHI								= 17, // [0A00]
+	B_UNICODE_GUJARATI								= 18, // [0A80]
+	B_UNICODE_ORIYA									= 19, // [0B00]
+	B_UNICODE_TAMIL									= 20, // [0B80]
+	B_UNICODE_TELUGU								= 21, // [0C00]
+	B_UNICODE_KANNADA								= 22, // [0C80]
+	B_UNICODE_MALAYALAM								= 23, // [0D00]
+	B_UNICODE_SINHALA								= 24, // [0D80]
+	B_UNICODE_THAI									= 25, // [0E00]
+	B_UNICODE_LAO									= 26, // [0E80]
+	B_UNICODE_TIBETAN								= 27, // [0F00]
+	B_UNICODE_MYANMAR								= 28, // [1000]
+	B_UNICODE_GEORGIAN								= 29, // [10A0]
+	B_UNICODE_HANGUL_JAMO							= 30, // [1100]
+	B_UNICODE_ETHIOPIC								= 31, // [1200]
+	B_UNICODE_CHEROKEE								= 32, // [13A0]
+	B_UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS	= 33, // [1400]
+	B_UNICODE_OGHAM									= 34, // [1680]
+	B_UNICODE_RUNIC									= 35, // [16A0]
+	B_UNICODE_KHMER									= 36, // [1780]
+	B_UNICODE_MONGOLIAN								= 37, // [1800]
+	B_UNICODE_LATIN_EXTENDED_ADDITIONAL				= 38, // [1E00]
+	B_UNICODE_GREEK_EXTENDED						= 39, // [1F00]
+	B_UNICODE_GENERAL_PUNCTUATION					= 40, // [2000]
+	B_UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS			= 41, // [2070]
+	B_UNICODE_CURRENCY_SYMBOLS						= 42, // [20A0]
+	B_UNICODE_COMBINING_MARKS_FOR_SYMBOLS			= 43, // [20D0]
+	B_UNICODE_LETTERLIKE_SYMBOLS					= 44, // [2100]
+	B_UNICODE_NUMBER_FORMS							= 45, // [2150]
+	B_UNICODE_ARROWS								= 46, // [2190]
+	B_UNICODE_MATHEMATICAL_OPERATORS				= 47, // [2200]
+	B_UNICODE_MISCELLANEOUS_TECHNICAL				= 48, // [2300]
+	B_UNICODE_CONTROL_PICTURES						= 49, // [2400]
+	B_UNICODE_OPTICAL_CHARACTER_RECOGNITION			= 50, // [2440]
+	B_UNICODE_ENCLOSED_ALPHANUMERICS				= 51, // [2460]
+	B_UNICODE_BOX_DRAWING							= 52, // [2500]
+	B_UNICODE_BLOCK_ELEMENTS						= 53, // [2580]
+	B_UNICODE_GEOMETRIC_SHAPES						= 54, // [25A0]
+	B_UNICODE_MISCELLANEOUS_SYMBOLS					= 55, // [2600]
+	B_UNICODE_DINGBATS								= 56, // [2700]
+	B_UNICODE_BRAILLE_PATTERNS						= 57, // [2800]
+	B_UNICODE_CJK_RADICALS_SUPPLEMENT				= 58, // [2E80]
+	B_UNICODE_KANGXI_RADICALS						= 59, // [2F00]
+	B_UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS	= 60, // [2FF0]
+	B_UNICODE_CJK_SYMBOLS_AND_PUNCTUATION			= 61, // [3000]
+	B_UNICODE_HIRAGANA								= 62, // [3040]
+	B_UNICODE_KATAKANA								= 63, // [30A0]
+	B_UNICODE_BOPOMOFO								= 64, // [3100]
+	B_UNICODE_HANGUL_COMPATIBILITY_JAMO				= 65, // [3130]
+	B_UNICODE_KANBUN								= 66, // [3190]
+	B_UNICODE_BOPOMOFO_EXTENDED						= 67, // [31A0]
+	B_UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS		= 68, // [3200]
+	B_UNICODE_CJK_COMPATIBILITY						= 69, // [3300]
+	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A	= 70, // [3400]
+	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS				= 71, // [4E00]
+	B_UNICODE_YI_SYLLABLES							= 72, // [A000]
+	B_UNICODE_YI_RADICALS							= 73, // [A490]
+	B_UNICODE_HANGUL_SYLLABLES						= 74, // [AC00]
+	B_UNICODE_HIGH_SURROGATES						= 75, // [D800]
+	B_UNICODE_HIGH_PRIVATE_USE_SURROGATES			= 76, // [DB80]
+	B_UNICODE_LOW_SURROGATES						= 77, // [DC00]
+	B_UNICODE_PRIVATE_USE							= 78,
+	B_UNICODE_PRIVATE_USE_AREA = B_UNICODE_PRIVATE_USE, // [E000]
+	B_UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS			= 79, // [F900]
+	B_UNICODE_ALPHABETIC_PRESENTATION_FORMS			= 80, // [FB00]
+	B_UNICODE_ARABIC_PRESENTATION_FORMS_A			= 81, // [FB50]
+	B_UNICODE_COMBINING_HALF_MARKS					= 82, // [FE20]
+	B_UNICODE_CJK_COMPATIBILITY_FORMS				= 83, // [FE30]
+	B_UNICODE_SMALL_FORM_VARIANTS					= 84, // [FE50]
+	B_UNICODE_ARABIC_PRESENTATION_FORMS_B			= 85, // [FE70]
+	B_UNICODE_SPECIALS								= 86, // [FFF0]
+	B_UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS			= 87, // [FF00]
 
-	B_UNICODE_SCRIPT_COUNT,
-	B_UNICODE_NO_SCRIPT = B_UNICODE_SCRIPT_COUNT
+	// New blocks in Unicode 3.1
+	B_UNICODE_OLD_ITALIC							= 88, // [10300]
+	B_UNICODE_GOTHIC								= 89, // [10330]
+	B_UNICODE_DESERET								= 90, // [10400]
+	B_UNICODE_BYZANTINE_MUSICAL_SYMBOLS				= 91, // [1D000]
+	B_UNICODE_MUSICAL_SYMBOLS						= 92, // [1D100]
+	B_UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS		= 93, // [1D400]
+	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B	= 94, // [20000]
+	B_UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, // [2F800]
+	B_UNICODE_TAGS									= 96, // [E0000]
+
+	// New blocks in Unicode
+	B_UNICODE_CYRILLIC_SUPPLEMENTARY				= 97,
+	B_UNICODE_CYRILLIC_SUPPLEMENT = B_UNICODE_CYRILLIC_SUPPLEMENTARY, // [0500]
+	B_UNICODE_TAGALOG								= 98, // [1700]
+	B_UNICODE_HANUNOO								= 99, // [1720]
+	B_UNICODE_BUHID									= 100, // [1740]
+	B_UNICODE_TAGBANWA								= 101, // [1760]
+	B_UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A	= 102, // [27C0]
+	B_UNICODE_SUPPLEMENTAL_ARROWS_A					= 103, // [27F0]
+	B_UNICODE_SUPPLEMENTAL_ARROWS_B					= 104, // [2900]
+	B_UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B	= 105, // [2980]
+	B_UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS	= 106, // [2A00]
+	B_UNICODE_KATAKANA_PHONETIC_EXTENSIONS			= 107, // [31F0]
+	B_UNICODE_VARIATION_SELECTORS					= 108, // [FE00]
+	B_UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A		= 109, // [F0000]
+	B_UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B		= 110, // [100000]
+
+	// New blocks in Unicode 4
+	B_UNICODE_LIMBU									= 111, // [1900]
+	B_UNICODE_TAI_LE								= 112, // [1950]
+	B_UNICODE_KHMER_SYMBOLS							= 113, // [19E0]
+	B_UNICODE_PHONETIC_EXTENSIONS					= 114, // [1D00]
+	B_UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS		= 115, // [2B00]
+	B_UNICODE_YIJING_HEXAGRAM_SYMBOLS				= 116, // [4DC0]
+	B_UNICODE_LINEAR_B_SYLLABARY					= 117, // [10000]
+	B_UNICODE_LINEAR_B_IDEOGRAMS					= 118, // [10080]
+	B_UNICODE_AEGEAN_NUMBERS						= 119, // [10100]
+	B_UNICODE_UGARITIC								= 120, // [10380]
+	B_UNICODE_SHAVIAN								= 121, // [10450]
+	B_UNICODE_OSMANYA								= 122, // [10480]
+	B_UNICODE_CYPRIOT_SYLLABARY						= 123, // [10800]
+	B_UNICODE_TAI_XUAN_JING_SYMBOLS					= 124, // [1D300]
+	B_UNICODE_VARIATION_SELECTORS_SUPPLEMENT		= 125, // [E0100]
+
+	// New blocks in Unicode 4.1
+	B_UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION		= 126, // [1D200]
+	B_UNICODE_ANCIENT_GREEK_NUMBERS					= 127, // [10140]
+	B_UNICODE_ARABIC_SUPPLEMENT						= 128, // [0750]
+	B_UNICODE_BUGINESE								= 129, // [1A00]
+	B_UNICODE_CJK_STROKES							= 130, // [31C0]
+	B_UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, // [1DC0]
+	B_UNICODE_COPTIC								= 132, // [2C80]
+	B_UNICODE_ETHIOPIC_EXTENDED						= 133, // [2D80]
+	B_UNICODE_ETHIOPIC_SUPPLEMENT					= 134, // [1380]
+	B_UNICODE_GEORGIAN_SUPPLEMENT					= 135, // [2D00]
+	B_UNICODE_GLAGOLITIC							= 136, // [2C00]
+	B_UNICODE_KHAROSHTHI							= 137, // [10A00]
+	B_UNICODE_MODIFIER_TONE_LETTERS					= 138, // [A700]
+	B_UNICODE_NEW_TAI_LUE							= 139, // [1980]
+	B_UNICODE_OLD_PERSIAN							= 140, // [103A0]
+	B_UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT		= 141, // [1D80]
+	B_UNICODE_SUPPLEMENTAL_PUNCTUATION				= 142, // [2E00]
+	B_UNICODE_SYLOTI_NAGRI							= 143, // [A800]
+	B_UNICODE_TIFINAGH								= 144, // [2D30]
+	B_UNICODE_VERTICAL_FORMS						= 145, // [FE10]
+
+	// New blocks in Unicode 5.0
+	B_UNICODE_NKO									= 146, // [07C0]
+	B_UNICODE_BALINESE								= 147, // [1B00]
+	B_UNICODE_LATIN_EXTENDED_C						= 148, // [2C60]
+	B_UNICODE_LATIN_EXTENDED_D						= 149, // [A720]
+	B_UNICODE_PHAGS_PA								= 150, // [A840]
+	B_UNICODE_PHOENICIAN							= 151, // [10900]
+	B_UNICODE_CUNEIFORM								= 152, // [12000]
+	B_UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION		= 153, // [12400]
+	B_UNICODE_COUNTING_ROD_NUMERALS					= 154, // [1D360]
+
+	//  New blocks in Unicode 5.1
+	B_UNICODE_SUNDANESE								= 155, // [1B80]
+	B_UNICODE_LEPCHA								= 156, // [1C00]
+	B_UNICODE_OL_CHIKI								= 157, // [1C50]
+	B_UNICODE_CYRILLIC_EXTENDED_A					= 158, // [2DE0]
+	B_UNICODE_VAI									= 159, // [A500]
+	B_UNICODE_CYRILLIC_EXTENDED_B					= 160, // [A640]
+	B_UNICODE_SAURASHTRA							= 161, // [A880]
+	B_UNICODE_KAYAH_LI								= 162, // [A900]
+	B_UNICODE_REJANG								= 163, // [A930]
+	B_UNICODE_CHAM									= 164, // [AA00]
+	B_UNICODE_ANCIENT_SYMBOLS						= 165, // [10190]
+	B_UNICODE_PHAISTOS_DISC							= 166, // [101D0]
+	B_UNICODE_LYCIAN								= 167, // [10280]
+	B_UNICODE_CARIAN								= 168, // [102A0]
+	B_UNICODE_LYDIAN								= 169, // [10920]
+	B_UNICODE_MAHJONG_TILES							= 170, // [1F000]
+	B_UNICODE_DOMINO_TILES							= 171, // [1F030]
+
+	//  New blocks in Unicode 5.2
+	B_UNICODE_SAMARITAN								= 172, // [0800]
+	B_UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, // [18B0]
+	B_UNICODE_TAI_THAM								= 174, // [1A20]
+	B_UNICODE_VEDIC_EXTENSIONS						= 175, // [1CD0]
+	B_UNICODE_LISU									= 176, // [A4D0]
+	B_UNICODE_BAMUM									= 177, // [A6A0]
+	B_UNICODE_COMMON_INDIC_NUMBER_FORMS				= 178, // [A830]
+	B_UNICODE_DEVANAGARI_EXTENDED					= 179, // [A8E0]
+	B_UNICODE_HANGUL_JAMO_EXTENDED_A				= 180, // [A960]
+	B_UNICODE_JAVANESE								= 181, // [A980]
+	B_UNICODE_MYANMAR_EXTENDED_A					= 182, // [AA60]
+	B_UNICODE_TAI_VIET								= 183, // [AA80]
+	B_UNICODE_MEETEI_MAYEK							= 184, // [ABC0]
+	B_UNICODE_HANGUL_JAMO_EXTENDED_B				= 185, // [D7B0]
+	B_UNICODE_IMPERIAL_ARAMAIC						= 186, // [10840]
+	B_UNICODE_OLD_SOUTH_ARABIAN						= 187, // [10A60]
+	B_UNICODE_AVESTAN								= 188, // [10B00]
+	B_UNICODE_INSCRIPTIONAL_PARTHIAN				= 189, // [10B40]
+	B_UNICODE_INSCRIPTIONAL_PAHLAVI					= 190, // [10B60]
+	B_UNICODE_OLD_TURKIC							= 191, // [10C00]
+	B_UNICODE_RUMI_NUMERAL_SYMBOLS					= 192, // [10E60]
+	B_UNICODE_KAITHI								= 193, // [11080]
+	B_UNICODE_EGYPTIAN_HIEROGLYPHS					= 194, // [13000]
+	B_UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT		= 195, // [1F100]
+	B_UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT		= 196, // [1F200]
+	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C	= 197, // [2A700]
+
+	//  New blocks in Unicode 6.0
+	B_UNICODE_MANDAIC								= 198, // [0840]
+	B_UNICODE_BATAK									= 199, // [1BC0]
+	B_UNICODE_ETHIOPIC_EXTENDED_A					= 200, // [AB00]
+	B_UNICODE_BRAHMI								= 201, // [11000]
+	B_UNICODE_BAMUM_SUPPLEMENT						= 202, // [16800]
+	B_UNICODE_KANA_SUPPLEMENT						= 203, // [1B000]
+	B_UNICODE_PLAYING_CARDS							= 204, // [1F0A0]
+	B_UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS	= 205, // [1F300]
+	B_UNICODE_EMOTICONS								= 206, // [1F600]
+	B_UNICODE_TRANSPORT_AND_MAP_SYMBOLS				= 207, // [1F680]
+	B_UNICODE_ALCHEMICAL_SYMBOLS					= 208, // [1F700]
+	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D	= 209, // [2B740]
+
+	B_UNICODE_SCRIPT_COUNT							= 210,
+	B_UNICODE_NO_SCRIPT = B_UNICODE_SCRIPT_COUNT,
+
+	B_UNICODE_INVALID_CODE							= -1
 };
 
 
-/**
- * Values returned by the u_getCellWidth() function.
- */
+// East Asian Width constants.
 
-enum unicode_cell_width
+enum unicode_east_asian_width
 {
-    B_UNICODE_ZERO_WIDTH              = 0,
-    B_UNICODE_HALF_WIDTH              = 1,
-    B_UNICODE_FULL_WIDTH              = 2,
-    B_UNICODE_NEUTRAL_WIDTH           = 3,
-
-    B_UNICODE_CELL_WIDTH_COUNT
+	B_UNICODE_EA_NEUTRAL,   // [N]
+	B_UNICODE_EA_AMBIGUOUS, // [A]
+	B_UNICODE_EA_HALFWIDTH, // [H]
+	B_UNICODE_EA_FULLWIDTH, // [F]
+	B_UNICODE_EA_NARROW,	// [Na]
+	B_UNICODE_EA_WIDE,		// [W]
+	B_UNICODE_EA_COUNT
 };
 
 
@@ -209,6 +347,7 @@ class BUnicodeChar {
 		static uint32 ToUpper(uint32 c);
 		static uint32 ToTitle(uint32 c);
 		static int32 DigitValue(uint32 c);
+		static unicode_east_asian_width EastAsianWidth(uint32 c);
 
 		static void ToUTF8(uint32 c, char **out);
 		static uint32 FromUTF8(const char **in);
@@ -230,4 +369,4 @@ BUnicodeChar::FromUTF8(const char *in)
 }
 
 
-#endif	/* _UNICODE_CHAR_H_ */
+#endif	//  _UNICODE_CHAR_H_
diff --git a/src/kits/locale/UnicodeChar.cpp b/src/kits/locale/UnicodeChar.cpp
index a86192e782..242e16d5c8 100644
--- a/src/kits/locale/UnicodeChar.cpp
+++ b/src/kits/locale/UnicodeChar.cpp
@@ -1,234 +1,18 @@
-/* 
-** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
-** Distributed under the terms of the OpenBeOS License.
-*/
-
-/* Reads the information out of the data files created by (an edited version of)
- * IBM's ICU genprops utility. The BUnicodeChar class is mostly the counterpart
- * to ICU's uchar module, but is not as huge or broad as that one.
+/*
+ * Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
+ * Distributed under the terms of the MIT License.
  *
- * Note, it probably won't be able to handle the output of the orginal genprops
- * tool and vice versa - only use the tool provided with this project to create
- * the Unicode property file.
- * However, the algorithmic idea behind the property file is still the same as
- * found in ICU - nothing important has been changed, so more recent versions
- * of genprops tool/data can probably be ported without too much effort.
+ * Authors:
+ *		Axel Dörfler, axeld@pinc-software.de
+ *		Siarzhuk Zharski, zharik@gmx.li
  *
- * In case no property file can be found it will still provide basic services
- * for the Latin-1 part of the character tables.
  */
 
 
-#include <OS.h>
-
 #include <UnicodeChar.h>
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-
-#define FLAG(n) ((uint32)1 << (n))
-enum {
-	UF_UPPERCASE		= FLAG(B_UNICODE_UPPERCASE_LETTER),
-	UF_LOWERCASE		= FLAG(B_UNICODE_LOWERCASE_LETTER),
-	UF_TITLECASE		= FLAG(B_UNICODE_TITLECASE_LETTER),
-	UF_MODIFIER_LETTER	= FLAG(B_UNICODE_MODIFIER_LETTER),
-	UF_OTHER_LETTER		= FLAG(B_UNICODE_OTHER_LETTER),
-	UF_DECIMAL_NUMBER	= FLAG(B_UNICODE_DECIMAL_DIGIT_NUMBER),
-	UF_OTHER_NUMBER		= FLAG(B_UNICODE_OTHER_NUMBER),
-	UF_LETTER_NUMBER	= FLAG(B_UNICODE_LETTER_NUMBER)
-};
-
-
-static uint32 gStaticProps32Table[] = {
-    /* 0x00 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x04 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x08 */	0x48f,		0x20c,		0x1ce,		0x20c,
-    /* 0x0c */	0x24d,		0x1ce,		0x48f,		0x48f,
-    /* 0x10 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x14 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x18 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x1c */	0x1ce,		0x1ce,		0x1ce,		0x20c,
-    /* 0x20 */	0x24c,		0x297,		0x297,		0x117,
-    /* 0x24 */	0x119,		0x117,		0x297,		0x297,
-    /* 0x28 */	0x100a94,	0xfff00a95,	0x297,		0x118,
-    /* 0x2c */	0x197,		0x113,		0x197,		0xd7,
-    /* 0x30 */	0x89,		0x100089,	0x200089,	0x300089,
-    /* 0x34 */	0x400089,	0x500089,	0x600089,	0x700089,
-    /* 0x38 */	0x800089,	0x900089,	0x197,		0x297,
-    /* 0x3c */	0x200a98,	0x298,		0xffe00a98,	0x297,
-    /* 0x40 */	0x297,		0x2000001,	0x2000001,	0x2000001,
-    /* 0x44 */	0x2000001,	0x2000001,	0x2000001,	0x2000001,
-    /* 0x48 */	0x2000001,	0x2000001,	0x2000001,	0x2000001,
-    /* 0x4c */	0x2000001,	0x2000001,	0x2000001,	0x2000001,
-    /* 0x50 */	0x2000001,	0x2000001,	0x2000001,	0x2000001,
-    /* 0x54 */	0x2000001,	0x2000001,	0x2000001,	0x2000001,
-    /* 0x58 */	0x2000001,	0x2000001,	0x2000001,	0x200a94,
-    /* 0x5c */	0x297,		0xffe00a95,	0x29a,		0x296,
-    /* 0x60 */	0x29a,		0x2000002,	0x2000002,	0x2000002,
-    /* 0x64 */	0x2000002,	0x2000002,	0x2000002,	0x2000002,
-    /* 0x68 */	0x2000002,	0x2000002,	0x2000002,	0x2000002,
-    /* 0x6c */	0x2000002,	0x2000002,	0x2000002,	0x2000002,
-    /* 0x70 */	0x2000002,	0x2000002,	0x2000002,	0x2000002,
-    /* 0x74 */	0x2000002,	0x2000002,	0x2000002,	0x2000002,
-    /* 0x78 */	0x2000002,	0x2000002,	0x2000002,	0x200a94,
-    /* 0x7c */	0x298,		0xffe00a95,	0x298,		0x48f,
-    /* 0x80 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x84 */	0x48f,		0x1ce,		0x48f,		0x48f,
-    /* 0x88 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x8c */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x90 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x94 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x98 */	0x48f,		0x48f,		0x48f,		0x48f,
-    /* 0x9c */	0x48f,		0x48f,		0x48f,		0x48f
-};
-
-enum {
-    INDEX_STAGE_2_BITS,
-    INDEX_STAGE_3_BITS,
-    INDEX_EXCEPTIONS,
-    INDEX_STAGE_3_INDEX,
-    INDEX_PROPS,
-    INDEX_UCHARS
-};
-
-/* constants and macros for access to the data */
-enum {
-    EXC_UPPERCASE,
-    EXC_LOWERCASE,
-    EXC_TITLECASE,
-    EXC_DIGIT_VALUE,
-    EXC_NUMERIC_VALUE,
-    EXC_DENOMINATOR_VALUE,
-    EXC_MIRROR_MAPPING,
-    EXC_SPECIAL_CASING,
-    EXC_CASE_FOLDING
-};
-
-enum {
-    EXCEPTION_SHIFT	= 5,
-    BIDI_SHIFT,
-    MIRROR_SHIFT	= BIDI_SHIFT + 5,
-    VALUE_SHIFT		= 20,
-
-    VALUE_BITS		= 32 - VALUE_SHIFT
-};
-
-/* number of bits in an 8-bit integer value */
-#define EXC_GROUP 8
-static uint8 gFlagsOffset[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
-
-#ifdef UCHAR_VARIABLE_TRIE_BITS
-	// access values calculated from indices
-	static uint16_t stage23Bits, stage2Mask, stage3Mask;
-#	define sStage3Bits   indexes[INDEX_STAGE_3_BITS]
-#else
-    // Use hardcoded bit distribution for the trie table access
-#	define sStage23Bits  10
-#	define sStage2Mask   0x3f
-#	define sStage3Mask   0xf
-#	define sStage3Bits   4
-#endif
-
-
-/**	We need to change the char category for ISO 8 controls, since the
- *	genprops utility we got from IBM's ICU apparently changes it for
- *	some characters.
- */
-
-static inline bool
-isISO8Control(uint32 c)
-{
-	return ((uint32)c < 0x20 || (uint32)(c - 0x7f) <= 0x20);
-}
-
-
-static inline uint32
-getProperties(uint32 c)
-{
-	if (c > 0x10ffff)
-		return 0;
-
-	// TODO : Data from unicode
-
-	return c > 0x9f ? 0 : gStaticProps32Table[c];
-}
-
-
-static inline uint8
-getCategory(uint32 properties)
-{
-	return properties & 0x1f;
-}
-
-
-static inline bool
-propertyIsException(uint32 properties)
-{
-	return properties & (1UL << EXCEPTION_SHIFT);
-}
-
-
-static inline uint32
-getUnsignedValue(uint32 properties)
-{
-	return properties >> VALUE_SHIFT;
-}
-
-
-static inline uint32
-getSignedValue(uint32 properties)
-{
-	return (int32)properties >> VALUE_SHIFT;
-}
-
-
-static inline uint32 *
-getExceptions(uint32 properties)
-{
-	// TODO : data from unicode
-	return 0;
-}
-
-
-static inline bool
-haveExceptionValue(uint32 flags,int16 index)
-{
-	return flags & (1UL << index);
-}
-
-
-static inline void
-addExceptionOffset(uint32 &flags, int16 &index, uint32 **offset)
-{
-	if (index >= EXC_GROUP) {
-		*offset += gFlagsOffset[flags & ((1 << EXC_GROUP) - 1)];
-		flags >>= EXC_GROUP;
-		index -= EXC_GROUP;
-	}
-	*offset += gFlagsOffset[flags & ((1 << index) - 1)];
-}
-
-
-//	#pragma mark -
+#include <unicode/uchar.h>
+#include <unicode/utf8.h>
 
 
 BUnicodeChar::BUnicodeChar()
@@ -236,382 +20,244 @@ BUnicodeChar::BUnicodeChar()
 }
 
 
-bool 
-BUnicodeChar::IsAlpha(uint32 c)
-{
-	BUnicodeChar();
-	return (FLAG(getCategory(getProperties(c)))
-			& (UF_UPPERCASE | UF_LOWERCASE | UF_TITLECASE | UF_MODIFIER_LETTER | UF_OTHER_LETTER)
-		   ) != 0;
-}
-
-
-/** Returns the type code of the specified unicode character */
+// Returns the general category value for the code point.
 int8
 BUnicodeChar::Type(uint32 c)
 {
 	BUnicodeChar();
-	return (int8)getCategory(getProperties(c));
+	return u_charType(c);
 }
 
 
-bool 
-BUnicodeChar::IsLower(uint32 c)
+// Determines whether the specified code point is a letter character.
+// True for general categories "L" (letters).
+bool
+BUnicodeChar::IsAlpha(uint32 c)
 {
 	BUnicodeChar();
-    return getCategory(getProperties(c)) == B_UNICODE_LOWERCASE_LETTER;
+	return u_isalpha(c);
 }
 
 
-bool 
-BUnicodeChar::IsUpper(uint32 c)
-{
-	BUnicodeChar();
-	return getCategory(getProperties(c)) == B_UNICODE_UPPERCASE_LETTER;
-}
-
-
-bool 
-BUnicodeChar::IsTitle(uint32 c)
-{
-	BUnicodeChar();
-	return getCategory(getProperties(c)) == B_UNICODE_TITLECASE_LETTER;
-}
-
-
-bool 
-BUnicodeChar::IsDigit(uint32 c)
-{
-	BUnicodeChar();
-	return (FLAG(getCategory(getProperties(c)))
-			& (UF_DECIMAL_NUMBER | UF_OTHER_NUMBER | UF_LETTER_NUMBER)
-		   ) != 0;
-}
-
-
-bool 
+// Determines whether the specified code point is an alphanumeric character
+// (letter or digit).
+// True for characters with general categories
+// "L" (letters) and "Nd" (decimal digit numbers).
+bool
 BUnicodeChar::IsAlNum(uint32 c)
 {
 	BUnicodeChar();
-	return (FLAG(getCategory(getProperties(c)))
-			& (UF_DECIMAL_NUMBER | UF_OTHER_NUMBER | UF_LETTER_NUMBER | UF_UPPERCASE
-			   | UF_LOWERCASE | UF_TITLECASE | UF_MODIFIER_LETTER | UF_OTHER_LETTER)
-           ) != 0;
+	return u_isalnum(c);
 }
 
 
-bool 
+// Check if a code point has the Lowercase Unicode property (UCHAR_LOWERCASE).
+bool
+BUnicodeChar::IsLower(uint32 c)
+{
+	BUnicodeChar();
+	return u_isULowercase(c);
+}
+
+
+// Check if a code point has the Uppercase Unicode property (UCHAR_UPPERCASE).
+bool
+BUnicodeChar::IsUpper(uint32 c)
+{
+	BUnicodeChar();
+	return u_isUUppercase(c);
+}
+
+
+// Determines whether the specified code point is a titlecase letter.
+// True for general category "Lt" (titlecase letter).
+bool
+BUnicodeChar::IsTitle(uint32 c)
+{
+	BUnicodeChar();
+	return u_istitle(c);
+}
+
+
+// Determines whether the specified code point is a digit character.
+// True for characters with general category "Nd" (decimal digit numbers).
+// Beginning with Unicode 4, this is the same as
+// testing for the Numeric_Type of Decimal.
+bool
+BUnicodeChar::IsDigit(uint32 c)
+{
+	BUnicodeChar();
+	return u_isdigit(c);
+}
+
+
+// Determines whether the specified code point is a hexadecimal digit.
+// This is equivalent to u_digit(c, 16)>=0.
+// True for characters with general category "Nd" (decimal digit numbers)
+// as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
+// (That is, for letters with code points
+// 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
+bool
+BUnicodeChar::IsHexDigit(uint32 c)
+{
+	BUnicodeChar();
+	return u_isxdigit(c);
+}
+
+
+// Determines whether the specified code point is "defined",
+// which usually means that it is assigned a character.
+// True for general categories other than "Cn" (other, not assigned),
+// i.e., true for all code points mentioned in UnicodeData.txt.
+bool
 BUnicodeChar::IsDefined(uint32 c)
 {
 	BUnicodeChar();
-	return getProperties(c) != 0;
+	return u_isdefined(c);
 }
 
 
-/** Returns true if the specified unicode character is a base
- *	form character that can be used with a diacritic.
- *	This doesn't mean that the character has to be distinct,
- *	though.
- */
-
-bool 
+// Determines whether the specified code point is a base character.
+// True for general categories "L" (letters), "N" (numbers),
+// "Mc" (spacing combining marks), and "Me" (enclosing marks).
+bool
 BUnicodeChar::IsBase(uint32 c)
 {
 	BUnicodeChar();
-	return (FLAG(getCategory(getProperties(c)))
-			& (UF_DECIMAL_NUMBER | UF_OTHER_NUMBER | UF_LETTER_NUMBER 
-			   | UF_UPPERCASE | UF_LOWERCASE | UF_TITLECASE
-			   | UF_MODIFIER_LETTER | UF_OTHER_LETTER | FLAG(B_UNICODE_NON_SPACING_MARK)
-			   | FLAG(B_UNICODE_ENCLOSING_MARK) | FLAG(B_UNICODE_COMBINING_SPACING_MARK))
-		   ) != 0;
+	return u_isbase(c);
 }
 
 
-/** Returns true if the specified unicode character is a
- *	control character.
- */
-
-bool 
+// Determines whether the specified code point is a control character
+// (as defined by this function).
+// A control character is one of the following:
+// - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
+// - U_CONTROL_CHAR (Cc)
+// - U_FORMAT_CHAR (Cf)
+// - U_LINE_SEPARATOR (Zl)
+// - U_PARAGRAPH_SEPARATOR (Zp)
+bool
 BUnicodeChar::IsControl(uint32 c)
 {
 	BUnicodeChar();
-	return isISO8Control(c)
-			|| (FLAG(getCategory(getProperties(c)))
-				& (FLAG(B_UNICODE_CONTROL_CHAR) | FLAG(B_UNICODE_FORMAT_CHAR)
-					| FLAG(B_UNICODE_LINE_SEPARATOR) | FLAG(B_UNICODE_PARAGRAPH_SEPARATOR))
-			   ) != 0;
+	return u_iscntrl(c);
 }
 
 
-/** Returns true if the specified unicode character is a
- *	punctuation character.
- */
-
+// Determines whether the specified code point is a punctuation character.
+// True for characters with general categories "P" (punctuation).
 bool
 BUnicodeChar::IsPunctuation(uint32 c)
 {
 	BUnicodeChar();
-	return (FLAG(getCategory(getProperties(c)))
-			& (FLAG(B_UNICODE_DASH_PUNCTUATION)
-				| FLAG(B_UNICODE_START_PUNCTUATION)
-				| FLAG(B_UNICODE_END_PUNCTUATION)
-				| FLAG(B_UNICODE_CONNECTOR_PUNCTUATION)
-				| FLAG(B_UNICODE_OTHER_PUNCTUATION))
-			) != 0;
+	return u_ispunct(c);
 }
 
 
-/** Returns true if the specified unicode character is some
- *	kind of a space character.
- */
-
-bool 
+// Determine if the specified code point is a space character according to Java.
+// True for characters with general categories "Z" (separators),
+// which does not include control codes (e.g., TAB or Line Feed).
+bool
 BUnicodeChar::IsSpace(uint32 c)
 {
 	BUnicodeChar();
-	return (FLAG(getCategory(getProperties(c)))
-			& (FLAG(B_UNICODE_SPACE_SEPARATOR)
-				| FLAG(B_UNICODE_LINE_SEPARATOR)
-				| FLAG(B_UNICODE_PARAGRAPH_SEPARATOR))
-		   ) != 0;
+	return u_isJavaSpaceChar(c);
 }
 
 
-/** Returns true if the specified unicode character is a white
- *	space character.
- *	This is essentially the same as IsSpace(), but excludes all
- *	non-breakable spaces.
- */
-
-bool 
+// Determines if the specified code point is a whitespace character
+// A character is considered to be a whitespace character if and only
+// if it satisfies one of the following criteria:
+// - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"),
+//		but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space
+//		or U+202F Narrow NBSP).
+// - It is U+0009 HORIZONTAL TABULATION.
+// - It is U+000A LINE FEED.
+// - It is U+000B VERTICAL TABULATION.
+// - It is U+000C FORM FEED.
+// - It is U+000D CARRIAGE RETURN.
+// - It is U+001C FILE SEPARATOR.
+// - It is U+001D GROUP SEPARATOR.
+// - It is U+001E RECORD SEPARATOR.
+// - It is U+001F UNIT SEPARATOR.
+bool
 BUnicodeChar::IsWhitespace(uint32 c)
 {
 	BUnicodeChar();
-	return (FLAG(getCategory(getProperties(c)))
-			& (FLAG(B_UNICODE_SPACE_SEPARATOR)
-				| FLAG(B_UNICODE_LINE_SEPARATOR)
-				| FLAG(B_UNICODE_PARAGRAPH_SEPARATOR))
-		   ) != 0 && c != 0xa0 && c != 0x202f && c != 0xfeff; // exclude non-breakable spaces
+	return u_isWhitespace(c);
 }
 
 
-/** Returns true if the specified unicode character is printable.
- */
-
-bool 
+// Determines whether the specified code point is a printable character.
+// True for general categories other than "C" (controls).
+bool
 BUnicodeChar::IsPrintable(uint32 c)
 {
 	BUnicodeChar();
-	return !isISO8Control(c)
-			&& (FLAG(getCategory(getProperties(c)))
-				& ~(FLAG(B_UNICODE_UNASSIGNED) | FLAG(B_UNICODE_CONTROL_CHAR)
-					| FLAG(B_UNICODE_FORMAT_CHAR) | FLAG(B_UNICODE_PRIVATE_USE_CHAR)
-					| FLAG(B_UNICODE_SURROGATE) | FLAG(B_UNICODE_GENERAL_OTHER_TYPES)
-					| FLAG(31))
-				   ) != 0;
+	return u_isprint(c);
 }
 
 
 //	#pragma mark -
 
-
-/** Transforms the specified unicode character to lowercase.
- */
-
-uint32 
+uint32
 BUnicodeChar::ToLower(uint32 c)
 {
 	BUnicodeChar();
-
-	uint32 props = getProperties(c);
-
-	if (!propertyIsException(props)) {
-		if (FLAG(getCategory(props)) & (UF_UPPERCASE | UF_TITLECASE))
-			return c + getSignedValue(props);
-	} else {
-		uint32 *exceptions = getExceptions(props);
-		uint32 firstExceptionValue = *exceptions;
-
-		if (haveExceptionValue(firstExceptionValue, EXC_LOWERCASE)) {
-			int16 index = EXC_LOWERCASE;
-			addExceptionOffset(firstExceptionValue, index, &++exceptions);
-			return *exceptions;
-		}
-	}
-	// no mapping found, just return the character unchanged
-	return c;
+	return u_tolower(c);
 }
 
 
-/** Transforms the specified unicode character to uppercase.
- */
-
-uint32 
+uint32
 BUnicodeChar::ToUpper(uint32 c)
 {
 	BUnicodeChar();
-
-	uint32 props = getProperties(c);
-
-	if (!propertyIsException(props)) {
-		if (getCategory(props) == B_UNICODE_LOWERCASE_LETTER)
-			return c - getSignedValue(props);
-	} else {
-		uint32 *exceptions = getExceptions(props);
-		uint32 firstExceptionValue = *exceptions;
-
-		if (haveExceptionValue(firstExceptionValue, EXC_UPPERCASE)) {
-			int16 index = EXC_UPPERCASE;
-			++exceptions;
-			addExceptionOffset(firstExceptionValue, index, &exceptions);
-			return *exceptions;
-		}
-    }
-	// no mapping found, just return the character unchanged
-	return c;
+	return u_toupper(c);
 }
 
 
-/** Transforms the specified unicode character to title case.
- */
-
-uint32 
+uint32
 BUnicodeChar::ToTitle(uint32 c)
 {
 	BUnicodeChar();
-
-	uint32 props = getProperties(c);
-
-	if (!propertyIsException(props)) {
-		if (getCategory(props) == B_UNICODE_LOWERCASE_LETTER) {
-			// here, titlecase is the same as uppercase
-			return c - getSignedValue(props);
-		}
-	} else {
-		uint32 *exceptions = getExceptions(props);
-		uint32 firstExceptionValue = *exceptions;
-
-		if (haveExceptionValue(firstExceptionValue, EXC_TITLECASE)) {
-			int16 index = EXC_TITLECASE;
-			addExceptionOffset(firstExceptionValue, index, &++exceptions);
-			return (uint32)*exceptions;
-		} else if (haveExceptionValue(firstExceptionValue, EXC_UPPERCASE)) {
-			// here, titlecase is the same as uppercase
-			int16 index = EXC_UPPERCASE;
-			addExceptionOffset(firstExceptionValue, index, &++exceptions);
-			return *exceptions;
-		}
-	}
-	// no mapping found, just return the character unchanged
-	return c;
+	return u_totitle(c);
 }
 
 
-int32 
+int32
 BUnicodeChar::DigitValue(uint32 c)
 {
 	BUnicodeChar();
+	return u_digit(c, 10);
+}
 
-	uint32 props = getProperties(c);
 
-	if (!propertyIsException(props)) {
-		if (getCategory(props) == B_UNICODE_DECIMAL_DIGIT_NUMBER)
-			return getSignedValue(props);
-	} else {
-		uint32 *exceptions = getExceptions(props);
-		uint32 firstExceptionValue = *exceptions;
-
-		if (haveExceptionValue(firstExceptionValue, EXC_DIGIT_VALUE)) {
-			int16 index = EXC_DIGIT_VALUE;
-			addExceptionOffset(firstExceptionValue, index, &++exceptions);
-
-			int32 value = (int32)(int16)*exceptions;
-				 // the digit value is in the lower 16 bits
-			if (value != -1)
-				return value;
-		}
-	}
-
-    // If there is no value in the properties table,
-    // then check for some special characters
-	switch (c) {
-		case 0x3007:	return 0;
-		case 0x4e00:	return 1;
-		case 0x4e8c:	return 2;
-		case 0x4e09:	return 3;
-		case 0x56d8:	return 4;
-		case 0x4e94:	return 5;
-		case 0x516d:	return 6;
-		case 0x4e03:	return 7;
-		case 0x516b:	return 8;
-		case 0x4e5d:	return 9;
-		default:		return -1;
-	}
+unicode_east_asian_width
+BUnicodeChar::EastAsianWidth(uint32 c)
+{
+	return (unicode_east_asian_width)u_getIntPropertyValue(c,
+			UCHAR_EAST_ASIAN_WIDTH);
 }
 
 
 void
 BUnicodeChar::ToUTF8(uint32 c, char **out)
 {
-	char *s = *out;
-
-	if (c < 0x80)
-		*(s++) = c;
-	else if (c < 0x800) {
-		*(s++) = 0xc0 | (c >> 6);
-		*(s++) = 0x80 | (c & 0x3f);
-	} else if (c < 0x10000) {
-		*(s++) = 0xe0 | (c >> 12);
-		*(s++) = 0x80 | ((c >> 6) & 0x3f);
-		*(s++) = 0x80 | (c & 0x3f);
-	} else if (c <= 0x10ffff) {
-		*(s++) = 0xf0 | (c >> 18);
-		*(s++) = 0x80 | ((c >> 12) & 0x3f);
-		*(s++) = 0x80 | ((c >> 6) & 0x3f);
-		*(s++) = 0x80 | (c & 0x3f);
-	}
-	*out = s;
+	int i = 0;
+	U8_APPEND_UNSAFE(*out, i, c);
 }
 
 
-uint32 
+uint32
 BUnicodeChar::FromUTF8(const char **in)
 {
-	uint8 *bytes = (uint8 *)*in;
-	if (bytes == NULL)
-		return 0;
-
-	int32 length;
-	uint8 mask = 0x1f;
-
-	switch (bytes[0] & 0xf0) {
-		case 0xc0:
-		case 0xd0:	length = 2; break;
-		case 0xe0:	length = 3; break;
-		case 0xf0:
-			mask = 0x0f;
-			length = 4;
-			break;
-		default:
-			// valid 1-byte character
-			// and invalid characters
-			(*in)++;
-			return bytes[0];
-	}
-	uint32 c = bytes[0] & mask;
-	int32 i = 1;
-	for (;i < length && (bytes[i] & 0x80) > 0;i++)
-		c = (c << 6) | (bytes[i] & 0x3f);
-
-	if (i < length) {
-		// invalid character
-		(*in)++;
-		return (uint32)bytes[0];
-	}
-	*in += length;
+	int i = 0;
+	uint32 c = 0;
+	U8_GET_UNSAFE(*in, i, c);
 	return c;
 }
 
+
 size_t
 BUnicodeChar::UTF8StringLength(const char *str)
 {
@@ -623,6 +269,7 @@ BUnicodeChar::UTF8StringLength(const char *str)
 	return len;
 }
 
+
 size_t
 BUnicodeChar::UTF8StringLength(const char *str, size_t maxLength)
 {
@@ -633,4 +280,3 @@ BUnicodeChar::UTF8StringLength(const char *str, size_t maxLength)
 	}
 	return len;
 }
-