* removed old genprops tool from our repo

git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@37722 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Oliver Tappe 2010-07-23 19:54:16 +00:00
parent debe3b0970
commit 229c79e015
14 changed files with 0 additions and 15036 deletions

View File

@ -1,9 +0,0 @@
SubDir HAIKU_TOP src tools locale genprops ;
UsePrivateHeaders locale ;
UsePublicHeaders locale ;
Application genprops
: genprops.cpp store.cpp utf8.cpp PropertyFile.cpp
: be
;

View File

@ -1,71 +0,0 @@
/*
** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
** Distributed under the terms of the OpenBeOS License.
*/
#include "PropertyFile.h"
#include "UnicodeProperties.h"
#include <Path.h>
#include <FindDirectory.h>
status_t
PropertyFile::SetTo(const char *directory, const char *name)
{
BPath path(directory,name);
status_t status = BFile::SetTo(path.Path(), B_WRITE_ONLY | B_CREATE_FILE);
if (status < B_OK)
return status;
static UnicodePropertiesHeader header = {
sizeof(UnicodePropertiesHeader),
B_HOST_IS_BENDIAN,
PROPERTIES_FORMAT,
{ 3, 0, 0 } // version (taken from the ICU data version)
};
return Write(&header, sizeof(header));
}
off_t
PropertyFile::Size()
{
off_t size;
if (GetSize(&size) < B_OK)
return 0;
return size - sizeof(UnicodePropertiesHeader);
}
ssize_t
PropertyFile::WritePadding(size_t length)
{
static uint8 padding[16] = {
0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa
};
ssize_t bytesWritten = (ssize_t)length;
while (length >= 16) {
ssize_t written = Write(padding, 16);
if (written < B_OK)
return written;
length -= 16;
}
if (length > 0) {
ssize_t written = Write(padding, length);
if (written < B_OK)
return written;
}
return bytesWritten;
}

View File

@ -1,24 +0,0 @@
/*
** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
** Distributed under the terms of the OpenBeOS License.
*/
#ifndef PROPERTY_FILE_H
#define PROPERTY_FILE_H
#include <File.h>
// This is the write-only version of the PropertyFile class - the library
// contains a read-only version of it
class PropertyFile : public BFile {
public:
status_t SetTo(const char *directory, const char *name);
off_t Size();
ssize_t WritePadding(size_t length);
};
#endif /* PROPERTY_FILE_H */

View File

@ -1,821 +0,0 @@
# CaseFolding-2.txt
#
# Case Folding Properties
#
# This file is a supplement to the UnicodeData file.
# It provides a case folding mapping generated from the Unicode Character Database.
# If all characters are mapped according to this mapping, then
# case differences (according to UnicodeData.txt and SpecialCasing.txt)
# are eliminated.
#
# For information on case folding, see
# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
#
# These are informative character properties.
#
# Send comments to mark@unicode.org
#
# ================================================================================
# Format
# ================================================================================
# The entries in this file are in the following machine-readable format:
#
# <code>; <status>; <mapping>; # <name>
#
# The status is:
# L (for Lowercase) if the case mapping matches the standard 1-1 lowercase mapping
# E (for exception) if it does not.
#
# The mapping may consist of multiple characters.
# If so, they are separated by spaces.
#
# =================================================================
0041; L; 0061; #LATIN CAPITAL LETTER A
0042; L; 0062; #LATIN CAPITAL LETTER B
0043; L; 0063; #LATIN CAPITAL LETTER C
0044; L; 0064; #LATIN CAPITAL LETTER D
0045; L; 0065; #LATIN CAPITAL LETTER E
0046; L; 0066; #LATIN CAPITAL LETTER F
0047; L; 0067; #LATIN CAPITAL LETTER G
0048; L; 0068; #LATIN CAPITAL LETTER H
0049; L; 0069; #LATIN CAPITAL LETTER I
004A; L; 006A; #LATIN CAPITAL LETTER J
004B; L; 006B; #LATIN CAPITAL LETTER K
004C; L; 006C; #LATIN CAPITAL LETTER L
004D; L; 006D; #LATIN CAPITAL LETTER M
004E; L; 006E; #LATIN CAPITAL LETTER N
004F; L; 006F; #LATIN CAPITAL LETTER O
0050; L; 0070; #LATIN CAPITAL LETTER P
0051; L; 0071; #LATIN CAPITAL LETTER Q
0052; L; 0072; #LATIN CAPITAL LETTER R
0053; L; 0073; #LATIN CAPITAL LETTER S
0054; L; 0074; #LATIN CAPITAL LETTER T
0055; L; 0075; #LATIN CAPITAL LETTER U
0056; L; 0076; #LATIN CAPITAL LETTER V
0057; L; 0077; #LATIN CAPITAL LETTER W
0058; L; 0078; #LATIN CAPITAL LETTER X
0059; L; 0079; #LATIN CAPITAL LETTER Y
005A; L; 007A; #LATIN CAPITAL LETTER Z
00B5; E; 03BC; #MICRO SIGN
00C0; L; 00E0; #LATIN CAPITAL LETTER A WITH GRAVE
00C1; L; 00E1; #LATIN CAPITAL LETTER A WITH ACUTE
00C2; L; 00E2; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
00C3; L; 00E3; #LATIN CAPITAL LETTER A WITH TILDE
00C4; L; 00E4; #LATIN CAPITAL LETTER A WITH DIAERESIS
00C5; L; 00E5; #LATIN CAPITAL LETTER A WITH RING ABOVE
00C6; L; 00E6; #LATIN CAPITAL LETTER AE
00C7; L; 00E7; #LATIN CAPITAL LETTER C WITH CEDILLA
00C8; L; 00E8; #LATIN CAPITAL LETTER E WITH GRAVE
00C9; L; 00E9; #LATIN CAPITAL LETTER E WITH ACUTE
00CA; L; 00EA; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX
00CB; L; 00EB; #LATIN CAPITAL LETTER E WITH DIAERESIS
00CC; L; 00EC; #LATIN CAPITAL LETTER I WITH GRAVE
00CD; L; 00ED; #LATIN CAPITAL LETTER I WITH ACUTE
00CE; L; 00EE; #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
00CF; L; 00EF; #LATIN CAPITAL LETTER I WITH DIAERESIS
00D0; L; 00F0; #LATIN CAPITAL LETTER ETH
00D1; L; 00F1; #LATIN CAPITAL LETTER N WITH TILDE
00D2; L; 00F2; #LATIN CAPITAL LETTER O WITH GRAVE
00D3; L; 00F3; #LATIN CAPITAL LETTER O WITH ACUTE
00D4; L; 00F4; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
00D5; L; 00F5; #LATIN CAPITAL LETTER O WITH TILDE
00D6; L; 00F6; #LATIN CAPITAL LETTER O WITH DIAERESIS
00D8; L; 00F8; #LATIN CAPITAL LETTER O WITH STROKE
00D9; L; 00F9; #LATIN CAPITAL LETTER U WITH GRAVE
00DA; L; 00FA; #LATIN CAPITAL LETTER U WITH ACUTE
00DB; L; 00FB; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX
00DC; L; 00FC; #LATIN CAPITAL LETTER U WITH DIAERESIS
00DD; L; 00FD; #LATIN CAPITAL LETTER Y WITH ACUTE
00DE; L; 00FE; #LATIN CAPITAL LETTER THORN
00DF; E; 0073 0073; #LATIN SMALL LETTER SHARP S
0100; L; 0101; #LATIN CAPITAL LETTER A WITH MACRON
0102; L; 0103; #LATIN CAPITAL LETTER A WITH BREVE
0104; L; 0105; #LATIN CAPITAL LETTER A WITH OGONEK
0106; L; 0107; #LATIN CAPITAL LETTER C WITH ACUTE
0108; L; 0109; #LATIN CAPITAL LETTER C WITH CIRCUMFLEX
010A; L; 010B; #LATIN CAPITAL LETTER C WITH DOT ABOVE
010C; L; 010D; #LATIN CAPITAL LETTER C WITH CARON
010E; L; 010F; #LATIN CAPITAL LETTER D WITH CARON
0110; L; 0111; #LATIN CAPITAL LETTER D WITH STROKE
0112; L; 0113; #LATIN CAPITAL LETTER E WITH MACRON
0114; L; 0115; #LATIN CAPITAL LETTER E WITH BREVE
0116; L; 0117; #LATIN CAPITAL LETTER E WITH DOT ABOVE
0118; L; 0119; #LATIN CAPITAL LETTER E WITH OGONEK
011A; L; 011B; #LATIN CAPITAL LETTER E WITH CARON
011C; L; 011D; #LATIN CAPITAL LETTER G WITH CIRCUMFLEX
011E; L; 011F; #LATIN CAPITAL LETTER G WITH BREVE
0120; L; 0121; #LATIN CAPITAL LETTER G WITH DOT ABOVE
0122; L; 0123; #LATIN CAPITAL LETTER G WITH CEDILLA
0124; L; 0125; #LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0126; L; 0127; #LATIN CAPITAL LETTER H WITH STROKE
0128; L; 0129; #LATIN CAPITAL LETTER I WITH TILDE
012A; L; 012B; #LATIN CAPITAL LETTER I WITH MACRON
012C; L; 012D; #LATIN CAPITAL LETTER I WITH BREVE
012E; L; 012F; #LATIN CAPITAL LETTER I WITH OGONEK
0130; L; 0069; #LATIN CAPITAL LETTER I WITH DOT ABOVE
0131; E; 0069; #LATIN SMALL LETTER DOTLESS I
0132; L; 0133; #LATIN CAPITAL LIGATURE IJ
0134; L; 0135; #LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0136; L; 0137; #LATIN CAPITAL LETTER K WITH CEDILLA
0139; L; 013A; #LATIN CAPITAL LETTER L WITH ACUTE
013B; L; 013C; #LATIN CAPITAL LETTER L WITH CEDILLA
013D; L; 013E; #LATIN CAPITAL LETTER L WITH CARON
013F; L; 0140; #LATIN CAPITAL LETTER L WITH MIDDLE DOT
0141; L; 0142; #LATIN CAPITAL LETTER L WITH STROKE
0143; L; 0144; #LATIN CAPITAL LETTER N WITH ACUTE
0145; L; 0146; #LATIN CAPITAL LETTER N WITH CEDILLA
0147; L; 0148; #LATIN CAPITAL LETTER N WITH CARON
0149; E; 02BC 006E; #LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
014A; L; 014B; #LATIN CAPITAL LETTER ENG
014C; L; 014D; #LATIN CAPITAL LETTER O WITH MACRON
014E; L; 014F; #LATIN CAPITAL LETTER O WITH BREVE
0150; L; 0151; #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0152; L; 0153; #LATIN CAPITAL LIGATURE OE
0154; L; 0155; #LATIN CAPITAL LETTER R WITH ACUTE
0156; L; 0157; #LATIN CAPITAL LETTER R WITH CEDILLA
0158; L; 0159; #LATIN CAPITAL LETTER R WITH CARON
015A; L; 015B; #LATIN CAPITAL LETTER S WITH ACUTE
015C; L; 015D; #LATIN CAPITAL LETTER S WITH CIRCUMFLEX
015E; L; 015F; #LATIN CAPITAL LETTER S WITH CEDILLA
0160; L; 0161; #LATIN CAPITAL LETTER S WITH CARON
0162; L; 0163; #LATIN CAPITAL LETTER T WITH CEDILLA
0164; L; 0165; #LATIN CAPITAL LETTER T WITH CARON
0166; L; 0167; #LATIN CAPITAL LETTER T WITH STROKE
0168; L; 0169; #LATIN CAPITAL LETTER U WITH TILDE
016A; L; 016B; #LATIN CAPITAL LETTER U WITH MACRON
016C; L; 016D; #LATIN CAPITAL LETTER U WITH BREVE
016E; L; 016F; #LATIN CAPITAL LETTER U WITH RING ABOVE
0170; L; 0171; #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0172; L; 0173; #LATIN CAPITAL LETTER U WITH OGONEK
0174; L; 0175; #LATIN CAPITAL LETTER W WITH CIRCUMFLEX
0176; L; 0177; #LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
0178; L; 00FF; #LATIN CAPITAL LETTER Y WITH DIAERESIS
0179; L; 017A; #LATIN CAPITAL LETTER Z WITH ACUTE
017B; L; 017C; #LATIN CAPITAL LETTER Z WITH DOT ABOVE
017D; L; 017E; #LATIN CAPITAL LETTER Z WITH CARON
017F; E; 0073; #LATIN SMALL LETTER LONG S
0181; L; 0253; #LATIN CAPITAL LETTER B WITH HOOK
0182; L; 0183; #LATIN CAPITAL LETTER B WITH TOPBAR
0184; L; 0185; #LATIN CAPITAL LETTER TONE SIX
0186; L; 0254; #LATIN CAPITAL LETTER OPEN O
0187; L; 0188; #LATIN CAPITAL LETTER C WITH HOOK
0189; L; 0256; #LATIN CAPITAL LETTER AFRICAN D
018A; L; 0257; #LATIN CAPITAL LETTER D WITH HOOK
018B; L; 018C; #LATIN CAPITAL LETTER D WITH TOPBAR
018E; L; 01DD; #LATIN CAPITAL LETTER REVERSED E
018F; L; 0259; #LATIN CAPITAL LETTER SCHWA
0190; L; 025B; #LATIN CAPITAL LETTER OPEN E
0191; L; 0192; #LATIN CAPITAL LETTER F WITH HOOK
0193; L; 0260; #LATIN CAPITAL LETTER G WITH HOOK
0194; L; 0263; #LATIN CAPITAL LETTER GAMMA
0196; L; 0269; #LATIN CAPITAL LETTER IOTA
0197; L; 0268; #LATIN CAPITAL LETTER I WITH STROKE
0198; L; 0199; #LATIN CAPITAL LETTER K WITH HOOK
019C; L; 026F; #LATIN CAPITAL LETTER TURNED M
019D; L; 0272; #LATIN CAPITAL LETTER N WITH LEFT HOOK
019F; L; 0275; #LATIN CAPITAL LETTER O WITH MIDDLE TILDE
01A0; L; 01A1; #LATIN CAPITAL LETTER O WITH HORN
01A2; L; 01A3; #LATIN CAPITAL LETTER OI
01A4; L; 01A5; #LATIN CAPITAL LETTER P WITH HOOK
01A6; L; 0280; #LATIN LETTER YR
01A7; L; 01A8; #LATIN CAPITAL LETTER TONE TWO
01A9; L; 0283; #LATIN CAPITAL LETTER ESH
01AC; L; 01AD; #LATIN CAPITAL LETTER T WITH HOOK
01AE; L; 0288; #LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
01AF; L; 01B0; #LATIN CAPITAL LETTER U WITH HORN
01B1; L; 028A; #LATIN CAPITAL LETTER UPSILON
01B2; L; 028B; #LATIN CAPITAL LETTER V WITH HOOK
01B3; L; 01B4; #LATIN CAPITAL LETTER Y WITH HOOK
01B5; L; 01B6; #LATIN CAPITAL LETTER Z WITH STROKE
01B7; L; 0292; #LATIN CAPITAL LETTER EZH
01B8; L; 01B9; #LATIN CAPITAL LETTER EZH REVERSED
01BC; L; 01BD; #LATIN CAPITAL LETTER TONE FIVE
01C4; L; 01C6; #LATIN CAPITAL LETTER DZ WITH CARON
01C5; L; 01C6; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
01C7; L; 01C9; #LATIN CAPITAL LETTER LJ
01C8; L; 01C9; #LATIN CAPITAL LETTER L WITH SMALL LETTER J
01CA; L; 01CC; #LATIN CAPITAL LETTER NJ
01CB; L; 01CC; #LATIN CAPITAL LETTER N WITH SMALL LETTER J
01CD; L; 01CE; #LATIN CAPITAL LETTER A WITH CARON
01CF; L; 01D0; #LATIN CAPITAL LETTER I WITH CARON
01D1; L; 01D2; #LATIN CAPITAL LETTER O WITH CARON
01D3; L; 01D4; #LATIN CAPITAL LETTER U WITH CARON
01D5; L; 01D6; #LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
01D7; L; 01D8; #LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
01D9; L; 01DA; #LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
01DB; L; 01DC; #LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
01DE; L; 01DF; #LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
01E0; L; 01E1; #LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
01E2; L; 01E3; #LATIN CAPITAL LETTER AE WITH MACRON
01E4; L; 01E5; #LATIN CAPITAL LETTER G WITH STROKE
01E6; L; 01E7; #LATIN CAPITAL LETTER G WITH CARON
01E8; L; 01E9; #LATIN CAPITAL LETTER K WITH CARON
01EA; L; 01EB; #LATIN CAPITAL LETTER O WITH OGONEK
01EC; L; 01ED; #LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
01EE; L; 01EF; #LATIN CAPITAL LETTER EZH WITH CARON
01F0; E; 006A 030C; #LATIN SMALL LETTER J WITH CARON
01F1; L; 01F3; #LATIN CAPITAL LETTER DZ
01F2; L; 01F3; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z
01F4; L; 01F5; #LATIN CAPITAL LETTER G WITH ACUTE
01F6; L; 0195; #LATIN CAPITAL LETTER HWAIR
01F7; L; 01BF; #LATIN CAPITAL LETTER WYNN
01F8; L; 01F9; #LATIN CAPITAL LETTER N WITH GRAVE
01FA; L; 01FB; #LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
01FC; L; 01FD; #LATIN CAPITAL LETTER AE WITH ACUTE
01FE; L; 01FF; #LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
0200; L; 0201; #LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
0202; L; 0203; #LATIN CAPITAL LETTER A WITH INVERTED BREVE
0204; L; 0205; #LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
0206; L; 0207; #LATIN CAPITAL LETTER E WITH INVERTED BREVE
0208; L; 0209; #LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
020A; L; 020B; #LATIN CAPITAL LETTER I WITH INVERTED BREVE
020C; L; 020D; #LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
020E; L; 020F; #LATIN CAPITAL LETTER O WITH INVERTED BREVE
0210; L; 0211; #LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
0212; L; 0213; #LATIN CAPITAL LETTER R WITH INVERTED BREVE
0214; L; 0215; #LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
0216; L; 0217; #LATIN CAPITAL LETTER U WITH INVERTED BREVE
0218; L; 0219; #LATIN CAPITAL LETTER S WITH COMMA BELOW
021A; L; 021B; #LATIN CAPITAL LETTER T WITH COMMA BELOW
021C; L; 021D; #LATIN CAPITAL LETTER YOGH
021E; L; 021F; #LATIN CAPITAL LETTER H WITH CARON
0222; L; 0223; #LATIN CAPITAL LETTER OU
0224; L; 0225; #LATIN CAPITAL LETTER Z WITH HOOK
0226; L; 0227; #LATIN CAPITAL LETTER A WITH DOT ABOVE
0228; L; 0229; #LATIN CAPITAL LETTER E WITH CEDILLA
022A; L; 022B; #LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
022C; L; 022D; #LATIN CAPITAL LETTER O WITH TILDE AND MACRON
022E; L; 022F; #LATIN CAPITAL LETTER O WITH DOT ABOVE
0230; L; 0231; #LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
0232; L; 0233; #LATIN CAPITAL LETTER Y WITH MACRON
0345; E; 03B9; #COMBINING GREEK YPOGEGRAMMENI
0386; L; 03AC; #GREEK CAPITAL LETTER ALPHA WITH TONOS
0388; L; 03AD; #GREEK CAPITAL LETTER EPSILON WITH TONOS
0389; L; 03AE; #GREEK CAPITAL LETTER ETA WITH TONOS
038A; L; 03AF; #GREEK CAPITAL LETTER IOTA WITH TONOS
038C; L; 03CC; #GREEK CAPITAL LETTER OMICRON WITH TONOS
038E; L; 03CD; #GREEK CAPITAL LETTER UPSILON WITH TONOS
038F; L; 03CE; #GREEK CAPITAL LETTER OMEGA WITH TONOS
0390; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0391; L; 03B1; #GREEK CAPITAL LETTER ALPHA
0392; L; 03B2; #GREEK CAPITAL LETTER BETA
0393; L; 03B3; #GREEK CAPITAL LETTER GAMMA
0394; L; 03B4; #GREEK CAPITAL LETTER DELTA
0395; L; 03B5; #GREEK CAPITAL LETTER EPSILON
0396; L; 03B6; #GREEK CAPITAL LETTER ZETA
0397; L; 03B7; #GREEK CAPITAL LETTER ETA
0398; L; 03B8; #GREEK CAPITAL LETTER THETA
0399; L; 03B9; #GREEK CAPITAL LETTER IOTA
039A; L; 03BA; #GREEK CAPITAL LETTER KAPPA
039B; L; 03BB; #GREEK CAPITAL LETTER LAMDA
039C; L; 03BC; #GREEK CAPITAL LETTER MU
039D; L; 03BD; #GREEK CAPITAL LETTER NU
039E; L; 03BE; #GREEK CAPITAL LETTER XI
039F; L; 03BF; #GREEK CAPITAL LETTER OMICRON
03A0; L; 03C0; #GREEK CAPITAL LETTER PI
03A1; L; 03C1; #GREEK CAPITAL LETTER RHO
03A3; E; 03C2; #GREEK CAPITAL LETTER SIGMA
03A4; L; 03C4; #GREEK CAPITAL LETTER TAU
03A5; L; 03C5; #GREEK CAPITAL LETTER UPSILON
03A6; L; 03C6; #GREEK CAPITAL LETTER PHI
03A7; L; 03C7; #GREEK CAPITAL LETTER CHI
03A8; L; 03C8; #GREEK CAPITAL LETTER PSI
03A9; L; 03C9; #GREEK CAPITAL LETTER OMEGA
03AA; L; 03CA; #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
03AB; L; 03CB; #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
03B0; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
03C2; L; 03C2; #GREEK SMALL LETTER FINAL SIGMA
03C3; E; 03C2; #GREEK SMALL LETTER SIGMA
03D0; E; 03B2; #GREEK BETA SYMBOL
03D1; E; 03B8; #GREEK THETA SYMBOL
03D5; E; 03C6; #GREEK PHI SYMBOL
03D6; E; 03C0; #GREEK PI SYMBOL
03DA; L; 03DB; #GREEK LETTER STIGMA
03DC; L; 03DD; #GREEK LETTER DIGAMMA
03DE; L; 03DF; #GREEK LETTER KOPPA
03E0; L; 03E1; #GREEK LETTER SAMPI
03E2; L; 03E3; #COPTIC CAPITAL LETTER SHEI
03E4; L; 03E5; #COPTIC CAPITAL LETTER FEI
03E6; L; 03E7; #COPTIC CAPITAL LETTER KHEI
03E8; L; 03E9; #COPTIC CAPITAL LETTER HORI
03EA; L; 03EB; #COPTIC CAPITAL LETTER GANGIA
03EC; L; 03ED; #COPTIC CAPITAL LETTER SHIMA
03EE; L; 03EF; #COPTIC CAPITAL LETTER DEI
03F0; E; 03BA; #GREEK KAPPA SYMBOL
03F1; E; 03C1; #GREEK RHO SYMBOL
03F2; E; 03C2; #GREEK LUNATE SIGMA SYMBOL
0400; L; 0450; #CYRILLIC CAPITAL LETTER IE WITH GRAVE
0401; L; 0451; #CYRILLIC CAPITAL LETTER IO
0402; L; 0452; #CYRILLIC CAPITAL LETTER DJE
0403; L; 0453; #CYRILLIC CAPITAL LETTER GJE
0404; L; 0454; #CYRILLIC CAPITAL LETTER UKRAINIAN IE
0405; L; 0455; #CYRILLIC CAPITAL LETTER DZE
0406; L; 0456; #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0407; L; 0457; #CYRILLIC CAPITAL LETTER YI
0408; L; 0458; #CYRILLIC CAPITAL LETTER JE
0409; L; 0459; #CYRILLIC CAPITAL LETTER LJE
040A; L; 045A; #CYRILLIC CAPITAL LETTER NJE
040B; L; 045B; #CYRILLIC CAPITAL LETTER TSHE
040C; L; 045C; #CYRILLIC CAPITAL LETTER KJE
040D; L; 045D; #CYRILLIC CAPITAL LETTER I WITH GRAVE
040E; L; 045E; #CYRILLIC CAPITAL LETTER SHORT U
040F; L; 045F; #CYRILLIC CAPITAL LETTER DZHE
0410; L; 0430; #CYRILLIC CAPITAL LETTER A
0411; L; 0431; #CYRILLIC CAPITAL LETTER BE
0412; L; 0432; #CYRILLIC CAPITAL LETTER VE
0413; L; 0433; #CYRILLIC CAPITAL LETTER GHE
0414; L; 0434; #CYRILLIC CAPITAL LETTER DE
0415; L; 0435; #CYRILLIC CAPITAL LETTER IE
0416; L; 0436; #CYRILLIC CAPITAL LETTER ZHE
0417; L; 0437; #CYRILLIC CAPITAL LETTER ZE
0418; L; 0438; #CYRILLIC CAPITAL LETTER I
0419; L; 0439; #CYRILLIC CAPITAL LETTER SHORT I
041A; L; 043A; #CYRILLIC CAPITAL LETTER KA
041B; L; 043B; #CYRILLIC CAPITAL LETTER EL
041C; L; 043C; #CYRILLIC CAPITAL LETTER EM
041D; L; 043D; #CYRILLIC CAPITAL LETTER EN
041E; L; 043E; #CYRILLIC CAPITAL LETTER O
041F; L; 043F; #CYRILLIC CAPITAL LETTER PE
0420; L; 0440; #CYRILLIC CAPITAL LETTER ER
0421; L; 0441; #CYRILLIC CAPITAL LETTER ES
0422; L; 0442; #CYRILLIC CAPITAL LETTER TE
0423; L; 0443; #CYRILLIC CAPITAL LETTER U
0424; L; 0444; #CYRILLIC CAPITAL LETTER EF
0425; L; 0445; #CYRILLIC CAPITAL LETTER HA
0426; L; 0446; #CYRILLIC CAPITAL LETTER TSE
0427; L; 0447; #CYRILLIC CAPITAL LETTER CHE
0428; L; 0448; #CYRILLIC CAPITAL LETTER SHA
0429; L; 0449; #CYRILLIC CAPITAL LETTER SHCHA
042A; L; 044A; #CYRILLIC CAPITAL LETTER HARD SIGN
042B; L; 044B; #CYRILLIC CAPITAL LETTER YERU
042C; L; 044C; #CYRILLIC CAPITAL LETTER SOFT SIGN
042D; L; 044D; #CYRILLIC CAPITAL LETTER E
042E; L; 044E; #CYRILLIC CAPITAL LETTER YU
042F; L; 044F; #CYRILLIC CAPITAL LETTER YA
0460; L; 0461; #CYRILLIC CAPITAL LETTER OMEGA
0462; L; 0463; #CYRILLIC CAPITAL LETTER YAT
0464; L; 0465; #CYRILLIC CAPITAL LETTER IOTIFIED E
0466; L; 0467; #CYRILLIC CAPITAL LETTER LITTLE YUS
0468; L; 0469; #CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
046A; L; 046B; #CYRILLIC CAPITAL LETTER BIG YUS
046C; L; 046D; #CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
046E; L; 046F; #CYRILLIC CAPITAL LETTER KSI
0470; L; 0471; #CYRILLIC CAPITAL LETTER PSI
0472; L; 0473; #CYRILLIC CAPITAL LETTER FITA
0474; L; 0475; #CYRILLIC CAPITAL LETTER IZHITSA
0476; L; 0477; #CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
0478; L; 0479; #CYRILLIC CAPITAL LETTER UK
047A; L; 047B; #CYRILLIC CAPITAL LETTER ROUND OMEGA
047C; L; 047D; #CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
047E; L; 047F; #CYRILLIC CAPITAL LETTER OT
0480; L; 0481; #CYRILLIC CAPITAL LETTER KOPPA
048C; L; 048D; #CYRILLIC CAPITAL LETTER SEMISOFT SIGN
048E; L; 048F; #CYRILLIC CAPITAL LETTER ER WITH TICK
0490; L; 0491; #CYRILLIC CAPITAL LETTER GHE WITH UPTURN
0492; L; 0493; #CYRILLIC CAPITAL LETTER GHE WITH STROKE
0494; L; 0495; #CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
0496; L; 0497; #CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
0498; L; 0499; #CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
049A; L; 049B; #CYRILLIC CAPITAL LETTER KA WITH DESCENDER
049C; L; 049D; #CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
049E; L; 049F; #CYRILLIC CAPITAL LETTER KA WITH STROKE
04A0; L; 04A1; #CYRILLIC CAPITAL LETTER BASHKIR KA
04A2; L; 04A3; #CYRILLIC CAPITAL LETTER EN WITH DESCENDER
04A4; L; 04A5; #CYRILLIC CAPITAL LIGATURE EN GHE
04A6; L; 04A7; #CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
04A8; L; 04A9; #CYRILLIC CAPITAL LETTER ABKHASIAN HA
04AA; L; 04AB; #CYRILLIC CAPITAL LETTER ES WITH DESCENDER
04AC; L; 04AD; #CYRILLIC CAPITAL LETTER TE WITH DESCENDER
04AE; L; 04AF; #CYRILLIC CAPITAL LETTER STRAIGHT U
04B0; L; 04B1; #CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
04B2; L; 04B3; #CYRILLIC CAPITAL LETTER HA WITH DESCENDER
04B4; L; 04B5; #CYRILLIC CAPITAL LIGATURE TE TSE
04B6; L; 04B7; #CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
04B8; L; 04B9; #CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
04BA; L; 04BB; #CYRILLIC CAPITAL LETTER SHHA
04BC; L; 04BD; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE
04BE; L; 04BF; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
04C1; L; 04C2; #CYRILLIC CAPITAL LETTER ZHE WITH BREVE
04C3; L; 04C4; #CYRILLIC CAPITAL LETTER KA WITH HOOK
04C7; L; 04C8; #CYRILLIC CAPITAL LETTER EN WITH HOOK
04CB; L; 04CC; #CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
04D0; L; 04D1; #CYRILLIC CAPITAL LETTER A WITH BREVE
04D2; L; 04D3; #CYRILLIC CAPITAL LETTER A WITH DIAERESIS
04D4; L; 04D5; #CYRILLIC CAPITAL LIGATURE A IE
04D6; L; 04D7; #CYRILLIC CAPITAL LETTER IE WITH BREVE
04D8; L; 04D9; #CYRILLIC CAPITAL LETTER SCHWA
04DA; L; 04DB; #CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
04DC; L; 04DD; #CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
04DE; L; 04DF; #CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
04E0; L; 04E1; #CYRILLIC CAPITAL LETTER ABKHASIAN DZE
04E2; L; 04E3; #CYRILLIC CAPITAL LETTER I WITH MACRON
04E4; L; 04E5; #CYRILLIC CAPITAL LETTER I WITH DIAERESIS
04E6; L; 04E7; #CYRILLIC CAPITAL LETTER O WITH DIAERESIS
04E8; L; 04E9; #CYRILLIC CAPITAL LETTER BARRED O
04EA; L; 04EB; #CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
04EC; L; 04ED; #CYRILLIC CAPITAL LETTER E WITH DIAERESIS
04EE; L; 04EF; #CYRILLIC CAPITAL LETTER U WITH MACRON
04F0; L; 04F1; #CYRILLIC CAPITAL LETTER U WITH DIAERESIS
04F2; L; 04F3; #CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
04F4; L; 04F5; #CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
04F8; L; 04F9; #CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
0531; L; 0561; #ARMENIAN CAPITAL LETTER AYB
0532; L; 0562; #ARMENIAN CAPITAL LETTER BEN
0533; L; 0563; #ARMENIAN CAPITAL LETTER GIM
0534; L; 0564; #ARMENIAN CAPITAL LETTER DA
0535; L; 0565; #ARMENIAN CAPITAL LETTER ECH
0536; L; 0566; #ARMENIAN CAPITAL LETTER ZA
0537; L; 0567; #ARMENIAN CAPITAL LETTER EH
0538; L; 0568; #ARMENIAN CAPITAL LETTER ET
0539; L; 0569; #ARMENIAN CAPITAL LETTER TO
053A; L; 056A; #ARMENIAN CAPITAL LETTER ZHE
053B; L; 056B; #ARMENIAN CAPITAL LETTER INI
053C; L; 056C; #ARMENIAN CAPITAL LETTER LIWN
053D; L; 056D; #ARMENIAN CAPITAL LETTER XEH
053E; L; 056E; #ARMENIAN CAPITAL LETTER CA
053F; L; 056F; #ARMENIAN CAPITAL LETTER KEN
0540; L; 0570; #ARMENIAN CAPITAL LETTER HO
0541; L; 0571; #ARMENIAN CAPITAL LETTER JA
0542; L; 0572; #ARMENIAN CAPITAL LETTER GHAD
0543; L; 0573; #ARMENIAN CAPITAL LETTER CHEH
0544; L; 0574; #ARMENIAN CAPITAL LETTER MEN
0545; L; 0575; #ARMENIAN CAPITAL LETTER YI
0546; L; 0576; #ARMENIAN CAPITAL LETTER NOW
0547; L; 0577; #ARMENIAN CAPITAL LETTER SHA
0548; L; 0578; #ARMENIAN CAPITAL LETTER VO
0549; L; 0579; #ARMENIAN CAPITAL LETTER CHA
054A; L; 057A; #ARMENIAN CAPITAL LETTER PEH
054B; L; 057B; #ARMENIAN CAPITAL LETTER JHEH
054C; L; 057C; #ARMENIAN CAPITAL LETTER RA
054D; L; 057D; #ARMENIAN CAPITAL LETTER SEH
054E; L; 057E; #ARMENIAN CAPITAL LETTER VEW
054F; L; 057F; #ARMENIAN CAPITAL LETTER TIWN
0550; L; 0580; #ARMENIAN CAPITAL LETTER REH
0551; L; 0581; #ARMENIAN CAPITAL LETTER CO
0552; L; 0582; #ARMENIAN CAPITAL LETTER YIWN
0553; L; 0583; #ARMENIAN CAPITAL LETTER PIWR
0554; L; 0584; #ARMENIAN CAPITAL LETTER KEH
0555; L; 0585; #ARMENIAN CAPITAL LETTER OH
0556; L; 0586; #ARMENIAN CAPITAL LETTER FEH
0587; E; 0565 0582; #ARMENIAN SMALL LIGATURE ECH YIWN
1E00; L; 1E01; #LATIN CAPITAL LETTER A WITH RING BELOW
1E02; L; 1E03; #LATIN CAPITAL LETTER B WITH DOT ABOVE
1E04; L; 1E05; #LATIN CAPITAL LETTER B WITH DOT BELOW
1E06; L; 1E07; #LATIN CAPITAL LETTER B WITH LINE BELOW
1E08; L; 1E09; #LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
1E0A; L; 1E0B; #LATIN CAPITAL LETTER D WITH DOT ABOVE
1E0C; L; 1E0D; #LATIN CAPITAL LETTER D WITH DOT BELOW
1E0E; L; 1E0F; #LATIN CAPITAL LETTER D WITH LINE BELOW
1E10; L; 1E11; #LATIN CAPITAL LETTER D WITH CEDILLA
1E12; L; 1E13; #LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
1E14; L; 1E15; #LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
1E16; L; 1E17; #LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
1E18; L; 1E19; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
1E1A; L; 1E1B; #LATIN CAPITAL LETTER E WITH TILDE BELOW
1E1C; L; 1E1D; #LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
1E1E; L; 1E1F; #LATIN CAPITAL LETTER F WITH DOT ABOVE
1E20; L; 1E21; #LATIN CAPITAL LETTER G WITH MACRON
1E22; L; 1E23; #LATIN CAPITAL LETTER H WITH DOT ABOVE
1E24; L; 1E25; #LATIN CAPITAL LETTER H WITH DOT BELOW
1E26; L; 1E27; #LATIN CAPITAL LETTER H WITH DIAERESIS
1E28; L; 1E29; #LATIN CAPITAL LETTER H WITH CEDILLA
1E2A; L; 1E2B; #LATIN CAPITAL LETTER H WITH BREVE BELOW
1E2C; L; 1E2D; #LATIN CAPITAL LETTER I WITH TILDE BELOW
1E2E; L; 1E2F; #LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
1E30; L; 1E31; #LATIN CAPITAL LETTER K WITH ACUTE
1E32; L; 1E33; #LATIN CAPITAL LETTER K WITH DOT BELOW
1E34; L; 1E35; #LATIN CAPITAL LETTER K WITH LINE BELOW
1E36; L; 1E37; #LATIN CAPITAL LETTER L WITH DOT BELOW
1E38; L; 1E39; #LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
1E3A; L; 1E3B; #LATIN CAPITAL LETTER L WITH LINE BELOW
1E3C; L; 1E3D; #LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
1E3E; L; 1E3F; #LATIN CAPITAL LETTER M WITH ACUTE
1E40; L; 1E41; #LATIN CAPITAL LETTER M WITH DOT ABOVE
1E42; L; 1E43; #LATIN CAPITAL LETTER M WITH DOT BELOW
1E44; L; 1E45; #LATIN CAPITAL LETTER N WITH DOT ABOVE
1E46; L; 1E47; #LATIN CAPITAL LETTER N WITH DOT BELOW
1E48; L; 1E49; #LATIN CAPITAL LETTER N WITH LINE BELOW
1E4A; L; 1E4B; #LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
1E4C; L; 1E4D; #LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
1E4E; L; 1E4F; #LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
1E50; L; 1E51; #LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
1E52; L; 1E53; #LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
1E54; L; 1E55; #LATIN CAPITAL LETTER P WITH ACUTE
1E56; L; 1E57; #LATIN CAPITAL LETTER P WITH DOT ABOVE
1E58; L; 1E59; #LATIN CAPITAL LETTER R WITH DOT ABOVE
1E5A; L; 1E5B; #LATIN CAPITAL LETTER R WITH DOT BELOW
1E5C; L; 1E5D; #LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
1E5E; L; 1E5F; #LATIN CAPITAL LETTER R WITH LINE BELOW
1E60; L; 1E61; #LATIN CAPITAL LETTER S WITH DOT ABOVE
1E62; L; 1E63; #LATIN CAPITAL LETTER S WITH DOT BELOW
1E64; L; 1E65; #LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
1E66; L; 1E67; #LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
1E68; L; 1E69; #LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
1E6A; L; 1E6B; #LATIN CAPITAL LETTER T WITH DOT ABOVE
1E6C; L; 1E6D; #LATIN CAPITAL LETTER T WITH DOT BELOW
1E6E; L; 1E6F; #LATIN CAPITAL LETTER T WITH LINE BELOW
1E70; L; 1E71; #LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
1E72; L; 1E73; #LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
1E74; L; 1E75; #LATIN CAPITAL LETTER U WITH TILDE BELOW
1E76; L; 1E77; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
1E78; L; 1E79; #LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
1E7A; L; 1E7B; #LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
1E7C; L; 1E7D; #LATIN CAPITAL LETTER V WITH TILDE
1E7E; L; 1E7F; #LATIN CAPITAL LETTER V WITH DOT BELOW
1E80; L; 1E81; #LATIN CAPITAL LETTER W WITH GRAVE
1E82; L; 1E83; #LATIN CAPITAL LETTER W WITH ACUTE
1E84; L; 1E85; #LATIN CAPITAL LETTER W WITH DIAERESIS
1E86; L; 1E87; #LATIN CAPITAL LETTER W WITH DOT ABOVE
1E88; L; 1E89; #LATIN CAPITAL LETTER W WITH DOT BELOW
1E8A; L; 1E8B; #LATIN CAPITAL LETTER X WITH DOT ABOVE
1E8C; L; 1E8D; #LATIN CAPITAL LETTER X WITH DIAERESIS
1E8E; L; 1E8F; #LATIN CAPITAL LETTER Y WITH DOT ABOVE
1E90; L; 1E91; #LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
1E92; L; 1E93; #LATIN CAPITAL LETTER Z WITH DOT BELOW
1E94; L; 1E95; #LATIN CAPITAL LETTER Z WITH LINE BELOW
1E96; E; 0068 0331; #LATIN SMALL LETTER H WITH LINE BELOW
1E97; E; 0074 0308; #LATIN SMALL LETTER T WITH DIAERESIS
1E98; E; 0077 030A; #LATIN SMALL LETTER W WITH RING ABOVE
1E99; E; 0079 030A; #LATIN SMALL LETTER Y WITH RING ABOVE
1E9A; E; 0061 02BE; #LATIN SMALL LETTER A WITH RIGHT HALF RING
1E9B; E; 1E61; #LATIN SMALL LETTER LONG S WITH DOT ABOVE
1EA0; L; 1EA1; #LATIN CAPITAL LETTER A WITH DOT BELOW
1EA2; L; 1EA3; #LATIN CAPITAL LETTER A WITH HOOK ABOVE
1EA4; L; 1EA5; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
1EA6; L; 1EA7; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
1EA8; L; 1EA9; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
1EAA; L; 1EAB; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
1EAC; L; 1EAD; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
1EAE; L; 1EAF; #LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
1EB0; L; 1EB1; #LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
1EB2; L; 1EB3; #LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
1EB4; L; 1EB5; #LATIN CAPITAL LETTER A WITH BREVE AND TILDE
1EB6; L; 1EB7; #LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
1EB8; L; 1EB9; #LATIN CAPITAL LETTER E WITH DOT BELOW
1EBA; L; 1EBB; #LATIN CAPITAL LETTER E WITH HOOK ABOVE
1EBC; L; 1EBD; #LATIN CAPITAL LETTER E WITH TILDE
1EBE; L; 1EBF; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
1EC0; L; 1EC1; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
1EC2; L; 1EC3; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
1EC4; L; 1EC5; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
1EC6; L; 1EC7; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
1EC8; L; 1EC9; #LATIN CAPITAL LETTER I WITH HOOK ABOVE
1ECA; L; 1ECB; #LATIN CAPITAL LETTER I WITH DOT BELOW
1ECC; L; 1ECD; #LATIN CAPITAL LETTER O WITH DOT BELOW
1ECE; L; 1ECF; #LATIN CAPITAL LETTER O WITH HOOK ABOVE
1ED0; L; 1ED1; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
1ED2; L; 1ED3; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
1ED4; L; 1ED5; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
1ED6; L; 1ED7; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
1ED8; L; 1ED9; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
1EDA; L; 1EDB; #LATIN CAPITAL LETTER O WITH HORN AND ACUTE
1EDC; L; 1EDD; #LATIN CAPITAL LETTER O WITH HORN AND GRAVE
1EDE; L; 1EDF; #LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
1EE0; L; 1EE1; #LATIN CAPITAL LETTER O WITH HORN AND TILDE
1EE2; L; 1EE3; #LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
1EE4; L; 1EE5; #LATIN CAPITAL LETTER U WITH DOT BELOW
1EE6; L; 1EE7; #LATIN CAPITAL LETTER U WITH HOOK ABOVE
1EE8; L; 1EE9; #LATIN CAPITAL LETTER U WITH HORN AND ACUTE
1EEA; L; 1EEB; #LATIN CAPITAL LETTER U WITH HORN AND GRAVE
1EEC; L; 1EED; #LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
1EEE; L; 1EEF; #LATIN CAPITAL LETTER U WITH HORN AND TILDE
1EF0; L; 1EF1; #LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
1EF2; L; 1EF3; #LATIN CAPITAL LETTER Y WITH GRAVE
1EF4; L; 1EF5; #LATIN CAPITAL LETTER Y WITH DOT BELOW
1EF6; L; 1EF7; #LATIN CAPITAL LETTER Y WITH HOOK ABOVE
1EF8; L; 1EF9; #LATIN CAPITAL LETTER Y WITH TILDE
1F08; L; 1F00; #GREEK CAPITAL LETTER ALPHA WITH PSILI
1F09; L; 1F01; #GREEK CAPITAL LETTER ALPHA WITH DASIA
1F0A; L; 1F02; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
1F0B; L; 1F03; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
1F0C; L; 1F04; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
1F0D; L; 1F05; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
1F0E; L; 1F06; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
1F0F; L; 1F07; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
1F18; L; 1F10; #GREEK CAPITAL LETTER EPSILON WITH PSILI
1F19; L; 1F11; #GREEK CAPITAL LETTER EPSILON WITH DASIA
1F1A; L; 1F12; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
1F1B; L; 1F13; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
1F1C; L; 1F14; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
1F1D; L; 1F15; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
1F28; L; 1F20; #GREEK CAPITAL LETTER ETA WITH PSILI
1F29; L; 1F21; #GREEK CAPITAL LETTER ETA WITH DASIA
1F2A; L; 1F22; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
1F2B; L; 1F23; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
1F2C; L; 1F24; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
1F2D; L; 1F25; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
1F2E; L; 1F26; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
1F2F; L; 1F27; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
1F38; L; 1F30; #GREEK CAPITAL LETTER IOTA WITH PSILI
1F39; L; 1F31; #GREEK CAPITAL LETTER IOTA WITH DASIA
1F3A; L; 1F32; #GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
1F3B; L; 1F33; #GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
1F3C; L; 1F34; #GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
1F3D; L; 1F35; #GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
1F3E; L; 1F36; #GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
1F3F; L; 1F37; #GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
1F48; L; 1F40; #GREEK CAPITAL LETTER OMICRON WITH PSILI
1F49; L; 1F41; #GREEK CAPITAL LETTER OMICRON WITH DASIA
1F4A; L; 1F42; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
1F4B; L; 1F43; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
1F4C; L; 1F44; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
1F4D; L; 1F45; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
1F50; E; 03C5 0313; #GREEK SMALL LETTER UPSILON WITH PSILI
1F52; E; 03C5 0313 0300; #GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
1F54; E; 03C5 0313 0301; #GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
1F56; E; 03C5 0313 0342; #GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
1F59; L; 1F51; #GREEK CAPITAL LETTER UPSILON WITH DASIA
1F5B; L; 1F53; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
1F5D; L; 1F55; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
1F5F; L; 1F57; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
1F68; L; 1F60; #GREEK CAPITAL LETTER OMEGA WITH PSILI
1F69; L; 1F61; #GREEK CAPITAL LETTER OMEGA WITH DASIA
1F6A; L; 1F62; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
1F6B; L; 1F63; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
1F6C; L; 1F64; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
1F6D; L; 1F65; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
1F6E; L; 1F66; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
1F6F; L; 1F67; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
1F80; E; 1F00 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
1F81; E; 1F01 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
1F82; E; 1F02 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F83; E; 1F03 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F84; E; 1F04 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F85; E; 1F05 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F86; E; 1F06 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F87; E; 1F07 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F88; E; 1F00 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F89; E; 1F01 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F8A; E; 1F02 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8B; E; 1F03 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8C; E; 1F04 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8D; E; 1F05 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8E; E; 1F06 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; E; 1F07 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F90; E; 1F20 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
1F91; E; 1F21 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
1F92; E; 1F22 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F93; E; 1F23 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F94; E; 1F24 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F95; E; 1F25 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F96; E; 1F26 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F97; E; 1F27 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F98; E; 1F20 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F99; E; 1F21 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F9A; E; 1F22 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9B; E; 1F23 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9C; E; 1F24 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9D; E; 1F25 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9E; E; 1F26 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; E; 1F27 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FA0; E; 1F60 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
1FA1; E; 1F61 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
1FA2; E; 1F62 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1FA3; E; 1F63 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1FA4; E; 1F64 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1FA5; E; 1F65 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1FA6; E; 1F66 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1FA7; E; 1F67 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1FA8; E; 1F60 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA9; E; 1F61 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FAA; E; 1F62 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAB; E; 1F63 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAC; E; 1F64 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAD; E; 1F65 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAE; E; 1F66 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; E; 1F67 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FB2; E; 1F70 03B9; #GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
1FB3; E; 03B1 03B9; #GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
1FB4; E; 03AC 03B9; #GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FB6; E; 03B1 0342; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI
1FB7; E; 03B1 0342 03B9; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
1FB8; L; 1FB0; #GREEK CAPITAL LETTER ALPHA WITH VRACHY
1FB9; L; 1FB1; #GREEK CAPITAL LETTER ALPHA WITH MACRON
1FBA; L; 1F70; #GREEK CAPITAL LETTER ALPHA WITH VARIA
1FBB; L; 1F71; #GREEK CAPITAL LETTER ALPHA WITH OXIA
1FBC; E; 03B1 03B9; #GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBE; E; 03B9; #GREEK PROSGEGRAMMENI
1FC2; E; 1F74 03B9; #GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
1FC3; E; 03B7 03B9; #GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
1FC4; E; 03AE 03B9; #GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FC6; E; 03B7 0342; #GREEK SMALL LETTER ETA WITH PERISPOMENI
1FC7; E; 03B7 0342 03B9; #GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
1FC8; L; 1F72; #GREEK CAPITAL LETTER EPSILON WITH VARIA
1FC9; L; 1F73; #GREEK CAPITAL LETTER EPSILON WITH OXIA
1FCA; L; 1F74; #GREEK CAPITAL LETTER ETA WITH VARIA
1FCB; L; 1F75; #GREEK CAPITAL LETTER ETA WITH OXIA
1FCC; E; 03B7 03B9; #GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FD2; E; 03B9 0308 0300; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; E; 03B9 0342; #GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; E; 03B9 0308 0342; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FD8; L; 1FD0; #GREEK CAPITAL LETTER IOTA WITH VRACHY
1FD9; L; 1FD1; #GREEK CAPITAL LETTER IOTA WITH MACRON
1FDA; L; 1F76; #GREEK CAPITAL LETTER IOTA WITH VARIA
1FDB; L; 1F77; #GREEK CAPITAL LETTER IOTA WITH OXIA
1FE2; E; 03C5 0308 0300; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; E; 03C1 0313; #GREEK SMALL LETTER RHO WITH PSILI
1FE6; E; 03C5 0342; #GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; E; 03C5 0308 0342; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FE8; L; 1FE0; #GREEK CAPITAL LETTER UPSILON WITH VRACHY
1FE9; L; 1FE1; #GREEK CAPITAL LETTER UPSILON WITH MACRON
1FEA; L; 1F7A; #GREEK CAPITAL LETTER UPSILON WITH VARIA
1FEB; L; 1F7B; #GREEK CAPITAL LETTER UPSILON WITH OXIA
1FEC; L; 1FE5; #GREEK CAPITAL LETTER RHO WITH DASIA
1FF2; E; 1F7C 03B9; #GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1FF3; E; 03C9 03B9; #GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1FF4; E; 03CE 03B9; #GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6; E; 03C9 0342; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI
1FF7; E; 03C9 0342 03B9; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
1FF8; L; 1F78; #GREEK CAPITAL LETTER OMICRON WITH VARIA
1FF9; L; 1F79; #GREEK CAPITAL LETTER OMICRON WITH OXIA
1FFA; L; 1F7C; #GREEK CAPITAL LETTER OMEGA WITH VARIA
1FFB; L; 1F7D; #GREEK CAPITAL LETTER OMEGA WITH OXIA
1FFC; E; 03C9 03B9; #GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
2126; L; 03C9; #OHM SIGN
212A; L; 006B; #KELVIN SIGN
212B; L; 00E5; #ANGSTROM SIGN
2160; L; 2170; #ROMAN NUMERAL ONE
2161; L; 2171; #ROMAN NUMERAL TWO
2162; L; 2172; #ROMAN NUMERAL THREE
2163; L; 2173; #ROMAN NUMERAL FOUR
2164; L; 2174; #ROMAN NUMERAL FIVE
2165; L; 2175; #ROMAN NUMERAL SIX
2166; L; 2176; #ROMAN NUMERAL SEVEN
2167; L; 2177; #ROMAN NUMERAL EIGHT
2168; L; 2178; #ROMAN NUMERAL NINE
2169; L; 2179; #ROMAN NUMERAL TEN
216A; L; 217A; #ROMAN NUMERAL ELEVEN
216B; L; 217B; #ROMAN NUMERAL TWELVE
216C; L; 217C; #ROMAN NUMERAL FIFTY
216D; L; 217D; #ROMAN NUMERAL ONE HUNDRED
216E; L; 217E; #ROMAN NUMERAL FIVE HUNDRED
216F; L; 217F; #ROMAN NUMERAL ONE THOUSAND
24B6; L; 24D0; #CIRCLED LATIN CAPITAL LETTER A
24B7; L; 24D1; #CIRCLED LATIN CAPITAL LETTER B
24B8; L; 24D2; #CIRCLED LATIN CAPITAL LETTER C
24B9; L; 24D3; #CIRCLED LATIN CAPITAL LETTER D
24BA; L; 24D4; #CIRCLED LATIN CAPITAL LETTER E
24BB; L; 24D5; #CIRCLED LATIN CAPITAL LETTER F
24BC; L; 24D6; #CIRCLED LATIN CAPITAL LETTER G
24BD; L; 24D7; #CIRCLED LATIN CAPITAL LETTER H
24BE; L; 24D8; #CIRCLED LATIN CAPITAL LETTER I
24BF; L; 24D9; #CIRCLED LATIN CAPITAL LETTER J
24C0; L; 24DA; #CIRCLED LATIN CAPITAL LETTER K
24C1; L; 24DB; #CIRCLED LATIN CAPITAL LETTER L
24C2; L; 24DC; #CIRCLED LATIN CAPITAL LETTER M
24C3; L; 24DD; #CIRCLED LATIN CAPITAL LETTER N
24C4; L; 24DE; #CIRCLED LATIN CAPITAL LETTER O
24C5; L; 24DF; #CIRCLED LATIN CAPITAL LETTER P
24C6; L; 24E0; #CIRCLED LATIN CAPITAL LETTER Q
24C7; L; 24E1; #CIRCLED LATIN CAPITAL LETTER R
24C8; L; 24E2; #CIRCLED LATIN CAPITAL LETTER S
24C9; L; 24E3; #CIRCLED LATIN CAPITAL LETTER T
24CA; L; 24E4; #CIRCLED LATIN CAPITAL LETTER U
24CB; L; 24E5; #CIRCLED LATIN CAPITAL LETTER V
24CC; L; 24E6; #CIRCLED LATIN CAPITAL LETTER W
24CD; L; 24E7; #CIRCLED LATIN CAPITAL LETTER X
24CE; L; 24E8; #CIRCLED LATIN CAPITAL LETTER Y
24CF; L; 24E9; #CIRCLED LATIN CAPITAL LETTER Z
FB00; E; 0066 0066; #LATIN SMALL LIGATURE FF
FB01; E; 0066 0069; #LATIN SMALL LIGATURE FI
FB02; E; 0066 006C; #LATIN SMALL LIGATURE FL
FB03; E; 0066 0066 0069; #LATIN SMALL LIGATURE FFI
FB04; E; 0066 0066 006C; #LATIN SMALL LIGATURE FFL
FB05; E; 0073 0074; #LATIN SMALL LIGATURE LONG S T
FB06; E; 0073 0074; #LATIN SMALL LIGATURE ST
FB13; E; 0574 0576; #ARMENIAN SMALL LIGATURE MEN NOW
FB14; E; 0574 0565; #ARMENIAN SMALL LIGATURE MEN ECH
FB15; E; 0574 056B; #ARMENIAN SMALL LIGATURE MEN INI
FB16; E; 057E 0576; #ARMENIAN SMALL LIGATURE VEW NOW
FB17; E; 0574 056D; #ARMENIAN SMALL LIGATURE MEN XEH
FF21; L; FF41; #FULLWIDTH LATIN CAPITAL LETTER A
FF22; L; FF42; #FULLWIDTH LATIN CAPITAL LETTER B
FF23; L; FF43; #FULLWIDTH LATIN CAPITAL LETTER C
FF24; L; FF44; #FULLWIDTH LATIN CAPITAL LETTER D
FF25; L; FF45; #FULLWIDTH LATIN CAPITAL LETTER E
FF26; L; FF46; #FULLWIDTH LATIN CAPITAL LETTER F
FF27; L; FF47; #FULLWIDTH LATIN CAPITAL LETTER G
FF28; L; FF48; #FULLWIDTH LATIN CAPITAL LETTER H
FF29; L; FF49; #FULLWIDTH LATIN CAPITAL LETTER I
FF2A; L; FF4A; #FULLWIDTH LATIN CAPITAL LETTER J
FF2B; L; FF4B; #FULLWIDTH LATIN CAPITAL LETTER K
FF2C; L; FF4C; #FULLWIDTH LATIN CAPITAL LETTER L
FF2D; L; FF4D; #FULLWIDTH LATIN CAPITAL LETTER M
FF2E; L; FF4E; #FULLWIDTH LATIN CAPITAL LETTER N
FF2F; L; FF4F; #FULLWIDTH LATIN CAPITAL LETTER O
FF30; L; FF50; #FULLWIDTH LATIN CAPITAL LETTER P
FF31; L; FF51; #FULLWIDTH LATIN CAPITAL LETTER Q
FF32; L; FF52; #FULLWIDTH LATIN CAPITAL LETTER R
FF33; L; FF53; #FULLWIDTH LATIN CAPITAL LETTER S
FF34; L; FF54; #FULLWIDTH LATIN CAPITAL LETTER T
FF35; L; FF55; #FULLWIDTH LATIN CAPITAL LETTER U
FF36; L; FF56; #FULLWIDTH LATIN CAPITAL LETTER V
FF37; L; FF57; #FULLWIDTH LATIN CAPITAL LETTER W
FF38; L; FF58; #FULLWIDTH LATIN CAPITAL LETTER X
FF39; L; FF59; #FULLWIDTH LATIN CAPITAL LETTER Y
FF3A; L; FF5A; #FULLWIDTH LATIN CAPITAL LETTER Z

View File

@ -1,245 +0,0 @@
# Mirror.txt
# Informative properties for Unicode characters:
# This file lists characters that have the mirrored property
# where there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.
# Note that for "real" mirroring, a rendering engine needs to select
# appropriate alternative glyphs, and that many Unicode characters do not
# have a mirror-image Unicode character.
# Each mapping line contains two fields, separated by a semicolon (';').
# Each of the two fields contains a code point represented as a
# variable-length hexadecimal value with 1 to 6 digits.
# The mapping lines are listed in ascending order by the first field, the
# original code points.
28;29
29;28
3C;3E
3E;3C
5B;5D
5D;5B
7B;7D
7D;7B
AB;BB
BB;AB
2039;203A
203A;2039
2045;2046
2046;2045
207D;207E
207E;207D
208D;208E
208E;208D
2201;2201
2202;2202
2203;2203
2204;2204
2208;220B
2209;220C
220A;220D
220B;2208
220C;2209
220D;220A
2211;2211
2215;2216
2216;2215
221A;221A
221B;221B
221C;221C
221D;221D
221F;221F
2220;2220
2221;2221
2222;2222
2224;2224
2226;2226
222B;222B
222C;222C
222D;222D
222E;222E
222F;222F
2230;2230
2231;2231
2232;2232
2233;2233
2239;2239
223B;223B
223C;223D
223D;223C
223E;223E
223F;223F
2240;2240
2241;2241
2242;2242
2243;22CD
2244;2244
2245;2245
2246;2246
2247;2247
2248;2248
2249;2249
224A;224A
224B;224B
224C;224C
2252;2253
2253;2252
2254;2255
2255;2254
225F;225F
2260;2260
2262;2262
2264;2265
2265;2264
2266;2267
2267;2266
2268;2269
2269;2268
226A;226B
226B;226A
226E;226F
226F;226E
2270;2271
2271;2270
2272;2273
2273;2272
2274;2275
2275;2274
2276;2277
2277;2276
2278;2279
2279;2278
227A;227B
227B;227A
227C;227D
227D;227C
227E;227F
227F;227E
2280;2281
2281;2280
2282;2283
2283;2282
2284;2285
2285;2284
2286;2287
2287;2286
2288;2289
2289;2288
228A;228B
228B;228A
228C;228C
228F;2290
2290;228F
2291;2292
2292;2291
2298;2298
22A2;22A3
22A3;22A2
22A6;22A6
22A7;22A7
22A8;22A8
22A9;22A9
22AA;22AA
22AB;22AB
22AC;22AC
22AD;22AD
22AE;22AE
22AF;22AF
22B0;22B1
22B1;22B0
22B2;22B3
22B3;22B2
22B4;22B5
22B5;22B4
22B6;22B7
22B7;22B6
22B8;22B8
22BE;22BE
22BF;22BF
22C9;22CA
22CA;22C9
22CB;22CC
22CC;22CB
22CD;2243
22D0;22D1
22D1;22D0
22D6;22D7
22D7;22D6
22D8;22D9
22D9;22D8
22DA;22DB
22DB;22DA
22DC;22DD
22DD;22DC
22DE;22DF
22DF;22DE
22E0;22E1
22E1;22E0
22E2;22E3
22E3;22E2
22E4;22E5
22E5;22E4
22E6;22E7
22E7;22E6
22E8;22E9
22E9;22E8
22EA;22EB
22EB;22EA
22EC;22ED
22ED;22EC
22F0;22F1
22F1;22F0
2308;2309
2309;2308
230A;230B
230B;230A
2320;2320
2321;2321
2329;232A
232A;2329
3008;3009
3009;3008
300A;300B
300B;300A
300C;300C
300D;300D
300E;300E
300F;300F
3010;3011
3011;3010
3014;3014
3015;3015
3016;3017
3017;3016
3018;3019
3019;3018
301A;301B
301B;301A
# Mirrored-character mappings for characters that are missing the mirrored property:
# Not listed are characters that could have the mirrored property but would not
# have a mirror-image mapping.
# Mathematical Operators
# 2205;2349
# APL
# No APL symbol has the mirrored property!
# 2300;2349
# 2326;232B
# 232B;2326
# 233F;2340
# 2340;233F
# 2341;2342
# 2342;2341
# 2343;2344
# 2344;2343
# 2345;2346
# 2346;2345
# 2347;2348
# 2348;2347
# 2349;2205

View File

@ -1,219 +0,0 @@
# SpecialCasing-2.txt
#
# Special Casing Properties
#
# This file is a supplement to the UnicodeData file.
# It contains additional information about the casing of Unicode characters.
# (For compatibility, the UnicodeData.txt file only contains case mappings for
# characters where they are 1-1, and does not have locale-specific mappings.)
# These are informative character properties.
#
# Send comments to mark@unicode.org
#
# ================================================================================
# Format
# ================================================================================
# The entries in this file are in the following machine-readable format:
#
# <entry> := <case_mapping> <condition_list>? (<s>* "#" <comment>)?
#
# <case_mapping> := <source> <sep> <lower> <sep> <title> <sep> <upper> <sep>
#
# <source> := <code_point>
# <sep> := <s>* ";" <s>*
# <lower> := <code_point_list>
# <title> := <code_point_list>
# <upper> := <code_point_list>
# <code_point_list> := <code_point> (<s>+ <code_point>)*
# <code_point> := <hex><hex><hex><hex>
# <hex> := [0-1A-Fa-f]
# <s> := <space>
#
# <condition_list> := <locale>? (<s>+ <context>)*
# <locale> := <ISO_3166_code> ( "_" <ISO_639_code> )? ( "_" <variant> )?
# <ISO_3166_code> := 2-letter country code,
# as in http://www.unicode.org/unicode/onlinedat/countries.html
# <ISO_639_code> := 2-letter code,
# as in http://www.unicode.org/unicode/onlinedat/languages.html
# <context> := "FINAL" | "NON_FINAL" | "MODERN" | "NON_MODERN"
#
# A condition list overrides the normal behavior if any of the listed conditions is true.
# FINAL: The letter is not followed by a letter of category L* (e.g. Ll, Lt, Lu, Lm, or Lo).
# MODERN: The mapping is only used for modern text.
# Conditions preceded by "NON_" represent the negation of the condition
#
# New contexts may be added in the future.
# Parsers of this file must be prepared to deal with that situation.
# Additional whitespace around elements is optional. Blank lines are ignored in parsing.
# On any line, all text following "#" is a comment, and are ignored in parsing.
# ================================================================================
# ================================================================================
# Unconditional mappings
# ================================================================================
# The German es-zed is special--the normal mapping is to SS.
# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))
00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
# Ligatures
FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF
FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI
FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL
FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI
FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL
FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T
FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST
0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN
FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW
FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH
FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI
FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW
FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# No corresponding uppercase precomposed character
0149; 0149; 02BC 006E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW
1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS
1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE
1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI
1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
# IMPORTANT-when capitalizing iota-subscript (0345)
# It MUST be in normalized form--moved to the end of any sequence of combining marks.
# This is because logically it represents a following base character!
# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript>
# It should never be the first character in a word, so in titlecasing it can be left as is.
# The following cases are already in the UnicodeData file, so are only commented here.
# 0345; 0345; 0345; 0399; # COMBINING GREEK YPOGEGRAMMENI
# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
# have special uppercases.
# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases
1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
# ================================================================================
# Conditional mappings
# ================================================================================
# Special case for final form of sigma
03A3; 03C2; 03A3; 03A3; FINAL; # GREEK CAPITAL LETTER SIGMA
# Note: the following cases for non-final are already in the UnicodeData file.
# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
# Note: the following cases are not included, since they would normalize in lowercasing
# 03C3; 03C2; 03A3; 03A3; FINAL; # GREEK SMALL LETTER SIGMA
# 03C2; 03C3; 03A3; 03A3; NON_FINAL; # GREEK SMALL LETTER FINAL SIGMA
# ================================================================================
# Locale-sensitive mappings
# ================================================================================
# Turkish
0049; 0131; 0049; 0049; TR; # LATIN CAPITAL LETTER I
0069; 0069; 0130; 0130; TR; # LATIN SMALL LETTER I
# Note: the following cases are already in the UnicodeData file.
# 0131; 0131; 0049; 0049; TR; # LATIN SMALL LETTER DOTLESS I
# 0130; 0069; 0130; 0130; TR; # LATIN CAPITAL LETTER I WITH DOT ABOVE

File diff suppressed because it is too large Load Diff

View File

@ -1,773 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* originally created by: Markus W. Scherer
*
* This program reads the Unicode character database text file,
* parses it, and extracts most of the properties for each character.
* It then writes a binary file containing the properties
* that is designed to be used directly for random-access to
* the properties of each Unicode character.
*
* adapted for use under BeOS by Axel Dörfler, axeld@pinc-software.de.
*/
#include "genprops.h"
#include "utf.h"
#include <UnicodeChar.h>
#include <Path.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
bool gBeVerbose = false;
/* prototypes --------------------------------------------------------------- */
static status_t parseMirror(const char *filename);
static status_t parseSpecialCasing(const char *filename);
static status_t parseCaseFolding(const char *filename);
static status_t parseDB(const char *filename);
/* -------------------------------------------------------------------------- */
typedef status_t
UParseLineFn(void *context,char *fields[][2],int32 fieldCount);
status_t
parseDelimitedFile(const char *filename, char delimiter,
char *fields[][2], int32 fieldCount,
UParseLineFn *lineFunction, void *context)
{
FILE *file = fopen(filename,"r");
if (file == NULL) {
fprintf(stderr, "*** Unable to open input file %s\n",filename);
return B_IO_ERROR;
}
status_t status = B_OK;
char line[300];
while (fgets(line,sizeof(line),file) != NULL) {
// remove trailing newline characters
int32 length = strlen(line);
while (length > 0 && (line[length-1] == '\r' || line[length-1] == '\n'))
line[--length] = '\0';
// skip this line if it is empty or a comment
if (line[0] == '\0' || line[0] == '#')
continue;
// remove in-line comments
char *limit = strchr(line, '#');
if (limit != NULL) {
/* get white space before the pound sign */
while (limit > line && (*(limit-1) == ' ' || *(limit-1) == '\t'))
--limit;
/* truncate the line */
*limit = '\0';
}
// for each field, call the corresponding field function
char *start = line;
for (int32 i = 0;i < fieldCount;i++) {
// set the limit pointer of this field
limit = start;
while(*limit != delimiter && *limit != 0)
++limit;
// set the field start and limit in the fields array
fields[i][0] = start;
fields[i][1] = limit;
// set start to the beginning of the next field, if any
start = limit;
if (*start != 0) {
++start;
} else if (i+1 < fieldCount) {
fprintf(stderr, "*** too few fields in line %s\n", line);
status = B_ERROR;
goto bailout;
}
}
// call the field function
status = lineFunction(context, fields, fieldCount);
if (status < B_OK)
break;
}
bailout:
fclose(file);
return status;
}
static const char *
skipWhitespace(const char *s)
{
while(*s == ' ' || *s == '\t')
++s;
return s;
}
/*
* parse a list of code points
* store them as a string in dest[destSize] with the string length in dest[0]
* set the first code point in *pFirst
* return the number of code points
*/
static int32
parseCodePoints(const char *s,UChar *dest, int32 destSize,uint32 *pFirst,status_t *pErrorCode)
{
int32 i,count = 0;
*pErrorCode = B_OK;
if (pFirst != NULL)
*pFirst = 0xffff;
// leave dest[0] for the length value
for (i = 1;;) {
s = skipWhitespace(s);
if (*s == ';' || *s == 0) {
dest[0] = (UChar)(i-1);
return count;
}
/* read one code point */
char *end;
uint32 value = strtoul(s, &end, 16);
if (end <= s || (*end != ' ' && *end != '\t' && *end != ';') || value >= 0x110000) {
fprintf(stderr, "genprops: syntax error parsing code point at %s\n", s);
*pErrorCode = B_ERROR;
return -1;
}
// store the first code point
if (++count == 1 && pFirst != NULL)
*pFirst = value;
// append it to the destination array
UTF_APPEND_CHAR(dest, i, destSize, value);
// overflow?
if (i >= destSize) {
fprintf(stderr, "genprops: code point sequence too long at at %s\n", s);
*pErrorCode = B_BAD_VALUE;
return -1;
}
// go to the following characters
s = end;
}
}
/* parser for Mirror.txt ---------------------------------------------------- */
#define MAX_MIRROR_COUNT 2000
static uint32 mirrorMappings[MAX_MIRROR_COUNT][2];
static int32 mirrorCount = 0;
static status_t
mirrorLineFn(void *context,char *fields[][2],int32 fieldCount)
{
char *end;
mirrorMappings[mirrorCount][0] = strtoul(fields[0][0], &end, 16);
if (end <= fields[0][0] || end != fields[0][1]) {
fprintf(stderr, "genprops: syntax error in Mirror.txt field 0 at %s\n", fields[0][0]);
return B_ERROR;
}
mirrorMappings[mirrorCount][1] = strtoul(fields[1][0], &end, 16);
if (end <= fields[1][0] || end != fields[1][1]) {
fprintf(stderr, "genprops: syntax error in Mirror.txt field 1 at %s\n", fields[1][0]);
return B_ERROR;
}
if (++mirrorCount == MAX_MIRROR_COUNT) {
fprintf(stderr, "genprops: too many mirror mappings\n");
return B_BAD_VALUE;
}
return B_OK;
}
static status_t
parseMirror(const char *filename)
{
char *fields[2][2];
return parseDelimitedFile(filename, ';', fields, 2, mirrorLineFn, NULL);
}
/* parser for SpecialCasing.txt --------------------------------------------- */
#define MAX_SPECIAL_CASING_COUNT 500
static SpecialCasing specialCasings[MAX_SPECIAL_CASING_COUNT];
static int32 specialCasingCount = 0;
static status_t
specialCasingLineFn(void *context,char *fields[][2],int32 fieldCount)
{
char *end;
// get code point
specialCasings[specialCasingCount].code = strtoul(skipWhitespace(fields[0][0]), &end, 16);
end = (char *)skipWhitespace(end);
if (end <= fields[0][0] || end != fields[0][1]) {
fprintf(stderr, "genprops: syntax error in SpecialCasing.txt field 0 at %s\n", fields[0][0]);
return B_ERROR;
}
// is this a complex mapping?
if (*skipWhitespace(fields[4][0]) != 0) {
// there is some condition text in the fifth field
specialCasings[specialCasingCount].isComplex = true;
// do not store any actual mappings for this
specialCasings[specialCasingCount].lowerCase[0] = 0;
specialCasings[specialCasingCount].upperCase[0] = 0;
specialCasings[specialCasingCount].titleCase[0] = 0;
} else {
// just set the "complex" flag and get the case mappings
specialCasings[specialCasingCount].isComplex = false;
status_t errorCode = B_OK;
parseCodePoints(fields[1][0], specialCasings[specialCasingCount].lowerCase, 32, NULL, &errorCode);
parseCodePoints(fields[3][0], specialCasings[specialCasingCount].upperCase, 32, NULL, &errorCode);
parseCodePoints(fields[2][0], specialCasings[specialCasingCount].titleCase, 32, NULL, &errorCode);
if (errorCode < B_OK) {
fprintf(stderr, "genprops: error parsing special casing at %s\n", fields[0][0]);
return errorCode;
}
}
if (++specialCasingCount == MAX_SPECIAL_CASING_COUNT) {
fprintf(stderr, "genprops: too many special casing mappings\n");
return B_BAD_VALUE;
}
return B_OK;
}
static int
compareSpecialCasings(const void *left, const void *right)
{
return ((const SpecialCasing *)left)->code - ((const SpecialCasing *)right)->code;
}
static status_t
parseSpecialCasing(const char *filename)
{
char *fields[5][2];
status_t status = parseDelimitedFile(filename, ';', fields, 5, specialCasingLineFn, NULL);
if (status < B_OK)
return status;
// sort the special casing entries by code point
if (specialCasingCount>0)
qsort(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings);
// replace multiple entries for any code point by one "complex" one
int32 j = 0;
for (int32 i = 1;i < specialCasingCount;++i) {
if (specialCasings[i-1].code == specialCasings[i].code) {
// there is a duplicate code point
specialCasings[i-1].code = 0x7fffffff; // remove this entry in the following qsort
specialCasings[i].isComplex = true; // make the following one complex
specialCasings[i].lowerCase[0] = 0;
specialCasings[i].upperCase[0] = 0;
specialCasings[i].titleCase[0] = 0;
j++;
}
}
/* if some entries just were removed, then re-sort */
if (j > 0) {
qsort(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings);
specialCasingCount -= j;
}
return B_OK;
}
/* parser for CaseFolding.txt ----------------------------------------------- */
#define MAX_CASE_FOLDING_COUNT 500
static CaseFolding caseFoldings[MAX_CASE_FOLDING_COUNT];
static int32 caseFoldingCount = 0;
static status_t
caseFoldingLineFn(void *context,char *fields[][2],int32 fieldCount)
{
char *end;
int32 count;
char status;
// get code point
caseFoldings[caseFoldingCount].code = strtoul(skipWhitespace(fields[0][0]), &end, 16);
end = (char *)skipWhitespace(end);
if (end <= fields[0][0] || end != fields[0][1]) {
fprintf(stderr, "genprops: syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
return B_ERROR;
}
// get the status of this mapping
caseFoldings[caseFoldingCount].status = status = *skipWhitespace(fields[1][0]);
if (status != 'L' && status != 'E' && status != 'C'
&& status != 'S' && status != 'F' && status != 'I') {
fprintf(stderr, "genprops: unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
return B_ERROR;
}
// ignore all case folding mappings that are the same as the UnicodeData.txt lowercase mappings
if (status == 'L')
return B_OK;
// get the mapping
status_t errorCode;
count = parseCodePoints(fields[2][0], caseFoldings[caseFoldingCount].full, 32, &caseFoldings[caseFoldingCount].simple, &errorCode);
if (errorCode < B_OK) {
fprintf(stderr, "genprops: error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
return errorCode;
}
// there is a simple mapping only if there is exactly one code point
if (count != 1)
caseFoldings[caseFoldingCount].simple = 0;
// check the status
if (status == 'S') {
// check if there was a full mapping for this code point before
if (caseFoldingCount > 0
&& caseFoldings[caseFoldingCount-1].code == caseFoldings[caseFoldingCount].code
&& caseFoldings[caseFoldingCount-1].status == 'F') {
// merge the two entries
caseFoldings[caseFoldingCount-1].simple=caseFoldings[caseFoldingCount].simple;
return B_OK;
}
} else if (status == 'F') {
// check if there was a simple mapping for this code point before */
if (caseFoldingCount > 0
&& caseFoldings[caseFoldingCount-1].code == caseFoldings[caseFoldingCount].code
&& caseFoldings[caseFoldingCount-1].status == 'S') {
// merge the two entries
memcpy(caseFoldings[caseFoldingCount-1].full, caseFoldings[caseFoldingCount].full, 32 * U_SIZEOF_UCHAR);
return B_OK;
}
} else if (status == 'I') {
// store only a marker for special handling for cases like dotless i
caseFoldings[caseFoldingCount].simple = 0;
caseFoldings[caseFoldingCount].full[0] = 0;
}
if (++caseFoldingCount == MAX_CASE_FOLDING_COUNT) {
fprintf(stderr, "genprops: too many case folding mappings\n");
return B_BAD_VALUE;
}
return B_OK;
}
static status_t
parseCaseFolding(const char *filename)
{
char *fields[3][2];
return parseDelimitedFile(filename, ';', fields, 3, caseFoldingLineFn, NULL);
}
/* parser for UnicodeData.txt ----------------------------------------------- */
// general categories
const char *const
genCategoryNames[B_UNICODE_CATEGORY_COUNT] = {
NULL,
"Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me",
"Mc", "Nd", "Nl", "No",
"Zs", "Zl", "Zp",
"Cc", "Cf", "Co", "Cs",
"Pd", "Ps", "Pe", "Pc", "Po",
"Sm", "Sc", "Sk", "So",
"Pi", "Pf",
"Cn"
};
const char *const
bidiNames[B_UNICODE_DIRECTION_COUNT] = {
"L", "R", "EN", "ES", "ET", "AN", "CS", "B", "S",
"WS", "ON", "LRE", "LRO", "AL", "RLE", "RLO", "PDF", "NSM", "BN"
};
// control code properties
static const struct {
uint32 code;
uint8 generalCategory;
} controlProps[] = {
/* TAB */ { 0x9, B_UNICODE_SPACE_SEPARATOR },
/* VT */ { 0xb, B_UNICODE_SPACE_SEPARATOR },
/* LF */ { 0xa, B_UNICODE_PARAGRAPH_SEPARATOR },
/* FF */ { 0xc, B_UNICODE_LINE_SEPARATOR },
/* CR */ { 0xd, B_UNICODE_PARAGRAPH_SEPARATOR },
/* FS */ { 0x1c, B_UNICODE_PARAGRAPH_SEPARATOR },
/* GS */ { 0x1d, B_UNICODE_PARAGRAPH_SEPARATOR },
/* RS */ { 0x1e, B_UNICODE_PARAGRAPH_SEPARATOR },
/* US */ { 0x1f, B_UNICODE_SPACE_SEPARATOR },
/* NL */ { 0x85, B_UNICODE_PARAGRAPH_SEPARATOR }
};
static struct {
uint32 first, last, props;
char name[80];
} unicodeAreas[32];
static int32 unicodeAreaIndex = 0;
static status_t
unicodeDataLineFn(void *context,char *fields[][2],int32 fieldCount)
{
static int32 mirrorIndex = 0, specialCasingIndex = 0, caseFoldingIndex = 0;
Props props;
char *end;
uint32 value;
// reset the properties
memset(&props, 0, sizeof(Props));
props.decimalDigitValue = props.digitValue = -1;
props.numericValue = 0x80000000;
// get the character code, field 0
props.code = strtoul(fields[0][0], &end, 16);
if (end <= fields[0][0] || end != fields[0][1]) {
fprintf(stderr, "genprops: syntax error in field 0 at %s\n", fields[0][0]);
return B_ERROR;
}
// get general category, field 2
*fields[2][1] = 0;
for (int i = 1;;) {
if (!strcmp(fields[2][0], genCategoryNames[i])) {
props.generalCategory = (uint8)i;
break;
}
if (++i == B_UNICODE_CATEGORY_COUNT) {
fprintf(stderr, "genprops: unknown general category \"%s\" at code 0x%lx\n", fields[2][0], props.code);
return B_ERROR;
}
}
// get canonical combining class, field 3
props.canonicalCombining = (uint8)strtoul(fields[3][0], &end, 10);
if (end <= fields[3][0] || end != fields[3][1]) {
fprintf(stderr, "genprops: syntax error in field 3 at code 0x%lx\n", props.code);
return B_ERROR;
}
// get BiDi category, field 4
*fields[4][1] = 0;
for (int i = 0;;) {
if (!strcmp(fields[4][0], bidiNames[i])) {
props.bidi = (uint8)i;
break;
}
if (++i == B_UNICODE_DIRECTION_COUNT) {
fprintf(stderr, "genprops: unknown BiDi category \"%s\" at code 0x%lx\n", fields[4][0], props.code);
return B_ERROR;
}
}
// decimal digit value, field 6
if (fields[6][0] < fields[6][1]) {
value = strtoul(fields[6][0], &end, 10);
if (end != fields[6][1] || value > 0x7fff) {
fprintf(stderr, "genprops: syntax error in field 6 at code 0x%lx\n", props.code);
return B_ERROR;
}
props.decimalDigitValue = (int16)value;
}
// digit value, field 7
if (fields[7][0] < fields[7][1]) {
value = strtoul(fields[7][0], &end, 10);
if (end != fields[7][1] || value > 0x7fff) {
fprintf(stderr, "genprops: syntax error in field 7 at code 0x%lx\n", props.code);
return B_ERROR;
}
props.digitValue = (int16)value;
}
// numeric value, field 8
if (fields[8][0] < fields[8][1]) {
char *s = fields[8][0];
bool isNegative;
// get a possible minus sign
if (*s == '-') {
isNegative = true;
++s;
} else
isNegative = false;
value = strtoul(s, &end, 10);
if (value > 0 && *end == '/') {
// field 8 may contain a fractional value, get the denominator
props.denominator = strtoul(end+1, &end, 10);
if (props.denominator == 0) {
fprintf(stderr, "genprops: denominator is 0 in field 8 at code 0x%lx\n", props.code);
return B_ERROR;
}
}
if (end != fields[8][1] || value > 0x7fffffff) {
fprintf(stderr, "genprops: syntax error in field 8 at code 0x%lx\n", props.code);
return B_ERROR;
}
if (isNegative)
props.numericValue = -(int32)value;
else
props.numericValue = (int32)value;
props.hasNumericValue = true;
}
// get Mirrored flag, field 9
if (*fields[9][0] == 'Y') {
props.isMirrored = 1;
} else if (fields[9][1] - fields[9][0] != 1 || *fields[9][0] != 'N') {
fprintf(stderr, "genprops: syntax error in field 9 at code 0x%lx\n", props.code);
return B_ERROR;
}
// get uppercase mapping, field 12
value = strtoul(fields[12][0], &end, 16);
if (end != fields[12][1]) {
fprintf(stderr, "genprops: syntax error in field 12 at code 0x%lx\n", props.code);
return B_ERROR;
}
props.upperCase = value;
// get lowercase value, field 13
value = strtoul(fields[13][0], &end, 16);
if (end != fields[13][1]) {
fprintf(stderr, "genprops: syntax error in field 13 at code 0x%lx\n", props.code);
return B_ERROR;
}
props.lowerCase = value;
// get titlecase value, field 14
value = strtoul(fields[14][0], &end, 16);
if (end != fields[14][1]) {
fprintf(stderr, "genprops: syntax error in field 14 at code 0x%lx\n", props.code);
return B_ERROR;
}
props.titleCase = value;
// override properties for some common control characters
if (props.generalCategory == B_UNICODE_CONTROL_CHAR) {
for (uint32 i = 0; i < sizeof(controlProps) / sizeof(controlProps[0]); i++) {
if (controlProps[i].code == props.code)
props.generalCategory = controlProps[i].generalCategory;
}
}
// set additional properties from previously parsed files
if (mirrorIndex < mirrorCount && props.code == mirrorMappings[mirrorIndex][0])
props.mirrorMapping = mirrorMappings[mirrorIndex++][1];
if (specialCasingIndex < specialCasingCount && props.code == specialCasings[specialCasingIndex].code)
props.specialCasing = specialCasings + specialCasingIndex++;
else
props.specialCasing = NULL;
if (caseFoldingIndex < caseFoldingCount && props.code == caseFoldings[caseFoldingIndex].code) {
props.caseFolding = caseFoldings + caseFoldingIndex++;
// ignore "Common" mappings (simple==full) that map to the same code
// point as the regular lowercase mapping
if (props.caseFolding->status == 'C' && props.caseFolding->simple == props.lowerCase)
props.caseFolding = NULL;
} else
props.caseFolding = NULL;
value = makeProps(&props);
if (*fields[1][0] == '<') {
// first or last entry of a Unicode area
size_t length = fields[1][1] - fields[1][0];
if (length < 9) {
/* name too short for an area name */
} else if (!memcmp(", First>", fields[1][1]-8, 8)) {
// set the current area
if (unicodeAreas[unicodeAreaIndex].first == 0xffffffff) {
length -= 9;
unicodeAreas[unicodeAreaIndex].first = props.code;
unicodeAreas[unicodeAreaIndex].props = value;
memcpy(unicodeAreas[unicodeAreaIndex].name, fields[1][0]+1, length);
unicodeAreas[unicodeAreaIndex].name[length] = 0;
} else {
// error: a previous area is incomplete
fprintf(stderr, "genprops: error - area \"%s\" is incomplete\n", unicodeAreas[unicodeAreaIndex].name);
return B_ERROR;
}
return B_OK;
} else if (!memcmp(", Last>", fields[1][1]-7, 7)) {
// check that the current area matches, and complete it with the last code point
length -= 8;
if (unicodeAreas[unicodeAreaIndex].props == value
&& !memcmp(unicodeAreas[unicodeAreaIndex].name, fields[1][0]+1, length)
&& unicodeAreas[unicodeAreaIndex].name[length] == 0
&& unicodeAreas[unicodeAreaIndex].first < props.code) {
unicodeAreas[unicodeAreaIndex].last = props.code;
if (gBeVerbose) {
printf("Unicode area U+%04lx..U+%04lx \"%s\"\n",
unicodeAreas[unicodeAreaIndex].first,
unicodeAreas[unicodeAreaIndex].last,
unicodeAreas[unicodeAreaIndex].name);
}
unicodeAreas[++unicodeAreaIndex].first = 0xffffffff;
} else {
// error: different properties between first & last, different area name, first >= last
fprintf(stderr, "genprops: error - Last of area \"%s\" is incorrect\n", unicodeAreas[unicodeAreaIndex].name);
return B_ERROR;
}
return B_OK;
} else {
/* not an area name */
}
}
// properties for a single code point
// ### TODO: check that the code points (props.code) are in ascending order
addProps(props.code, value);
return B_OK;
}
/* set repeated properties for the areas */
static void
repeatAreaProps()
{
uint32 puaProps;
int32 i;
bool hasPlane15PUA, hasPlane16PUA;
/*
* UnicodeData.txt before 3.0.1 did not contain the PUAs on
* planes 15 and 16.
* If that is the case, then we add them here, using the properties
* from the BMP PUA.
*/
puaProps = 0;
hasPlane15PUA = hasPlane16PUA = false;
for (i = 0;i < unicodeAreaIndex;i++) {
repeatProps(unicodeAreas[i].first,unicodeAreas[i].last,unicodeAreas[i].props);
if (unicodeAreas[i].first == 0xe000)
puaProps = unicodeAreas[i].props;
else if (unicodeAreas[i].first == 0xf0000)
hasPlane15PUA = false;
else if (unicodeAreas[i].first == 0x100000)
hasPlane16PUA = true;
}
if (puaProps != 0) {
if (!hasPlane15PUA)
repeatProps(0xf0000, 0xffffd, puaProps);
if (!hasPlane16PUA)
repeatProps(0x100000, 0x10fffd, puaProps);
}
}
static status_t
parseDB(const char *filename)
{
// while unicodeAreas[unicodeAreaIndex] is unused, set its first to a bogus value
unicodeAreas[0].first = 0xffffffff;
char *fields[15][2];
status_t status = parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL);
if (status < B_OK)
return status;
if (unicodeAreas[unicodeAreaIndex].first != 0xffffffff) {
fprintf(stderr, "genprops: error - the last area \"%s\" from U+%04lx is incomplete\n",
unicodeAreas[unicodeAreaIndex].name,
unicodeAreas[unicodeAreaIndex].first);
return B_ERROR;
}
repeatAreaProps();
return B_OK;
}
int
main(int argc,char **argv)
{
const char *srcDir = "data", *destDir = ".";
// gBeVerbose = true;
if (argc >= 2 && argv[1])
srcDir = argv[1];
// prepare the filename beginning with the source dir
initStore();
BPath path(srcDir,"Mirror.txt");
status_t status = parseMirror(path.Path());
if (status < B_OK)
return -1;
path.SetTo(srcDir,"SpecialCasing.txt");
status = parseSpecialCasing(path.Path());
if (status < B_OK)
return -1;
path.SetTo(srcDir,"CaseFolding.txt");
status = parseCaseFolding(path.Path());
if (status < B_OK)
return -1;
path.SetTo(srcDir,"UnicodeData.txt");
status = parseDB(path.Path());
if (status < B_OK)
return -1;
// process parsed data
compactProps();
compactStage3();
compactStage2();
// write the properties data file
return generateData(destDir);
}

View File

@ -1,69 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: genprops.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999dec13
* created by: Markus W. Scherer
*
* adapted for use under BeOS by Axel Dörfler, axeld@pinc-software.de.
*/
#ifndef __GENPROPS_H__
#define __GENPROPS_H__
#include <SupportDefs.h>
#include "utf.h"
// special casing data
struct SpecialCasing {
uint32 code;
bool isComplex;
UChar lowerCase[32], upperCase[32], titleCase[32];
};
// case folding data
struct CaseFolding {
uint32 code, simple;
char status;
UChar full[32];
};
// character properties
struct Props {
uint32 code, lowerCase, upperCase, titleCase, mirrorMapping;
int16 decimalDigitValue, digitValue; /* -1: no value */
int32 numericValue; /* see hasNumericValue */
uint32 denominator; /* 0: no value */
uint8 generalCategory, canonicalCombining, bidi, isMirrored, hasNumericValue;
SpecialCasing *specialCasing;
CaseFolding *caseFolding;
};
// global flags
extern bool gBeVerbose;
// name tables
extern const char *const bidiNames[];
extern const char *const genCategoryNames[];
// prototypes
extern void initStore(void);
extern uint32 makeProps(Props *p);
extern void addProps(uint32 c, uint32 props);
extern void repeatProps(uint32 first, uint32 last, uint32 props);
extern void compactStage2(void);
extern void compactStage3(void);
extern void compactProps(void);
extern status_t generateData(const char *dataDir);
#endif

View File

@ -1,242 +0,0 @@
<html>
<head>
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=Latin-1">
<title>IBM's Public License - IBM's Classes for Unicode</title>
</head>
<body>
<b>
<p ALIGN="CENTER"><big>IBM PUBLIC LICENSE - IBM&#146;s Classes for Unicode VERSION 1.0</big></p>
</b><font size="2">
<p>THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS IBM PUBLIC LICENSE
(&quot;AGREEMENT&quot;). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES
RECIPIENT&#146;S ACCEPTANCE OF THIS AGREEMENT.</p>
<b>
<p>1. DEFINITIONS</p>
</b>
<p>&quot;Contribution&quot; means: </p>
<blockquote>
<blockquote>
<p>a) in the case of International Business Machines Corporation (&quot;IBM&quot;), the
Original Program, and </p>
<p>b) in the case of each Contributor, </p>
<blockquote>
<p>i) changes to the Program, and</p>
<p>ii) additions to the Program;</p>
</blockquote>
</blockquote>
<p>where such changes and/or additions to the Program originate from and are distributed
by that particular Contributor. A Contribution &#145;originates&#146; from a Contributor
if it was added to the Program by such Contributor itself or anyone acting on such
Contributor&#146;s behalf. Contributions do not include additions to the Program which:
(i) are separate modules of software distributed in conjunction with the Program under
their own license agreement, and (ii) are not derivative works of the Program.</p>
</blockquote>
<p>&quot;Contributor&quot; means IBM and any other entity that distributes the Program.</p>
<p>&quot;Licensed Patents &quot; mean patent claims licensable by a Contributor which are
necessarily infringed by the use or sale of its Contribution alone or when combined with
the Program. </p>
<p>&quot;Original Program&quot; means the original version of the software accompanying
this Agreement as released by IBM, including source code, object code and documentation,
if any.</p>
<p>&quot;Program&quot; means the Original Program and Contributions.</p>
<p>&quot;Recipient&quot; means anyone who receives the Program under this Agreement,
including all Contributors.</p>
<b>
<p>2. GRANT OF RIGHTS</p>
<blockquote>
<blockquote>
</b><p>a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient
a non-exclusive, worldwide, royalty-free copyright license to<font COLOR="#ff0000"> </font>reproduce,
prepare derivative works of, publicly display, publicly perform, distribute and sublicense
the Contribution of such Contributor, if any, and such derivative works, in source code
and object code form.</p>
<p>b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a
non-exclusive, worldwide,<font COLOR="#008000"> </font>royalty-free patent license under
Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the
Contribution of such Contributor, if any, in source code and object code form. This patent
license shall apply to the combination of the Contribution and the Program if, at the time
the Contribution is added by the Contributor, such addition of the Contribution causes
such combination to be covered by the Licensed Patents. The patent license shall not apply
to any other combinations which include the Contribution. No hardware per se is licensed
hereunder. </p>
<p>c) Recipient understands that although each Contributor grants the licenses to its
Contributions set forth herein, no assurances are provided by any Contributor that the
Program does not infringe the patent or other intellectual property rights of any other
entity. Each Contributor disclaims any liability to Recipient for claims brought by any
other entity based on infringement of intellectual property rights or otherwise. As a
condition to exercising the rights and licenses granted hereunder, each Recipient hereby
assumes sole responsibility to secure any other intellectual property rights needed, if
any. For example, if a third party patent license is required to allow Recipient to
distribute the Program, it is Recipient&#146;s responsibility to acquire that license
before distributing the Program.</p>
<p>d) Each Contributor represents that to its knowledge it has sufficient copyright rights
in its Contribution, if any, to grant the copyright license set forth in this Agreement. </p>
</blockquote>
</blockquote>
<b>
<p>3. REQUIREMENTS</p>
</b>
<p>A Contributor may choose to distribute the Program in object code form under its own
license agreement, provided that:</p>
<blockquote>
<blockquote>
<p>a) it complies with the terms and conditions of this Agreement; and</p>
<p>b) its license agreement:</p>
<blockquote>
<p>i) effectively disclaims on behalf of all Contributors all warranties and conditions,
express and implied, including warranties or conditions of title and non-infringement, and
implied warranties or conditions of merchantability and fitness for a particular purpose; </p>
<p>ii) effectively excludes on behalf of all Contributors all liability for damages,
including direct, indirect, special, incidental and consequential damages, such as lost
profits; </p>
<p>iii) states that any provisions which differ from this Agreement are offered by that
Contributor alone and not by any other party; and</p>
<p>iv) states that source code for the Program is available from such Contributor, and
informs licensees how to obtain it in a reasonable manner on or through a medium
customarily used for software exchange.<font COLOR="#0000ff"> </p>
</font>
</blockquote>
</blockquote>
</blockquote>
<p>When the Program is made available in source code form:</p>
<blockquote>
<blockquote>
<p>a) it must be made available under this Agreement; and </p>
<p>b) a copy of this Agreement must be included with each copy of the Program. </p>
<font COLOR="#0000ff"><strike>
</blockquote>
</blockquote>
</strike></font>
<p>Each Contributor must include the following in a conspicuous location in the Program: </p>
<blockquote>
<p>Copyright <font FACE="Times New Roman">©</font><font COLOR="#ff0000"> </font>1999,
International Business Machines Corporation and others. All Rights Reserved. </p>
</blockquote>
<p>In addition, each Contributor must identify itself as the originator of its
Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify
the originator of the Contribution. </p>
<b>
<p>4. COMMERCIAL DISTRIBUTION</p>
</b>
<p>Commercial distributors of software may accept certain responsibilities with respect to
end users, business partners and the like. While this license is intended to facilitate
the commercial use of the Program, the Contributor who includes the Program in a
commercial product offering should do so in a manner which does not create potential
liability for other Contributors. Therefore, if a Contributor includes the Program in a
commercial product offering, such Contributor (&quot;Commercial Contributor&quot;) hereby
agrees to defend and indemnify every other Contributor (&quot;Indemnified
Contributor&quot;) against any losses, damages and costs (collectively &quot;Losses&quot;)
arising from claims, lawsuits and other legal actions brought by a third party against the
Indemnified Contributor to the extent caused by the acts or omissions of such Commercial
Contributor in connection with its distribution of the Program in a commercial product
offering. The obligations in this section do not apply to any claims or Losses relating to
any actual or alleged intellectual property infringement. In order to qualify, an
Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of
such claim, and b) allow the Commercial Contributor to control, and cooperate with the
Commercial Contributor in, the defense and any related settlement negotiations. The
Indemnified Contributor may participate in any such claim at its own expense.</p>
<p>For example, a Contributor might include the Program in a commercial product offering,
Product X. That Contributor is then a Commercial Contributor. If that Commercial
Contributor then makes performance claims, or offers warranties related to Product X,
those performance claims and warranties are such Commercial Contributor&#146;s
responsibility alone. Under this section, the Commercial Contributor would have to defend
claims against the other Contributors related to those performance claims and warranties,
and if a court requires any other Contributor to pay any damages as a result, the
Commercial Contributor must pay those damages.</p>
<b>
<p>5. NO WARRANTY</p>
</b>
<p>EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN &quot;AS
IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED
INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible
for determining the appropriateness of using and distributing the Program and assumes all
risks associated with its exercise of rights under this Agreement, including but not
limited to the risks and costs of program errors, compliance with applicable laws, damage
to or loss of data, programs or equipment, and unavailability or interruption of
operations. </p>
<b>
<p>6. DISCLAIMER OF LIABILITY</p>
</b>
<p>EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS
SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM
OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.</p>
<b>
<p>7. GENERAL</p>
</b>
<p>If any provision of this Agreement is invalid or unenforceable under applicable law, it
shall not affect the validity or enforceability of the remainder of the terms of this
Agreement, and without further action by the parties hereto, such provision shall be
reformed to the minimum extent necessary to make such provision valid and enforceable.</p>
<p>If Recipient institutes patent litigation against a Contributor with respect to a
patent applicable to software (including a cross-claim or counterclaim in a lawsuit), then
any patent licenses granted by that Contributor to such Recipient under this Agreement
shall terminate as of the date such litigation is filed. In addition, If Recipient
institutes patent litigation against any entity (including a cross-claim or counterclaim
in a lawsuit) alleging that the Program itself (excluding combinations of the Program with
other software or hardware) infringes such Recipient&#146;s patent(s), then such
Recipient&#146;s rights granted under Section 2(b) shall terminate as of the date such
litigation is filed. </p>
<p>All Recipient&#146;s rights under this Agreement shall terminate if it fails to comply
with any of the material terms or conditions of this Agreement and does not cure such
failure in a reasonable period of time after becoming aware of such noncompliance. If all
Recipient&#146;s rights under this Agreement terminate, Recipient agrees to cease use and
distribution of the Program as soon as reasonably practicable. However, Recipient's
obligations under this Agreement and any licenses granted by Recipient relating to the
Program shall continue and survive. </p>
<p>IBM may publish new versions (including revisions) of this Agreement from time to time.
Each new version of the Agreement will be given a distinguishing version number. The
Program (including Contributions) may always be distributed subject to the version of the
Agreement under which it was received. In addition, after a new version of the Agreement
is published, Contributor may elect to distribute the Program (including its
Contributions) under the new version. No one other than IBM has the right to modify this
Agreement. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives
no rights or licenses to the intellectual property of any Contributor under this
Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the
Program not expressly granted under this Agreement are reserved.</p>
<p>This Agreement is governed by the laws of the State of New York and the intellectual
property laws of the United States of America. No party to this Agreement will bring a
legal action under this Agreement more than one year after the cause of action arose. Each
party waives its rights to a jury trial in any resulting litigation. </p>
</font>
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@ -1,97 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep09
* created by: Markus W. Scherer
* adapted for use under BeOS by Axel Dörfler, axeld@pinc-software.de.
*/
#ifndef __UTF_H__
#define __UTF_H__
#include "UnicodeProperties.h"
#include <stddef.h>
#define UTF_SIZE 8
#define U_SIZEOF_UCHAR (UTF_SIZE>>3)
typedef uint32 UChar32;
#ifndef UTF_SAFE
# define UTF_SAFE
#endif
/* internal definitions ----------------------------------------------------- */
#define UTF8_ERROR_VALUE_1 0x15
#define UTF8_ERROR_VALUE_2 0x9f
#define UTF_ERROR_VALUE 0xffff
/** Is this code unit or code point a surrogate (U+d800..U+dfff)? */
#define UTF_IS_SURROGATE(unichar) (((unichar)&0xfffff800)==0xd800)
/** Is a given 32-bit code point/Unicode scalar value
* actually a valid Unicode (abstract) character?
*/
#define UTF_IS_UNICODE_CHAR(c) \
((uint32_t)(c)<=0x10ffff && \
!UTF_IS_SURROGATE(c) && ((c)&0xfffe)!=0xfffe)
/** Is a given 32-bit code an error value
* as returned by one of the macros for any UTF?
*/
#define UTF_IS_ERROR(c) \
(((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
/** This is a combined macro: Is c a valid Unicode value _and_ not an error code? */
#define UTF_IS_VALID(c) \
((uint32_t)(c)<=0x10ffff && \
!UTF_IS_SURROGATE(c) && \
((c)&0xfffe)!=0xfffe && \
(c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
#include "utf8.h"
/*
* ANSI C header:
* limits.h defines CHAR_MAX
*/
#include <limits.h>
#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF8_APPEND_CHAR_SAFE(s, i, length, c)
#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF8_APPEND_CHAR_UNSAFE(s, i, c)
/* Define UChar to be compatible with char if possible. */
#if CHAR_MAX>=255
typedef char UChar;
#else
typedef uint8 UChar;
#endif
#define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
#define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
#define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
#define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
#define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
#define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
#define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
#define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
#define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
#define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
#endif /* __UTF_H__ */

View File

@ -1,309 +0,0 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: utf_impl.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*
* This file provides implementation functions for macros in the utfXX.h
* that would otherwise be too long as macros.
*/
/* set import/export definitions */
#ifndef U_UTF8_IMPL
# define U_UTF8_IMPL
#endif
#include <SupportDefs.h>
#include "utf.h"
/*
* This table could be replaced on many machines by
* a few lines of assembler code using an
* "index of first 0-bit from msb" instruction and
* one or two more integer instructions.
*
* For example, on an i386, do something like
* - MOV AL, leadByte
* - NOT AL (8-bit, leave b15..b8==0..0, reverse only b7..b0)
* - MOV AH, 0
* - BSR BX, AX (16-bit)
* - MOV AX, 6 (result)
* - JZ finish (ZF==1 if leadByte==0xff)
* - SUB AX, BX (result)
* -finish:
* (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB)
*/
uint8
utf8_countTrailBytes[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4,
5, 5,
0, 0 /* illegal bytes 0xfe and 0xff */
};
static UChar32
utf8_minRegular[4]={ 0, 0x80, 0x800, 0x10000 };
static UChar32
utf8_errorValue[6]={
UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE, 0x10ffff,
0x3ffffff, 0x7fffffff
};
UChar32
utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, bool strict)
{
int32 i = *pi;
uint8 count = UTF8_COUNT_TRAIL_BYTES(c);
if (i + count <= length) {
uint8 trail, illegal = 0;
UTF8_MASK_LEAD_BYTE((c), count);
/* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
switch(count) {
/* each branch falls through to the next one */
case 5:
trail=s[(i)++];
(c)=((c)<<6)|(trail&0x3f);
illegal|=(trail&0xc0)^0x80;
case 4:
trail=s[(i)++];
(c)=((c)<<6)|(trail&0x3f);
illegal|=(trail&0xc0)^0x80;
case 3:
trail=s[(i)++];
(c)=((c)<<6)|(trail&0x3f);
if(c<0x110) {
illegal|=(trail&0xc0)^0x80;
} else {
/* code point>0x10ffff, outside Unicode */
i+=2;
illegal=1;
break;
}
case 2:
trail=s[(i)++];
(c)=((c)<<6)|(trail&0x3f);
illegal|=(trail&0xc0)^0x80;
case 1:
trail=s[(i)++];
(c)=((c)<<6)|(trail&0x3f);
illegal|=(trail&0xc0)^0x80;
break;
case 0:
illegal=1;
/* no default branch to optimize switch() - all values are covered */
break;
}
/*
* All the error handling should return a value
* that needs count bytes so that UTF8_GET_CHAR_SAFE() works right.
*/
/* correct sequence - all trail bytes have (b7..b6)==(10)? */
if(illegal) {
/* error handling */
uint8 errorCount=count;
/* don't go beyond this sequence */
(i)-=count;
while(count>0 && UTF8_IS_TRAIL(s[i])) {
++(i);
--count;
}
c=utf8_errorValue[errorCount-count];
} else if((strict) &&
(UTF_IS_SURROGATE(c) ||
count>=4 || (c)<utf8_minRegular[count] ||
((c)&0xfffe)==0xfffe)
) {
/* irregular sequence */
c=utf8_errorValue[count];
}
} else /* too few bytes left */ {
/* error handling */
int32 i0=i;
/* don't just set (i)=(length) in case there is an illegal sequence */
while((i)<(length) && UTF8_IS_TRAIL(s[i])) {
++(i);
}
c=utf8_errorValue[i-i0];
}
*pi=i;
return c;
}
int32
utf8_appendCharSafeBody(uint8 *s, int32 i, int32 length, UChar32 c) {
if((c)<=0x7ff) {
if((i)+1<(length)) {
(s)[(i)++]=(uint8)(((c)>>6)|0xc0);
(s)[(i)++]=(uint8)(((c)&0x3f)|0x80);
return i;
}
} else if((uint32)(c)<=0xffff) {
if((i)+2<(length)) {
(s)[(i)++]=(uint8)(((c)>>12)|0xe0);
(s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80);
(s)[(i)++]=(uint8)(((c)&0x3f)|0x80);
return i;
}
} else if((uint32)(c)<=0x10ffff) {
if((i)+3<(length)) {
(s)[(i)++]=(uint8)(((c)>>18)|0xf0);
(s)[(i)++]=(uint8)((((c)>>12)&0x3f)|0x80);
(s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80);
(s)[(i)++]=(uint8)(((c)&0x3f)|0x80);
return i;
}
}
/* c>0x10ffff or not enough space, write an error value */
length-=i;
if(length>0) {
int32 offset;
if(length>3) {
length=3;
}
s+=i;
offset=0;
c=utf8_errorValue[length-1];
UTF8_APPEND_CHAR_SAFE(s, offset, length, c);
i=i+offset;
}
return i;
}
UChar32
utf8_prevCharSafeBody(const uint8 *s, int32 start, int32 *pi, UChar32 c, bool strict) {
int32 i=*pi;
uint8 b, count=1, shift=6;
/* extract value bits from the last trail byte */
c&=0x3f;
for(;;) {
if(i<=start) {
/* no lead byte at all */
c=UTF8_ERROR_VALUE_1;
break;
}
/* read another previous byte */
b=s[--i];
if((uint8)(b-0x80)<0x7e) { /* 0x80<=b<0xfe */
if(b&0x40) {
/* lead byte, this will always end the loop */
uint8 shouldCount=UTF8_COUNT_TRAIL_BYTES(b);
if(count==shouldCount) {
/* set the new position */
*pi=i;
UTF8_MASK_LEAD_BYTE(b, count);
c|=(UChar32)b<<shift;
if( c>0x10ffff ||
(strict &&
(UTF_IS_SURROGATE(c) ||
count>=4 || c<utf8_minRegular[count] || (c&0xfffe)==0xfffe))
) {
/* irregular sequence */
c=utf8_errorValue[count];
} else {
/* exit with correct c */
}
} else {
/* the lead byte does not match the number of trail bytes */
/* only set the position to the lead byte if it would
include the trail byte that we started with */
if(count<shouldCount) {
*pi=i;
c=utf8_errorValue[count];
} else {
c=UTF8_ERROR_VALUE_1;
}
}
break;
} else if(count<5) {
/* trail byte */
c|=(UChar32)(b&0x3f)<<shift;
++count;
shift+=6;
} else {
/* more than 5 trail bytes is illegal */
c=UTF8_ERROR_VALUE_1;
break;
}
} else {
/* single-byte character precedes trailing bytes */
c=UTF8_ERROR_VALUE_1;
break;
}
}
return c;
}
int32
utf8_back1SafeBody(const uint8 *s, int32 start, int32 i) {
/* i had been decremented once before the function call */
int32 I=i, Z;
uint8 b;
/* read at most the 6 bytes s[Z] to s[i], inclusively */
if(I-5>start) {
Z=I-5;
} else {
Z=start;
}
/* return I if the sequence starting there is long enough to include i */
for(;;) {
b=s[I];
if((uint8)(b-0x80)>=0x7e) { /* not 0x80<=b<0xfe */
break;
} else if(b>=0xc0) {
if(UTF8_COUNT_TRAIL_BYTES(b)>=(i-I)) {
return I;
} else {
break;
}
} else if(Z<I) {
--I;
} else {
break;
}
}
/* return i itself to be consistent with the FWD_1 macro */
return i;
}

View File

@ -1,315 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf8.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: UTF-8 macros
*
* This file defines macros to deal with UTF-8 code units and code points.
* Signatures and semantics are the same as for the similarly named macros
* in utf16.h.
* utf8.h is included by utf.h after unicode/umachine.h
* and some common definitions.</p>
* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.</p>
*/
/* utf.h must be included first. */
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
#ifndef __UTF8_H__
#define __UTF8_H__
/* internal definitions ----------------------------------------------------- */
extern uint8
utf8_countTrailBytes[256];
/*
* Count the trail bytes for a lead byte -
* this macro should be used so that the assembler code
* that is mentioned in utf_impl.c could be used here.
*/
#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8)leadByte])
/* use a macro here, too - there may be a simpler way with some machines */
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
UChar32
utf8_nextCharSafeBody(const uint8 *s,int32 *pi,int32 length, UChar32 c,bool strict);
int32
utf8_appendCharSafeBody(uint8 *s,int32 i,int32 length, UChar32 c);
UChar32
utf8_prevCharSafeBody(const uint8 *s,int32 start,int32 *pi, UChar32 c,bool strict);
int32
utf8_back1SafeBody(const uint8 *s,int32 start,int32 i);
/*
* For the semantics of all of these macros, see utf16.h.
* The UTF-8 macros favor sequences more the shorter they are.
* Sometimes, only the single-byte case is covered by a macro,
* while longer sequences are handled by a function call.
*/
/* single-code point definitions -------------------------------------------- */
/* classes of code unit values */
#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
#define UTF8_IS_LEAD(uchar) ((uint8)((uchar)-0xc0)<0x3e)
#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
/* number of code units per code point */
#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32)(c)>0x7f)
/*
* ICU does not deal with code points >0x10ffff
* unless necessary for advancing in the byte stream.
*
* These length macros take into account that for values >0x10ffff
* the "safe" append macros would write the error code point 0xffff
* with 3 bytes.
* Code point comparisons need to be in uint32 because UChar32
* may be a signed type, and negative values must be recognized.
*/
#if 1
# define UTF8_CHAR_LENGTH(c) \
((uint32)(c)<=0x7f ? 1 : \
((uint32)(c)<=0x7ff ? 2 : \
((uint32)((c)-0x10000)>0xfffff ? 3 : 4) \
) \
)
#else
# define UTF8_CHAR_LENGTH(c) \
((uint32)(c)<=0x7f ? 1 : \
((uint32)(c)<=0x7ff ? 2 : \
((uint32)(c)<=0xffff ? 3 : \
((uint32)(c)<=0x10ffff ? 4 : \
((uint32)(c)<=0x3ffffff ? 5 : \
((uint32)(c)<=0x7fffffff ? 6 : 3) \
) \
) \
) \
) \
)
#endif
#define UTF8_MAX_CHAR_LENGTH 4
/* average number of code units compared to UTF-16 */
#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
int32 __I=(int32)(i); \
UTF8_SET_CHAR_START_UNSAFE(s, __I); \
UTF8_NEXT_CHAR_UNSAFE(s, __I, c); \
}
#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
int32 __I=(int32)(i); \
UTF8_SET_CHAR_START_SAFE(s, start, __I); \
UTF8_NEXT_CHAR_SAFE(s, __I, length, c, strict); \
}
/* definitions with forward iteration --------------------------------------- */
/*
* Read a Unicode scalar value from an array of UTF-8 bytes.
* Only values <=0x10ffff are accepted, and if an error occurs,
* then c will be set such that UTF_IS_ERROR(c).
* The _UNSAFE macro is fast and does not check for errors.
* The _SAFE macro checks for errors and optionally for
* irregular sequences, too, i.e., for sequences that
* are longer than necessary, such as <c0 80> instead of <0>.
* The strict checks also check for surrogates and
* for 0xXXXXfffe and 0xXXXXffff.
*/
#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
(c)=(s)[(i)++]; \
if((uint8)((c)-0xc0)<0x35) { \
uint8 __count=UTF8_COUNT_TRAIL_BYTES(c); \
UTF8_MASK_LEAD_BYTE(c, __count); \
switch(__count) { \
/* each following branch falls through to the next one */ \
case 3: \
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
case 2: \
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
case 1: \
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
/* no other branches to optimize switch() */ \
break; \
} \
} \
}
#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
if((uint32)(c)<=0x7f) { \
(s)[(i)++]=(uint8)(c); \
} else { \
if((uint32)(c)<=0x7ff) { \
(s)[(i)++]=(uint8)(((c)>>6)|0xc0); \
} else { \
if((uint32)(c)<=0xffff) { \
(s)[(i)++]=(uint8)(((c)>>12)|0xe0); \
} else { \
(s)[(i)++]=(uint8)(((c)>>18)|0xf0); \
(s)[(i)++]=(uint8)((((c)>>12)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8)(((c)&0x3f)|0x80); \
} \
}
#define UTF8_FWD_1_UNSAFE(s, i) { \
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
}
#define UTF8_FWD_N_UNSAFE(s, i, n) { \
UTextOffset __N=(n); \
while(__N>0) { \
UTF8_FWD_1_UNSAFE(s, i); \
--__N; \
} \
}
#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
}
#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
(c)=(s)[(i)++]; \
if(UTF8_IS_LEAD(c)) { \
(c)=utf8_nextCharSafeBody(s, &(i), (int32)(length), c, strict); \
} \
}
#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \
if((uint32)(c)<=0x7f) { \
(s)[(i)++]=(uint8)(c); \
} else { \
(i)=utf8_appendCharSafeBody(s, (int32)(i), (int32)(length), c); \
} \
}
#define UTF8_FWD_1_SAFE(s, i, length) { \
uint8 __b=(s)[(i)++]; \
if(UTF8_IS_LEAD(__b)) { \
uint8 __count=UTF8_COUNT_TRAIL_BYTES(__b); \
if((i)+__count>(length)) { \
__count=(uint8)((length)-(i)); \
} \
while(__count>0 && UTF8_IS_TRAIL((s)[i])) { \
++(i); \
--__count; \
} \
} \
}
#define UTF8_FWD_N_SAFE(s, i, length, n) { \
int32 __N=(n); \
while(__N>0 && (i)<(length)) { \
UTF8_FWD_1_SAFE(s, i, length); \
--__N; \
} \
}
#define UTF8_SET_CHAR_START_SAFE(s, start, i) { \
if(UTF8_IS_TRAIL((s)[(i)])) { \
(i)=utf8_back1SafeBody(s, start, (int32)(i)); \
} \
}
/* definitions with backward iteration -------------------------------------- */
#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
(c)=(s)[--(i)]; \
if(UTF8_IS_TRAIL(c)) { \
uint8 __b, __count=1, __shift=6; \
\
/* c is a trail byte */ \
(c)&=0x3f; \
for(;;) { \
__b=(s)[--(i)]; \
if(__b>=0xc0) { \
UTF8_MASK_LEAD_BYTE(__b, __count); \
(c)|=(UChar32)__b<<__shift; \
break; \
} else { \
(c)|=(UChar32)(__b&0x3f)<<__shift; \
++__count; \
__shift+=6; \
} \
} \
} \
}
#define UTF8_BACK_1_UNSAFE(s, i) { \
while(UTF8_IS_TRAIL((s)[--(i)])) {} \
}
#define UTF8_BACK_N_UNSAFE(s, i, n) { \
UTextOffset __N=(n); \
while(__N>0) { \
UTF8_BACK_1_UNSAFE(s, i); \
--__N; \
} \
}
#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
UTF8_BACK_1_UNSAFE(s, i); \
UTF8_FWD_1_UNSAFE(s, i); \
}
#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
(c)=(s)[--(i)]; \
if(UTF8_IS_TRAIL((c))) { \
(c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
} \
}
#define UTF8_BACK_1_SAFE(s, start, i) { \
if(UTF8_IS_TRAIL((s)[--(i)])) { \
(i)=utf8_back1SafeBody(s, start, (int32)(i)); \
} \
}
#define UTF8_BACK_N_SAFE(s, start, i, n) { \
UTextOffset __N=(n); \
while(__N>0 && (i)>(start)) { \
UTF8_BACK_1_SAFE(s, start, i); \
--__N; \
} \
}
#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) { \
if((start)<(i) && (i)<(length)) { \
UTF8_BACK_1_SAFE(s, start, i); \
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
if((i)>(length)) { \
(i)=(length); \
} \
} \
}
#endif