* removed old genprops tool from our repo
git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@37722 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
parent
debe3b0970
commit
229c79e015
@ -1,9 +0,0 @@
|
||||
SubDir HAIKU_TOP src tools locale genprops ;
|
||||
|
||||
UsePrivateHeaders locale ;
|
||||
UsePublicHeaders locale ;
|
||||
|
||||
Application genprops
|
||||
: genprops.cpp store.cpp utf8.cpp PropertyFile.cpp
|
||||
: be
|
||||
;
|
@ -1,71 +0,0 @@
|
||||
/*
|
||||
** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
|
||||
** Distributed under the terms of the OpenBeOS License.
|
||||
*/
|
||||
|
||||
|
||||
#include "PropertyFile.h"
|
||||
#include "UnicodeProperties.h"
|
||||
|
||||
#include <Path.h>
|
||||
#include <FindDirectory.h>
|
||||
|
||||
|
||||
status_t
|
||||
PropertyFile::SetTo(const char *directory, const char *name)
|
||||
{
|
||||
BPath path(directory,name);
|
||||
status_t status = BFile::SetTo(path.Path(), B_WRITE_ONLY | B_CREATE_FILE);
|
||||
if (status < B_OK)
|
||||
return status;
|
||||
|
||||
static UnicodePropertiesHeader header = {
|
||||
sizeof(UnicodePropertiesHeader),
|
||||
B_HOST_IS_BENDIAN,
|
||||
PROPERTIES_FORMAT,
|
||||
{ 3, 0, 0 } // version (taken from the ICU data version)
|
||||
};
|
||||
|
||||
return Write(&header, sizeof(header));
|
||||
}
|
||||
|
||||
|
||||
off_t
|
||||
PropertyFile::Size()
|
||||
{
|
||||
off_t size;
|
||||
if (GetSize(&size) < B_OK)
|
||||
return 0;
|
||||
|
||||
return size - sizeof(UnicodePropertiesHeader);
|
||||
}
|
||||
|
||||
|
||||
ssize_t
|
||||
PropertyFile::WritePadding(size_t length)
|
||||
{
|
||||
static uint8 padding[16] = {
|
||||
0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0xaa, 0xaa, 0xaa, 0xaa
|
||||
};
|
||||
|
||||
ssize_t bytesWritten = (ssize_t)length;
|
||||
|
||||
while (length >= 16) {
|
||||
ssize_t written = Write(padding, 16);
|
||||
if (written < B_OK)
|
||||
return written;
|
||||
|
||||
length -= 16;
|
||||
}
|
||||
if (length > 0) {
|
||||
ssize_t written = Write(padding, length);
|
||||
if (written < B_OK)
|
||||
return written;
|
||||
}
|
||||
|
||||
return bytesWritten;
|
||||
}
|
||||
|
@ -1,24 +0,0 @@
|
||||
/*
|
||||
** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
|
||||
** Distributed under the terms of the OpenBeOS License.
|
||||
*/
|
||||
#ifndef PROPERTY_FILE_H
|
||||
#define PROPERTY_FILE_H
|
||||
|
||||
|
||||
#include <File.h>
|
||||
|
||||
|
||||
// This is the write-only version of the PropertyFile class - the library
|
||||
// contains a read-only version of it
|
||||
|
||||
|
||||
class PropertyFile : public BFile {
|
||||
public:
|
||||
status_t SetTo(const char *directory, const char *name);
|
||||
|
||||
off_t Size();
|
||||
ssize_t WritePadding(size_t length);
|
||||
};
|
||||
|
||||
#endif /* PROPERTY_FILE_H */
|
@ -1,821 +0,0 @@
|
||||
# CaseFolding-2.txt
|
||||
#
|
||||
# Case Folding Properties
|
||||
#
|
||||
# This file is a supplement to the UnicodeData file.
|
||||
# It provides a case folding mapping generated from the Unicode Character Database.
|
||||
# If all characters are mapped according to this mapping, then
|
||||
# case differences (according to UnicodeData.txt and SpecialCasing.txt)
|
||||
# are eliminated.
|
||||
#
|
||||
# For information on case folding, see
|
||||
# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
|
||||
#
|
||||
# These are informative character properties.
|
||||
#
|
||||
# Send comments to mark@unicode.org
|
||||
#
|
||||
# ================================================================================
|
||||
# Format
|
||||
# ================================================================================
|
||||
# The entries in this file are in the following machine-readable format:
|
||||
#
|
||||
# <code>; <status>; <mapping>; # <name>
|
||||
#
|
||||
# The status is:
|
||||
# L (for Lowercase) if the case mapping matches the standard 1-1 lowercase mapping
|
||||
# E (for exception) if it does not.
|
||||
#
|
||||
# The mapping may consist of multiple characters.
|
||||
# If so, they are separated by spaces.
|
||||
#
|
||||
# =================================================================
|
||||
|
||||
0041; L; 0061; #LATIN CAPITAL LETTER A
|
||||
0042; L; 0062; #LATIN CAPITAL LETTER B
|
||||
0043; L; 0063; #LATIN CAPITAL LETTER C
|
||||
0044; L; 0064; #LATIN CAPITAL LETTER D
|
||||
0045; L; 0065; #LATIN CAPITAL LETTER E
|
||||
0046; L; 0066; #LATIN CAPITAL LETTER F
|
||||
0047; L; 0067; #LATIN CAPITAL LETTER G
|
||||
0048; L; 0068; #LATIN CAPITAL LETTER H
|
||||
0049; L; 0069; #LATIN CAPITAL LETTER I
|
||||
004A; L; 006A; #LATIN CAPITAL LETTER J
|
||||
004B; L; 006B; #LATIN CAPITAL LETTER K
|
||||
004C; L; 006C; #LATIN CAPITAL LETTER L
|
||||
004D; L; 006D; #LATIN CAPITAL LETTER M
|
||||
004E; L; 006E; #LATIN CAPITAL LETTER N
|
||||
004F; L; 006F; #LATIN CAPITAL LETTER O
|
||||
0050; L; 0070; #LATIN CAPITAL LETTER P
|
||||
0051; L; 0071; #LATIN CAPITAL LETTER Q
|
||||
0052; L; 0072; #LATIN CAPITAL LETTER R
|
||||
0053; L; 0073; #LATIN CAPITAL LETTER S
|
||||
0054; L; 0074; #LATIN CAPITAL LETTER T
|
||||
0055; L; 0075; #LATIN CAPITAL LETTER U
|
||||
0056; L; 0076; #LATIN CAPITAL LETTER V
|
||||
0057; L; 0077; #LATIN CAPITAL LETTER W
|
||||
0058; L; 0078; #LATIN CAPITAL LETTER X
|
||||
0059; L; 0079; #LATIN CAPITAL LETTER Y
|
||||
005A; L; 007A; #LATIN CAPITAL LETTER Z
|
||||
00B5; E; 03BC; #MICRO SIGN
|
||||
00C0; L; 00E0; #LATIN CAPITAL LETTER A WITH GRAVE
|
||||
00C1; L; 00E1; #LATIN CAPITAL LETTER A WITH ACUTE
|
||||
00C2; L; 00E2; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
00C3; L; 00E3; #LATIN CAPITAL LETTER A WITH TILDE
|
||||
00C4; L; 00E4; #LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
00C5; L; 00E5; #LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
00C6; L; 00E6; #LATIN CAPITAL LETTER AE
|
||||
00C7; L; 00E7; #LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
00C8; L; 00E8; #LATIN CAPITAL LETTER E WITH GRAVE
|
||||
00C9; L; 00E9; #LATIN CAPITAL LETTER E WITH ACUTE
|
||||
00CA; L; 00EA; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
00CB; L; 00EB; #LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
00CC; L; 00EC; #LATIN CAPITAL LETTER I WITH GRAVE
|
||||
00CD; L; 00ED; #LATIN CAPITAL LETTER I WITH ACUTE
|
||||
00CE; L; 00EE; #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
00CF; L; 00EF; #LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
00D0; L; 00F0; #LATIN CAPITAL LETTER ETH
|
||||
00D1; L; 00F1; #LATIN CAPITAL LETTER N WITH TILDE
|
||||
00D2; L; 00F2; #LATIN CAPITAL LETTER O WITH GRAVE
|
||||
00D3; L; 00F3; #LATIN CAPITAL LETTER O WITH ACUTE
|
||||
00D4; L; 00F4; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
00D5; L; 00F5; #LATIN CAPITAL LETTER O WITH TILDE
|
||||
00D6; L; 00F6; #LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
00D8; L; 00F8; #LATIN CAPITAL LETTER O WITH STROKE
|
||||
00D9; L; 00F9; #LATIN CAPITAL LETTER U WITH GRAVE
|
||||
00DA; L; 00FA; #LATIN CAPITAL LETTER U WITH ACUTE
|
||||
00DB; L; 00FB; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
00DC; L; 00FC; #LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
00DD; L; 00FD; #LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
00DE; L; 00FE; #LATIN CAPITAL LETTER THORN
|
||||
00DF; E; 0073 0073; #LATIN SMALL LETTER SHARP S
|
||||
0100; L; 0101; #LATIN CAPITAL LETTER A WITH MACRON
|
||||
0102; L; 0103; #LATIN CAPITAL LETTER A WITH BREVE
|
||||
0104; L; 0105; #LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0106; L; 0107; #LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0108; L; 0109; #LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||
010A; L; 010B; #LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
010C; L; 010D; #LATIN CAPITAL LETTER C WITH CARON
|
||||
010E; L; 010F; #LATIN CAPITAL LETTER D WITH CARON
|
||||
0110; L; 0111; #LATIN CAPITAL LETTER D WITH STROKE
|
||||
0112; L; 0113; #LATIN CAPITAL LETTER E WITH MACRON
|
||||
0114; L; 0115; #LATIN CAPITAL LETTER E WITH BREVE
|
||||
0116; L; 0117; #LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0118; L; 0119; #LATIN CAPITAL LETTER E WITH OGONEK
|
||||
011A; L; 011B; #LATIN CAPITAL LETTER E WITH CARON
|
||||
011C; L; 011D; #LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||
011E; L; 011F; #LATIN CAPITAL LETTER G WITH BREVE
|
||||
0120; L; 0121; #LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||
0122; L; 0123; #LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0124; L; 0125; #LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||
0126; L; 0127; #LATIN CAPITAL LETTER H WITH STROKE
|
||||
0128; L; 0129; #LATIN CAPITAL LETTER I WITH TILDE
|
||||
012A; L; 012B; #LATIN CAPITAL LETTER I WITH MACRON
|
||||
012C; L; 012D; #LATIN CAPITAL LETTER I WITH BREVE
|
||||
012E; L; 012F; #LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0130; L; 0069; #LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0131; E; 0069; #LATIN SMALL LETTER DOTLESS I
|
||||
0132; L; 0133; #LATIN CAPITAL LIGATURE IJ
|
||||
0134; L; 0135; #LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||
0136; L; 0137; #LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0139; L; 013A; #LATIN CAPITAL LETTER L WITH ACUTE
|
||||
013B; L; 013C; #LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
013D; L; 013E; #LATIN CAPITAL LETTER L WITH CARON
|
||||
013F; L; 0140; #LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||
0141; L; 0142; #LATIN CAPITAL LETTER L WITH STROKE
|
||||
0143; L; 0144; #LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0145; L; 0146; #LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0147; L; 0148; #LATIN CAPITAL LETTER N WITH CARON
|
||||
0149; E; 02BC 006E; #LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
014A; L; 014B; #LATIN CAPITAL LETTER ENG
|
||||
014C; L; 014D; #LATIN CAPITAL LETTER O WITH MACRON
|
||||
014E; L; 014F; #LATIN CAPITAL LETTER O WITH BREVE
|
||||
0150; L; 0151; #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0152; L; 0153; #LATIN CAPITAL LIGATURE OE
|
||||
0154; L; 0155; #LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0156; L; 0157; #LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0158; L; 0159; #LATIN CAPITAL LETTER R WITH CARON
|
||||
015A; L; 015B; #LATIN CAPITAL LETTER S WITH ACUTE
|
||||
015C; L; 015D; #LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||
015E; L; 015F; #LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0160; L; 0161; #LATIN CAPITAL LETTER S WITH CARON
|
||||
0162; L; 0163; #LATIN CAPITAL LETTER T WITH CEDILLA
|
||||
0164; L; 0165; #LATIN CAPITAL LETTER T WITH CARON
|
||||
0166; L; 0167; #LATIN CAPITAL LETTER T WITH STROKE
|
||||
0168; L; 0169; #LATIN CAPITAL LETTER U WITH TILDE
|
||||
016A; L; 016B; #LATIN CAPITAL LETTER U WITH MACRON
|
||||
016C; L; 016D; #LATIN CAPITAL LETTER U WITH BREVE
|
||||
016E; L; 016F; #LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0170; L; 0171; #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0172; L; 0173; #LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0174; L; 0175; #LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||
0176; L; 0177; #LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||
0178; L; 00FF; #LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0179; L; 017A; #LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
017B; L; 017C; #LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
017D; L; 017E; #LATIN CAPITAL LETTER Z WITH CARON
|
||||
017F; E; 0073; #LATIN SMALL LETTER LONG S
|
||||
0181; L; 0253; #LATIN CAPITAL LETTER B WITH HOOK
|
||||
0182; L; 0183; #LATIN CAPITAL LETTER B WITH TOPBAR
|
||||
0184; L; 0185; #LATIN CAPITAL LETTER TONE SIX
|
||||
0186; L; 0254; #LATIN CAPITAL LETTER OPEN O
|
||||
0187; L; 0188; #LATIN CAPITAL LETTER C WITH HOOK
|
||||
0189; L; 0256; #LATIN CAPITAL LETTER AFRICAN D
|
||||
018A; L; 0257; #LATIN CAPITAL LETTER D WITH HOOK
|
||||
018B; L; 018C; #LATIN CAPITAL LETTER D WITH TOPBAR
|
||||
018E; L; 01DD; #LATIN CAPITAL LETTER REVERSED E
|
||||
018F; L; 0259; #LATIN CAPITAL LETTER SCHWA
|
||||
0190; L; 025B; #LATIN CAPITAL LETTER OPEN E
|
||||
0191; L; 0192; #LATIN CAPITAL LETTER F WITH HOOK
|
||||
0193; L; 0260; #LATIN CAPITAL LETTER G WITH HOOK
|
||||
0194; L; 0263; #LATIN CAPITAL LETTER GAMMA
|
||||
0196; L; 0269; #LATIN CAPITAL LETTER IOTA
|
||||
0197; L; 0268; #LATIN CAPITAL LETTER I WITH STROKE
|
||||
0198; L; 0199; #LATIN CAPITAL LETTER K WITH HOOK
|
||||
019C; L; 026F; #LATIN CAPITAL LETTER TURNED M
|
||||
019D; L; 0272; #LATIN CAPITAL LETTER N WITH LEFT HOOK
|
||||
019F; L; 0275; #LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
||||
01A0; L; 01A1; #LATIN CAPITAL LETTER O WITH HORN
|
||||
01A2; L; 01A3; #LATIN CAPITAL LETTER OI
|
||||
01A4; L; 01A5; #LATIN CAPITAL LETTER P WITH HOOK
|
||||
01A6; L; 0280; #LATIN LETTER YR
|
||||
01A7; L; 01A8; #LATIN CAPITAL LETTER TONE TWO
|
||||
01A9; L; 0283; #LATIN CAPITAL LETTER ESH
|
||||
01AC; L; 01AD; #LATIN CAPITAL LETTER T WITH HOOK
|
||||
01AE; L; 0288; #LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
||||
01AF; L; 01B0; #LATIN CAPITAL LETTER U WITH HORN
|
||||
01B1; L; 028A; #LATIN CAPITAL LETTER UPSILON
|
||||
01B2; L; 028B; #LATIN CAPITAL LETTER V WITH HOOK
|
||||
01B3; L; 01B4; #LATIN CAPITAL LETTER Y WITH HOOK
|
||||
01B5; L; 01B6; #LATIN CAPITAL LETTER Z WITH STROKE
|
||||
01B7; L; 0292; #LATIN CAPITAL LETTER EZH
|
||||
01B8; L; 01B9; #LATIN CAPITAL LETTER EZH REVERSED
|
||||
01BC; L; 01BD; #LATIN CAPITAL LETTER TONE FIVE
|
||||
01C4; L; 01C6; #LATIN CAPITAL LETTER DZ WITH CARON
|
||||
01C5; L; 01C6; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
|
||||
01C7; L; 01C9; #LATIN CAPITAL LETTER LJ
|
||||
01C8; L; 01C9; #LATIN CAPITAL LETTER L WITH SMALL LETTER J
|
||||
01CA; L; 01CC; #LATIN CAPITAL LETTER NJ
|
||||
01CB; L; 01CC; #LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||
01CD; L; 01CE; #LATIN CAPITAL LETTER A WITH CARON
|
||||
01CF; L; 01D0; #LATIN CAPITAL LETTER I WITH CARON
|
||||
01D1; L; 01D2; #LATIN CAPITAL LETTER O WITH CARON
|
||||
01D3; L; 01D4; #LATIN CAPITAL LETTER U WITH CARON
|
||||
01D5; L; 01D6; #LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
01D7; L; 01D8; #LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
|
||||
01D9; L; 01DA; #LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
|
||||
01DB; L; 01DC; #LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
|
||||
01DE; L; 01DF; #LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
|
||||
01E0; L; 01E1; #LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
|
||||
01E2; L; 01E3; #LATIN CAPITAL LETTER AE WITH MACRON
|
||||
01E4; L; 01E5; #LATIN CAPITAL LETTER G WITH STROKE
|
||||
01E6; L; 01E7; #LATIN CAPITAL LETTER G WITH CARON
|
||||
01E8; L; 01E9; #LATIN CAPITAL LETTER K WITH CARON
|
||||
01EA; L; 01EB; #LATIN CAPITAL LETTER O WITH OGONEK
|
||||
01EC; L; 01ED; #LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
|
||||
01EE; L; 01EF; #LATIN CAPITAL LETTER EZH WITH CARON
|
||||
01F0; E; 006A 030C; #LATIN SMALL LETTER J WITH CARON
|
||||
01F1; L; 01F3; #LATIN CAPITAL LETTER DZ
|
||||
01F2; L; 01F3; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z
|
||||
01F4; L; 01F5; #LATIN CAPITAL LETTER G WITH ACUTE
|
||||
01F6; L; 0195; #LATIN CAPITAL LETTER HWAIR
|
||||
01F7; L; 01BF; #LATIN CAPITAL LETTER WYNN
|
||||
01F8; L; 01F9; #LATIN CAPITAL LETTER N WITH GRAVE
|
||||
01FA; L; 01FB; #LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
|
||||
01FC; L; 01FD; #LATIN CAPITAL LETTER AE WITH ACUTE
|
||||
01FE; L; 01FF; #LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
|
||||
0200; L; 0201; #LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
|
||||
0202; L; 0203; #LATIN CAPITAL LETTER A WITH INVERTED BREVE
|
||||
0204; L; 0205; #LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
|
||||
0206; L; 0207; #LATIN CAPITAL LETTER E WITH INVERTED BREVE
|
||||
0208; L; 0209; #LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
|
||||
020A; L; 020B; #LATIN CAPITAL LETTER I WITH INVERTED BREVE
|
||||
020C; L; 020D; #LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
|
||||
020E; L; 020F; #LATIN CAPITAL LETTER O WITH INVERTED BREVE
|
||||
0210; L; 0211; #LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
|
||||
0212; L; 0213; #LATIN CAPITAL LETTER R WITH INVERTED BREVE
|
||||
0214; L; 0215; #LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
|
||||
0216; L; 0217; #LATIN CAPITAL LETTER U WITH INVERTED BREVE
|
||||
0218; L; 0219; #LATIN CAPITAL LETTER S WITH COMMA BELOW
|
||||
021A; L; 021B; #LATIN CAPITAL LETTER T WITH COMMA BELOW
|
||||
021C; L; 021D; #LATIN CAPITAL LETTER YOGH
|
||||
021E; L; 021F; #LATIN CAPITAL LETTER H WITH CARON
|
||||
0222; L; 0223; #LATIN CAPITAL LETTER OU
|
||||
0224; L; 0225; #LATIN CAPITAL LETTER Z WITH HOOK
|
||||
0226; L; 0227; #LATIN CAPITAL LETTER A WITH DOT ABOVE
|
||||
0228; L; 0229; #LATIN CAPITAL LETTER E WITH CEDILLA
|
||||
022A; L; 022B; #LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
|
||||
022C; L; 022D; #LATIN CAPITAL LETTER O WITH TILDE AND MACRON
|
||||
022E; L; 022F; #LATIN CAPITAL LETTER O WITH DOT ABOVE
|
||||
0230; L; 0231; #LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
|
||||
0232; L; 0233; #LATIN CAPITAL LETTER Y WITH MACRON
|
||||
0345; E; 03B9; #COMBINING GREEK YPOGEGRAMMENI
|
||||
0386; L; 03AC; #GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0388; L; 03AD; #GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0389; L; 03AE; #GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
038A; L; 03AF; #GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
038C; L; 03CC; #GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
038E; L; 03CD; #GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
038F; L; 03CE; #GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0390; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0391; L; 03B1; #GREEK CAPITAL LETTER ALPHA
|
||||
0392; L; 03B2; #GREEK CAPITAL LETTER BETA
|
||||
0393; L; 03B3; #GREEK CAPITAL LETTER GAMMA
|
||||
0394; L; 03B4; #GREEK CAPITAL LETTER DELTA
|
||||
0395; L; 03B5; #GREEK CAPITAL LETTER EPSILON
|
||||
0396; L; 03B6; #GREEK CAPITAL LETTER ZETA
|
||||
0397; L; 03B7; #GREEK CAPITAL LETTER ETA
|
||||
0398; L; 03B8; #GREEK CAPITAL LETTER THETA
|
||||
0399; L; 03B9; #GREEK CAPITAL LETTER IOTA
|
||||
039A; L; 03BA; #GREEK CAPITAL LETTER KAPPA
|
||||
039B; L; 03BB; #GREEK CAPITAL LETTER LAMDA
|
||||
039C; L; 03BC; #GREEK CAPITAL LETTER MU
|
||||
039D; L; 03BD; #GREEK CAPITAL LETTER NU
|
||||
039E; L; 03BE; #GREEK CAPITAL LETTER XI
|
||||
039F; L; 03BF; #GREEK CAPITAL LETTER OMICRON
|
||||
03A0; L; 03C0; #GREEK CAPITAL LETTER PI
|
||||
03A1; L; 03C1; #GREEK CAPITAL LETTER RHO
|
||||
03A3; E; 03C2; #GREEK CAPITAL LETTER SIGMA
|
||||
03A4; L; 03C4; #GREEK CAPITAL LETTER TAU
|
||||
03A5; L; 03C5; #GREEK CAPITAL LETTER UPSILON
|
||||
03A6; L; 03C6; #GREEK CAPITAL LETTER PHI
|
||||
03A7; L; 03C7; #GREEK CAPITAL LETTER CHI
|
||||
03A8; L; 03C8; #GREEK CAPITAL LETTER PSI
|
||||
03A9; L; 03C9; #GREEK CAPITAL LETTER OMEGA
|
||||
03AA; L; 03CA; #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
03AB; L; 03CB; #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
03B0; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
03C2; L; 03C2; #GREEK SMALL LETTER FINAL SIGMA
|
||||
03C3; E; 03C2; #GREEK SMALL LETTER SIGMA
|
||||
03D0; E; 03B2; #GREEK BETA SYMBOL
|
||||
03D1; E; 03B8; #GREEK THETA SYMBOL
|
||||
03D5; E; 03C6; #GREEK PHI SYMBOL
|
||||
03D6; E; 03C0; #GREEK PI SYMBOL
|
||||
03DA; L; 03DB; #GREEK LETTER STIGMA
|
||||
03DC; L; 03DD; #GREEK LETTER DIGAMMA
|
||||
03DE; L; 03DF; #GREEK LETTER KOPPA
|
||||
03E0; L; 03E1; #GREEK LETTER SAMPI
|
||||
03E2; L; 03E3; #COPTIC CAPITAL LETTER SHEI
|
||||
03E4; L; 03E5; #COPTIC CAPITAL LETTER FEI
|
||||
03E6; L; 03E7; #COPTIC CAPITAL LETTER KHEI
|
||||
03E8; L; 03E9; #COPTIC CAPITAL LETTER HORI
|
||||
03EA; L; 03EB; #COPTIC CAPITAL LETTER GANGIA
|
||||
03EC; L; 03ED; #COPTIC CAPITAL LETTER SHIMA
|
||||
03EE; L; 03EF; #COPTIC CAPITAL LETTER DEI
|
||||
03F0; E; 03BA; #GREEK KAPPA SYMBOL
|
||||
03F1; E; 03C1; #GREEK RHO SYMBOL
|
||||
03F2; E; 03C2; #GREEK LUNATE SIGMA SYMBOL
|
||||
0400; L; 0450; #CYRILLIC CAPITAL LETTER IE WITH GRAVE
|
||||
0401; L; 0451; #CYRILLIC CAPITAL LETTER IO
|
||||
0402; L; 0452; #CYRILLIC CAPITAL LETTER DJE
|
||||
0403; L; 0453; #CYRILLIC CAPITAL LETTER GJE
|
||||
0404; L; 0454; #CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0405; L; 0455; #CYRILLIC CAPITAL LETTER DZE
|
||||
0406; L; 0456; #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0407; L; 0457; #CYRILLIC CAPITAL LETTER YI
|
||||
0408; L; 0458; #CYRILLIC CAPITAL LETTER JE
|
||||
0409; L; 0459; #CYRILLIC CAPITAL LETTER LJE
|
||||
040A; L; 045A; #CYRILLIC CAPITAL LETTER NJE
|
||||
040B; L; 045B; #CYRILLIC CAPITAL LETTER TSHE
|
||||
040C; L; 045C; #CYRILLIC CAPITAL LETTER KJE
|
||||
040D; L; 045D; #CYRILLIC CAPITAL LETTER I WITH GRAVE
|
||||
040E; L; 045E; #CYRILLIC CAPITAL LETTER SHORT U
|
||||
040F; L; 045F; #CYRILLIC CAPITAL LETTER DZHE
|
||||
0410; L; 0430; #CYRILLIC CAPITAL LETTER A
|
||||
0411; L; 0431; #CYRILLIC CAPITAL LETTER BE
|
||||
0412; L; 0432; #CYRILLIC CAPITAL LETTER VE
|
||||
0413; L; 0433; #CYRILLIC CAPITAL LETTER GHE
|
||||
0414; L; 0434; #CYRILLIC CAPITAL LETTER DE
|
||||
0415; L; 0435; #CYRILLIC CAPITAL LETTER IE
|
||||
0416; L; 0436; #CYRILLIC CAPITAL LETTER ZHE
|
||||
0417; L; 0437; #CYRILLIC CAPITAL LETTER ZE
|
||||
0418; L; 0438; #CYRILLIC CAPITAL LETTER I
|
||||
0419; L; 0439; #CYRILLIC CAPITAL LETTER SHORT I
|
||||
041A; L; 043A; #CYRILLIC CAPITAL LETTER KA
|
||||
041B; L; 043B; #CYRILLIC CAPITAL LETTER EL
|
||||
041C; L; 043C; #CYRILLIC CAPITAL LETTER EM
|
||||
041D; L; 043D; #CYRILLIC CAPITAL LETTER EN
|
||||
041E; L; 043E; #CYRILLIC CAPITAL LETTER O
|
||||
041F; L; 043F; #CYRILLIC CAPITAL LETTER PE
|
||||
0420; L; 0440; #CYRILLIC CAPITAL LETTER ER
|
||||
0421; L; 0441; #CYRILLIC CAPITAL LETTER ES
|
||||
0422; L; 0442; #CYRILLIC CAPITAL LETTER TE
|
||||
0423; L; 0443; #CYRILLIC CAPITAL LETTER U
|
||||
0424; L; 0444; #CYRILLIC CAPITAL LETTER EF
|
||||
0425; L; 0445; #CYRILLIC CAPITAL LETTER HA
|
||||
0426; L; 0446; #CYRILLIC CAPITAL LETTER TSE
|
||||
0427; L; 0447; #CYRILLIC CAPITAL LETTER CHE
|
||||
0428; L; 0448; #CYRILLIC CAPITAL LETTER SHA
|
||||
0429; L; 0449; #CYRILLIC CAPITAL LETTER SHCHA
|
||||
042A; L; 044A; #CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
042B; L; 044B; #CYRILLIC CAPITAL LETTER YERU
|
||||
042C; L; 044C; #CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
042D; L; 044D; #CYRILLIC CAPITAL LETTER E
|
||||
042E; L; 044E; #CYRILLIC CAPITAL LETTER YU
|
||||
042F; L; 044F; #CYRILLIC CAPITAL LETTER YA
|
||||
0460; L; 0461; #CYRILLIC CAPITAL LETTER OMEGA
|
||||
0462; L; 0463; #CYRILLIC CAPITAL LETTER YAT
|
||||
0464; L; 0465; #CYRILLIC CAPITAL LETTER IOTIFIED E
|
||||
0466; L; 0467; #CYRILLIC CAPITAL LETTER LITTLE YUS
|
||||
0468; L; 0469; #CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
||||
046A; L; 046B; #CYRILLIC CAPITAL LETTER BIG YUS
|
||||
046C; L; 046D; #CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
||||
046E; L; 046F; #CYRILLIC CAPITAL LETTER KSI
|
||||
0470; L; 0471; #CYRILLIC CAPITAL LETTER PSI
|
||||
0472; L; 0473; #CYRILLIC CAPITAL LETTER FITA
|
||||
0474; L; 0475; #CYRILLIC CAPITAL LETTER IZHITSA
|
||||
0476; L; 0477; #CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
|
||||
0478; L; 0479; #CYRILLIC CAPITAL LETTER UK
|
||||
047A; L; 047B; #CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
047C; L; 047D; #CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
047E; L; 047F; #CYRILLIC CAPITAL LETTER OT
|
||||
0480; L; 0481; #CYRILLIC CAPITAL LETTER KOPPA
|
||||
048C; L; 048D; #CYRILLIC CAPITAL LETTER SEMISOFT SIGN
|
||||
048E; L; 048F; #CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
0490; L; 0491; #CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
0492; L; 0493; #CYRILLIC CAPITAL LETTER GHE WITH STROKE
|
||||
0494; L; 0495; #CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
|
||||
0496; L; 0497; #CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
0498; L; 0499; #CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
|
||||
049A; L; 049B; #CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
049C; L; 049D; #CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
049E; L; 049F; #CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
04A0; L; 04A1; #CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
04A2; L; 04A3; #CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
04A4; L; 04A5; #CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
04A6; L; 04A7; #CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
04A8; L; 04A9; #CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
||||
04AA; L; 04AB; #CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
04AC; L; 04AD; #CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
04AE; L; 04AF; #CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
04B0; L; 04B1; #CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
04B2; L; 04B3; #CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
04B4; L; 04B5; #CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
04B6; L; 04B7; #CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
04B8; L; 04B9; #CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
04BA; L; 04BB; #CYRILLIC CAPITAL LETTER SHHA
|
||||
04BC; L; 04BD; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||
04BE; L; 04BF; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
04C1; L; 04C2; #CYRILLIC CAPITAL LETTER ZHE WITH BREVE
|
||||
04C3; L; 04C4; #CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
04C7; L; 04C8; #CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
04CB; L; 04CC; #CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
04D0; L; 04D1; #CYRILLIC CAPITAL LETTER A WITH BREVE
|
||||
04D2; L; 04D3; #CYRILLIC CAPITAL LETTER A WITH DIAERESIS
|
||||
04D4; L; 04D5; #CYRILLIC CAPITAL LIGATURE A IE
|
||||
04D6; L; 04D7; #CYRILLIC CAPITAL LETTER IE WITH BREVE
|
||||
04D8; L; 04D9; #CYRILLIC CAPITAL LETTER SCHWA
|
||||
04DA; L; 04DB; #CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
|
||||
04DC; L; 04DD; #CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
|
||||
04DE; L; 04DF; #CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
|
||||
04E0; L; 04E1; #CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
04E2; L; 04E3; #CYRILLIC CAPITAL LETTER I WITH MACRON
|
||||
04E4; L; 04E5; #CYRILLIC CAPITAL LETTER I WITH DIAERESIS
|
||||
04E6; L; 04E7; #CYRILLIC CAPITAL LETTER O WITH DIAERESIS
|
||||
04E8; L; 04E9; #CYRILLIC CAPITAL LETTER BARRED O
|
||||
04EA; L; 04EB; #CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
|
||||
04EC; L; 04ED; #CYRILLIC CAPITAL LETTER E WITH DIAERESIS
|
||||
04EE; L; 04EF; #CYRILLIC CAPITAL LETTER U WITH MACRON
|
||||
04F0; L; 04F1; #CYRILLIC CAPITAL LETTER U WITH DIAERESIS
|
||||
04F2; L; 04F3; #CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
04F4; L; 04F5; #CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
|
||||
04F8; L; 04F9; #CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
|
||||
0531; L; 0561; #ARMENIAN CAPITAL LETTER AYB
|
||||
0532; L; 0562; #ARMENIAN CAPITAL LETTER BEN
|
||||
0533; L; 0563; #ARMENIAN CAPITAL LETTER GIM
|
||||
0534; L; 0564; #ARMENIAN CAPITAL LETTER DA
|
||||
0535; L; 0565; #ARMENIAN CAPITAL LETTER ECH
|
||||
0536; L; 0566; #ARMENIAN CAPITAL LETTER ZA
|
||||
0537; L; 0567; #ARMENIAN CAPITAL LETTER EH
|
||||
0538; L; 0568; #ARMENIAN CAPITAL LETTER ET
|
||||
0539; L; 0569; #ARMENIAN CAPITAL LETTER TO
|
||||
053A; L; 056A; #ARMENIAN CAPITAL LETTER ZHE
|
||||
053B; L; 056B; #ARMENIAN CAPITAL LETTER INI
|
||||
053C; L; 056C; #ARMENIAN CAPITAL LETTER LIWN
|
||||
053D; L; 056D; #ARMENIAN CAPITAL LETTER XEH
|
||||
053E; L; 056E; #ARMENIAN CAPITAL LETTER CA
|
||||
053F; L; 056F; #ARMENIAN CAPITAL LETTER KEN
|
||||
0540; L; 0570; #ARMENIAN CAPITAL LETTER HO
|
||||
0541; L; 0571; #ARMENIAN CAPITAL LETTER JA
|
||||
0542; L; 0572; #ARMENIAN CAPITAL LETTER GHAD
|
||||
0543; L; 0573; #ARMENIAN CAPITAL LETTER CHEH
|
||||
0544; L; 0574; #ARMENIAN CAPITAL LETTER MEN
|
||||
0545; L; 0575; #ARMENIAN CAPITAL LETTER YI
|
||||
0546; L; 0576; #ARMENIAN CAPITAL LETTER NOW
|
||||
0547; L; 0577; #ARMENIAN CAPITAL LETTER SHA
|
||||
0548; L; 0578; #ARMENIAN CAPITAL LETTER VO
|
||||
0549; L; 0579; #ARMENIAN CAPITAL LETTER CHA
|
||||
054A; L; 057A; #ARMENIAN CAPITAL LETTER PEH
|
||||
054B; L; 057B; #ARMENIAN CAPITAL LETTER JHEH
|
||||
054C; L; 057C; #ARMENIAN CAPITAL LETTER RA
|
||||
054D; L; 057D; #ARMENIAN CAPITAL LETTER SEH
|
||||
054E; L; 057E; #ARMENIAN CAPITAL LETTER VEW
|
||||
054F; L; 057F; #ARMENIAN CAPITAL LETTER TIWN
|
||||
0550; L; 0580; #ARMENIAN CAPITAL LETTER REH
|
||||
0551; L; 0581; #ARMENIAN CAPITAL LETTER CO
|
||||
0552; L; 0582; #ARMENIAN CAPITAL LETTER YIWN
|
||||
0553; L; 0583; #ARMENIAN CAPITAL LETTER PIWR
|
||||
0554; L; 0584; #ARMENIAN CAPITAL LETTER KEH
|
||||
0555; L; 0585; #ARMENIAN CAPITAL LETTER OH
|
||||
0556; L; 0586; #ARMENIAN CAPITAL LETTER FEH
|
||||
0587; E; 0565 0582; #ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
1E00; L; 1E01; #LATIN CAPITAL LETTER A WITH RING BELOW
|
||||
1E02; L; 1E03; #LATIN CAPITAL LETTER B WITH DOT ABOVE
|
||||
1E04; L; 1E05; #LATIN CAPITAL LETTER B WITH DOT BELOW
|
||||
1E06; L; 1E07; #LATIN CAPITAL LETTER B WITH LINE BELOW
|
||||
1E08; L; 1E09; #LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
|
||||
1E0A; L; 1E0B; #LATIN CAPITAL LETTER D WITH DOT ABOVE
|
||||
1E0C; L; 1E0D; #LATIN CAPITAL LETTER D WITH DOT BELOW
|
||||
1E0E; L; 1E0F; #LATIN CAPITAL LETTER D WITH LINE BELOW
|
||||
1E10; L; 1E11; #LATIN CAPITAL LETTER D WITH CEDILLA
|
||||
1E12; L; 1E13; #LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
|
||||
1E14; L; 1E15; #LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
|
||||
1E16; L; 1E17; #LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
|
||||
1E18; L; 1E19; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
|
||||
1E1A; L; 1E1B; #LATIN CAPITAL LETTER E WITH TILDE BELOW
|
||||
1E1C; L; 1E1D; #LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
|
||||
1E1E; L; 1E1F; #LATIN CAPITAL LETTER F WITH DOT ABOVE
|
||||
1E20; L; 1E21; #LATIN CAPITAL LETTER G WITH MACRON
|
||||
1E22; L; 1E23; #LATIN CAPITAL LETTER H WITH DOT ABOVE
|
||||
1E24; L; 1E25; #LATIN CAPITAL LETTER H WITH DOT BELOW
|
||||
1E26; L; 1E27; #LATIN CAPITAL LETTER H WITH DIAERESIS
|
||||
1E28; L; 1E29; #LATIN CAPITAL LETTER H WITH CEDILLA
|
||||
1E2A; L; 1E2B; #LATIN CAPITAL LETTER H WITH BREVE BELOW
|
||||
1E2C; L; 1E2D; #LATIN CAPITAL LETTER I WITH TILDE BELOW
|
||||
1E2E; L; 1E2F; #LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
|
||||
1E30; L; 1E31; #LATIN CAPITAL LETTER K WITH ACUTE
|
||||
1E32; L; 1E33; #LATIN CAPITAL LETTER K WITH DOT BELOW
|
||||
1E34; L; 1E35; #LATIN CAPITAL LETTER K WITH LINE BELOW
|
||||
1E36; L; 1E37; #LATIN CAPITAL LETTER L WITH DOT BELOW
|
||||
1E38; L; 1E39; #LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
|
||||
1E3A; L; 1E3B; #LATIN CAPITAL LETTER L WITH LINE BELOW
|
||||
1E3C; L; 1E3D; #LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
|
||||
1E3E; L; 1E3F; #LATIN CAPITAL LETTER M WITH ACUTE
|
||||
1E40; L; 1E41; #LATIN CAPITAL LETTER M WITH DOT ABOVE
|
||||
1E42; L; 1E43; #LATIN CAPITAL LETTER M WITH DOT BELOW
|
||||
1E44; L; 1E45; #LATIN CAPITAL LETTER N WITH DOT ABOVE
|
||||
1E46; L; 1E47; #LATIN CAPITAL LETTER N WITH DOT BELOW
|
||||
1E48; L; 1E49; #LATIN CAPITAL LETTER N WITH LINE BELOW
|
||||
1E4A; L; 1E4B; #LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
|
||||
1E4C; L; 1E4D; #LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
|
||||
1E4E; L; 1E4F; #LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
|
||||
1E50; L; 1E51; #LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
|
||||
1E52; L; 1E53; #LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
|
||||
1E54; L; 1E55; #LATIN CAPITAL LETTER P WITH ACUTE
|
||||
1E56; L; 1E57; #LATIN CAPITAL LETTER P WITH DOT ABOVE
|
||||
1E58; L; 1E59; #LATIN CAPITAL LETTER R WITH DOT ABOVE
|
||||
1E5A; L; 1E5B; #LATIN CAPITAL LETTER R WITH DOT BELOW
|
||||
1E5C; L; 1E5D; #LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
|
||||
1E5E; L; 1E5F; #LATIN CAPITAL LETTER R WITH LINE BELOW
|
||||
1E60; L; 1E61; #LATIN CAPITAL LETTER S WITH DOT ABOVE
|
||||
1E62; L; 1E63; #LATIN CAPITAL LETTER S WITH DOT BELOW
|
||||
1E64; L; 1E65; #LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
|
||||
1E66; L; 1E67; #LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
|
||||
1E68; L; 1E69; #LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
|
||||
1E6A; L; 1E6B; #LATIN CAPITAL LETTER T WITH DOT ABOVE
|
||||
1E6C; L; 1E6D; #LATIN CAPITAL LETTER T WITH DOT BELOW
|
||||
1E6E; L; 1E6F; #LATIN CAPITAL LETTER T WITH LINE BELOW
|
||||
1E70; L; 1E71; #LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
|
||||
1E72; L; 1E73; #LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
|
||||
1E74; L; 1E75; #LATIN CAPITAL LETTER U WITH TILDE BELOW
|
||||
1E76; L; 1E77; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
|
||||
1E78; L; 1E79; #LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
|
||||
1E7A; L; 1E7B; #LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
|
||||
1E7C; L; 1E7D; #LATIN CAPITAL LETTER V WITH TILDE
|
||||
1E7E; L; 1E7F; #LATIN CAPITAL LETTER V WITH DOT BELOW
|
||||
1E80; L; 1E81; #LATIN CAPITAL LETTER W WITH GRAVE
|
||||
1E82; L; 1E83; #LATIN CAPITAL LETTER W WITH ACUTE
|
||||
1E84; L; 1E85; #LATIN CAPITAL LETTER W WITH DIAERESIS
|
||||
1E86; L; 1E87; #LATIN CAPITAL LETTER W WITH DOT ABOVE
|
||||
1E88; L; 1E89; #LATIN CAPITAL LETTER W WITH DOT BELOW
|
||||
1E8A; L; 1E8B; #LATIN CAPITAL LETTER X WITH DOT ABOVE
|
||||
1E8C; L; 1E8D; #LATIN CAPITAL LETTER X WITH DIAERESIS
|
||||
1E8E; L; 1E8F; #LATIN CAPITAL LETTER Y WITH DOT ABOVE
|
||||
1E90; L; 1E91; #LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
|
||||
1E92; L; 1E93; #LATIN CAPITAL LETTER Z WITH DOT BELOW
|
||||
1E94; L; 1E95; #LATIN CAPITAL LETTER Z WITH LINE BELOW
|
||||
1E96; E; 0068 0331; #LATIN SMALL LETTER H WITH LINE BELOW
|
||||
1E97; E; 0074 0308; #LATIN SMALL LETTER T WITH DIAERESIS
|
||||
1E98; E; 0077 030A; #LATIN SMALL LETTER W WITH RING ABOVE
|
||||
1E99; E; 0079 030A; #LATIN SMALL LETTER Y WITH RING ABOVE
|
||||
1E9A; E; 0061 02BE; #LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
1E9B; E; 1E61; #LATIN SMALL LETTER LONG S WITH DOT ABOVE
|
||||
1EA0; L; 1EA1; #LATIN CAPITAL LETTER A WITH DOT BELOW
|
||||
1EA2; L; 1EA3; #LATIN CAPITAL LETTER A WITH HOOK ABOVE
|
||||
1EA4; L; 1EA5; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
|
||||
1EA6; L; 1EA7; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
|
||||
1EA8; L; 1EA9; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
|
||||
1EAA; L; 1EAB; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
|
||||
1EAC; L; 1EAD; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EAE; L; 1EAF; #LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
|
||||
1EB0; L; 1EB1; #LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
|
||||
1EB2; L; 1EB3; #LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
|
||||
1EB4; L; 1EB5; #LATIN CAPITAL LETTER A WITH BREVE AND TILDE
|
||||
1EB6; L; 1EB7; #LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
|
||||
1EB8; L; 1EB9; #LATIN CAPITAL LETTER E WITH DOT BELOW
|
||||
1EBA; L; 1EBB; #LATIN CAPITAL LETTER E WITH HOOK ABOVE
|
||||
1EBC; L; 1EBD; #LATIN CAPITAL LETTER E WITH TILDE
|
||||
1EBE; L; 1EBF; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
|
||||
1EC0; L; 1EC1; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
|
||||
1EC2; L; 1EC3; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
|
||||
1EC4; L; 1EC5; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
|
||||
1EC6; L; 1EC7; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EC8; L; 1EC9; #LATIN CAPITAL LETTER I WITH HOOK ABOVE
|
||||
1ECA; L; 1ECB; #LATIN CAPITAL LETTER I WITH DOT BELOW
|
||||
1ECC; L; 1ECD; #LATIN CAPITAL LETTER O WITH DOT BELOW
|
||||
1ECE; L; 1ECF; #LATIN CAPITAL LETTER O WITH HOOK ABOVE
|
||||
1ED0; L; 1ED1; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
|
||||
1ED2; L; 1ED3; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
|
||||
1ED4; L; 1ED5; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
|
||||
1ED6; L; 1ED7; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
|
||||
1ED8; L; 1ED9; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EDA; L; 1EDB; #LATIN CAPITAL LETTER O WITH HORN AND ACUTE
|
||||
1EDC; L; 1EDD; #LATIN CAPITAL LETTER O WITH HORN AND GRAVE
|
||||
1EDE; L; 1EDF; #LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
|
||||
1EE0; L; 1EE1; #LATIN CAPITAL LETTER O WITH HORN AND TILDE
|
||||
1EE2; L; 1EE3; #LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
|
||||
1EE4; L; 1EE5; #LATIN CAPITAL LETTER U WITH DOT BELOW
|
||||
1EE6; L; 1EE7; #LATIN CAPITAL LETTER U WITH HOOK ABOVE
|
||||
1EE8; L; 1EE9; #LATIN CAPITAL LETTER U WITH HORN AND ACUTE
|
||||
1EEA; L; 1EEB; #LATIN CAPITAL LETTER U WITH HORN AND GRAVE
|
||||
1EEC; L; 1EED; #LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
|
||||
1EEE; L; 1EEF; #LATIN CAPITAL LETTER U WITH HORN AND TILDE
|
||||
1EF0; L; 1EF1; #LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
|
||||
1EF2; L; 1EF3; #LATIN CAPITAL LETTER Y WITH GRAVE
|
||||
1EF4; L; 1EF5; #LATIN CAPITAL LETTER Y WITH DOT BELOW
|
||||
1EF6; L; 1EF7; #LATIN CAPITAL LETTER Y WITH HOOK ABOVE
|
||||
1EF8; L; 1EF9; #LATIN CAPITAL LETTER Y WITH TILDE
|
||||
1F08; L; 1F00; #GREEK CAPITAL LETTER ALPHA WITH PSILI
|
||||
1F09; L; 1F01; #GREEK CAPITAL LETTER ALPHA WITH DASIA
|
||||
1F0A; L; 1F02; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
|
||||
1F0B; L; 1F03; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
|
||||
1F0C; L; 1F04; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
|
||||
1F0D; L; 1F05; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
|
||||
1F0E; L; 1F06; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
|
||||
1F0F; L; 1F07; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
|
||||
1F18; L; 1F10; #GREEK CAPITAL LETTER EPSILON WITH PSILI
|
||||
1F19; L; 1F11; #GREEK CAPITAL LETTER EPSILON WITH DASIA
|
||||
1F1A; L; 1F12; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
|
||||
1F1B; L; 1F13; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
|
||||
1F1C; L; 1F14; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
|
||||
1F1D; L; 1F15; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
|
||||
1F28; L; 1F20; #GREEK CAPITAL LETTER ETA WITH PSILI
|
||||
1F29; L; 1F21; #GREEK CAPITAL LETTER ETA WITH DASIA
|
||||
1F2A; L; 1F22; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
|
||||
1F2B; L; 1F23; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
|
||||
1F2C; L; 1F24; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
|
||||
1F2D; L; 1F25; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
|
||||
1F2E; L; 1F26; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
|
||||
1F2F; L; 1F27; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
|
||||
1F38; L; 1F30; #GREEK CAPITAL LETTER IOTA WITH PSILI
|
||||
1F39; L; 1F31; #GREEK CAPITAL LETTER IOTA WITH DASIA
|
||||
1F3A; L; 1F32; #GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
|
||||
1F3B; L; 1F33; #GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
|
||||
1F3C; L; 1F34; #GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
|
||||
1F3D; L; 1F35; #GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
|
||||
1F3E; L; 1F36; #GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
|
||||
1F3F; L; 1F37; #GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
|
||||
1F48; L; 1F40; #GREEK CAPITAL LETTER OMICRON WITH PSILI
|
||||
1F49; L; 1F41; #GREEK CAPITAL LETTER OMICRON WITH DASIA
|
||||
1F4A; L; 1F42; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
|
||||
1F4B; L; 1F43; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
|
||||
1F4C; L; 1F44; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
|
||||
1F4D; L; 1F45; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
|
||||
1F50; E; 03C5 0313; #GREEK SMALL LETTER UPSILON WITH PSILI
|
||||
1F52; E; 03C5 0313 0300; #GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
|
||||
1F54; E; 03C5 0313 0301; #GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
|
||||
1F56; E; 03C5 0313 0342; #GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
|
||||
1F59; L; 1F51; #GREEK CAPITAL LETTER UPSILON WITH DASIA
|
||||
1F5B; L; 1F53; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
|
||||
1F5D; L; 1F55; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
|
||||
1F5F; L; 1F57; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
|
||||
1F68; L; 1F60; #GREEK CAPITAL LETTER OMEGA WITH PSILI
|
||||
1F69; L; 1F61; #GREEK CAPITAL LETTER OMEGA WITH DASIA
|
||||
1F6A; L; 1F62; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
|
||||
1F6B; L; 1F63; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
|
||||
1F6C; L; 1F64; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
|
||||
1F6D; L; 1F65; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
|
||||
1F6E; L; 1F66; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
|
||||
1F6F; L; 1F67; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
|
||||
1F80; E; 1F00 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F81; E; 1F01 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F82; E; 1F02 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F83; E; 1F03 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F84; E; 1F04 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F85; E; 1F05 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F86; E; 1F06 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F87; E; 1F07 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F88; E; 1F00 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F89; E; 1F01 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F8A; E; 1F02 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F8B; E; 1F03 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F8C; E; 1F04 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F8D; E; 1F05 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F8E; E; 1F06 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F8F; E; 1F07 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F90; E; 1F20 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F91; E; 1F21 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F92; E; 1F22 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F93; E; 1F23 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F94; E; 1F24 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F95; E; 1F25 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F96; E; 1F26 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F97; E; 1F27 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F98; E; 1F20 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F99; E; 1F21 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F9A; E; 1F22 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F9B; E; 1F23 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F9C; E; 1F24 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F9D; E; 1F25 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F9E; E; 1F26 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F9F; E; 1F27 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FA0; E; 1F60 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
|
||||
1FA1; E; 1F61 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
|
||||
1FA2; E; 1F62 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1FA3; E; 1F63 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1FA4; E; 1F64 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1FA5; E; 1F65 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1FA6; E; 1F66 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA7; E; 1F67 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA8; E; 1F60 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
|
||||
1FA9; E; 1F61 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
|
||||
1FAA; E; 1F62 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1FAB; E; 1F63 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1FAC; E; 1F64 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1FAD; E; 1F65 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1FAE; E; 1F66 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FAF; E; 1F67 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FB2; E; 1F70 03B9; #GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FB3; E; 03B1 03B9; #GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
|
||||
1FB4; E; 03AC 03B9; #GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FB6; E; 03B1 0342; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||||
1FB7; E; 03B1 0342 03B9; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FB8; L; 1FB0; #GREEK CAPITAL LETTER ALPHA WITH VRACHY
|
||||
1FB9; L; 1FB1; #GREEK CAPITAL LETTER ALPHA WITH MACRON
|
||||
1FBA; L; 1F70; #GREEK CAPITAL LETTER ALPHA WITH VARIA
|
||||
1FBB; L; 1F71; #GREEK CAPITAL LETTER ALPHA WITH OXIA
|
||||
1FBC; E; 03B1 03B9; #GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
1FBE; E; 03B9; #GREEK PROSGEGRAMMENI
|
||||
1FC2; E; 1F74 03B9; #GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FC3; E; 03B7 03B9; #GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
|
||||
1FC4; E; 03AE 03B9; #GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FC6; E; 03B7 0342; #GREEK SMALL LETTER ETA WITH PERISPOMENI
|
||||
1FC7; E; 03B7 0342 03B9; #GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FC8; L; 1F72; #GREEK CAPITAL LETTER EPSILON WITH VARIA
|
||||
1FC9; L; 1F73; #GREEK CAPITAL LETTER EPSILON WITH OXIA
|
||||
1FCA; L; 1F74; #GREEK CAPITAL LETTER ETA WITH VARIA
|
||||
1FCB; L; 1F75; #GREEK CAPITAL LETTER ETA WITH OXIA
|
||||
1FCC; E; 03B7 03B9; #GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
1FD2; E; 03B9 0308 0300; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
|
||||
1FD3; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
1FD6; E; 03B9 0342; #GREEK SMALL LETTER IOTA WITH PERISPOMENI
|
||||
1FD7; E; 03B9 0308 0342; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
|
||||
1FD8; L; 1FD0; #GREEK CAPITAL LETTER IOTA WITH VRACHY
|
||||
1FD9; L; 1FD1; #GREEK CAPITAL LETTER IOTA WITH MACRON
|
||||
1FDA; L; 1F76; #GREEK CAPITAL LETTER IOTA WITH VARIA
|
||||
1FDB; L; 1F77; #GREEK CAPITAL LETTER IOTA WITH OXIA
|
||||
1FE2; E; 03C5 0308 0300; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
|
||||
1FE3; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
|
||||
1FE4; E; 03C1 0313; #GREEK SMALL LETTER RHO WITH PSILI
|
||||
1FE6; E; 03C5 0342; #GREEK SMALL LETTER UPSILON WITH PERISPOMENI
|
||||
1FE7; E; 03C5 0308 0342; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
|
||||
1FE8; L; 1FE0; #GREEK CAPITAL LETTER UPSILON WITH VRACHY
|
||||
1FE9; L; 1FE1; #GREEK CAPITAL LETTER UPSILON WITH MACRON
|
||||
1FEA; L; 1F7A; #GREEK CAPITAL LETTER UPSILON WITH VARIA
|
||||
1FEB; L; 1F7B; #GREEK CAPITAL LETTER UPSILON WITH OXIA
|
||||
1FEC; L; 1FE5; #GREEK CAPITAL LETTER RHO WITH DASIA
|
||||
1FF2; E; 1F7C 03B9; #GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FF3; E; 03C9 03B9; #GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
|
||||
1FF4; E; 03CE 03B9; #GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF6; E; 03C9 0342; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||||
1FF7; E; 03C9 0342 03B9; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FF8; L; 1F78; #GREEK CAPITAL LETTER OMICRON WITH VARIA
|
||||
1FF9; L; 1F79; #GREEK CAPITAL LETTER OMICRON WITH OXIA
|
||||
1FFA; L; 1F7C; #GREEK CAPITAL LETTER OMEGA WITH VARIA
|
||||
1FFB; L; 1F7D; #GREEK CAPITAL LETTER OMEGA WITH OXIA
|
||||
1FFC; E; 03C9 03B9; #GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
2126; L; 03C9; #OHM SIGN
|
||||
212A; L; 006B; #KELVIN SIGN
|
||||
212B; L; 00E5; #ANGSTROM SIGN
|
||||
2160; L; 2170; #ROMAN NUMERAL ONE
|
||||
2161; L; 2171; #ROMAN NUMERAL TWO
|
||||
2162; L; 2172; #ROMAN NUMERAL THREE
|
||||
2163; L; 2173; #ROMAN NUMERAL FOUR
|
||||
2164; L; 2174; #ROMAN NUMERAL FIVE
|
||||
2165; L; 2175; #ROMAN NUMERAL SIX
|
||||
2166; L; 2176; #ROMAN NUMERAL SEVEN
|
||||
2167; L; 2177; #ROMAN NUMERAL EIGHT
|
||||
2168; L; 2178; #ROMAN NUMERAL NINE
|
||||
2169; L; 2179; #ROMAN NUMERAL TEN
|
||||
216A; L; 217A; #ROMAN NUMERAL ELEVEN
|
||||
216B; L; 217B; #ROMAN NUMERAL TWELVE
|
||||
216C; L; 217C; #ROMAN NUMERAL FIFTY
|
||||
216D; L; 217D; #ROMAN NUMERAL ONE HUNDRED
|
||||
216E; L; 217E; #ROMAN NUMERAL FIVE HUNDRED
|
||||
216F; L; 217F; #ROMAN NUMERAL ONE THOUSAND
|
||||
24B6; L; 24D0; #CIRCLED LATIN CAPITAL LETTER A
|
||||
24B7; L; 24D1; #CIRCLED LATIN CAPITAL LETTER B
|
||||
24B8; L; 24D2; #CIRCLED LATIN CAPITAL LETTER C
|
||||
24B9; L; 24D3; #CIRCLED LATIN CAPITAL LETTER D
|
||||
24BA; L; 24D4; #CIRCLED LATIN CAPITAL LETTER E
|
||||
24BB; L; 24D5; #CIRCLED LATIN CAPITAL LETTER F
|
||||
24BC; L; 24D6; #CIRCLED LATIN CAPITAL LETTER G
|
||||
24BD; L; 24D7; #CIRCLED LATIN CAPITAL LETTER H
|
||||
24BE; L; 24D8; #CIRCLED LATIN CAPITAL LETTER I
|
||||
24BF; L; 24D9; #CIRCLED LATIN CAPITAL LETTER J
|
||||
24C0; L; 24DA; #CIRCLED LATIN CAPITAL LETTER K
|
||||
24C1; L; 24DB; #CIRCLED LATIN CAPITAL LETTER L
|
||||
24C2; L; 24DC; #CIRCLED LATIN CAPITAL LETTER M
|
||||
24C3; L; 24DD; #CIRCLED LATIN CAPITAL LETTER N
|
||||
24C4; L; 24DE; #CIRCLED LATIN CAPITAL LETTER O
|
||||
24C5; L; 24DF; #CIRCLED LATIN CAPITAL LETTER P
|
||||
24C6; L; 24E0; #CIRCLED LATIN CAPITAL LETTER Q
|
||||
24C7; L; 24E1; #CIRCLED LATIN CAPITAL LETTER R
|
||||
24C8; L; 24E2; #CIRCLED LATIN CAPITAL LETTER S
|
||||
24C9; L; 24E3; #CIRCLED LATIN CAPITAL LETTER T
|
||||
24CA; L; 24E4; #CIRCLED LATIN CAPITAL LETTER U
|
||||
24CB; L; 24E5; #CIRCLED LATIN CAPITAL LETTER V
|
||||
24CC; L; 24E6; #CIRCLED LATIN CAPITAL LETTER W
|
||||
24CD; L; 24E7; #CIRCLED LATIN CAPITAL LETTER X
|
||||
24CE; L; 24E8; #CIRCLED LATIN CAPITAL LETTER Y
|
||||
24CF; L; 24E9; #CIRCLED LATIN CAPITAL LETTER Z
|
||||
FB00; E; 0066 0066; #LATIN SMALL LIGATURE FF
|
||||
FB01; E; 0066 0069; #LATIN SMALL LIGATURE FI
|
||||
FB02; E; 0066 006C; #LATIN SMALL LIGATURE FL
|
||||
FB03; E; 0066 0066 0069; #LATIN SMALL LIGATURE FFI
|
||||
FB04; E; 0066 0066 006C; #LATIN SMALL LIGATURE FFL
|
||||
FB05; E; 0073 0074; #LATIN SMALL LIGATURE LONG S T
|
||||
FB06; E; 0073 0074; #LATIN SMALL LIGATURE ST
|
||||
FB13; E; 0574 0576; #ARMENIAN SMALL LIGATURE MEN NOW
|
||||
FB14; E; 0574 0565; #ARMENIAN SMALL LIGATURE MEN ECH
|
||||
FB15; E; 0574 056B; #ARMENIAN SMALL LIGATURE MEN INI
|
||||
FB16; E; 057E 0576; #ARMENIAN SMALL LIGATURE VEW NOW
|
||||
FB17; E; 0574 056D; #ARMENIAN SMALL LIGATURE MEN XEH
|
||||
FF21; L; FF41; #FULLWIDTH LATIN CAPITAL LETTER A
|
||||
FF22; L; FF42; #FULLWIDTH LATIN CAPITAL LETTER B
|
||||
FF23; L; FF43; #FULLWIDTH LATIN CAPITAL LETTER C
|
||||
FF24; L; FF44; #FULLWIDTH LATIN CAPITAL LETTER D
|
||||
FF25; L; FF45; #FULLWIDTH LATIN CAPITAL LETTER E
|
||||
FF26; L; FF46; #FULLWIDTH LATIN CAPITAL LETTER F
|
||||
FF27; L; FF47; #FULLWIDTH LATIN CAPITAL LETTER G
|
||||
FF28; L; FF48; #FULLWIDTH LATIN CAPITAL LETTER H
|
||||
FF29; L; FF49; #FULLWIDTH LATIN CAPITAL LETTER I
|
||||
FF2A; L; FF4A; #FULLWIDTH LATIN CAPITAL LETTER J
|
||||
FF2B; L; FF4B; #FULLWIDTH LATIN CAPITAL LETTER K
|
||||
FF2C; L; FF4C; #FULLWIDTH LATIN CAPITAL LETTER L
|
||||
FF2D; L; FF4D; #FULLWIDTH LATIN CAPITAL LETTER M
|
||||
FF2E; L; FF4E; #FULLWIDTH LATIN CAPITAL LETTER N
|
||||
FF2F; L; FF4F; #FULLWIDTH LATIN CAPITAL LETTER O
|
||||
FF30; L; FF50; #FULLWIDTH LATIN CAPITAL LETTER P
|
||||
FF31; L; FF51; #FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
FF32; L; FF52; #FULLWIDTH LATIN CAPITAL LETTER R
|
||||
FF33; L; FF53; #FULLWIDTH LATIN CAPITAL LETTER S
|
||||
FF34; L; FF54; #FULLWIDTH LATIN CAPITAL LETTER T
|
||||
FF35; L; FF55; #FULLWIDTH LATIN CAPITAL LETTER U
|
||||
FF36; L; FF56; #FULLWIDTH LATIN CAPITAL LETTER V
|
||||
FF37; L; FF57; #FULLWIDTH LATIN CAPITAL LETTER W
|
||||
FF38; L; FF58; #FULLWIDTH LATIN CAPITAL LETTER X
|
||||
FF39; L; FF59; #FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
FF3A; L; FF5A; #FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
|
||||
|
@ -1,245 +0,0 @@
|
||||
# Mirror.txt
|
||||
|
||||
# Informative properties for Unicode characters:
|
||||
# This file lists characters that have the mirrored property
|
||||
# where there is another Unicode character that typically has a glyph
|
||||
# that is the mirror image of the original character's glyph.
|
||||
|
||||
# The file contains a list of lines with mappings from one code point
|
||||
# to another one for character-based mirroring.
|
||||
# Note that for "real" mirroring, a rendering engine needs to select
|
||||
# appropriate alternative glyphs, and that many Unicode characters do not
|
||||
# have a mirror-image Unicode character.
|
||||
|
||||
# Each mapping line contains two fields, separated by a semicolon (';').
|
||||
# Each of the two fields contains a code point represented as a
|
||||
# variable-length hexadecimal value with 1 to 6 digits.
|
||||
# The mapping lines are listed in ascending order by the first field, the
|
||||
# original code points.
|
||||
|
||||
28;29
|
||||
29;28
|
||||
3C;3E
|
||||
3E;3C
|
||||
5B;5D
|
||||
5D;5B
|
||||
7B;7D
|
||||
7D;7B
|
||||
AB;BB
|
||||
BB;AB
|
||||
2039;203A
|
||||
203A;2039
|
||||
2045;2046
|
||||
2046;2045
|
||||
207D;207E
|
||||
207E;207D
|
||||
208D;208E
|
||||
208E;208D
|
||||
2201;2201
|
||||
2202;2202
|
||||
2203;2203
|
||||
2204;2204
|
||||
2208;220B
|
||||
2209;220C
|
||||
220A;220D
|
||||
220B;2208
|
||||
220C;2209
|
||||
220D;220A
|
||||
2211;2211
|
||||
2215;2216
|
||||
2216;2215
|
||||
221A;221A
|
||||
221B;221B
|
||||
221C;221C
|
||||
221D;221D
|
||||
221F;221F
|
||||
2220;2220
|
||||
2221;2221
|
||||
2222;2222
|
||||
2224;2224
|
||||
2226;2226
|
||||
222B;222B
|
||||
222C;222C
|
||||
222D;222D
|
||||
222E;222E
|
||||
222F;222F
|
||||
2230;2230
|
||||
2231;2231
|
||||
2232;2232
|
||||
2233;2233
|
||||
2239;2239
|
||||
223B;223B
|
||||
223C;223D
|
||||
223D;223C
|
||||
223E;223E
|
||||
223F;223F
|
||||
2240;2240
|
||||
2241;2241
|
||||
2242;2242
|
||||
2243;22CD
|
||||
2244;2244
|
||||
2245;2245
|
||||
2246;2246
|
||||
2247;2247
|
||||
2248;2248
|
||||
2249;2249
|
||||
224A;224A
|
||||
224B;224B
|
||||
224C;224C
|
||||
2252;2253
|
||||
2253;2252
|
||||
2254;2255
|
||||
2255;2254
|
||||
225F;225F
|
||||
2260;2260
|
||||
2262;2262
|
||||
2264;2265
|
||||
2265;2264
|
||||
2266;2267
|
||||
2267;2266
|
||||
2268;2269
|
||||
2269;2268
|
||||
226A;226B
|
||||
226B;226A
|
||||
226E;226F
|
||||
226F;226E
|
||||
2270;2271
|
||||
2271;2270
|
||||
2272;2273
|
||||
2273;2272
|
||||
2274;2275
|
||||
2275;2274
|
||||
2276;2277
|
||||
2277;2276
|
||||
2278;2279
|
||||
2279;2278
|
||||
227A;227B
|
||||
227B;227A
|
||||
227C;227D
|
||||
227D;227C
|
||||
227E;227F
|
||||
227F;227E
|
||||
2280;2281
|
||||
2281;2280
|
||||
2282;2283
|
||||
2283;2282
|
||||
2284;2285
|
||||
2285;2284
|
||||
2286;2287
|
||||
2287;2286
|
||||
2288;2289
|
||||
2289;2288
|
||||
228A;228B
|
||||
228B;228A
|
||||
228C;228C
|
||||
228F;2290
|
||||
2290;228F
|
||||
2291;2292
|
||||
2292;2291
|
||||
2298;2298
|
||||
22A2;22A3
|
||||
22A3;22A2
|
||||
22A6;22A6
|
||||
22A7;22A7
|
||||
22A8;22A8
|
||||
22A9;22A9
|
||||
22AA;22AA
|
||||
22AB;22AB
|
||||
22AC;22AC
|
||||
22AD;22AD
|
||||
22AE;22AE
|
||||
22AF;22AF
|
||||
22B0;22B1
|
||||
22B1;22B0
|
||||
22B2;22B3
|
||||
22B3;22B2
|
||||
22B4;22B5
|
||||
22B5;22B4
|
||||
22B6;22B7
|
||||
22B7;22B6
|
||||
22B8;22B8
|
||||
22BE;22BE
|
||||
22BF;22BF
|
||||
22C9;22CA
|
||||
22CA;22C9
|
||||
22CB;22CC
|
||||
22CC;22CB
|
||||
22CD;2243
|
||||
22D0;22D1
|
||||
22D1;22D0
|
||||
22D6;22D7
|
||||
22D7;22D6
|
||||
22D8;22D9
|
||||
22D9;22D8
|
||||
22DA;22DB
|
||||
22DB;22DA
|
||||
22DC;22DD
|
||||
22DD;22DC
|
||||
22DE;22DF
|
||||
22DF;22DE
|
||||
22E0;22E1
|
||||
22E1;22E0
|
||||
22E2;22E3
|
||||
22E3;22E2
|
||||
22E4;22E5
|
||||
22E5;22E4
|
||||
22E6;22E7
|
||||
22E7;22E6
|
||||
22E8;22E9
|
||||
22E9;22E8
|
||||
22EA;22EB
|
||||
22EB;22EA
|
||||
22EC;22ED
|
||||
22ED;22EC
|
||||
22F0;22F1
|
||||
22F1;22F0
|
||||
2308;2309
|
||||
2309;2308
|
||||
230A;230B
|
||||
230B;230A
|
||||
2320;2320
|
||||
2321;2321
|
||||
2329;232A
|
||||
232A;2329
|
||||
3008;3009
|
||||
3009;3008
|
||||
300A;300B
|
||||
300B;300A
|
||||
300C;300C
|
||||
300D;300D
|
||||
300E;300E
|
||||
300F;300F
|
||||
3010;3011
|
||||
3011;3010
|
||||
3014;3014
|
||||
3015;3015
|
||||
3016;3017
|
||||
3017;3016
|
||||
3018;3019
|
||||
3019;3018
|
||||
301A;301B
|
||||
301B;301A
|
||||
|
||||
# Mirrored-character mappings for characters that are missing the mirrored property:
|
||||
# Not listed are characters that could have the mirrored property but would not
|
||||
# have a mirror-image mapping.
|
||||
|
||||
# Mathematical Operators
|
||||
# 2205;2349
|
||||
|
||||
# APL
|
||||
# No APL symbol has the mirrored property!
|
||||
# 2300;2349
|
||||
# 2326;232B
|
||||
# 232B;2326
|
||||
# 233F;2340
|
||||
# 2340;233F
|
||||
# 2341;2342
|
||||
# 2342;2341
|
||||
# 2343;2344
|
||||
# 2344;2343
|
||||
# 2345;2346
|
||||
# 2346;2345
|
||||
# 2347;2348
|
||||
# 2348;2347
|
||||
# 2349;2205
|
@ -1,219 +0,0 @@
|
||||
# SpecialCasing-2.txt
|
||||
#
|
||||
# Special Casing Properties
|
||||
#
|
||||
# This file is a supplement to the UnicodeData file.
|
||||
# It contains additional information about the casing of Unicode characters.
|
||||
# (For compatibility, the UnicodeData.txt file only contains case mappings for
|
||||
# characters where they are 1-1, and does not have locale-specific mappings.)
|
||||
# These are informative character properties.
|
||||
#
|
||||
# Send comments to mark@unicode.org
|
||||
#
|
||||
# ================================================================================
|
||||
# Format
|
||||
# ================================================================================
|
||||
# The entries in this file are in the following machine-readable format:
|
||||
#
|
||||
# <entry> := <case_mapping> <condition_list>? (<s>* "#" <comment>)?
|
||||
#
|
||||
# <case_mapping> := <source> <sep> <lower> <sep> <title> <sep> <upper> <sep>
|
||||
#
|
||||
# <source> := <code_point>
|
||||
# <sep> := <s>* ";" <s>*
|
||||
# <lower> := <code_point_list>
|
||||
# <title> := <code_point_list>
|
||||
# <upper> := <code_point_list>
|
||||
# <code_point_list> := <code_point> (<s>+ <code_point>)*
|
||||
# <code_point> := <hex><hex><hex><hex>
|
||||
# <hex> := [0-1A-Fa-f]
|
||||
# <s> := <space>
|
||||
#
|
||||
# <condition_list> := <locale>? (<s>+ <context>)*
|
||||
# <locale> := <ISO_3166_code> ( "_" <ISO_639_code> )? ( "_" <variant> )?
|
||||
# <ISO_3166_code> := 2-letter country code,
|
||||
# as in http://www.unicode.org/unicode/onlinedat/countries.html
|
||||
# <ISO_639_code> := 2-letter code,
|
||||
# as in http://www.unicode.org/unicode/onlinedat/languages.html
|
||||
# <context> := "FINAL" | "NON_FINAL" | "MODERN" | "NON_MODERN"
|
||||
#
|
||||
# A condition list overrides the normal behavior if any of the listed conditions is true.
|
||||
# FINAL: The letter is not followed by a letter of category L* (e.g. Ll, Lt, Lu, Lm, or Lo).
|
||||
# MODERN: The mapping is only used for modern text.
|
||||
# Conditions preceded by "NON_" represent the negation of the condition
|
||||
#
|
||||
# New contexts may be added in the future.
|
||||
# Parsers of this file must be prepared to deal with that situation.
|
||||
# Additional whitespace around elements is optional. Blank lines are ignored in parsing.
|
||||
# On any line, all text following "#" is a comment, and are ignored in parsing.
|
||||
# ================================================================================
|
||||
|
||||
# ================================================================================
|
||||
# Unconditional mappings
|
||||
# ================================================================================
|
||||
|
||||
# The German es-zed is special--the normal mapping is to SS.
|
||||
# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))
|
||||
|
||||
00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
|
||||
|
||||
# Ligatures
|
||||
|
||||
FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF
|
||||
FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI
|
||||
FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL
|
||||
FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI
|
||||
FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL
|
||||
FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T
|
||||
FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST
|
||||
|
||||
0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW
|
||||
FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH
|
||||
FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI
|
||||
FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW
|
||||
FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
|
||||
|
||||
# No corresponding uppercase precomposed character
|
||||
|
||||
0149; 0149; 02BC 006E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
|
||||
1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW
|
||||
1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS
|
||||
1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE
|
||||
1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
|
||||
1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
|
||||
1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
|
||||
1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
|
||||
1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
|
||||
1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||||
1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
|
||||
1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
|
||||
1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
|
||||
1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
|
||||
1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
|
||||
1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
|
||||
1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI
|
||||
1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
|
||||
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
|
||||
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||||
|
||||
# IMPORTANT-when capitalizing iota-subscript (0345)
|
||||
# It MUST be in normalized form--moved to the end of any sequence of combining marks.
|
||||
# This is because logically it represents a following base character!
|
||||
# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript>
|
||||
# It should never be the first character in a word, so in titlecasing it can be left as is.
|
||||
|
||||
# The following cases are already in the UnicodeData file, so are only commented here.
|
||||
|
||||
# 0345; 0345; 0345; 0399; # COMBINING GREEK YPOGEGRAMMENI
|
||||
|
||||
# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
|
||||
# have special uppercases.
|
||||
# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
|
||||
|
||||
1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
|
||||
1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
|
||||
1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
|
||||
1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
|
||||
1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
|
||||
1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
|
||||
1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
|
||||
1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
|
||||
1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
|
||||
# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases
|
||||
|
||||
1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
|
||||
1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
|
||||
# ================================================================================
|
||||
# Conditional mappings
|
||||
# ================================================================================
|
||||
|
||||
# Special case for final form of sigma
|
||||
|
||||
03A3; 03C2; 03A3; 03A3; FINAL; # GREEK CAPITAL LETTER SIGMA
|
||||
|
||||
# Note: the following cases for non-final are already in the UnicodeData file.
|
||||
|
||||
# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
|
||||
# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
|
||||
# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
|
||||
|
||||
# Note: the following cases are not included, since they would normalize in lowercasing
|
||||
|
||||
# 03C3; 03C2; 03A3; 03A3; FINAL; # GREEK SMALL LETTER SIGMA
|
||||
# 03C2; 03C3; 03A3; 03A3; NON_FINAL; # GREEK SMALL LETTER FINAL SIGMA
|
||||
|
||||
# ================================================================================
|
||||
# Locale-sensitive mappings
|
||||
# ================================================================================
|
||||
|
||||
# Turkish
|
||||
|
||||
0049; 0131; 0049; 0049; TR; # LATIN CAPITAL LETTER I
|
||||
0069; 0069; 0130; 0130; TR; # LATIN SMALL LETTER I
|
||||
|
||||
# Note: the following cases are already in the UnicodeData file.
|
||||
|
||||
# 0131; 0131; 0049; 0049; TR; # LATIN SMALL LETTER DOTLESS I
|
||||
# 0130; 0069; 0130; 0130; TR; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
File diff suppressed because it is too large
Load Diff
@ -1,773 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* originally created by: Markus W. Scherer
|
||||
*
|
||||
* This program reads the Unicode character database text file,
|
||||
* parses it, and extracts most of the properties for each character.
|
||||
* It then writes a binary file containing the properties
|
||||
* that is designed to be used directly for random-access to
|
||||
* the properties of each Unicode character.
|
||||
*
|
||||
* adapted for use under BeOS by Axel Dörfler, axeld@pinc-software.de.
|
||||
*/
|
||||
|
||||
|
||||
#include "genprops.h"
|
||||
#include "utf.h"
|
||||
|
||||
#include <UnicodeChar.h>
|
||||
#include <Path.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
bool gBeVerbose = false;
|
||||
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
|
||||
static status_t parseMirror(const char *filename);
|
||||
static status_t parseSpecialCasing(const char *filename);
|
||||
static status_t parseCaseFolding(const char *filename);
|
||||
static status_t parseDB(const char *filename);
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
typedef status_t
|
||||
UParseLineFn(void *context,char *fields[][2],int32 fieldCount);
|
||||
|
||||
|
||||
status_t
|
||||
parseDelimitedFile(const char *filename, char delimiter,
|
||||
char *fields[][2], int32 fieldCount,
|
||||
UParseLineFn *lineFunction, void *context)
|
||||
{
|
||||
FILE *file = fopen(filename,"r");
|
||||
if (file == NULL) {
|
||||
fprintf(stderr, "*** Unable to open input file %s\n",filename);
|
||||
return B_IO_ERROR;
|
||||
}
|
||||
|
||||
status_t status = B_OK;
|
||||
char line[300];
|
||||
while (fgets(line,sizeof(line),file) != NULL) {
|
||||
// remove trailing newline characters
|
||||
int32 length = strlen(line);
|
||||
while (length > 0 && (line[length-1] == '\r' || line[length-1] == '\n'))
|
||||
line[--length] = '\0';
|
||||
|
||||
// skip this line if it is empty or a comment
|
||||
if (line[0] == '\0' || line[0] == '#')
|
||||
continue;
|
||||
|
||||
// remove in-line comments
|
||||
char *limit = strchr(line, '#');
|
||||
if (limit != NULL) {
|
||||
/* get white space before the pound sign */
|
||||
while (limit > line && (*(limit-1) == ' ' || *(limit-1) == '\t'))
|
||||
--limit;
|
||||
|
||||
/* truncate the line */
|
||||
*limit = '\0';
|
||||
}
|
||||
|
||||
// for each field, call the corresponding field function
|
||||
char *start = line;
|
||||
for (int32 i = 0;i < fieldCount;i++) {
|
||||
// set the limit pointer of this field
|
||||
limit = start;
|
||||
while(*limit != delimiter && *limit != 0)
|
||||
++limit;
|
||||
|
||||
// set the field start and limit in the fields array
|
||||
fields[i][0] = start;
|
||||
fields[i][1] = limit;
|
||||
|
||||
// set start to the beginning of the next field, if any
|
||||
start = limit;
|
||||
if (*start != 0) {
|
||||
++start;
|
||||
} else if (i+1 < fieldCount) {
|
||||
fprintf(stderr, "*** too few fields in line %s\n", line);
|
||||
status = B_ERROR;
|
||||
goto bailout;
|
||||
}
|
||||
}
|
||||
|
||||
// call the field function
|
||||
status = lineFunction(context, fields, fieldCount);
|
||||
if (status < B_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
bailout:
|
||||
fclose(file);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
static const char *
|
||||
skipWhitespace(const char *s)
|
||||
{
|
||||
while(*s == ' ' || *s == '\t')
|
||||
++s;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* parse a list of code points
|
||||
* store them as a string in dest[destSize] with the string length in dest[0]
|
||||
* set the first code point in *pFirst
|
||||
* return the number of code points
|
||||
*/
|
||||
|
||||
static int32
|
||||
parseCodePoints(const char *s,UChar *dest, int32 destSize,uint32 *pFirst,status_t *pErrorCode)
|
||||
{
|
||||
int32 i,count = 0;
|
||||
*pErrorCode = B_OK;
|
||||
|
||||
if (pFirst != NULL)
|
||||
*pFirst = 0xffff;
|
||||
|
||||
// leave dest[0] for the length value
|
||||
for (i = 1;;) {
|
||||
s = skipWhitespace(s);
|
||||
if (*s == ';' || *s == 0) {
|
||||
dest[0] = (UChar)(i-1);
|
||||
return count;
|
||||
}
|
||||
|
||||
/* read one code point */
|
||||
char *end;
|
||||
uint32 value = strtoul(s, &end, 16);
|
||||
if (end <= s || (*end != ' ' && *end != '\t' && *end != ';') || value >= 0x110000) {
|
||||
fprintf(stderr, "genprops: syntax error parsing code point at %s\n", s);
|
||||
*pErrorCode = B_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// store the first code point
|
||||
if (++count == 1 && pFirst != NULL)
|
||||
*pFirst = value;
|
||||
|
||||
// append it to the destination array
|
||||
UTF_APPEND_CHAR(dest, i, destSize, value);
|
||||
|
||||
// overflow?
|
||||
if (i >= destSize) {
|
||||
fprintf(stderr, "genprops: code point sequence too long at at %s\n", s);
|
||||
*pErrorCode = B_BAD_VALUE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// go to the following characters
|
||||
s = end;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* parser for Mirror.txt ---------------------------------------------------- */
|
||||
|
||||
#define MAX_MIRROR_COUNT 2000
|
||||
|
||||
static uint32 mirrorMappings[MAX_MIRROR_COUNT][2];
|
||||
static int32 mirrorCount = 0;
|
||||
|
||||
|
||||
static status_t
|
||||
mirrorLineFn(void *context,char *fields[][2],int32 fieldCount)
|
||||
{
|
||||
char *end;
|
||||
|
||||
mirrorMappings[mirrorCount][0] = strtoul(fields[0][0], &end, 16);
|
||||
if (end <= fields[0][0] || end != fields[0][1]) {
|
||||
fprintf(stderr, "genprops: syntax error in Mirror.txt field 0 at %s\n", fields[0][0]);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
mirrorMappings[mirrorCount][1] = strtoul(fields[1][0], &end, 16);
|
||||
if (end <= fields[1][0] || end != fields[1][1]) {
|
||||
fprintf(stderr, "genprops: syntax error in Mirror.txt field 1 at %s\n", fields[1][0]);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
if (++mirrorCount == MAX_MIRROR_COUNT) {
|
||||
fprintf(stderr, "genprops: too many mirror mappings\n");
|
||||
return B_BAD_VALUE;
|
||||
}
|
||||
return B_OK;
|
||||
}
|
||||
|
||||
|
||||
static status_t
|
||||
parseMirror(const char *filename)
|
||||
{
|
||||
char *fields[2][2];
|
||||
return parseDelimitedFile(filename, ';', fields, 2, mirrorLineFn, NULL);
|
||||
}
|
||||
|
||||
|
||||
/* parser for SpecialCasing.txt --------------------------------------------- */
|
||||
|
||||
#define MAX_SPECIAL_CASING_COUNT 500
|
||||
|
||||
static SpecialCasing specialCasings[MAX_SPECIAL_CASING_COUNT];
|
||||
static int32 specialCasingCount = 0;
|
||||
|
||||
|
||||
static status_t
|
||||
specialCasingLineFn(void *context,char *fields[][2],int32 fieldCount)
|
||||
{
|
||||
char *end;
|
||||
|
||||
// get code point
|
||||
specialCasings[specialCasingCount].code = strtoul(skipWhitespace(fields[0][0]), &end, 16);
|
||||
end = (char *)skipWhitespace(end);
|
||||
if (end <= fields[0][0] || end != fields[0][1]) {
|
||||
fprintf(stderr, "genprops: syntax error in SpecialCasing.txt field 0 at %s\n", fields[0][0]);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
// is this a complex mapping?
|
||||
if (*skipWhitespace(fields[4][0]) != 0) {
|
||||
// there is some condition text in the fifth field
|
||||
specialCasings[specialCasingCount].isComplex = true;
|
||||
|
||||
// do not store any actual mappings for this
|
||||
specialCasings[specialCasingCount].lowerCase[0] = 0;
|
||||
specialCasings[specialCasingCount].upperCase[0] = 0;
|
||||
specialCasings[specialCasingCount].titleCase[0] = 0;
|
||||
} else {
|
||||
// just set the "complex" flag and get the case mappings
|
||||
specialCasings[specialCasingCount].isComplex = false;
|
||||
status_t errorCode = B_OK;
|
||||
parseCodePoints(fields[1][0], specialCasings[specialCasingCount].lowerCase, 32, NULL, &errorCode);
|
||||
parseCodePoints(fields[3][0], specialCasings[specialCasingCount].upperCase, 32, NULL, &errorCode);
|
||||
parseCodePoints(fields[2][0], specialCasings[specialCasingCount].titleCase, 32, NULL, &errorCode);
|
||||
if (errorCode < B_OK) {
|
||||
fprintf(stderr, "genprops: error parsing special casing at %s\n", fields[0][0]);
|
||||
return errorCode;
|
||||
}
|
||||
}
|
||||
|
||||
if (++specialCasingCount == MAX_SPECIAL_CASING_COUNT) {
|
||||
fprintf(stderr, "genprops: too many special casing mappings\n");
|
||||
return B_BAD_VALUE;
|
||||
}
|
||||
return B_OK;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
compareSpecialCasings(const void *left, const void *right)
|
||||
{
|
||||
return ((const SpecialCasing *)left)->code - ((const SpecialCasing *)right)->code;
|
||||
}
|
||||
|
||||
|
||||
static status_t
|
||||
parseSpecialCasing(const char *filename)
|
||||
{
|
||||
char *fields[5][2];
|
||||
status_t status = parseDelimitedFile(filename, ';', fields, 5, specialCasingLineFn, NULL);
|
||||
if (status < B_OK)
|
||||
return status;
|
||||
|
||||
// sort the special casing entries by code point
|
||||
if (specialCasingCount>0)
|
||||
qsort(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings);
|
||||
|
||||
// replace multiple entries for any code point by one "complex" one
|
||||
int32 j = 0;
|
||||
for (int32 i = 1;i < specialCasingCount;++i) {
|
||||
if (specialCasings[i-1].code == specialCasings[i].code) {
|
||||
// there is a duplicate code point
|
||||
specialCasings[i-1].code = 0x7fffffff; // remove this entry in the following qsort
|
||||
specialCasings[i].isComplex = true; // make the following one complex
|
||||
specialCasings[i].lowerCase[0] = 0;
|
||||
specialCasings[i].upperCase[0] = 0;
|
||||
specialCasings[i].titleCase[0] = 0;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
/* if some entries just were removed, then re-sort */
|
||||
if (j > 0) {
|
||||
qsort(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings);
|
||||
specialCasingCount -= j;
|
||||
}
|
||||
return B_OK;
|
||||
}
|
||||
|
||||
|
||||
/* parser for CaseFolding.txt ----------------------------------------------- */
|
||||
|
||||
#define MAX_CASE_FOLDING_COUNT 500
|
||||
|
||||
static CaseFolding caseFoldings[MAX_CASE_FOLDING_COUNT];
|
||||
static int32 caseFoldingCount = 0;
|
||||
|
||||
|
||||
static status_t
|
||||
caseFoldingLineFn(void *context,char *fields[][2],int32 fieldCount)
|
||||
{
|
||||
char *end;
|
||||
int32 count;
|
||||
char status;
|
||||
|
||||
// get code point
|
||||
caseFoldings[caseFoldingCount].code = strtoul(skipWhitespace(fields[0][0]), &end, 16);
|
||||
end = (char *)skipWhitespace(end);
|
||||
if (end <= fields[0][0] || end != fields[0][1]) {
|
||||
fprintf(stderr, "genprops: syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
// get the status of this mapping
|
||||
caseFoldings[caseFoldingCount].status = status = *skipWhitespace(fields[1][0]);
|
||||
if (status != 'L' && status != 'E' && status != 'C'
|
||||
&& status != 'S' && status != 'F' && status != 'I') {
|
||||
fprintf(stderr, "genprops: unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
// ignore all case folding mappings that are the same as the UnicodeData.txt lowercase mappings
|
||||
if (status == 'L')
|
||||
return B_OK;
|
||||
|
||||
// get the mapping
|
||||
status_t errorCode;
|
||||
count = parseCodePoints(fields[2][0], caseFoldings[caseFoldingCount].full, 32, &caseFoldings[caseFoldingCount].simple, &errorCode);
|
||||
if (errorCode < B_OK) {
|
||||
fprintf(stderr, "genprops: error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
// there is a simple mapping only if there is exactly one code point
|
||||
if (count != 1)
|
||||
caseFoldings[caseFoldingCount].simple = 0;
|
||||
|
||||
// check the status
|
||||
if (status == 'S') {
|
||||
// check if there was a full mapping for this code point before
|
||||
if (caseFoldingCount > 0
|
||||
&& caseFoldings[caseFoldingCount-1].code == caseFoldings[caseFoldingCount].code
|
||||
&& caseFoldings[caseFoldingCount-1].status == 'F') {
|
||||
// merge the two entries
|
||||
caseFoldings[caseFoldingCount-1].simple=caseFoldings[caseFoldingCount].simple;
|
||||
return B_OK;
|
||||
}
|
||||
} else if (status == 'F') {
|
||||
// check if there was a simple mapping for this code point before */
|
||||
if (caseFoldingCount > 0
|
||||
&& caseFoldings[caseFoldingCount-1].code == caseFoldings[caseFoldingCount].code
|
||||
&& caseFoldings[caseFoldingCount-1].status == 'S') {
|
||||
// merge the two entries
|
||||
memcpy(caseFoldings[caseFoldingCount-1].full, caseFoldings[caseFoldingCount].full, 32 * U_SIZEOF_UCHAR);
|
||||
return B_OK;
|
||||
}
|
||||
} else if (status == 'I') {
|
||||
// store only a marker for special handling for cases like dotless i
|
||||
caseFoldings[caseFoldingCount].simple = 0;
|
||||
caseFoldings[caseFoldingCount].full[0] = 0;
|
||||
}
|
||||
|
||||
if (++caseFoldingCount == MAX_CASE_FOLDING_COUNT) {
|
||||
fprintf(stderr, "genprops: too many case folding mappings\n");
|
||||
return B_BAD_VALUE;
|
||||
}
|
||||
return B_OK;
|
||||
}
|
||||
|
||||
|
||||
static status_t
|
||||
parseCaseFolding(const char *filename)
|
||||
{
|
||||
char *fields[3][2];
|
||||
return parseDelimitedFile(filename, ';', fields, 3, caseFoldingLineFn, NULL);
|
||||
}
|
||||
|
||||
|
||||
/* parser for UnicodeData.txt ----------------------------------------------- */
|
||||
|
||||
// general categories
|
||||
const char *const
|
||||
genCategoryNames[B_UNICODE_CATEGORY_COUNT] = {
|
||||
NULL,
|
||||
"Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me",
|
||||
"Mc", "Nd", "Nl", "No",
|
||||
"Zs", "Zl", "Zp",
|
||||
"Cc", "Cf", "Co", "Cs",
|
||||
"Pd", "Ps", "Pe", "Pc", "Po",
|
||||
"Sm", "Sc", "Sk", "So",
|
||||
"Pi", "Pf",
|
||||
"Cn"
|
||||
};
|
||||
|
||||
const char *const
|
||||
bidiNames[B_UNICODE_DIRECTION_COUNT] = {
|
||||
"L", "R", "EN", "ES", "ET", "AN", "CS", "B", "S",
|
||||
"WS", "ON", "LRE", "LRO", "AL", "RLE", "RLO", "PDF", "NSM", "BN"
|
||||
};
|
||||
|
||||
// control code properties
|
||||
static const struct {
|
||||
uint32 code;
|
||||
uint8 generalCategory;
|
||||
} controlProps[] = {
|
||||
/* TAB */ { 0x9, B_UNICODE_SPACE_SEPARATOR },
|
||||
/* VT */ { 0xb, B_UNICODE_SPACE_SEPARATOR },
|
||||
/* LF */ { 0xa, B_UNICODE_PARAGRAPH_SEPARATOR },
|
||||
/* FF */ { 0xc, B_UNICODE_LINE_SEPARATOR },
|
||||
/* CR */ { 0xd, B_UNICODE_PARAGRAPH_SEPARATOR },
|
||||
/* FS */ { 0x1c, B_UNICODE_PARAGRAPH_SEPARATOR },
|
||||
/* GS */ { 0x1d, B_UNICODE_PARAGRAPH_SEPARATOR },
|
||||
/* RS */ { 0x1e, B_UNICODE_PARAGRAPH_SEPARATOR },
|
||||
/* US */ { 0x1f, B_UNICODE_SPACE_SEPARATOR },
|
||||
/* NL */ { 0x85, B_UNICODE_PARAGRAPH_SEPARATOR }
|
||||
};
|
||||
|
||||
static struct {
|
||||
uint32 first, last, props;
|
||||
char name[80];
|
||||
} unicodeAreas[32];
|
||||
|
||||
static int32 unicodeAreaIndex = 0;
|
||||
|
||||
|
||||
static status_t
|
||||
unicodeDataLineFn(void *context,char *fields[][2],int32 fieldCount)
|
||||
{
|
||||
static int32 mirrorIndex = 0, specialCasingIndex = 0, caseFoldingIndex = 0;
|
||||
Props props;
|
||||
char *end;
|
||||
uint32 value;
|
||||
|
||||
// reset the properties
|
||||
memset(&props, 0, sizeof(Props));
|
||||
props.decimalDigitValue = props.digitValue = -1;
|
||||
props.numericValue = 0x80000000;
|
||||
|
||||
// get the character code, field 0
|
||||
props.code = strtoul(fields[0][0], &end, 16);
|
||||
if (end <= fields[0][0] || end != fields[0][1]) {
|
||||
fprintf(stderr, "genprops: syntax error in field 0 at %s\n", fields[0][0]);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
// get general category, field 2
|
||||
*fields[2][1] = 0;
|
||||
for (int i = 1;;) {
|
||||
if (!strcmp(fields[2][0], genCategoryNames[i])) {
|
||||
props.generalCategory = (uint8)i;
|
||||
break;
|
||||
}
|
||||
if (++i == B_UNICODE_CATEGORY_COUNT) {
|
||||
fprintf(stderr, "genprops: unknown general category \"%s\" at code 0x%lx\n", fields[2][0], props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
// get canonical combining class, field 3
|
||||
props.canonicalCombining = (uint8)strtoul(fields[3][0], &end, 10);
|
||||
if (end <= fields[3][0] || end != fields[3][1]) {
|
||||
fprintf(stderr, "genprops: syntax error in field 3 at code 0x%lx\n", props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
// get BiDi category, field 4
|
||||
*fields[4][1] = 0;
|
||||
for (int i = 0;;) {
|
||||
if (!strcmp(fields[4][0], bidiNames[i])) {
|
||||
props.bidi = (uint8)i;
|
||||
break;
|
||||
}
|
||||
if (++i == B_UNICODE_DIRECTION_COUNT) {
|
||||
fprintf(stderr, "genprops: unknown BiDi category \"%s\" at code 0x%lx\n", fields[4][0], props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
// decimal digit value, field 6
|
||||
if (fields[6][0] < fields[6][1]) {
|
||||
value = strtoul(fields[6][0], &end, 10);
|
||||
if (end != fields[6][1] || value > 0x7fff) {
|
||||
fprintf(stderr, "genprops: syntax error in field 6 at code 0x%lx\n", props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
props.decimalDigitValue = (int16)value;
|
||||
}
|
||||
|
||||
// digit value, field 7
|
||||
if (fields[7][0] < fields[7][1]) {
|
||||
value = strtoul(fields[7][0], &end, 10);
|
||||
if (end != fields[7][1] || value > 0x7fff) {
|
||||
fprintf(stderr, "genprops: syntax error in field 7 at code 0x%lx\n", props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
props.digitValue = (int16)value;
|
||||
}
|
||||
|
||||
// numeric value, field 8
|
||||
if (fields[8][0] < fields[8][1]) {
|
||||
char *s = fields[8][0];
|
||||
bool isNegative;
|
||||
|
||||
// get a possible minus sign
|
||||
if (*s == '-') {
|
||||
isNegative = true;
|
||||
++s;
|
||||
} else
|
||||
isNegative = false;
|
||||
|
||||
value = strtoul(s, &end, 10);
|
||||
if (value > 0 && *end == '/') {
|
||||
// field 8 may contain a fractional value, get the denominator
|
||||
props.denominator = strtoul(end+1, &end, 10);
|
||||
if (props.denominator == 0) {
|
||||
fprintf(stderr, "genprops: denominator is 0 in field 8 at code 0x%lx\n", props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
}
|
||||
if (end != fields[8][1] || value > 0x7fffffff) {
|
||||
fprintf(stderr, "genprops: syntax error in field 8 at code 0x%lx\n", props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
if (isNegative)
|
||||
props.numericValue = -(int32)value;
|
||||
else
|
||||
props.numericValue = (int32)value;
|
||||
|
||||
props.hasNumericValue = true;
|
||||
}
|
||||
|
||||
// get Mirrored flag, field 9
|
||||
if (*fields[9][0] == 'Y') {
|
||||
props.isMirrored = 1;
|
||||
} else if (fields[9][1] - fields[9][0] != 1 || *fields[9][0] != 'N') {
|
||||
fprintf(stderr, "genprops: syntax error in field 9 at code 0x%lx\n", props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
// get uppercase mapping, field 12
|
||||
value = strtoul(fields[12][0], &end, 16);
|
||||
if (end != fields[12][1]) {
|
||||
fprintf(stderr, "genprops: syntax error in field 12 at code 0x%lx\n", props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
props.upperCase = value;
|
||||
|
||||
// get lowercase value, field 13
|
||||
value = strtoul(fields[13][0], &end, 16);
|
||||
if (end != fields[13][1]) {
|
||||
fprintf(stderr, "genprops: syntax error in field 13 at code 0x%lx\n", props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
props.lowerCase = value;
|
||||
|
||||
// get titlecase value, field 14
|
||||
value = strtoul(fields[14][0], &end, 16);
|
||||
if (end != fields[14][1]) {
|
||||
fprintf(stderr, "genprops: syntax error in field 14 at code 0x%lx\n", props.code);
|
||||
return B_ERROR;
|
||||
}
|
||||
props.titleCase = value;
|
||||
|
||||
// override properties for some common control characters
|
||||
if (props.generalCategory == B_UNICODE_CONTROL_CHAR) {
|
||||
for (uint32 i = 0; i < sizeof(controlProps) / sizeof(controlProps[0]); i++) {
|
||||
if (controlProps[i].code == props.code)
|
||||
props.generalCategory = controlProps[i].generalCategory;
|
||||
}
|
||||
}
|
||||
|
||||
// set additional properties from previously parsed files
|
||||
if (mirrorIndex < mirrorCount && props.code == mirrorMappings[mirrorIndex][0])
|
||||
props.mirrorMapping = mirrorMappings[mirrorIndex++][1];
|
||||
|
||||
if (specialCasingIndex < specialCasingCount && props.code == specialCasings[specialCasingIndex].code)
|
||||
props.specialCasing = specialCasings + specialCasingIndex++;
|
||||
else
|
||||
props.specialCasing = NULL;
|
||||
|
||||
if (caseFoldingIndex < caseFoldingCount && props.code == caseFoldings[caseFoldingIndex].code) {
|
||||
props.caseFolding = caseFoldings + caseFoldingIndex++;
|
||||
|
||||
// ignore "Common" mappings (simple==full) that map to the same code
|
||||
// point as the regular lowercase mapping
|
||||
if (props.caseFolding->status == 'C' && props.caseFolding->simple == props.lowerCase)
|
||||
props.caseFolding = NULL;
|
||||
} else
|
||||
props.caseFolding = NULL;
|
||||
|
||||
value = makeProps(&props);
|
||||
|
||||
if (*fields[1][0] == '<') {
|
||||
// first or last entry of a Unicode area
|
||||
size_t length = fields[1][1] - fields[1][0];
|
||||
|
||||
if (length < 9) {
|
||||
/* name too short for an area name */
|
||||
} else if (!memcmp(", First>", fields[1][1]-8, 8)) {
|
||||
// set the current area
|
||||
if (unicodeAreas[unicodeAreaIndex].first == 0xffffffff) {
|
||||
length -= 9;
|
||||
unicodeAreas[unicodeAreaIndex].first = props.code;
|
||||
unicodeAreas[unicodeAreaIndex].props = value;
|
||||
memcpy(unicodeAreas[unicodeAreaIndex].name, fields[1][0]+1, length);
|
||||
unicodeAreas[unicodeAreaIndex].name[length] = 0;
|
||||
} else {
|
||||
// error: a previous area is incomplete
|
||||
fprintf(stderr, "genprops: error - area \"%s\" is incomplete\n", unicodeAreas[unicodeAreaIndex].name);
|
||||
return B_ERROR;
|
||||
}
|
||||
return B_OK;
|
||||
} else if (!memcmp(", Last>", fields[1][1]-7, 7)) {
|
||||
// check that the current area matches, and complete it with the last code point
|
||||
length -= 8;
|
||||
if (unicodeAreas[unicodeAreaIndex].props == value
|
||||
&& !memcmp(unicodeAreas[unicodeAreaIndex].name, fields[1][0]+1, length)
|
||||
&& unicodeAreas[unicodeAreaIndex].name[length] == 0
|
||||
&& unicodeAreas[unicodeAreaIndex].first < props.code) {
|
||||
|
||||
unicodeAreas[unicodeAreaIndex].last = props.code;
|
||||
if (gBeVerbose) {
|
||||
printf("Unicode area U+%04lx..U+%04lx \"%s\"\n",
|
||||
unicodeAreas[unicodeAreaIndex].first,
|
||||
unicodeAreas[unicodeAreaIndex].last,
|
||||
unicodeAreas[unicodeAreaIndex].name);
|
||||
}
|
||||
unicodeAreas[++unicodeAreaIndex].first = 0xffffffff;
|
||||
} else {
|
||||
// error: different properties between first & last, different area name, first >= last
|
||||
fprintf(stderr, "genprops: error - Last of area \"%s\" is incorrect\n", unicodeAreas[unicodeAreaIndex].name);
|
||||
return B_ERROR;
|
||||
}
|
||||
return B_OK;
|
||||
} else {
|
||||
/* not an area name */
|
||||
}
|
||||
}
|
||||
|
||||
// properties for a single code point
|
||||
// ### TODO: check that the code points (props.code) are in ascending order
|
||||
addProps(props.code, value);
|
||||
return B_OK;
|
||||
}
|
||||
|
||||
|
||||
/* set repeated properties for the areas */
|
||||
|
||||
static void
|
||||
repeatAreaProps()
|
||||
{
|
||||
uint32 puaProps;
|
||||
int32 i;
|
||||
bool hasPlane15PUA, hasPlane16PUA;
|
||||
|
||||
/*
|
||||
* UnicodeData.txt before 3.0.1 did not contain the PUAs on
|
||||
* planes 15 and 16.
|
||||
* If that is the case, then we add them here, using the properties
|
||||
* from the BMP PUA.
|
||||
*/
|
||||
puaProps = 0;
|
||||
hasPlane15PUA = hasPlane16PUA = false;
|
||||
|
||||
for (i = 0;i < unicodeAreaIndex;i++) {
|
||||
repeatProps(unicodeAreas[i].first,unicodeAreas[i].last,unicodeAreas[i].props);
|
||||
|
||||
if (unicodeAreas[i].first == 0xe000)
|
||||
puaProps = unicodeAreas[i].props;
|
||||
else if (unicodeAreas[i].first == 0xf0000)
|
||||
hasPlane15PUA = false;
|
||||
else if (unicodeAreas[i].first == 0x100000)
|
||||
hasPlane16PUA = true;
|
||||
}
|
||||
|
||||
if (puaProps != 0) {
|
||||
if (!hasPlane15PUA)
|
||||
repeatProps(0xf0000, 0xffffd, puaProps);
|
||||
|
||||
if (!hasPlane16PUA)
|
||||
repeatProps(0x100000, 0x10fffd, puaProps);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static status_t
|
||||
parseDB(const char *filename)
|
||||
{
|
||||
// while unicodeAreas[unicodeAreaIndex] is unused, set its first to a bogus value
|
||||
unicodeAreas[0].first = 0xffffffff;
|
||||
|
||||
char *fields[15][2];
|
||||
status_t status = parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL);
|
||||
if (status < B_OK)
|
||||
return status;
|
||||
|
||||
if (unicodeAreas[unicodeAreaIndex].first != 0xffffffff) {
|
||||
fprintf(stderr, "genprops: error - the last area \"%s\" from U+%04lx is incomplete\n",
|
||||
unicodeAreas[unicodeAreaIndex].name,
|
||||
unicodeAreas[unicodeAreaIndex].first);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
repeatAreaProps();
|
||||
return B_OK;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc,char **argv)
|
||||
{
|
||||
const char *srcDir = "data", *destDir = ".";
|
||||
|
||||
// gBeVerbose = true;
|
||||
if (argc >= 2 && argv[1])
|
||||
srcDir = argv[1];
|
||||
|
||||
// prepare the filename beginning with the source dir
|
||||
|
||||
initStore();
|
||||
|
||||
BPath path(srcDir,"Mirror.txt");
|
||||
status_t status = parseMirror(path.Path());
|
||||
if (status < B_OK)
|
||||
return -1;
|
||||
|
||||
path.SetTo(srcDir,"SpecialCasing.txt");
|
||||
status = parseSpecialCasing(path.Path());
|
||||
if (status < B_OK)
|
||||
return -1;
|
||||
|
||||
path.SetTo(srcDir,"CaseFolding.txt");
|
||||
status = parseCaseFolding(path.Path());
|
||||
if (status < B_OK)
|
||||
return -1;
|
||||
|
||||
path.SetTo(srcDir,"UnicodeData.txt");
|
||||
status = parseDB(path.Path());
|
||||
if (status < B_OK)
|
||||
return -1;
|
||||
|
||||
// process parsed data
|
||||
compactProps();
|
||||
compactStage3();
|
||||
compactStage2();
|
||||
|
||||
// write the properties data file
|
||||
return generateData(destDir);
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: genprops.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999dec13
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* adapted for use under BeOS by Axel Dörfler, axeld@pinc-software.de.
|
||||
*/
|
||||
|
||||
#ifndef __GENPROPS_H__
|
||||
#define __GENPROPS_H__
|
||||
|
||||
#include <SupportDefs.h>
|
||||
#include "utf.h"
|
||||
|
||||
|
||||
// special casing data
|
||||
struct SpecialCasing {
|
||||
uint32 code;
|
||||
bool isComplex;
|
||||
UChar lowerCase[32], upperCase[32], titleCase[32];
|
||||
};
|
||||
|
||||
// case folding data
|
||||
struct CaseFolding {
|
||||
uint32 code, simple;
|
||||
char status;
|
||||
UChar full[32];
|
||||
};
|
||||
|
||||
// character properties
|
||||
struct Props {
|
||||
uint32 code, lowerCase, upperCase, titleCase, mirrorMapping;
|
||||
int16 decimalDigitValue, digitValue; /* -1: no value */
|
||||
int32 numericValue; /* see hasNumericValue */
|
||||
uint32 denominator; /* 0: no value */
|
||||
uint8 generalCategory, canonicalCombining, bidi, isMirrored, hasNumericValue;
|
||||
SpecialCasing *specialCasing;
|
||||
CaseFolding *caseFolding;
|
||||
};
|
||||
|
||||
// global flags
|
||||
extern bool gBeVerbose;
|
||||
|
||||
// name tables
|
||||
extern const char *const bidiNames[];
|
||||
extern const char *const genCategoryNames[];
|
||||
|
||||
// prototypes
|
||||
extern void initStore(void);
|
||||
extern uint32 makeProps(Props *p);
|
||||
extern void addProps(uint32 c, uint32 props);
|
||||
extern void repeatProps(uint32 first, uint32 last, uint32 props);
|
||||
extern void compactStage2(void);
|
||||
extern void compactStage3(void);
|
||||
extern void compactProps(void);
|
||||
extern status_t generateData(const char *dataDir);
|
||||
|
||||
#endif
|
||||
|
@ -1,242 +0,0 @@
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=Latin-1">
|
||||
<title>IBM's Public License - IBM's Classes for Unicode</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<b>
|
||||
|
||||
<p ALIGN="CENTER"><big>IBM PUBLIC LICENSE - IBM’s Classes for Unicode VERSION 1.0</big></p>
|
||||
</b><font size="2">
|
||||
|
||||
<p>THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS IBM PUBLIC LICENSE
|
||||
("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES
|
||||
RECIPIENT’S ACCEPTANCE OF THIS AGREEMENT.</p>
|
||||
<b>
|
||||
|
||||
<p>1. DEFINITIONS</p>
|
||||
</b>
|
||||
|
||||
<p>"Contribution" means: </p>
|
||||
|
||||
<blockquote>
|
||||
<blockquote>
|
||||
<p>a) in the case of International Business Machines Corporation ("IBM"), the
|
||||
Original Program, and </p>
|
||||
<p>b) in the case of each Contributor, </p>
|
||||
<blockquote>
|
||||
<p>i) changes to the Program, and</p>
|
||||
<p>ii) additions to the Program;</p>
|
||||
</blockquote>
|
||||
</blockquote>
|
||||
<p>where such changes and/or additions to the Program originate from and are distributed
|
||||
by that particular Contributor. A Contribution ‘originates’ from a Contributor
|
||||
if it was added to the Program by such Contributor itself or anyone acting on such
|
||||
Contributor’s behalf. Contributions do not include additions to the Program which:
|
||||
(i) are separate modules of software distributed in conjunction with the Program under
|
||||
their own license agreement, and (ii) are not derivative works of the Program.</p>
|
||||
</blockquote>
|
||||
|
||||
<p>"Contributor" means IBM and any other entity that distributes the Program.</p>
|
||||
|
||||
<p>"Licensed Patents " mean patent claims licensable by a Contributor which are
|
||||
necessarily infringed by the use or sale of its Contribution alone or when combined with
|
||||
the Program. </p>
|
||||
|
||||
<p>"Original Program" means the original version of the software accompanying
|
||||
this Agreement as released by IBM, including source code, object code and documentation,
|
||||
if any.</p>
|
||||
|
||||
<p>"Program" means the Original Program and Contributions.</p>
|
||||
|
||||
<p>"Recipient" means anyone who receives the Program under this Agreement,
|
||||
including all Contributors.</p>
|
||||
<b>
|
||||
|
||||
<p>2. GRANT OF RIGHTS</p>
|
||||
|
||||
<blockquote>
|
||||
<blockquote>
|
||||
</b><p>a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient
|
||||
a non-exclusive, worldwide, royalty-free copyright license to<font COLOR="#ff0000"> </font>reproduce,
|
||||
prepare derivative works of, publicly display, publicly perform, distribute and sublicense
|
||||
the Contribution of such Contributor, if any, and such derivative works, in source code
|
||||
and object code form.</p>
|
||||
<p>b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a
|
||||
non-exclusive, worldwide,<font COLOR="#008000"> </font>royalty-free patent license under
|
||||
Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the
|
||||
Contribution of such Contributor, if any, in source code and object code form. This patent
|
||||
license shall apply to the combination of the Contribution and the Program if, at the time
|
||||
the Contribution is added by the Contributor, such addition of the Contribution causes
|
||||
such combination to be covered by the Licensed Patents. The patent license shall not apply
|
||||
to any other combinations which include the Contribution. No hardware per se is licensed
|
||||
hereunder. </p>
|
||||
<p>c) Recipient understands that although each Contributor grants the licenses to its
|
||||
Contributions set forth herein, no assurances are provided by any Contributor that the
|
||||
Program does not infringe the patent or other intellectual property rights of any other
|
||||
entity. Each Contributor disclaims any liability to Recipient for claims brought by any
|
||||
other entity based on infringement of intellectual property rights or otherwise. As a
|
||||
condition to exercising the rights and licenses granted hereunder, each Recipient hereby
|
||||
assumes sole responsibility to secure any other intellectual property rights needed, if
|
||||
any. For example, if a third party patent license is required to allow Recipient to
|
||||
distribute the Program, it is Recipient’s responsibility to acquire that license
|
||||
before distributing the Program.</p>
|
||||
<p>d) Each Contributor represents that to its knowledge it has sufficient copyright rights
|
||||
in its Contribution, if any, to grant the copyright license set forth in this Agreement. </p>
|
||||
</blockquote>
|
||||
</blockquote>
|
||||
<b>
|
||||
|
||||
<p>3. REQUIREMENTS</p>
|
||||
</b>
|
||||
|
||||
<p>A Contributor may choose to distribute the Program in object code form under its own
|
||||
license agreement, provided that:</p>
|
||||
|
||||
<blockquote>
|
||||
<blockquote>
|
||||
<p>a) it complies with the terms and conditions of this Agreement; and</p>
|
||||
<p>b) its license agreement:</p>
|
||||
<blockquote>
|
||||
<p>i) effectively disclaims on behalf of all Contributors all warranties and conditions,
|
||||
express and implied, including warranties or conditions of title and non-infringement, and
|
||||
implied warranties or conditions of merchantability and fitness for a particular purpose; </p>
|
||||
<p>ii) effectively excludes on behalf of all Contributors all liability for damages,
|
||||
including direct, indirect, special, incidental and consequential damages, such as lost
|
||||
profits; </p>
|
||||
<p>iii) states that any provisions which differ from this Agreement are offered by that
|
||||
Contributor alone and not by any other party; and</p>
|
||||
<p>iv) states that source code for the Program is available from such Contributor, and
|
||||
informs licensees how to obtain it in a reasonable manner on or through a medium
|
||||
customarily used for software exchange.<font COLOR="#0000ff"> </p>
|
||||
</font>
|
||||
</blockquote>
|
||||
</blockquote>
|
||||
</blockquote>
|
||||
|
||||
<p>When the Program is made available in source code form:</p>
|
||||
|
||||
<blockquote>
|
||||
<blockquote>
|
||||
<p>a) it must be made available under this Agreement; and </p>
|
||||
<p>b) a copy of this Agreement must be included with each copy of the Program. </p>
|
||||
<font COLOR="#0000ff"><strike>
|
||||
</blockquote>
|
||||
</blockquote>
|
||||
</strike></font>
|
||||
|
||||
<p>Each Contributor must include the following in a conspicuous location in the Program: </p>
|
||||
|
||||
<blockquote>
|
||||
<p>Copyright <font FACE="Times New Roman">©</font><font COLOR="#ff0000"> </font>1999,
|
||||
International Business Machines Corporation and others. All Rights Reserved. </p>
|
||||
</blockquote>
|
||||
|
||||
<p>In addition, each Contributor must identify itself as the originator of its
|
||||
Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify
|
||||
the originator of the Contribution. </p>
|
||||
<b>
|
||||
|
||||
<p>4. COMMERCIAL DISTRIBUTION</p>
|
||||
</b>
|
||||
|
||||
<p>Commercial distributors of software may accept certain responsibilities with respect to
|
||||
end users, business partners and the like. While this license is intended to facilitate
|
||||
the commercial use of the Program, the Contributor who includes the Program in a
|
||||
commercial product offering should do so in a manner which does not create potential
|
||||
liability for other Contributors. Therefore, if a Contributor includes the Program in a
|
||||
commercial product offering, such Contributor ("Commercial Contributor") hereby
|
||||
agrees to defend and indemnify every other Contributor ("Indemnified
|
||||
Contributor") against any losses, damages and costs (collectively "Losses")
|
||||
arising from claims, lawsuits and other legal actions brought by a third party against the
|
||||
Indemnified Contributor to the extent caused by the acts or omissions of such Commercial
|
||||
Contributor in connection with its distribution of the Program in a commercial product
|
||||
offering. The obligations in this section do not apply to any claims or Losses relating to
|
||||
any actual or alleged intellectual property infringement. In order to qualify, an
|
||||
Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of
|
||||
such claim, and b) allow the Commercial Contributor to control, and cooperate with the
|
||||
Commercial Contributor in, the defense and any related settlement negotiations. The
|
||||
Indemnified Contributor may participate in any such claim at its own expense.</p>
|
||||
|
||||
<p>For example, a Contributor might include the Program in a commercial product offering,
|
||||
Product X. That Contributor is then a Commercial Contributor. If that Commercial
|
||||
Contributor then makes performance claims, or offers warranties related to Product X,
|
||||
those performance claims and warranties are such Commercial Contributor’s
|
||||
responsibility alone. Under this section, the Commercial Contributor would have to defend
|
||||
claims against the other Contributors related to those performance claims and warranties,
|
||||
and if a court requires any other Contributor to pay any damages as a result, the
|
||||
Commercial Contributor must pay those damages.</p>
|
||||
<b>
|
||||
|
||||
<p>5. NO WARRANTY</p>
|
||||
</b>
|
||||
|
||||
<p>EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS
|
||||
IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED
|
||||
INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
|
||||
MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible
|
||||
for determining the appropriateness of using and distributing the Program and assumes all
|
||||
risks associated with its exercise of rights under this Agreement, including but not
|
||||
limited to the risks and costs of program errors, compliance with applicable laws, damage
|
||||
to or loss of data, programs or equipment, and unavailability or interruption of
|
||||
operations. </p>
|
||||
<b>
|
||||
|
||||
<p>6. DISCLAIMER OF LIABILITY</p>
|
||||
</b>
|
||||
|
||||
<p>EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS
|
||||
SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM
|
||||
OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.</p>
|
||||
<b>
|
||||
|
||||
<p>7. GENERAL</p>
|
||||
</b>
|
||||
|
||||
<p>If any provision of this Agreement is invalid or unenforceable under applicable law, it
|
||||
shall not affect the validity or enforceability of the remainder of the terms of this
|
||||
Agreement, and without further action by the parties hereto, such provision shall be
|
||||
reformed to the minimum extent necessary to make such provision valid and enforceable.</p>
|
||||
|
||||
<p>If Recipient institutes patent litigation against a Contributor with respect to a
|
||||
patent applicable to software (including a cross-claim or counterclaim in a lawsuit), then
|
||||
any patent licenses granted by that Contributor to such Recipient under this Agreement
|
||||
shall terminate as of the date such litigation is filed. In addition, If Recipient
|
||||
institutes patent litigation against any entity (including a cross-claim or counterclaim
|
||||
in a lawsuit) alleging that the Program itself (excluding combinations of the Program with
|
||||
other software or hardware) infringes such Recipient’s patent(s), then such
|
||||
Recipient’s rights granted under Section 2(b) shall terminate as of the date such
|
||||
litigation is filed. </p>
|
||||
|
||||
<p>All Recipient’s rights under this Agreement shall terminate if it fails to comply
|
||||
with any of the material terms or conditions of this Agreement and does not cure such
|
||||
failure in a reasonable period of time after becoming aware of such noncompliance. If all
|
||||
Recipient’s rights under this Agreement terminate, Recipient agrees to cease use and
|
||||
distribution of the Program as soon as reasonably practicable. However, Recipient's
|
||||
obligations under this Agreement and any licenses granted by Recipient relating to the
|
||||
Program shall continue and survive. </p>
|
||||
|
||||
<p>IBM may publish new versions (including revisions) of this Agreement from time to time.
|
||||
Each new version of the Agreement will be given a distinguishing version number. The
|
||||
Program (including Contributions) may always be distributed subject to the version of the
|
||||
Agreement under which it was received. In addition, after a new version of the Agreement
|
||||
is published, Contributor may elect to distribute the Program (including its
|
||||
Contributions) under the new version. No one other than IBM has the right to modify this
|
||||
Agreement. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives
|
||||
no rights or licenses to the intellectual property of any Contributor under this
|
||||
Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the
|
||||
Program not expressly granted under this Agreement are reserved.</p>
|
||||
|
||||
<p>This Agreement is governed by the laws of the State of New York and the intellectual
|
||||
property laws of the United States of America. No party to this Agreement will bring a
|
||||
legal action under this Agreement more than one year after the cause of action arose. Each
|
||||
party waives its rights to a jury trial in any resulting litigation. </p>
|
||||
</font>
|
||||
</body>
|
||||
</html>
|
File diff suppressed because it is too large
Load Diff
@ -1,97 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep09
|
||||
* created by: Markus W. Scherer
|
||||
* adapted for use under BeOS by Axel Dörfler, axeld@pinc-software.de.
|
||||
*/
|
||||
|
||||
#ifndef __UTF_H__
|
||||
#define __UTF_H__
|
||||
|
||||
#include "UnicodeProperties.h"
|
||||
#include <stddef.h>
|
||||
|
||||
|
||||
#define UTF_SIZE 8
|
||||
#define U_SIZEOF_UCHAR (UTF_SIZE>>3)
|
||||
|
||||
|
||||
typedef uint32 UChar32;
|
||||
|
||||
#ifndef UTF_SAFE
|
||||
# define UTF_SAFE
|
||||
#endif
|
||||
|
||||
/* internal definitions ----------------------------------------------------- */
|
||||
|
||||
#define UTF8_ERROR_VALUE_1 0x15
|
||||
#define UTF8_ERROR_VALUE_2 0x9f
|
||||
#define UTF_ERROR_VALUE 0xffff
|
||||
|
||||
/** Is this code unit or code point a surrogate (U+d800..U+dfff)? */
|
||||
#define UTF_IS_SURROGATE(unichar) (((unichar)&0xfffff800)==0xd800)
|
||||
|
||||
/** Is a given 32-bit code point/Unicode scalar value
|
||||
* actually a valid Unicode (abstract) character?
|
||||
*/
|
||||
#define UTF_IS_UNICODE_CHAR(c) \
|
||||
((uint32_t)(c)<=0x10ffff && \
|
||||
!UTF_IS_SURROGATE(c) && ((c)&0xfffe)!=0xfffe)
|
||||
|
||||
/** Is a given 32-bit code an error value
|
||||
* as returned by one of the macros for any UTF?
|
||||
*/
|
||||
#define UTF_IS_ERROR(c) \
|
||||
(((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
|
||||
|
||||
/** This is a combined macro: Is c a valid Unicode value _and_ not an error code? */
|
||||
#define UTF_IS_VALID(c) \
|
||||
((uint32_t)(c)<=0x10ffff && \
|
||||
!UTF_IS_SURROGATE(c) && \
|
||||
((c)&0xfffe)!=0xfffe && \
|
||||
(c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
|
||||
|
||||
#include "utf8.h"
|
||||
|
||||
/*
|
||||
* ANSI C header:
|
||||
* limits.h defines CHAR_MAX
|
||||
*/
|
||||
#include <limits.h>
|
||||
|
||||
#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF8_APPEND_CHAR_SAFE(s, i, length, c)
|
||||
#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF8_APPEND_CHAR_UNSAFE(s, i, c)
|
||||
|
||||
|
||||
/* Define UChar to be compatible with char if possible. */
|
||||
#if CHAR_MAX>=255
|
||||
typedef char UChar;
|
||||
#else
|
||||
typedef uint8 UChar;
|
||||
#endif
|
||||
|
||||
|
||||
#define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
|
||||
|
||||
#define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
|
||||
#define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
|
||||
#define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
|
||||
#define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
|
||||
#define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
|
||||
|
||||
#define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
|
||||
#define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
|
||||
#define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
|
||||
#define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
|
||||
|
||||
#endif /* __UTF_H__ */
|
@ -1,309 +0,0 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: utf_impl.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This file provides implementation functions for macros in the utfXX.h
|
||||
* that would otherwise be too long as macros.
|
||||
*/
|
||||
|
||||
/* set import/export definitions */
|
||||
#ifndef U_UTF8_IMPL
|
||||
# define U_UTF8_IMPL
|
||||
#endif
|
||||
|
||||
#include <SupportDefs.h>
|
||||
#include "utf.h"
|
||||
|
||||
/*
|
||||
* This table could be replaced on many machines by
|
||||
* a few lines of assembler code using an
|
||||
* "index of first 0-bit from msb" instruction and
|
||||
* one or two more integer instructions.
|
||||
*
|
||||
* For example, on an i386, do something like
|
||||
* - MOV AL, leadByte
|
||||
* - NOT AL (8-bit, leave b15..b8==0..0, reverse only b7..b0)
|
||||
* - MOV AH, 0
|
||||
* - BSR BX, AX (16-bit)
|
||||
* - MOV AX, 6 (result)
|
||||
* - JZ finish (ZF==1 if leadByte==0xff)
|
||||
* - SUB AX, BX (result)
|
||||
* -finish:
|
||||
* (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB)
|
||||
*/
|
||||
|
||||
uint8
|
||||
utf8_countTrailBytes[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4,
|
||||
5, 5,
|
||||
0, 0 /* illegal bytes 0xfe and 0xff */
|
||||
};
|
||||
|
||||
static UChar32
|
||||
utf8_minRegular[4]={ 0, 0x80, 0x800, 0x10000 };
|
||||
|
||||
static UChar32
|
||||
utf8_errorValue[6]={
|
||||
UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE, 0x10ffff,
|
||||
0x3ffffff, 0x7fffffff
|
||||
};
|
||||
|
||||
|
||||
UChar32
|
||||
utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, bool strict)
|
||||
{
|
||||
int32 i = *pi;
|
||||
uint8 count = UTF8_COUNT_TRAIL_BYTES(c);
|
||||
|
||||
if (i + count <= length) {
|
||||
uint8 trail, illegal = 0;
|
||||
|
||||
UTF8_MASK_LEAD_BYTE((c), count);
|
||||
/* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
|
||||
switch(count) {
|
||||
/* each branch falls through to the next one */
|
||||
case 5:
|
||||
trail=s[(i)++];
|
||||
(c)=((c)<<6)|(trail&0x3f);
|
||||
illegal|=(trail&0xc0)^0x80;
|
||||
case 4:
|
||||
trail=s[(i)++];
|
||||
(c)=((c)<<6)|(trail&0x3f);
|
||||
illegal|=(trail&0xc0)^0x80;
|
||||
case 3:
|
||||
trail=s[(i)++];
|
||||
(c)=((c)<<6)|(trail&0x3f);
|
||||
if(c<0x110) {
|
||||
illegal|=(trail&0xc0)^0x80;
|
||||
} else {
|
||||
/* code point>0x10ffff, outside Unicode */
|
||||
i+=2;
|
||||
illegal=1;
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
trail=s[(i)++];
|
||||
(c)=((c)<<6)|(trail&0x3f);
|
||||
illegal|=(trail&0xc0)^0x80;
|
||||
case 1:
|
||||
trail=s[(i)++];
|
||||
(c)=((c)<<6)|(trail&0x3f);
|
||||
illegal|=(trail&0xc0)^0x80;
|
||||
break;
|
||||
case 0:
|
||||
illegal=1;
|
||||
/* no default branch to optimize switch() - all values are covered */
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* All the error handling should return a value
|
||||
* that needs count bytes so that UTF8_GET_CHAR_SAFE() works right.
|
||||
*/
|
||||
|
||||
/* correct sequence - all trail bytes have (b7..b6)==(10)? */
|
||||
if(illegal) {
|
||||
/* error handling */
|
||||
uint8 errorCount=count;
|
||||
/* don't go beyond this sequence */
|
||||
(i)-=count;
|
||||
while(count>0 && UTF8_IS_TRAIL(s[i])) {
|
||||
++(i);
|
||||
--count;
|
||||
}
|
||||
c=utf8_errorValue[errorCount-count];
|
||||
} else if((strict) &&
|
||||
(UTF_IS_SURROGATE(c) ||
|
||||
count>=4 || (c)<utf8_minRegular[count] ||
|
||||
((c)&0xfffe)==0xfffe)
|
||||
) {
|
||||
/* irregular sequence */
|
||||
c=utf8_errorValue[count];
|
||||
}
|
||||
} else /* too few bytes left */ {
|
||||
/* error handling */
|
||||
int32 i0=i;
|
||||
/* don't just set (i)=(length) in case there is an illegal sequence */
|
||||
while((i)<(length) && UTF8_IS_TRAIL(s[i])) {
|
||||
++(i);
|
||||
}
|
||||
c=utf8_errorValue[i-i0];
|
||||
}
|
||||
*pi=i;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
int32
|
||||
utf8_appendCharSafeBody(uint8 *s, int32 i, int32 length, UChar32 c) {
|
||||
if((c)<=0x7ff) {
|
||||
if((i)+1<(length)) {
|
||||
(s)[(i)++]=(uint8)(((c)>>6)|0xc0);
|
||||
(s)[(i)++]=(uint8)(((c)&0x3f)|0x80);
|
||||
return i;
|
||||
}
|
||||
} else if((uint32)(c)<=0xffff) {
|
||||
if((i)+2<(length)) {
|
||||
(s)[(i)++]=(uint8)(((c)>>12)|0xe0);
|
||||
(s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80);
|
||||
(s)[(i)++]=(uint8)(((c)&0x3f)|0x80);
|
||||
return i;
|
||||
}
|
||||
} else if((uint32)(c)<=0x10ffff) {
|
||||
if((i)+3<(length)) {
|
||||
(s)[(i)++]=(uint8)(((c)>>18)|0xf0);
|
||||
(s)[(i)++]=(uint8)((((c)>>12)&0x3f)|0x80);
|
||||
(s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80);
|
||||
(s)[(i)++]=(uint8)(((c)&0x3f)|0x80);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
/* c>0x10ffff or not enough space, write an error value */
|
||||
length-=i;
|
||||
if(length>0) {
|
||||
int32 offset;
|
||||
if(length>3) {
|
||||
length=3;
|
||||
}
|
||||
s+=i;
|
||||
offset=0;
|
||||
c=utf8_errorValue[length-1];
|
||||
UTF8_APPEND_CHAR_SAFE(s, offset, length, c);
|
||||
i=i+offset;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
UChar32
|
||||
utf8_prevCharSafeBody(const uint8 *s, int32 start, int32 *pi, UChar32 c, bool strict) {
|
||||
int32 i=*pi;
|
||||
uint8 b, count=1, shift=6;
|
||||
|
||||
/* extract value bits from the last trail byte */
|
||||
c&=0x3f;
|
||||
|
||||
for(;;) {
|
||||
if(i<=start) {
|
||||
/* no lead byte at all */
|
||||
c=UTF8_ERROR_VALUE_1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* read another previous byte */
|
||||
b=s[--i];
|
||||
if((uint8)(b-0x80)<0x7e) { /* 0x80<=b<0xfe */
|
||||
if(b&0x40) {
|
||||
/* lead byte, this will always end the loop */
|
||||
uint8 shouldCount=UTF8_COUNT_TRAIL_BYTES(b);
|
||||
|
||||
if(count==shouldCount) {
|
||||
/* set the new position */
|
||||
*pi=i;
|
||||
UTF8_MASK_LEAD_BYTE(b, count);
|
||||
c|=(UChar32)b<<shift;
|
||||
if( c>0x10ffff ||
|
||||
(strict &&
|
||||
(UTF_IS_SURROGATE(c) ||
|
||||
count>=4 || c<utf8_minRegular[count] || (c&0xfffe)==0xfffe))
|
||||
) {
|
||||
/* irregular sequence */
|
||||
c=utf8_errorValue[count];
|
||||
} else {
|
||||
/* exit with correct c */
|
||||
}
|
||||
} else {
|
||||
/* the lead byte does not match the number of trail bytes */
|
||||
/* only set the position to the lead byte if it would
|
||||
include the trail byte that we started with */
|
||||
if(count<shouldCount) {
|
||||
*pi=i;
|
||||
c=utf8_errorValue[count];
|
||||
} else {
|
||||
c=UTF8_ERROR_VALUE_1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else if(count<5) {
|
||||
/* trail byte */
|
||||
c|=(UChar32)(b&0x3f)<<shift;
|
||||
++count;
|
||||
shift+=6;
|
||||
} else {
|
||||
/* more than 5 trail bytes is illegal */
|
||||
c=UTF8_ERROR_VALUE_1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* single-byte character precedes trailing bytes */
|
||||
c=UTF8_ERROR_VALUE_1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int32
|
||||
utf8_back1SafeBody(const uint8 *s, int32 start, int32 i) {
|
||||
/* i had been decremented once before the function call */
|
||||
int32 I=i, Z;
|
||||
uint8 b;
|
||||
|
||||
/* read at most the 6 bytes s[Z] to s[i], inclusively */
|
||||
if(I-5>start) {
|
||||
Z=I-5;
|
||||
} else {
|
||||
Z=start;
|
||||
}
|
||||
|
||||
/* return I if the sequence starting there is long enough to include i */
|
||||
for(;;) {
|
||||
b=s[I];
|
||||
if((uint8)(b-0x80)>=0x7e) { /* not 0x80<=b<0xfe */
|
||||
break;
|
||||
} else if(b>=0xc0) {
|
||||
if(UTF8_COUNT_TRAIL_BYTES(b)>=(i-I)) {
|
||||
return I;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else if(Z<I) {
|
||||
--I;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* return i itself to be consistent with the FWD_1 macro */
|
||||
return i;
|
||||
}
|
@ -1,315 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf8.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: UTF-8 macros
|
||||
*
|
||||
* This file defines macros to deal with UTF-8 code units and code points.
|
||||
* Signatures and semantics are the same as for the similarly named macros
|
||||
* in utf16.h.
|
||||
* utf8.h is included by utf.h after unicode/umachine.h
|
||||
* and some common definitions.</p>
|
||||
* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.</p>
|
||||
*/
|
||||
|
||||
|
||||
/* utf.h must be included first. */
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
#ifndef __UTF8_H__
|
||||
#define __UTF8_H__
|
||||
|
||||
/* internal definitions ----------------------------------------------------- */
|
||||
|
||||
extern uint8
|
||||
utf8_countTrailBytes[256];
|
||||
|
||||
/*
|
||||
* Count the trail bytes for a lead byte -
|
||||
* this macro should be used so that the assembler code
|
||||
* that is mentioned in utf_impl.c could be used here.
|
||||
*/
|
||||
#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8)leadByte])
|
||||
|
||||
/* use a macro here, too - there may be a simpler way with some machines */
|
||||
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
|
||||
|
||||
UChar32
|
||||
utf8_nextCharSafeBody(const uint8 *s,int32 *pi,int32 length, UChar32 c,bool strict);
|
||||
|
||||
int32
|
||||
utf8_appendCharSafeBody(uint8 *s,int32 i,int32 length, UChar32 c);
|
||||
|
||||
UChar32
|
||||
utf8_prevCharSafeBody(const uint8 *s,int32 start,int32 *pi, UChar32 c,bool strict);
|
||||
|
||||
int32
|
||||
utf8_back1SafeBody(const uint8 *s,int32 start,int32 i);
|
||||
|
||||
/*
|
||||
* For the semantics of all of these macros, see utf16.h.
|
||||
* The UTF-8 macros favor sequences more the shorter they are.
|
||||
* Sometimes, only the single-byte case is covered by a macro,
|
||||
* while longer sequences are handled by a function call.
|
||||
*/
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/* classes of code unit values */
|
||||
#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
|
||||
#define UTF8_IS_LEAD(uchar) ((uint8)((uchar)-0xc0)<0x3e)
|
||||
#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
|
||||
|
||||
/* number of code units per code point */
|
||||
#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32)(c)>0x7f)
|
||||
|
||||
/*
|
||||
* ICU does not deal with code points >0x10ffff
|
||||
* unless necessary for advancing in the byte stream.
|
||||
*
|
||||
* These length macros take into account that for values >0x10ffff
|
||||
* the "safe" append macros would write the error code point 0xffff
|
||||
* with 3 bytes.
|
||||
* Code point comparisons need to be in uint32 because UChar32
|
||||
* may be a signed type, and negative values must be recognized.
|
||||
*/
|
||||
#if 1
|
||||
# define UTF8_CHAR_LENGTH(c) \
|
||||
((uint32)(c)<=0x7f ? 1 : \
|
||||
((uint32)(c)<=0x7ff ? 2 : \
|
||||
((uint32)((c)-0x10000)>0xfffff ? 3 : 4) \
|
||||
) \
|
||||
)
|
||||
#else
|
||||
# define UTF8_CHAR_LENGTH(c) \
|
||||
((uint32)(c)<=0x7f ? 1 : \
|
||||
((uint32)(c)<=0x7ff ? 2 : \
|
||||
((uint32)(c)<=0xffff ? 3 : \
|
||||
((uint32)(c)<=0x10ffff ? 4 : \
|
||||
((uint32)(c)<=0x3ffffff ? 5 : \
|
||||
((uint32)(c)<=0x7fffffff ? 6 : 3) \
|
||||
) \
|
||||
) \
|
||||
) \
|
||||
) \
|
||||
)
|
||||
#endif
|
||||
|
||||
#define UTF8_MAX_CHAR_LENGTH 4
|
||||
|
||||
/* average number of code units compared to UTF-16 */
|
||||
#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
|
||||
|
||||
#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
|
||||
int32 __I=(int32)(i); \
|
||||
UTF8_SET_CHAR_START_UNSAFE(s, __I); \
|
||||
UTF8_NEXT_CHAR_UNSAFE(s, __I, c); \
|
||||
}
|
||||
|
||||
#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
|
||||
int32 __I=(int32)(i); \
|
||||
UTF8_SET_CHAR_START_SAFE(s, start, __I); \
|
||||
UTF8_NEXT_CHAR_SAFE(s, __I, length, c, strict); \
|
||||
}
|
||||
|
||||
/* definitions with forward iteration --------------------------------------- */
|
||||
|
||||
/*
|
||||
* Read a Unicode scalar value from an array of UTF-8 bytes.
|
||||
* Only values <=0x10ffff are accepted, and if an error occurs,
|
||||
* then c will be set such that UTF_IS_ERROR(c).
|
||||
* The _UNSAFE macro is fast and does not check for errors.
|
||||
* The _SAFE macro checks for errors and optionally for
|
||||
* irregular sequences, too, i.e., for sequences that
|
||||
* are longer than necessary, such as <c0 80> instead of <0>.
|
||||
* The strict checks also check for surrogates and
|
||||
* for 0xXXXXfffe and 0xXXXXffff.
|
||||
*/
|
||||
#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if((uint8)((c)-0xc0)<0x35) { \
|
||||
uint8 __count=UTF8_COUNT_TRAIL_BYTES(c); \
|
||||
UTF8_MASK_LEAD_BYTE(c, __count); \
|
||||
switch(__count) { \
|
||||
/* each following branch falls through to the next one */ \
|
||||
case 3: \
|
||||
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
|
||||
case 2: \
|
||||
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
|
||||
case 1: \
|
||||
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
|
||||
/* no other branches to optimize switch() */ \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
|
||||
if((uint32)(c)<=0x7f) { \
|
||||
(s)[(i)++]=(uint8)(c); \
|
||||
} else { \
|
||||
if((uint32)(c)<=0x7ff) { \
|
||||
(s)[(i)++]=(uint8)(((c)>>6)|0xc0); \
|
||||
} else { \
|
||||
if((uint32)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint8)(((c)>>12)|0xe0); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint8)(((c)>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8)((((c)>>12)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8)(((c)&0x3f)|0x80); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_FWD_1_UNSAFE(s, i) { \
|
||||
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
|
||||
}
|
||||
|
||||
#define UTF8_FWD_N_UNSAFE(s, i, n) { \
|
||||
UTextOffset __N=(n); \
|
||||
while(__N>0) { \
|
||||
UTF8_FWD_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
|
||||
while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
|
||||
}
|
||||
|
||||
#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(UTF8_IS_LEAD(c)) { \
|
||||
(c)=utf8_nextCharSafeBody(s, &(i), (int32)(length), c, strict); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \
|
||||
if((uint32)(c)<=0x7f) { \
|
||||
(s)[(i)++]=(uint8)(c); \
|
||||
} else { \
|
||||
(i)=utf8_appendCharSafeBody(s, (int32)(i), (int32)(length), c); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_FWD_1_SAFE(s, i, length) { \
|
||||
uint8 __b=(s)[(i)++]; \
|
||||
if(UTF8_IS_LEAD(__b)) { \
|
||||
uint8 __count=UTF8_COUNT_TRAIL_BYTES(__b); \
|
||||
if((i)+__count>(length)) { \
|
||||
__count=(uint8)((length)-(i)); \
|
||||
} \
|
||||
while(__count>0 && UTF8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
--__count; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_FWD_N_SAFE(s, i, length, n) { \
|
||||
int32 __N=(n); \
|
||||
while(__N>0 && (i)<(length)) { \
|
||||
UTF8_FWD_1_SAFE(s, i, length); \
|
||||
--__N; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_SET_CHAR_START_SAFE(s, start, i) { \
|
||||
if(UTF8_IS_TRAIL((s)[(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (int32)(i)); \
|
||||
} \
|
||||
}
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(UTF8_IS_TRAIL(c)) { \
|
||||
uint8 __b, __count=1, __shift=6; \
|
||||
\
|
||||
/* c is a trail byte */ \
|
||||
(c)&=0x3f; \
|
||||
for(;;) { \
|
||||
__b=(s)[--(i)]; \
|
||||
if(__b>=0xc0) { \
|
||||
UTF8_MASK_LEAD_BYTE(__b, __count); \
|
||||
(c)|=(UChar32)__b<<__shift; \
|
||||
break; \
|
||||
} else { \
|
||||
(c)|=(UChar32)(__b&0x3f)<<__shift; \
|
||||
++__count; \
|
||||
__shift+=6; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_BACK_1_UNSAFE(s, i) { \
|
||||
while(UTF8_IS_TRAIL((s)[--(i)])) {} \
|
||||
}
|
||||
|
||||
#define UTF8_BACK_N_UNSAFE(s, i, n) { \
|
||||
UTextOffset __N=(n); \
|
||||
while(__N>0) { \
|
||||
UTF8_BACK_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
|
||||
UTF8_BACK_1_UNSAFE(s, i); \
|
||||
UTF8_FWD_1_UNSAFE(s, i); \
|
||||
}
|
||||
|
||||
#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(UTF8_IS_TRAIL((c))) { \
|
||||
(c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_BACK_1_SAFE(s, start, i) { \
|
||||
if(UTF8_IS_TRAIL((s)[--(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (int32)(i)); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_BACK_N_SAFE(s, start, i, n) { \
|
||||
UTextOffset __N=(n); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
UTF8_BACK_1_SAFE(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) { \
|
||||
if((start)<(i) && (i)<(length)) { \
|
||||
UTF8_BACK_1_SAFE(s, start, i); \
|
||||
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
|
||||
if((i)>(length)) { \
|
||||
(i)=(length); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user