Required IDNA/Unicode properties files and generation.

This commit is contained in:
Chris Young 2014-05-30 20:00:49 +01:00 committed by Daniel Silverstone
parent a4940bb56c
commit e5d5e68eb5
4 changed files with 5370 additions and 0 deletions

View File

@ -0,0 +1,318 @@
# DerivedJoiningType-5.2.0.txt
# Date: 2009-05-28, 20:37:39 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
# Type T is derived, as described in ArabicShaping.txt
# All code points not explicitly listed for Joining_Type
# have the value Non_Joining (U).
# @missing: 0000..10FFFF; Non_Joining
# ================================================
# Joining_Type=Join_Causing
0640 ; C # Lm ARABIC TATWEEL
07FA ; C # Lm NKO LAJANYALAN
200D ; C # Cf ZERO WIDTH JOINER
# Total code points: 3
# ================================================
# Joining_Type=Dual_Joining
0626 ; D # Lo ARABIC LETTER YEH WITH HAMZA ABOVE
0628 ; D # Lo ARABIC LETTER BEH
062A..062E ; D # Lo [5] ARABIC LETTER TEH..ARABIC LETTER KHAH
0633..063F ; D # Lo [13] ARABIC LETTER SEEN..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..0647 ; D # Lo [7] ARABIC LETTER FEH..ARABIC LETTER HEH
0649..064A ; D # Lo [2] ARABIC LETTER ALEF MAKSURA..ARABIC LETTER YEH
066E..066F ; D # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0678..0687 ; D # Lo [16] ARABIC LETTER HIGH HAMZA YEH..ARABIC LETTER TCHEHEH
069A..06BF ; D # Lo [38] ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE..ARABIC LETTER TCHEH WITH DOT ABOVE
06C1..06C2 ; D # Lo [2] ARABIC LETTER HEH GOAL..ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
06CC ; D # Lo ARABIC LETTER FARSI YEH
06CE ; D # Lo ARABIC LETTER YEH WITH SMALL V
06D0..06D1 ; D # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW
06FA..06FC ; D # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
06FF ; D # Lo ARABIC LETTER HEH WITH INVERTED V
0712..0714 ; D # Lo [3] SYRIAC LETTER BETH..SYRIAC LETTER GAMAL GARSHUNI
071A..071D ; D # Lo [4] SYRIAC LETTER HETH..SYRIAC LETTER YUDH
071F..0727 ; D # Lo [9] SYRIAC LETTER KAPH..SYRIAC LETTER REVERSED PE
0729 ; D # Lo SYRIAC LETTER QAPH
072B ; D # Lo SYRIAC LETTER SHIN
072D..072E ; D # Lo [2] SYRIAC LETTER PERSIAN BHETH..SYRIAC LETTER PERSIAN GHAMAL
074E..0758 ; D # Lo [11] SYRIAC LETTER SOGDIAN KHAPH..ARABIC LETTER HAH WITH THREE DOTS POINTING UPWARDS BELOW
075C..076A ; D # Lo [15] ARABIC LETTER SEEN WITH FOUR DOTS ABOVE..ARABIC LETTER LAM WITH BAR
076D..0770 ; D # Lo [4] ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER SEEN WITH SMALL ARABIC LETTER TAH AND TWO DOTS
0772 ; D # Lo ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH ABOVE
0775..0777 ; D # Lo [3] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW
077A..077F ; D # Lo [6] ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER KAF WITH TWO DOTS ABOVE
07CA..07EA ; D # Lo [33] NKO LETTER A..NKO LETTER JONA RA
# Total code points: 188
# ================================================
# Joining_Type=Right_Joining
0622..0625 ; R # Lo [4] ARABIC LETTER ALEF WITH MADDA ABOVE..ARABIC LETTER ALEF WITH HAMZA BELOW
0627 ; R # Lo ARABIC LETTER ALEF
0629 ; R # Lo ARABIC LETTER TEH MARBUTA
062F..0632 ; R # Lo [4] ARABIC LETTER DAL..ARABIC LETTER ZAIN
0648 ; R # Lo ARABIC LETTER WAW
0671..0673 ; R # Lo [3] ARABIC LETTER ALEF WASLA..ARABIC LETTER ALEF WITH WAVY HAMZA BELOW
0675..0677 ; R # Lo [3] ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER U WITH HAMZA ABOVE
0688..0699 ; R # Lo [18] ARABIC LETTER DDAL..ARABIC LETTER REH WITH FOUR DOTS ABOVE
06C0 ; R # Lo ARABIC LETTER HEH WITH YEH ABOVE
06C3..06CB ; R # Lo [9] ARABIC LETTER TEH MARBUTA GOAL..ARABIC LETTER VE
06CD ; R # Lo ARABIC LETTER YEH WITH TAIL
06CF ; R # Lo ARABIC LETTER WAW WITH DOT ABOVE
06D2..06D3 ; R # Lo [2] ARABIC LETTER YEH BARREE..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
06D5 ; R # Lo ARABIC LETTER AE
06EE..06EF ; R # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
0710 ; R # Lo SYRIAC LETTER ALAPH
0715..0719 ; R # Lo [5] SYRIAC LETTER DALATH..SYRIAC LETTER ZAIN
071E ; R # Lo SYRIAC LETTER YUDH HE
0728 ; R # Lo SYRIAC LETTER SADHE
072A ; R # Lo SYRIAC LETTER RISH
072C ; R # Lo SYRIAC LETTER TAW
072F ; R # Lo SYRIAC LETTER PERSIAN DHALATH
074D ; R # Lo SYRIAC LETTER SOGDIAN ZHAIN
0759..075B ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW AND SMALL TAH..ARABIC LETTER REH WITH STROKE
076B..076C ; R # Lo [2] ARABIC LETTER REH WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER REH WITH HAMZA ABOVE
0771 ; R # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS
0773..0774 ; R # Lo [2] ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE
0778..0779 ; R # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE
# Total code points: 74
# ================================================
# Joining_Type=Transparent
00AD ; T # Cf SOFT HYPHEN
0300..036F ; T # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0483..0487 ; T # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
0488..0489 ; T # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
0591..05BD ; T # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BF ; T # Mn HEBREW POINT RAFE
05C1..05C2 ; T # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C4..05C5 ; T # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C7 ; T # Mn HEBREW POINT QAMATS QATAN
0610..061A ; T # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
064B..065E ; T # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
0670 ; T # Mn ARABIC LETTER SUPERSCRIPT ALEF
06D6..06DC ; T # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DE ; T # Me ARABIC START OF RUB EL HIZB
06DF..06E4 ; T # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E7..06E8 ; T # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06EA..06ED ; T # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
070F ; T # Cf SYRIAC ABBREVIATION MARK
0711 ; T # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
0730..074A ; T # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07A6..07B0 ; T # Mn [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3 ; T # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
0816..0819 ; T # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081B..0823 ; T # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0900..0902 ; T # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
093C ; T # Mn DEVANAGARI SIGN NUKTA
0941..0948 ; T # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
094D ; T # Mn DEVANAGARI SIGN VIRAMA
0951..0955 ; T # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E
0962..0963 ; T # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0981 ; T # Mn BENGALI SIGN CANDRABINDU
09BC ; T # Mn BENGALI SIGN NUKTA
09C1..09C4 ; T # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
09CD ; T # Mn BENGALI SIGN VIRAMA
09E2..09E3 ; T # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
0A01..0A02 ; T # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
0A3C ; T # Mn GURMUKHI SIGN NUKTA
0A41..0A42 ; T # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
0A47..0A48 ; T # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
0A4B..0A4D ; T # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
0A51 ; T # Mn GURMUKHI SIGN UDAAT
0A70..0A71 ; T # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
0A75 ; T # Mn GURMUKHI SIGN YAKASH
0A81..0A82 ; T # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
0ABC ; T # Mn GUJARATI SIGN NUKTA
0AC1..0AC5 ; T # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
0AC7..0AC8 ; T # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
0ACD ; T # Mn GUJARATI SIGN VIRAMA
0AE2..0AE3 ; T # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
0B01 ; T # Mn ORIYA SIGN CANDRABINDU
0B3C ; T # Mn ORIYA SIGN NUKTA
0B3F ; T # Mn ORIYA VOWEL SIGN I
0B41..0B44 ; T # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
0B4D ; T # Mn ORIYA SIGN VIRAMA
0B56 ; T # Mn ORIYA AI LENGTH MARK
0B62..0B63 ; T # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
0B82 ; T # Mn TAMIL SIGN ANUSVARA
0BC0 ; T # Mn TAMIL VOWEL SIGN II
0BCD ; T # Mn TAMIL SIGN VIRAMA
0C3E..0C40 ; T # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C46..0C48 ; T # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
0C4A..0C4D ; T # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; T # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C62..0C63 ; T # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0CBC ; T # Mn KANNADA SIGN NUKTA
0CBF ; T # Mn KANNADA VOWEL SIGN I
0CC6 ; T # Mn KANNADA VOWEL SIGN E
0CCC..0CCD ; T # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CE2..0CE3 ; T # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0D41..0D44 ; T # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D4D ; T # Mn MALAYALAM SIGN VIRAMA
0D62..0D63 ; T # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0DCA ; T # Mn SINHALA SIGN AL-LAKUNA
0DD2..0DD4 ; T # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
0DD6 ; T # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
0E31 ; T # Mn THAI CHARACTER MAI HAN-AKAT
0E34..0E3A ; T # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
0E47..0E4E ; T # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
0EB1 ; T # Mn LAO VOWEL SIGN MAI KAN
0EB4..0EB9 ; T # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
0EBB..0EBC ; T # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
0EC8..0ECD ; T # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
0F18..0F19 ; T # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
0F35 ; T # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
0F37 ; T # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
0F39 ; T # Mn TIBETAN MARK TSA -PHRU
0F71..0F7E ; T # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
0F80..0F84 ; T # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
0F86..0F87 ; T # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
0F90..0F97 ; T # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
0F99..0FBC ; T # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
0FC6 ; T # Mn TIBETAN SYMBOL PADMA GDAN
102D..1030 ; T # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
1032..1037 ; T # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
1039..103A ; T # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
103D..103E ; T # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
1058..1059 ; T # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
105E..1060 ; T # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
1071..1074 ; T # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
1082 ; T # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
1085..1086 ; T # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
108D ; T # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
109D ; T # Mn MYANMAR VOWEL SIGN AITON AI
135F ; T # Mn ETHIOPIC COMBINING GEMINATION MARK
1712..1714 ; T # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
1732..1734 ; T # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
1752..1753 ; T # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
1772..1773 ; T # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
17B4..17B5 ; T # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
17B7..17BD ; T # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
17C6 ; T # Mn KHMER SIGN NIKAHIT
17C9..17D3 ; T # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; T # Mn KHMER SIGN ATTHACAN
180B..180D ; T # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
18A9 ; T # Mn MONGOLIAN LETTER ALI GALI DAGALGA
1920..1922 ; T # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1927..1928 ; T # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
1932 ; T # Mn LIMBU SMALL LETTER ANUSVARA
1939..193B ; T # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1A17..1A18 ; T # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A56 ; T # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A58..1A5E ; T # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A60 ; T # Mn TAI THAM SIGN SAKOT
1A62 ; T # Mn TAI THAM VOWEL SIGN MAI SAT
1A65..1A6C ; T # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A73..1A7C ; T # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34 ; T # Mn BALINESE SIGN REREKAN
1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
1B3C ; T # Mn BALINESE VOWEL SIGN LA LENGA
1B42 ; T # Mn BALINESE VOWEL SIGN PEPET
1B6B..1B73 ; T # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
1B80..1B81 ; T # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
1BA2..1BA5 ; T # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
1BA8..1BA9 ; T # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
1C2C..1C33 ; T # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C36..1C37 ; T # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1CD0..1CD2 ; T # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; T # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; T # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; T # Mn VEDIC SIGN TIRYAK
1DC0..1DE6 ; T # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFD..1DFF ; T # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200B ; T # Cf ZERO WIDTH SPACE
200E..200F ; T # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
202A..202E ; T # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2064 ; T # Cf [5] WORD JOINER..INVISIBLE PLUS
206A..206F ; T # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
20D0..20DC ; T # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; T # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; T # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; T # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; T # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
2CEF..2CF1 ; T # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2DE0..2DFF ; T # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
302A..302F ; T # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; T # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
A66F ; T # Mn COMBINING CYRILLIC VZMET
A670..A672 ; T # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
A67C..A67D ; T # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A6F0..A6F1 ; T # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A802 ; T # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; T # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; T # Mn SYLOTI NAGRI SIGN ANUSVARA
A825..A826 ; T # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A8C4 ; T # Mn SAURASHTRA SIGN VIRAMA
A8E0..A8F1 ; T # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A926..A92D ; T # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; T # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A980..A982 ; T # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A9B3 ; T # Mn JAVANESE SIGN CECAK TELU
A9B6..A9B9 ; T # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BC ; T # Mn JAVANESE VOWEL SIGN PEPET
AA29..AA2E ; T # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA31..AA32 ; T # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
AA35..AA36 ; T # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
AA43 ; T # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; T # Mn CHAM CONSONANT SIGN FINAL M
AAB0 ; T # Mn TAI VIET MAI KANG
AAB2..AAB4 ; T # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; T # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AABE..AABF ; T # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
AAC1 ; T # Mn TAI VIET TONE MAI THO
ABE5 ; T # Mn MEETEI MAYEK VOWEL SIGN ANAP
ABE8 ; T # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABED ; T # Mn MEETEI MAYEK APUN IYEK
FB1E ; T # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE00..FE0F ; T # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; T # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
FEFF ; T # Cf ZERO WIDTH NO-BREAK SPACE
FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
101FD ; T # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
10A01..10A03 ; T # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
10A05..10A06 ; T # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
10A0C..10A0F ; T # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A38..10A3A ; T # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; T # Mn KHAROSHTHI VIRAMA
11080..11081 ; T # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
110B3..110B6 ; T # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B9..110BA ; T # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
110BD ; T # Cf KAITHI NUMBER SIGN
1D167..1D169 ; T # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D173..1D17A ; T # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
1D17B..1D182 ; T # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; T # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; T # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
1D242..1D244 ; T # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
E0001 ; T # Cf LANGUAGE TAG
E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1308
# EOF

View File

@ -0,0 +1,96 @@
#!/usr/bin/perl
#
# Copyright 2014 Chris Young <chris@unsatisfactorysoftware.co.uk>
#
# This file is part of NetSurf, http://www.netsurf-browser.org/
#
# NetSurf is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# NetSurf is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
print <<HEADER;
/* This file is generated by idna-derived-props-gen.pl
* DO NOT EDIT BY HAND
*/
#ifndef _NETSURF_UTILS_IDNA_PROPS_H_
#define _NETSURF_UTILS_IDNA_PROPS_H_
typedef enum idna_property {
IDNA_P_PVALID = 1,
IDNA_P_CONTEXTJ = 2,
IDNA_P_CONTEXTO = 3,
IDNA_P_DISALLOWED = 4,
IDNA_P_UNASSIGNED = 5
} idna_property;
typedef enum idna_unicode_jt {
IDNA_UNICODE_JT_U = 0,
IDNA_UNICODE_JT_C = 1,
IDNA_UNICODE_JT_D = 2,
IDNA_UNICODE_JT_R = 3,
IDNA_UNICODE_JT_T = 4,
IDNA_UNICODE_JT_L = 5
} idna_unicode_jt;
typedef struct idna_table {
int32_t start;
int32_t end;
union p {
idna_property property;
idna_unicode_jt jt;
} p;
} idna_table;
idna_table idna_derived[] = {
HEADER
open(CSVFILE, "idna-tables-5.2.0-properties.csv");
$line = <CSVFILE>; # discard header line
while($line = <CSVFILE>) {
@items = split(/\,/, $line);
@codepoints = split(/-/, $items[0]);
if($#codepoints == 0) { $codepoints[1] = $codepoints[0]; }
print "\t{ 0x" . $codepoints[0] . ", 0x" . $codepoints[1] . ", .p.property = IDNA_P_" . $items[1] . " },\n";
}
close(CSVFILE);
print <<HEADER;
{ 0, 0, .p.property = 0}
};
idna_table idna_joiningtype[] = {
HEADER
open(TXTFILE, "DerivedJoiningType.txt");
while($line = <TXTFILE>) {
chop($line);
if(substr($line, 0, 1) eq '#') {next;}
if(length($line) == 0) {next;}
@items = split(/;/, $line);
@codepoints = split(/\./, $items[0]);
if($#codepoints == 0) { $codepoints[2] = $codepoints[0]; }
print "\t{ 0x" . $codepoints[0] . ", 0x" . $codepoints[2] . ", .p.jt = IDNA_UNICODE_JT_" . substr($items[1], 1, 1) . " },\n";
}
close(TXTFILE);
print <<HEADER;
{ 0, 0, .p.jt = 0}
};
#endif
HEADER

File diff suppressed because it is too large Load Diff

2634
utils/idna_props.h Normal file

File diff suppressed because it is too large Load Diff