Commit a761fbf2 authored by Eli Zaretskii's avatar Eli Zaretskii
Browse files

Import new data files from Unicode 9.0.0beta

* admin/unidata/UnicodeData.txt:
* admin/unidata/Blocks.txt:
* admin/unidata/BidiMirroring.txt:
* admin/unidata/BidiBrackets.txt: Update from Unicode 9.0.0beta.
* admin/unidata/unidata-gen.el (unidata-gen-files): Bind
'coding-system-for-read' to 'utf-8, as various Unicode data files
now actually use non-ASCII characters.
(unidata-setup-list, unidata-get-name): Support the new Tangut
Ideographs block.

* lisp/international/characters.el (standard-case-table): Add new
characters from Unicode 9.0.0.
(standard-category-table): Add Arabic block u+08A0..u+08FF.  Add
Cyrillic Extended-C block.
(char-width-table): Update ranges per Unicode 9.0.0.
* lisp/international/fontset.el (script-representative-chars): Add
new scripts defined by Unicode 9.0.0.
(otf-script-alist): Add new OTF script tags.
* lisp/international/mule-cmds.el (ucs-names): Update ranges per
Unicode 9.0.0 additions.
parent 06aad394
# BidiBrackets-8.0.0.txt
# Date: 2015-01-20, 19:00:00 GMT [AG, LI, KW]
# BidiBrackets-9.0.0.txt
# Date: 2016-01-21, 22:00:00 GMT [AG, LI, KW]
# © 2016 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Bidi_Paired_Bracket and Bidi_Paired_Bracket_Type Properties
#
# This file is a normative contributory data file in the Unicode
# Character Database.
#
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Bidi_Paired_Bracket is a normative property of type Miscellaneous,
# which establishes a mapping between characters that are treated as
# bracket pairs by the Unicode Bidirectional Algorithm.
......
# BidiMirroring-8.0.0.txt
# Date: 2015-01-20, 18:30:00 GMT [KW, LI]
# BidiMirroring-9.0.0.txt
# Date: 2016-01-21, 22:00:00 GMT [KW, LI]
# © 2016 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Bidi_Mirroring_Glyph Property
#
# This file is an informative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This data file lists characters that have the Bidi_Mirrored=Yes property
# value, for which there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
#
# The repertoire covered by the file is Unicode 8.0.0.
# The repertoire covered by the file is Unicode 9.0.0.
#
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.
......
# Blocks-8.0.0.txt
# Date: 2014-11-10, 23:04:00 GMT [KW]
# Blocks-9.0.0.txt
# Date: 2016-02-05, 23:48:00 GMT [KW]
# © 2016 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Format:
......@@ -93,6 +93,7 @@
1BC0..1BFF; Batak
1C00..1C4F; Lepcha
1C50..1C7F; Ol Chiki
1C80..1C8F; Cyrillic Extended-C
1CC0..1CCF; Sundanese Supplement
1CD0..1CFF; Vedic Extensions
1D00..1D7F; Phonetic Extensions
......@@ -209,6 +210,7 @@ FFF0..FFFF; Specials
10400..1044F; Deseret
10450..1047F; Shavian
10480..104AF; Osmanya
104B0..104FF; Osage
10500..1052F; Elbasan
10530..1056F; Caucasian Albanian
10600..1077F; Linear A
......@@ -243,13 +245,17 @@ FFF0..FFFF; Specials
11280..112AF; Multani
112B0..112FF; Khudawadi
11300..1137F; Grantha
11400..1147F; Newa
11480..114DF; Tirhuta
11580..115FF; Siddham
11600..1165F; Modi
11660..1167F; Mongolian Supplement
11680..116CF; Takri
11700..1173F; Ahom
118A0..118FF; Warang Citi
11AC0..11AFF; Pau Cin Hau
11C00..11C6F; Bhaiksuki
11C70..11CBF; Marchen
12000..123FF; Cuneiform
12400..1247F; Cuneiform Numbers and Punctuation
12480..1254F; Early Dynastic Cuneiform
......@@ -260,6 +266,9 @@ FFF0..FFFF; Specials
16AD0..16AFF; Bassa Vah
16B00..16B8F; Pahawh Hmong
16F00..16F9F; Miao
16FE0..16FFF; Ideographic Symbols and Punctuation
17000..187FF; Tangut
18800..18AFF; Tangut Components
1B000..1B0FF; Kana Supplement
1BC00..1BC9F; Duployan
1BCA0..1BCAF; Shorthand Format Controls
......@@ -270,7 +279,9 @@ FFF0..FFFF; Specials
1D360..1D37F; Counting Rod Numerals
1D400..1D7FF; Mathematical Alphanumeric Symbols
1D800..1DAAF; Sutton SignWriting
1E000..1E02F; Glagolitic Supplement
1E800..1E8DF; Mende Kikakui
1E900..1E95F; Adlam
1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
1F000..1F02F; Mahjong Tiles
1F030..1F09F; Domino Tiles
......
This diff is collapsed.
......@@ -101,6 +101,7 @@
(let* ((table (list nil))
(tail table)
(block-names '(("^<CJK Ideograph" . CJK\ IDEOGRAPH)
("^<Tangut Ideograph" . TANGUT\ IDEOGRAPH)
("^<Hangul Syllable" . HANGUL\ SYLLABLE)
("^<.*High Surrogate" . HIGH\ SURROGATE)
("^<.*Low Surrogate" . LOW\ SURROGATE)
......@@ -779,6 +780,8 @@ Property value is a symbol `o' (Open), `c' (Close), or `n' (None)."
(aref (aref jamo-name-table 2) (1- T)))))))
((eq sym 'CJK\ IDEOGRAPH)
(format "%s-%04X" sym char))
((eq sym 'TANGUT\ IDEOGRAPH)
(format "%s-%04X" sym char))
((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH)
(format "%s-%04X" sym char))
((eq sym 'HIGH\ SURROGATE)
......@@ -1302,6 +1305,7 @@ Property value is a symbol `o' (Open), `c' (Close), or `n' (None)."
unidata-text-file (or (pop command-line-args-left)
(expand-file-name "unidata.txt"))))
(let ((coding-system-for-write 'utf-8-unix)
(coding-system-for-read 'utf-8)
(charprop-file (expand-file-name "charprop.el" dest-dir))
(unidata-dir data-dir))
(dolist (elt unidata-prop-alist)
......
......@@ -290,6 +290,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(map-charset-chars #'modify-category-entry (car charsets) ?b)
(setq charsets (cdr charsets))))
(modify-category-entry '(#x600 . #x6ff) ?b)
(modify-category-entry '(#x8a0 . #x8ff) ?b)
(modify-category-entry '(#xfb50 . #xfdff) ?b)
(modify-category-entry '(#xfe70 . #xfefe) ?b)
......@@ -700,6 +701,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(set-case-syntax-pair ? ?ɜ tbl)
(set-case-syntax-pair ? ?ɡ tbl)
(set-case-syntax-pair ? ?ɬ tbl)
(set-case-syntax-pair ? ?ɪ tbl)
(set-case-syntax-pair ? ?ʞ tbl)
(set-case-syntax-pair ? ?ʇ tbl)
(set-case-syntax-pair ? ?ʝ tbl)
......@@ -810,6 +812,9 @@ with L, LRE, or LRO Unicode bidi character type.")
(set-case-syntax-pair c (+ c #x1C60) tbl)
(setq c (1+ c)))
;; Cyrillic Extended-C
(modify-category-entry '(#x1C80 . #x1C8F) ?y)
;; general punctuation
(setq c #x2000)
(while (<= c #x200b)
......@@ -903,6 +908,12 @@ with L, LRE, or LRO Unicode bidi character type.")
(set-case-syntax-pair c (+ c 28) tbl)
(setq c (1+ c)))
;; Osage
(setq c #x104B0)
(while (<= c #x104D3)
(set-case-syntax-pair c (+ c 40) tbl)
(setq c (1+ c)))
;; Old Hungarian
(setq c #x10c80)
(while (<= c #x10cb2)
......@@ -915,6 +926,12 @@ with L, LRE, or LRO Unicode bidi character type.")
(set-case-syntax-pair c (+ c #x20) tbl)
(setq c (1+ c)))
;; Adlam
(setq c #x1e900)
(while (<= c #x1e921)
(set-case-syntax-pair c (+ c #x22) tbl)
(setq c (1+ c)))
;; Combining diacritics
(modify-category-entry '(#x300 . #x362) ?^)
;; Combining marks
......@@ -1183,7 +1200,40 @@ with L, LRE, or LRO Unicode bidi character type.")
;; 2: East Asian Wide and Full-width characters.
(let ((l '((#x1100 . #x115F)
(#x231A . #x231B)
(#x2329 . #x232A)
(#x23E9 . #x23EC)
(#x23F0 . #x23F0)
(#x23F3 . #x23F3)
(#x25FD . #x25FE)
(#x2614 . #x2615)
(#x2648 . #x2653)
(#x267F . #x267F)
(#x2693 . #x2693)
(#x26A1 . #x26A1)
(#x26AA . #x26AB)
(#x26BD . #x26BE)
(#x26C4 . #x26C5)
(#x26CE . #x26CE)
(#x26D4 . #x26D4)
(#x26EA . #x26EA)
(#x26F2 . #x26F3)
(#x26F5 . #x26F5)
(#x26FA . #x26FA)
(#x26FD . #x26FD)
(#x2705 . #x2705)
(#x270A . #x270B)
(#x2728 . #x2728)
(#x274C . #x274C)
(#x274E . #x274E)
(#x2753 . #x2755)
(#x2757 . #x2757)
(#x2795 . #x2797)
(#x27B0 . #x27B0)
(#x27BF . #x27BF)
(#x2B1B . #x2B1C)
(#x2B50 . #x2B50)
(#x2B55 . #x2B55)
(#x2E80 . #x303E)
(#x3040 . #xA4CF)
(#xAC00 . #xD7A3)
......@@ -1191,6 +1241,46 @@ with L, LRE, or LRO Unicode bidi character type.")
(#xFE30 . #xFE6F)
(#xFF01 . #xFF60)
(#xFFE0 . #xFFE6)
(#x16FE0 . #x16FE0)
(#x17000 . #x187EC)
(#x18800 . #x18AF2)
(#x1F18E . #x1F18E)
(#x1F191 . #x1F19A)
(#x1F200 . #x1F202)
(#x1F210 . #x1F23B)
(#x1F300 . #x1F320)
(#x1F32D . #x1F335)
(#x1F337 . #x1F37C)
(#x1F37E . #x1F393)
(#x1F3A0 . #x1F3CA)
(#x1F3CF . #x1F3D3)
(#x1F3E0 . #x1F3F0)
(#x1F3F4 . #x1F3F4)
(#x1F3F8 . #x1F3FA)
(#x1F3FB . #x1F3FF)
(#x1F440 . #x1F440)
(#x1F442 . #x1F4FC)
(#x1F4FF . #x1F53D)
(#x1F54B . #x1F54E)
(#x1F550 . #x1F567)
(#x1F57A . #x1F57A)
(#x1F595 . #x1F596)
(#x1F5A4 . #x1F5A4)
(#x1F5FB . #x1F5FF)
(#x1F600 . #x1F64F)
(#x1F680 . #x1F6C5)
(#x1F6CC . #x1F6CC)
(#x1F6D0 . #x1F6D2)
(#x1F6EB . #x1F6EC)
(#x1F6F4 . #x1F6F6)
(#x1F910 . #x1F91E)
(#x1F920 . #x1F927)
(#x1F930 . #x1F930)
(#x1F933 . #x1F93E)
(#x1F940 . #x1F94B)
(#x1F950 . #x1F95E)
(#x1F980 . #x1F991)
(#x1F9C0 . #x1F9C0)
(#x20000 . #x2FFFF)
(#x30000 . #x3FFFF))))
(dolist (elt l)
......
......@@ -205,6 +205,7 @@
(deseret #x10400)
(shavian #x10450)
(osmanya #x10480)
(osage #x104B0)
(elbasan #x10500)
(caucasian-albanian #x10530)
(linear-a #x10600)
......@@ -220,17 +221,22 @@
(khojki #x11200)
(khudawadi #x112B0)
(grantha #x11305)
(newa #x11400)
(tirhuta #x11481)
(siddham #x11580)
(modi #x11600)
(takri #x11680)
(warang-citi #x118A1)
(pau-cin-hau #x11AC0)
(bhaiksuki #x11C00)
(marchen #x11C72)
(cuneiform #x12000)
(cuneiform-numbers-and-punctuation #x12400)
(mro #x16A40)
(bassa-vah #x16AD0)
(pahawh-hmong #x16B11)
(tangut #x17000)
(tangut-components #x18800)
(duployan-shorthand #x1BC20)
(byzantine-musical-symbol #x1D000)
(musical-symbol #x1D100)
......@@ -238,31 +244,38 @@
(tai-xuan-jing-symbol #x1D300)
(counting-rod-numeral #x1D360)
(mende-kikakui #x1E810)
(adlam #x1E900)
(mahjong-tile #x1F000)
(domino-tile #x1F030)))
(defvar otf-script-alist)
;; The below was synchronized with the latest Jan 3, 2013 version of
;; The below was synchronized with the latest Feb 25, 2016 version of
;; https://www.microsoft.com/typography/otspec/scripttags.htm.
(setq otf-script-alist
'((arab . arabic)
'((adlm . adlam)
(ahom . ahom)
(hluw . anatolian)
(arab . arabic)
(armi . aramaic)
(armn . armenian)
(avst . avestan)
(bali . balinese)
(bamu . bamum)
(bass . bassa-vah)
(batk . batak)
(bng2 . bengali)
(beng . bengali)
(bhks . bhaiksuki)
(bopo . bopomofo)
(brai . braille)
(brah . brahmi)
(brai . braille)
(bugi . buginese)
(buhd . buhid)
(byzm . byzantine-musical-symbol)
(cans . canadian-aboriginal)
(cari . carian)
(aghb . caucasian-albanian)
(cakm . chakma)
(cham . cham)
(cher . cherokee)
......@@ -273,11 +286,14 @@
(dsrt . deseret)
(deva . devanagari)
(dev2 . devanagari)
(dupl . duployan-shorthand)
(egyp . egyptian)
(elba . elbasan)
(ethi . ethiopic)
(geor . georgian)
(glag . glagolitic)
(goth . gothic)
(gran . grantha)
(grek . greek)
(gujr . gujarati)
(gjr2 . gujarati)
......@@ -287,6 +303,7 @@
(hang . hangul)
(jamo . hangul)
(hano . hanunoo)
(hatr . hatran)
(hebr . hebrew)
(phli . inscriptional-pahlavi)
(prti . inscriptional-parthian)
......@@ -298,43 +315,67 @@
(kali . kayah-li)
(khar . kharoshthi)
(khmr . khmer)
(khoj . khojki)
(sind . khudawadi)
(lao\ . lao)
(latn . latin)
(lepc . lepcha)
(limb . limbu)
(lina . linear_a)
(linb . linear_b)
(lisu . lisu)
(lyci . lycian)
(lydi . lydian)
(mahj . mahajani)
(marc . marchen)
(mlym . malayalam)
(mlm2 . malayalam)
(mand . mandaic)
(mani . manichaean)
(math . mathematical)
(mtei . meetei-mayek)
(mend . mende-kikakui)
(merc . meroitic)
(mero . meroitic)
(plrd . miao)
(modi . modi)
(mong . mongolian)
(mroo . mro)
(mult . multani)
(musc . musical-symbol)
(mym2 . burmese)
(mymr . burmese)
(nbat . nabataean)
(newa . newa)
(nko\ . nko)
(ogam . ogham)
(olck . ol-chiki)
(ital . old_italic)
(xpeo . old_persian)
(narb . old-north-arabian)
(perm . old-permic)
(sarb . old-south-arabian)
(orkh . old-turkic)
(orya . oriya)
(ory2 . oriya)
(osge . osage)
(osma . osmanya)
(hmng . pahawh-hmong)
(palm . palmyrene)
(pauc . pau-cin-hau)
(phag . phags-pa)
(phli . inscriptional-pahlavi)
(phnx . phoenician)
(phlp . psalter-pahlavi)
(prti . inscriptional-parthian)
(rjng . rejang)
(runr . runic)
(samr . samaritan)
(saur . saurashtra)
(shrd . sharada)
(shaw . shavian)
(sidd . siddham)
(sgnw . sutton-sign-writing)
(sinh . sinhala)
(sora . sora-sompeng)
(sund . sundanese)
......@@ -349,14 +390,17 @@
(takr . takri)
(taml . tamil)
(tml2 . tamil)
(tang . tangut)
(telu . telugu)
(tel2 . telugu)
(thaa . thaana)
(thai . thai)
(tibt . tibetan)
(tfng . tifinagh)
(tirh . tirhuta)
(ugar . ugaritic)
(vai\ . vai)
(wara . warang-citi)
(yi\ \ . yi)))
;; Set standard fontname specification of characters in the default
......
......@@ -2939,7 +2939,10 @@ on encoding."
(#x14400 . #x14646)
;; (#x14647 . #x167FF) unused
(#x16800 . #x16F9F)
;; (#x16FA0 . #x1AFFF) unused
(#x16FE0 . #x16FE0)
;; (#x17000 . #x187FF) Tangut Ideographs
;; (#x18800 . #x18AFF) Tangut Components
;; (#x18B00 . #x1AFFF) unused
(#x1B000 . #x1B0FF)
;; (#x1B100 . #x1BBFF) unused
(#x1BC00 . #x1BCAF)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment