Commit 8802474a authored by Eli Zaretskii's avatar Eli Zaretskii

Assign correct general-category and names to surrogates

* admin/unidata/unidata-gen.el (unidata-setup-list): Don't ignore
surrogates.  This avoids assigning them the default
general-category of 'Cn', i.e. unassigned codepoints.
(unidata-get-name): Give surrogates synthetic names.
parent b5919771
......@@ -102,7 +102,8 @@
(tail table)
(block-names '(("^<CJK Ideograph" . CJK\ IDEOGRAPH)
("^<Hangul Syllable" . HANGUL\ SYLLABLE)
("^<.*Surrogate" . nil)
("^<.*High Surrogate" . HIGH\ SURROGATE)
("^<.*Low Surrogate" . LOW\ SURROGATE)
("^<.*Private Use" . PRIVATE\ USE)))
val char name)
(setq unidata-text-file (expand-file-name unidata-text-file unidata-dir))
......@@ -137,11 +138,8 @@
(if (string-match (caar l) block-name)
(setq name (cdar l) l nil)
(setq l (cdr l))))
(if (not name)
;; As this is a surrogate pair range, ignore it.
(setq val nil)
(setcar val (cons first char))
(setcar (cdr val) name))))
(setcar val (cons first char))
(setcar (cdr val) name)))
(when val
(setcdr tail (list val))
......@@ -783,6 +781,10 @@ Property value is a symbol `o' (Open), `c' (Close), or `n' (None)."
(format "%s-%04X" sym char))
(format "%s-%04X" sym char))
((eq sym 'HIGH\ SURROGATE)
(format "%s-%04X" sym char))
((eq sym 'LOW\ SURROGATE)
(format "%s-%04X" sym char))
(format "%s-%d" sym (+ (- char #xe0100) 17))))))))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment