Commit 42ea0349 authored by Dave Love's avatar Dave Love
Browse files

(unicode-data): Check that

`unicodedata-file' exists.
parent 4fb82d62
...@@ -1228,170 +1228,172 @@ looked up from it." ...@@ -1228,170 +1228,172 @@ looked up from it."
"Return a list of Unicode data for unicode CHAR. "Return a list of Unicode data for unicode CHAR.
Each element is a list of a property description and the property value. Each element is a list of a property description and the property value.
The list is null if CHAR isn't found in `unicodedata-file'." The list is null if CHAR isn't found in `unicodedata-file'."
(if unicodedata-file (when unicodedata-file
(save-excursion (unless (file-exists-p unicodedata-file)
(set-buffer (find-file-noselect unicodedata-file)) (error "`unicodedata-file' %s not found" unicodedata-file))
(goto-char (point-min)) (save-excursion
(let ((hex (format "%04X" char)) (set-buffer (find-file-noselect unicodedata-file))
found first last) (goto-char (point-min))
(if (re-search-forward (concat "^" hex) nil t) (let ((hex (format "%04X" char))
(setq found t) found first last)
;; It's not listed explicitly. Look for ranges, e.g. CJK (if (re-search-forward (concat "^" hex) nil t)
;; ideographs, and check whether it's in one of them. (setq found t)
(while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) ;; It's not listed explicitly. Look for ranges, e.g. CJK
(>= char (setq first ;; ideographs, and check whether it's in one of them.
(string-to-number (match-string 1) 16))) (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
(progn (>= char (setq first
(forward-line 1) (string-to-number (match-string 1) 16)))
(looking-at "^\\([^;]+\\);[^;]+Last>;") (progn
(> char (forward-line 1)
(setq last (looking-at "^\\([^;]+\\);[^;]+Last>;")
(string-to-number (match-string 1) 16)))))) (> char
(if (and (>= char first) (setq last
(<= char last)) (string-to-number (match-string 1) 16))))))
(setq found t))) (if (and (>= char first)
(if found (<= char last))
(let ((fields (mapcar (lambda (elt) (setq found t)))
(if (> (length elt) 0) (if found
elt)) (let ((fields (mapcar (lambda (elt)
(cdr (split-string (if (> (length elt) 0)
(buffer-substring elt))
(line-beginning-position) (cdr (split-string
(line-end-position)) (buffer-substring
";"))))) (line-beginning-position)
;; The length depends on whether the last field was empty. (line-end-position))
(unless (or (= 13 (length fields)) ";")))))
(= 14 (length fields))) ;; The length depends on whether the last field was empty.
(error "Invalid contents in %s" unicodedata-file)) (unless (or (= 13 (length fields))
;; The field names and values lists are slightly (= 14 (length fields)))
;; modified from Mule-UCS unidata.el. (error "Invalid contents in %s" unicodedata-file))
(list ;; The field names and values lists are slightly
(list "Name" (let ((name (nth 0 fields))) ;; modified from Mule-UCS unidata.el.
;; Check for <..., First>, <..., Last> (list
(if (string-match "\\`\\(<[^,]+\\)," name) (list "Name" (let ((name (nth 0 fields)))
(concat (match-string 1 name) ">") ;; Check for <..., First>, <..., Last>
name))) (if (string-match "\\`\\(<[^,]+\\)," name)
(list "Category" (concat (match-string 1 name) ">")
(cdr (assoc name)))
(nth 1 fields) (list "Category"
'(("Lu" . "uppercase letter") (cdr (assoc
("Ll" . "lowercase letter") (nth 1 fields)
("Lt" . "titlecase letter") '(("Lu" . "uppercase letter")
("Mn" . "non-spacing mark") ("Ll" . "lowercase letter")
("Mc" . "spacing-combining mark") ("Lt" . "titlecase letter")
("Me" . "enclosing mark") ("Mn" . "non-spacing mark")
("Nd" . "decimal digit") ("Mc" . "spacing-combining mark")
("Nl" . "letter number") ("Me" . "enclosing mark")
("No" . "other number") ("Nd" . "decimal digit")
("Zs" . "space separator") ("Nl" . "letter number")
("Zl" . "line separator") ("No" . "other number")
("Zp" . "paragraph separator") ("Zs" . "space separator")
("Cc" . "other control") ("Zl" . "line separator")
("Cf" . "other format") ("Zp" . "paragraph separator")
("Cs" . "surrogate") ("Cc" . "other control")
("Co" . "private use") ("Cf" . "other format")
("Cn" . "not assigned") ("Cs" . "surrogate")
("Lm" . "modifier letter") ("Co" . "private use")
("Lo" . "other letter") ("Cn" . "not assigned")
("Pc" . "connector punctuation") ("Lm" . "modifier letter")
("Pd" . "dash punctuation") ("Lo" . "other letter")
("Ps" . "open punctuation") ("Pc" . "connector punctuation")
("Pe" . "close punctuation") ("Pd" . "dash punctuation")
("Pi" . "initial-quotation punctuation") ("Ps" . "open punctuation")
("Pf" . "final-quotation punctuation") ("Pe" . "close punctuation")
("Po" . "other punctuation") ("Pi" . "initial-quotation punctuation")
("Sm" . "math symbol") ("Pf" . "final-quotation punctuation")
("Sc" . "currency symbol") ("Po" . "other punctuation")
("Sk" . "modifier symbol") ("Sm" . "math symbol")
("So" . "other symbol"))))) ("Sc" . "currency symbol")
(list "Combining class" ("Sk" . "modifier symbol")
(cdr (assoc ("So" . "other symbol")))))
(string-to-number (nth 2 fields)) (list "Combining class"
'((0 . "Spacing") (cdr (assoc
(1 . "Overlays and interior") (string-to-number (nth 2 fields))
(7 . "Nuktas") '((0 . "Spacing")
(8 . "Hiragana/Katakana voicing marks") (1 . "Overlays and interior")
(9 . "Viramas") (7 . "Nuktas")
(10 . "Start of fixed position classes") (8 . "Hiragana/Katakana voicing marks")
(199 . "End of fixed position classes") (9 . "Viramas")
(200 . "Below left attached") (10 . "Start of fixed position classes")
(202 . "Below attached") (199 . "End of fixed position classes")
(204 . "Below right attached") (200 . "Below left attached")
(208 . "Left attached (reordrant around \ (202 . "Below attached")
(204 . "Below right attached")
(208 . "Left attached (reordrant around \
single base character)") single base character)")
(210 . "Right attached") (210 . "Right attached")
(212 . "Above left attached") (212 . "Above left attached")
(214 . "Above attached") (214 . "Above attached")
(216 . "Above right attached") (216 . "Above right attached")
(218 . "Below left") (218 . "Below left")
(220 . "Below") (220 . "Below")
(222 . "Below right") (222 . "Below right")
(224 . "Left (reordrant around single base \ (224 . "Left (reordrant around single base \
character)") character)")
(226 . "Right") (226 . "Right")
(228 . "Above left") (228 . "Above left")
(230 . "Above") (230 . "Above")
(232 . "Above right") (232 . "Above right")
(233 . "Double below") (233 . "Double below")
(234 . "Double above") (234 . "Double above")
(240 . "Below (iota subscript)"))))) (240 . "Below (iota subscript)")))))
(list "Bidi category" (list "Bidi category"
(cdr (assoc (cdr (assoc
(nth 3 fields) (nth 3 fields)
'(("L" . "Left-to-Right") '(("L" . "Left-to-Right")
("LRE" . "Left-to-Right Embedding") ("LRE" . "Left-to-Right Embedding")
("LRO" . "Left-to-Right Override") ("LRO" . "Left-to-Right Override")
("R" . "Right-to-Left") ("R" . "Right-to-Left")
("AL" . "Right-to-Left Arabic") ("AL" . "Right-to-Left Arabic")
("RLE" . "Right-to-Left Embedding") ("RLE" . "Right-to-Left Embedding")
("RLO" . "Right-to-Left Override") ("RLO" . "Right-to-Left Override")
("PDF" . "Pop Directional Format") ("PDF" . "Pop Directional Format")
("EN" . "European Number") ("EN" . "European Number")
("ES" . "European Number Separator") ("ES" . "European Number Separator")
("ET" . "European Number Terminator") ("ET" . "European Number Terminator")
("AN" . "Arabic Number") ("AN" . "Arabic Number")
("CS" . "Common Number Separator") ("CS" . "Common Number Separator")
("NSM" . "Non-Spacing Mark") ("NSM" . "Non-Spacing Mark")
("BN" . "Boundary Neutral") ("BN" . "Boundary Neutral")
("B" . "Paragraph Separator") ("B" . "Paragraph Separator")
("S" . "Segment Separator") ("S" . "Segment Separator")
("WS" . "Whitespace") ("WS" . "Whitespace")
("ON" . "Other Neutrals"))))) ("ON" . "Other Neutrals")))))
(list "Decomposition" (list "Decomposition"
(if (nth 4 fields) (if (nth 4 fields)
(let* ((parts (split-string (nth 4 fields))) (let* ((parts (split-string (nth 4 fields)))
(info (car parts))) (info (car parts)))
(if (string-match "\\`<\\(.+\\)>\\'" info) (if (string-match "\\`<\\(.+\\)>\\'" info)
(setq info (match-string 1 info)) (setq info (match-string 1 info))
(setq info nil)) (setq info nil))
(if info (setq parts (cdr parts))) (if info (setq parts (cdr parts)))
(setq parts (mapconcat (setq parts (mapconcat
(lambda (arg) (lambda (arg)
(string (string-to-number arg 16))) (string (string-to-number arg 16)))
parts " ")) parts " "))
(concat info parts)))) (concat info parts))))
(list "Decimal digit value" (list "Decimal digit value"
(if (nth 5 fields) (if (nth 5 fields)
(string-to-number (nth 5 fields)))) (string-to-number (nth 5 fields))))
(list "Digit value" (list "Digit value"
(if (nth 6 fields) (if (nth 6 fields)
(string-to-number (nth 6 fields)))) (string-to-number (nth 6 fields))))
(list "Numeric value" (list "Numeric value"
(if (nth 7 fields) (if (nth 7 fields)
(string-to-number (nth 6 fields)))) (string-to-number (nth 6 fields))))
(list "Mirrored" (list "Mirrored"
(if (equal "Y" (nth 8 fields)) (if (equal "Y" (nth 8 fields))
"yes")) "yes"))
(list "Old name" (nth 9 fields)) (list "Old name" (nth 9 fields))
(list "ISO 10646 comment" (nth 10 fields)) (list "ISO 10646 comment" (nth 10 fields))
(list "Uppercase" (and (nth 11 fields) (list "Uppercase" (and (nth 11 fields)
(string (string-to-number (string (string-to-number
(nth 11 fields) 16)))) (nth 11 fields) 16))))
(list "Lowercase" (and (nth 12 fields) (list "Lowercase" (and (nth 12 fields)
(string (string-to-number (string (string-to-number
(nth 12 fields) 16)))) (nth 12 fields) 16))))
(list "Titlecase" (and (nth 13 fields) (list "Titlecase" (and (nth 13 fields)
(string (string-to-number (string (string-to-number
(nth 13 fields) 16))))))))))) (nth 13 fields) 16)))))))))))
;;; mule-diag.el ends here ;;; mule-diag.el ends here
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment