Commit 96c2c098 authored by Mark Oteiza's avatar Mark Oteiza

Make ucs-names a hash table (Bug#28302)

* etc/NEWS: Mention the type change.
* lisp/descr-text.el (describe-char): Use gethash to access ucs-names.
Hardcode BEL's name into the function instead of needlessly mapping
over the hash table in the spirit of rassoc.
* lisp/international/mule-cmds.el (ucs-names): Fix variable and
function docstrings.  Initialize a hash table for ucs-names--the
number of entries is 42845 here.  Switch to hash-table
getters/setters.
(mule--ucs-names-annotation): Use hash-table getter.
(char-from-name): Upcase the string if ignore-case is truthy.
* lisp/leim/quail/latin-ltx.el: Use maphash instead of dolist.
parent e6a2b4c2
......@@ -1154,6 +1154,9 @@ table implementation. This uses a new bytecode op 'switch', which
isn't compatible with previous Emacs versions. This functionality can
be disabled by setting 'byte-compile-cond-use-jump-table' to nil.
---
** The alist 'ucs-names' is now a hash table.
---
** 'C-up', 'C-down', 'C-left' and 'C-right' are now defined in term
mode to send the same escape sequences that xterm does. This makes
......
......@@ -617,16 +617,16 @@ relevant to POS."
(list
(let* ((names (ucs-names))
(name
(or (when (= char 7)
(or (when (= char ?\a)
;; Special case for "BELL" which is
;; apparently the only char which
;; doesn't have a new name and whose
;; old-name is shadowed by a newer char
;; with that name (bug#25641).
(car (rassoc char names)))
"BELL (BEL)")
(get-char-code-property char 'name)
(get-char-code-property char 'old-name))))
(if (and name (assoc-string name names))
(if (and name (gethash name names))
(format
"type \"C-x 8 RET %x\" or \"C-x 8 RET %s\""
char name)
......
......@@ -2923,10 +2923,10 @@ on encoding."
(make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1")
(defvar ucs-names nil
"Alist of cached (CHAR-NAME . CHAR-CODE) pairs.")
"Hash table of cached CHAR-NAME keys to CHAR-CODE values.")
(defun ucs-names ()
"Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'."
"Return table of CHAR-NAME keys and CHAR-CODE values cached in `ucs-names'."
(or ucs-names
(let ((ranges
'((#x0000 . #x33FF)
......@@ -2954,38 +2954,39 @@ on encoding."
;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
(#xE0000 . #xE01FF)))
(gc-cons-threshold 10000000)
names)
(dolist (range ranges)
(let ((c (car range))
(end (cdr range)))
(while (<= c end)
(names (make-hash-table :size 42943 :test #'equal)))
(dolist (range ranges)
(let ((c (car range))
(end (cdr range)))
(while (<= c end)
(let ((new-name (get-char-code-property c 'name))
(old-name (get-char-code-property c 'old-name)))
;; In theory this code could end up pushing an "old-name" that
;; shadows a "new-name" but in practice every time an
;; `old-name' conflicts with a `new-name', the newer one has a
;; higher code, so it gets pushed later!
(if new-name (push (cons new-name c) names))
(if old-name (push (cons old-name c) names))
(setq c (1+ c))))))
;; Special case for "BELL" which is apparently the only char which
;; doesn't have a new name and whose old-name is shadowed by a newer
;; char with that name.
(setq ucs-names `(("BELL (BEL)" . 7) ,@names)))))
;; In theory this code could end up pushing an "old-name" that
;; shadows a "new-name" but in practice every time an
;; `old-name' conflicts with a `new-name', the newer one has a
;; higher code, so it gets pushed later!
(if new-name (puthash new-name c names))
(if old-name (puthash old-name c names))
(setq c (1+ c))))))
;; Special case for "BELL" which is apparently the only char which
;; doesn't have a new name and whose old-name is shadowed by a newer
;; char with that name.
(puthash "BELL (BEL)" ?\a names)
(setq ucs-names names))))
(defun mule--ucs-names-annotation (name)
;; FIXME: It would be much better to add this annotation before rather than
;; after the char name, so the annotations are aligned.
;; FIXME: The default behavior of displaying annotations in italics
;; doesn't work well here.
(let ((char (assoc name ucs-names)))
(when char (format " (%c)" (cdr char)))))
(let ((char (gethash name ucs-names)))
(when char (format " (%c)" char))))
(defun char-from-name (string &optional ignore-case)
"Return a character as a number from its Unicode name STRING.
If optional IGNORE-CASE is non-nil, ignore case in STRING.
Return nil if STRING does not name a character."
(or (cdr (assoc-string string (ucs-names) ignore-case))
(or (gethash (if ignore-case (upcase string) string) (ucs-names))
(let ((minus (string-match-p "-[0-9A-F]+\\'" string)))
(when minus
;; Parse names like "VARIATION SELECTOR-17" and "CJK
......
......@@ -75,20 +75,20 @@ system, including many technical ones. Examples:
(`(,seq ,re)
(let ((count 0)
(re (eval re t)))
(dolist (pair (ucs-names))
(let ((name (car pair))
(char (cdr pair)))
(when (and (characterp char) ;; Ignore char-ranges.
(string-match re name))
(let ((keys (if (stringp seq)
(replace-match seq nil nil name)
(funcall seq name char))))
(if (listp keys)
(dolist (x keys)
(setq count (1+ count))
(push (list x char) newrules))
(setq count (1+ count))
(push (list keys char) newrules))))))
(maphash
(lambda (name char)
(when (and (characterp char) ;; Ignore char-ranges.
(string-match re name))
(let ((keys (if (stringp seq)
(replace-match seq nil nil name)
(funcall seq name char))))
(if (listp keys)
(dolist (x keys)
(setq count (1+ count))
(push (list x char) newrules))
(setq count (1+ count))
(push (list keys char) newrules)))))
(ucs-names))
;; (message "latin-ltx: %d mappings for %S" count re)
))))
(setq newrules (delete-dups newrules))
......@@ -206,7 +206,7 @@ system, including many technical ones. Examples:
((lambda (name char)
(let* ((base (concat (match-string 1 name) (match-string 3 name)))
(basechar (cdr (assoc base (ucs-names)))))
(basechar (gethash base (ucs-names))))
(when (latin-ltx--ascii-p basechar)
(string (if (match-end 2) ?^ ?_) basechar))))
"\\(.*\\)SU\\(?:B\\|\\(PER\\)\\)SCRIPT \\(.*\\)")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment