Add C interface for Unicode character property table.

2011-07-06 Kenichi Handa <>
* unidata/unidata-gen.el (unidata-dir): New variable.
(unidata-setup-list): Expand unidata-text-file in unidata-dir.
(unidata-prop-alist): INDEX element may be a function. New
optional element VAL-LIST (for general-category and bidi-class).
New entry `mirroring'.
(unidata-prop-default, unidata-prop-val-list): New subst.
(unidata-get-character, unidata-put-character): Delete them.
(unidata-gen-table-character): New arg IGNORE. Adjusted for the
above changes.
(unidata-get-symbol, unidata-get-integer, unidata-get-numeric)
(unidata-put-symbol, unidata-put-integer, unidata-put-numeric):
Delete them.
(unidata-encode-val): Assume that the first element of VAL-LIST is
a cons (nil . 0).
(unidata-gen-table): Change argument DEFAULT-VALUE to VAL-LIST.
Always store the encoded value.
(unidata-gen-table-symbol): New args DEFAULT-VALUE and VAL-LIST.
Set the 1st and the 2nd extra slots to index numbers for C
(unidata-gen-table-integer): Likewise.
(unidata-gen-table-numeric): Likewise.
(unidata-gen-table-name): New arg IGNORE.
(unidata-gen-table-decomposition): Likewise.
(unidata-describe-general-category): Add the case nil to the
description alist.
(unidata-gen-mirroring-list): New funciton.
(unidata-gen-files): New arg DATA-DIR. Adjusted for the change of
unidata-prop-alist. Handle the case of storing multiple
char-tables in a file.
* unidata/ (${DSTDIR}/charprop.el): New arg to
2011-05-21 Glenn Morris <>
* bzrmerge.el (bzrmerge-resolve): Suppress prompts about file-locals.
......@@ -33,9 +33,10 @@ unidata.txt: UnicodeData.txt
${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt
ELC=`/bin/pwd`/unidata-gen.elc; \
DATA=`/bin/pwd`/unidata.txt; \
DATADIR=`/bin/pwd`; \
DATA=unidata.txt; \
cd ${DSTDIR}; \
${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATA}
${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATADIR} $${DATA}
../../src/biditype.h: UnicodeData.txt
gawk -F";" -f biditype.awk $< > $@
This diff is collapsed.
2011-07-06 Kenichi Handa <>
* international/characters.el (build-unicode-category-table):
Delete it.
(unicode-category-table): Set it by
* international/mule-cmds.el (char-code-property-alist): Moved to
to src/chartab.c.
(get-char-code-property): Call unicode-property-table-internal to
load a file. Call get-unicode-property-internal where necessary.
(put-char-code-property): Call unicode-property-table-internal to
load a file. Call put-unicode-property-internal where necessary.
put-unicode-property-internal where necessary.
(char-code-property-description): Call
unicode-property-table-internal to load a file.
* international/charprop.el:
* international/uni-bidi.el:
* international/uni-category.el:
* international/uni-combining.el:
* international/uni-comment.el:
* international/uni-decimal.el:
* international/uni-decomposition.el:
* international/uni-digit.el:
* international/uni-lowercase.el:
* international/uni-mirrored.el:
* international/uni-name.el:
* international/uni-numeric.el:
* international/uni-old-name.el:
* international/uni-titlecase.el:
* international/uni-uppercase.el: Regenerate.
* loadup.el: Load international/charprop.el before
2011-06-22 Richard Stallman <>
* mail/sendmail.el (mail-bury): If Rmail is in use, return nicely
......@@ -1206,22 +1206,8 @@ Setup char-width-table appropriate for non-CJK language environment."
;;; Setting unicode-category-table.
;; This macro is to build unicode-category-table at compile time so
;; that C code can access the table efficiently.
(defmacro build-unicode-category-table ()
(let ((table (make-char-table 'unicode-category-table nil)))
(dotimes (i #x110000)
(if (or (< i #xD800)
(and (>= i #xF900) (< i #x30000))
(and (>= i #xE0000) (< i #xE0200)))
(aset table i (get-char-code-property i 'general-category))))
(set-char-table-range table '(#xE000 . #xF8FF) 'Co)
(set-char-table-range table '(#xF0000 . #xFFFFD) 'Co)
(set-char-table-range table '(#x100000 . #x10FFFD) 'Co)
(optimize-char-table table 'eq)
(setq unicode-category-table (build-unicode-category-table))
(setq unicode-category-table
(unicode-property-table-internal 'general-category))
(map-char-table #'(lambda (key val)
(if (and val
(or (and (/= (aref (symbol-name val) 0) ?M)
;; Copyright (C) 1991-2010 Unicode, Inc.
;; This file was generated from the Unicode data file at
;; See lisp/international/README for the copyright and permission notice.
;; Automatically generated by unidata-gen.el.
;; FILE: uni-name.el
(define-char-code-property 'name "uni-name.el"
"Unicode character name.
......@@ -45,7 +41,7 @@ Property value is an integer or a floating point.")
;; FILE: uni-mirrored.el
(define-char-code-property 'mirrored "uni-mirrored.el"
"Unicode bidi mirrored flag.
Property value is a symbol `Y' or `N'.")
Property value is a symbol `Y' or `N'. See also the property `mirroring'.")
;; FILE: uni-old-name.el
(define-char-code-property 'old-name "uni-old-name.el"
"Unicode old names as published in Unicode 1.0.
......@@ -66,6 +62,11 @@ Property value is a character.")
(define-char-code-property 'titlecase "uni-titlecase.el"
"Unicode simple titlecase mapping.
Property value is a character.")
;; FILE: uni-mirrored.el
(define-char-code-property 'mirroring "uni-mirrored.el"
"Unicode bidi-mirroring characters.
Property value is a character that has the corresponding mirroring image,
or nil for non-mirrored character.")
;; Local Variables:
;; coding: utf-8
;; no-byte-compile: t
......@@ -2709,16 +2709,6 @@ See also `locale-charset-language-names', `locale-language-names',
;;; Character property
;; Each element has the form (PROP . TABLE).
;; PROP is a symbol representing a character property.
;; TABLE is a char-table containing the property value for each character.
;; TABLE may be a name of file to load to build a char-table.
;; Don't modify this variable directly but use `define-char-code-property'.
(defvar char-code-property-alist nil
"Alist of character property name vs char-table containing property values.
Internal use only.")
(put 'char-code-property-table 'char-table-extra-slots 5)
(defun define-char-code-property (name table &optional docstring)
......@@ -2770,32 +2760,23 @@ See also the documentation of `get-char-code-property' and
(defun get-char-code-property (char propname)
"Return the value of CHAR's PROPNAME property."
(let ((slot (assq propname char-code-property-alist)))
(if slot
(let (table value func)
(if (stringp (cdr slot))
(load (cdr slot) nil t))
(setq table (cdr slot)
value (aref table char)
func (char-table-extra-slot table 1))
(let ((table (unicode-property-table-internal propname)))
(if table
(let ((func (char-table-extra-slot table 1)))
(if (functionp func)
(setq value (funcall func char value table)))
(funcall func char (aref table char) table)
(get-unicode-property-internal table char)))
(plist-get (aref char-code-property-table char) propname))))
(defun put-char-code-property (char propname value)
"Store CHAR's PROPNAME property with VALUE.
It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
(let ((slot (assq propname char-code-property-alist)))
(if slot
(let (table func)
(if (stringp (cdr slot))
(load (cdr slot) nil t))
(setq table (cdr slot)
func (char-table-extra-slot table 2))
(let ((table (unicode-property-table-internal propname)))
(if table
(let ((func (char-table-extra-slot table 2)))
(if (functionp func)
(funcall func char value table)
(aset table char value)))
(put-unicode-property-internal table char value)))
(let* ((plist (aref char-code-property-table char))
(x (plist-put plist propname value)))
(or (eq x plist)
......@@ -2805,13 +2786,9 @@ It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
(defun char-code-property-description (prop value)
"Return a description string of character property PROP's value VALUE.
If there's no description string for VALUE, return nil."
(let ((slot (assq prop char-code-property-alist)))
(if slot
(let (table func)
(if (stringp (cdr slot))
(load (cdr slot) nil t))
(setq table (cdr slot)
func (char-table-extra-slot table 3))
(let ((table (unicode-property-table-internal prop)))
(if table
(let ((func (char-table-extra-slot table 3)))
(if (functionp func)
(funcall func value))))))
