Commit c805dec0 authored by Kenichi Handa's avatar Kenichi Handa

Add C interface for Unicode character property table.

parent 5c62d133
2011-07-06 Kenichi Handa <handa@m17n.org>
* unidata/unidata-gen.el (unidata-dir): New variable.
(unidata-setup-list): Expand unidata-text-file in unidata-dir.
(unidata-prop-alist): INDEX element may be a function. New
optional element VAL-LIST (for general-category and bidi-class).
New entry `mirroring'.
(unidata-prop-default, unidata-prop-val-list): New subst.
(unidata-get-character, unidata-put-character): Delete them.
(unidata-gen-table-character): New arg IGNORE. Adjusted for the
above changes.
(unidata-get-symbol, unidata-get-integer, unidata-get-numeric)
(unidata-put-symbol, unidata-put-integer, unidata-put-numeric):
Delete them.
(unidata-encode-val): Assume that the first element of VAL-LIST is
a cons (nil . 0).
(unidata-gen-table): Change argument DEFAULT-VALUE to VAL-LIST.
Always store the encoded value.
(unidata-gen-table-symbol): New args DEFAULT-VALUE and VAL-LIST.
Set the 1st and the 2nd extra slots to index numbers for C
functions.
(unidata-gen-table-integer): Likewise.
(unidata-gen-table-numeric): Likewise.
(unidata-gen-table-name): New arg IGNORE.
(unidata-gen-table-decomposition): Likewise.
(unidata-describe-general-category): Add the case nil to the
description alist.
(unidata-gen-mirroring-list): New funciton.
(unidata-gen-files): New arg DATA-DIR. Adjusted for the change of
unidata-prop-alist. Handle the case of storing multiple
char-tables in a file.
* unidata/Makefile.in (${DSTDIR}/charprop.el): New arg to
unidata-gen-files.
2011-05-21 Glenn Morris <rgm@gnu.org>
* bzrmerge.el (bzrmerge-resolve): Suppress prompts about file-locals.
......
......@@ -33,9 +33,10 @@ unidata.txt: UnicodeData.txt
${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt
ELC=`/bin/pwd`/unidata-gen.elc; \
DATA=`/bin/pwd`/unidata.txt; \
DATADIR=`/bin/pwd`; \
DATA=unidata.txt; \
cd ${DSTDIR}; \
${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATA}
${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATADIR} $${DATA}
../../src/biditype.h: UnicodeData.txt
gawk -F";" -f biditype.awk $< > $@
......
This diff is collapsed.
2011-07-06 Kenichi Handa <handa@m17n.org>
* international/characters.el (build-unicode-category-table):
Delete it.
(unicode-category-table): Set it by
unicode-prroperty-table-internal.
* international/mule-cmds.el (char-code-property-alist): Moved to
to src/chartab.c.
(get-char-code-property): Call unicode-property-table-internal to
load a file. Call get-unicode-property-internal where necessary.
(put-char-code-property): Call unicode-property-table-internal to
load a file. Call put-unicode-property-internal where necessary.
put-unicode-property-internal where necessary.
(char-code-property-description): Call
unicode-property-table-internal to load a file.
* international/charprop.el:
* international/uni-bidi.el:
* international/uni-category.el:
* international/uni-combining.el:
* international/uni-comment.el:
* international/uni-decimal.el:
* international/uni-decomposition.el:
* international/uni-digit.el:
* international/uni-lowercase.el:
* international/uni-mirrored.el:
* international/uni-name.el:
* international/uni-numeric.el:
* international/uni-old-name.el:
* international/uni-titlecase.el:
* international/uni-uppercase.el: Regenerate.
* loadup.el: Load international/charprop.el before
international/characters.
2011-06-22 Richard Stallman <rms@gnu.org>
* mail/sendmail.el (mail-bury): If Rmail is in use, return nicely
......
......@@ -1206,22 +1206,8 @@ Setup char-width-table appropriate for non-CJK language environment."
;;; Setting unicode-category-table.
;; This macro is to build unicode-category-table at compile time so
;; that C code can access the table efficiently.
(defmacro build-unicode-category-table ()
(let ((table (make-char-table 'unicode-category-table nil)))
(dotimes (i #x110000)
(if (or (< i #xD800)
(and (>= i #xF900) (< i #x30000))
(and (>= i #xE0000) (< i #xE0200)))
(aset table i (get-char-code-property i 'general-category))))
(set-char-table-range table '(#xE000 . #xF8FF) 'Co)
(set-char-table-range table '(#xF0000 . #xFFFFD) 'Co)
(set-char-table-range table '(#x100000 . #x10FFFD) 'Co)
(optimize-char-table table 'eq)
table))
(setq unicode-category-table (build-unicode-category-table))
(setq unicode-category-table
(unicode-property-table-internal 'general-category))
(map-char-table #'(lambda (key val)
(if (and val
(or (and (/= (aref (symbol-name val) 0) ?M)
......
;; Copyright (C) 1991-2010 Unicode, Inc.
;; This file was generated from the Unicode data file at
;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt.
;; See lisp/international/README for the copyright and permission notice.
;; Automatically generated by unidata-gen.el.
;; FILE: uni-name.el
(define-char-code-property 'name "uni-name.el"
"Unicode character name.
......@@ -45,7 +41,7 @@ Property value is an integer or a floating point.")
;; FILE: uni-mirrored.el
(define-char-code-property 'mirrored "uni-mirrored.el"
"Unicode bidi mirrored flag.
Property value is a symbol `Y' or `N'.")
Property value is a symbol `Y' or `N'. See also the property `mirroring'.")
;; FILE: uni-old-name.el
(define-char-code-property 'old-name "uni-old-name.el"
"Unicode old names as published in Unicode 1.0.
......@@ -66,6 +62,11 @@ Property value is a character.")
(define-char-code-property 'titlecase "uni-titlecase.el"
"Unicode simple titlecase mapping.
Property value is a character.")
;; FILE: uni-mirrored.el
(define-char-code-property 'mirroring "uni-mirrored.el"
"Unicode bidi-mirroring characters.
Property value is a character that has the corresponding mirroring image,
or nil for non-mirrored character.")
;; Local Variables:
;; coding: utf-8
;; no-byte-compile: t
......
......@@ -2709,16 +2709,6 @@ See also `locale-charset-language-names', `locale-language-names',
;;; Character property
;; Each element has the form (PROP . TABLE).
;; PROP is a symbol representing a character property.
;; TABLE is a char-table containing the property value for each character.
;; TABLE may be a name of file to load to build a char-table.
;; Don't modify this variable directly but use `define-char-code-property'.
(defvar char-code-property-alist nil
"Alist of character property name vs char-table containing property values.
Internal use only.")
(put 'char-code-property-table 'char-table-extra-slots 5)
(defun define-char-code-property (name table &optional docstring)
......@@ -2770,32 +2760,23 @@ See also the documentation of `get-char-code-property' and
(defun get-char-code-property (char propname)
"Return the value of CHAR's PROPNAME property."
(let ((slot (assq propname char-code-property-alist)))
(if slot
(let (table value func)
(if (stringp (cdr slot))
(load (cdr slot) nil t))
(setq table (cdr slot)
value (aref table char)
func (char-table-extra-slot table 1))
(let ((table (unicode-property-table-internal propname)))
(if table
(let ((func (char-table-extra-slot table 1)))
(if (functionp func)
(setq value (funcall func char value table)))
value)
(funcall func char (aref table char) table)
(get-unicode-property-internal table char)))
(plist-get (aref char-code-property-table char) propname))))
(defun put-char-code-property (char propname value)
"Store CHAR's PROPNAME property with VALUE.
It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
(let ((slot (assq propname char-code-property-alist)))
(if slot
(let (table func)
(if (stringp (cdr slot))
(load (cdr slot) nil t))
(setq table (cdr slot)
func (char-table-extra-slot table 2))
(let ((table (unicode-property-table-internal propname)))
(if table
(let ((func (char-table-extra-slot table 2)))
(if (functionp func)
(funcall func char value table)
(aset table char value)))
(put-unicode-property-internal table char value)))
(let* ((plist (aref char-code-property-table char))
(x (plist-put plist propname value)))
(or (eq x plist)
......@@ -2805,13 +2786,9 @@ It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
(defun char-code-property-description (prop value)
"Return a description string of character property PROP's value VALUE.
If there's no description string for VALUE, return nil."
(let ((slot (assq prop char-code-property-alist)))
(if slot
(let (table func)
(if (stringp (cdr slot))
(load (cdr slot) nil t))
(setq table (cdr slot)
func (char-table-extra-slot table 3))
(let ((table (unicode-property-table-internal prop)))
(if table
(let ((func (char-table-extra-slot table 3)))
(if (functionp func)
(funcall func value))))))
......
......@@ -123,11 +123,11 @@
;; multilingual text.
(load "international/mule-cmds")
(load "case-table")
(load "international/characters")
(load "composite")
;; This file doesn't exist when building a development version of Emacs
;; from the repository. It is generated just after temacs is built.
(load "international/charprop.el" t)
(load "international/characters")
(load "composite")
;; Load language-specific files.
(load "language/chinese")
......
2011-07-06 Kenichi Handa <handa@m17n.org>
* character.h (unicode_category_t): New enum type.
* chartab.c (uniprop_decoder_t, uniprop_encoder_t): New types.
(Qchar_code_property_table): New variable.
(UNIPROP_TABLE_P, UNIPROP_GET_DECODER)
(UNIPROP_COMPRESSED_FORM_P): New macros.
(char_table_ascii): Uncompress the compressed values.
(sub_char_table_ref): New arg is_uniprop. Callers changed.
Uncompress the compressed values.
(sub_char_table_ref_and_range): Likewise.
(char_table_ref_and_range): Uncompress the compressed values.
(sub_char_table_set): New arg is_uniprop. Callers changed.
Uncompress the compressed values.
(sub_char_table_set_range): Args changed. Callers changed.
(char_table_set_range): Adjuted for the above change.
(map_sub_char_table): Delete args default_val and parent. Add arg
top. Give decoded values to a Lisp function.
(map_char_table): Adjusted for the above change. Give decoded
values to a Lisp function. Gcpro more variables.
(uniprop_table_uncompress)
(uniprop_decode_value_run_length): New functions.
(uniprop_decoder, uniprop_decoder_count): New variables.
(uniprop_get_decoder, uniprop_encode_value_character)
(uniprop_encode_value_run_length, uniprop_encode_value_numeric):
New functions.
(uniprop_encoder, uniprop_encoder_count): New variables.
(uniprop_get_encoder, uniprop_table)
(Funicode_property_table_internal, Fget_unicode_property_internal)
(Fput_unicode_property_internal): New functions.
(syms_of_chartab): DEFSYM Qchar_code_property_table, defsubr
Sunicode_property_table_internal, Sget_unicode_property_internal,
and Sput_unicode_property_internal. Defvar_lisp
char-code-property-alist.
* composite.c (CHAR_COMPOSABLE_P): Adjusted for the change of
Vunicode_category_table.
* font.c (font_range): Adjusted for the change of
Vunicode_category_table.
2011-06-22 Paul Eggert <eggert@cs.ucla.edu>
Fixes for GLYPH_DEBUG found by GCC 4.6.0 static checking.
......
......@@ -597,6 +597,45 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
: (c) <= 0xDFFF ? 2 \
: 0)
/* Data type for Unicode general category.
The order of members must be in sync with the 8th element of the
member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
Unicode character property `general-category'. */
typedef enum {
UNICODE_CATEGORY_UNKNOWN = 0,
UNICODE_CATEGORY_Lu,
UNICODE_CATEGORY_Ll,
UNICODE_CATEGORY_Lt,
UNICODE_CATEGORY_Lm,
UNICODE_CATEGORY_Lo,
UNICODE_CATEGORY_Mn,
UNICODE_CATEGORY_Mc,
UNICODE_CATEGORY_Me,
UNICODE_CATEGORY_Nd,
UNICODE_CATEGORY_Nl,
UNICODE_CATEGORY_No,
UNICODE_CATEGORY_Pc,
UNICODE_CATEGORY_Pd,
UNICODE_CATEGORY_Ps,
UNICODE_CATEGORY_Pe,
UNICODE_CATEGORY_Pi,
UNICODE_CATEGORY_Pf,
UNICODE_CATEGORY_Po,
UNICODE_CATEGORY_Sm,
UNICODE_CATEGORY_Sc,
UNICODE_CATEGORY_Sk,
UNICODE_CATEGORY_So,
UNICODE_CATEGORY_Zs,
UNICODE_CATEGORY_Zl,
UNICODE_CATEGORY_Zp,
UNICODE_CATEGORY_Cc,
UNICODE_CATEGORY_Cf,
UNICODE_CATEGORY_Cs,
UNICODE_CATEGORY_Co,
UNICODE_CATEGORY_Cn
} unicode_category_t;
extern int char_resolve_modifier_mask (int);
extern int char_string (unsigned, unsigned char *);
......
This diff is collapsed.
......@@ -976,9 +976,8 @@ static int _work_char;
((C) > ' ' \
&& ((C) == 0x200C || (C) == 0x200D \
|| (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \
(SYMBOLP (_work_val) \
&& (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \
&& _work_char != 'Z'))))
(INTEGERP (_work_val) \
&& (XINT (_work_val) <= UNICODE_CATEGORY_So)))))
/* Update cmp_it->stop_pos to the next position after CHARPOS (and
BYTEPOS) where character composition may happen. If BYTEPOS is
......
......@@ -1773,7 +1773,11 @@ extern int face_change_count;
/* Data type for describing the bidirectional character types. The
first 7 must be at the beginning, because they are the only values
valid in the `bidi_type' member of `struct glyph'; we only reserve
3 bits for it, so we cannot use there values larger than 7. */
3 bits for it, so we cannot use there values larger than 7.
The order of members must be in sync with the 8th element of the
member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
Unicode character property `bidi-class'. */
typedef enum {
UNKNOWN_BT = 0,
STRONG_L, /* strong left-to-right */
......
......@@ -3739,8 +3739,9 @@ font_range (EMACS_INT pos, EMACS_INT *limit, struct window *w, struct face *face
else
FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, pos, pos_byte);
category = CHAR_TABLE_REF (Vunicode_category_table, c);
if (EQ (category, QCf)
|| CHAR_VARIATION_SELECTOR_P (c))
if (INTEGERP (category)
&& (XINT (category) == UNICODE_CATEGORY_Cf
|| CHAR_VARIATION_SELECTOR_P (c)))
continue;
if (NILP (font_object))
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment