Commit ad826124 authored by Eli Zaretskii's avatar Eli Zaretskii
Browse files

Fix bug #16043 with crashes when displaying new bidi control characters.

 src/bidi.c (bidi_get_type, bidi_get_category): Handle the isolate
 directional control characters.  Update type and category
 determination according to the UBA from Unicode v6.3.
 (bidi_category_t): New category EXPLICIT_FORMATTING.
 src/dispextern.h (bidi_type_t): Update to include new bidirectional
 properties introduced with Unicode v6.3.

 admin/unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class
 to include the new isolate-related classes introduced with Unicode
 v6.3.
 (unidata-encode-val): Accept an additional optional argument, a
 warning message to emit when UnicodeData.txt defines bidi-class
 values that are not in unidata-prop-alist.  Add a comment
 explaining what should maintainers do if/when such a warning ever
 appears.
 (unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil
 when generating uni-bidi.el.
parent 456760a5
2013-12-04 Eli Zaretskii <eliz@gnu.org>
* unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class
to include the new isolate-related classes introduced with Unicode
v6.3.
(unidata-encode-val): Accept an additional optional argument, a
warning message to emit when UnicodeData.txt defines bidi-class
values that are not in unidata-prop-alist. Add a comment
explaining what should maintainers do if/when such a warning ever
appears.
(unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil
when generating uni-bidi.el.
2013-12-01 Glenn Morris <rgm@gnu.org> 2013-12-01 Glenn Morris <rgm@gnu.org>
* unidata/Makefile.in (${DSTDIR}/charprop.el): * unidata/Makefile.in (${DSTDIR}/charprop.el):
......
...@@ -194,8 +194,8 @@ Property value is an integer." ...@@ -194,8 +194,8 @@ Property value is an integer."
4 unidata-gen-table-symbol "uni-bidi.el" 4 unidata-gen-table-symbol "uni-bidi.el"
"Unicode bidi class. "Unicode bidi class.
Property value is one of the following symbols: Property value is one of the following symbols:
L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, L, LRE, LRO, LRI, R, AL, RLE, RLO, RLI, FSI, PDF, PDI,
AN, CS, NSM, BN, B, S, WS, ON" EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON"
unidata-describe-bidi-class unidata-describe-bidi-class
;; The assignment of default values to blocks of code points ;; The assignment of default values to blocks of code points
;; follows the file DerivedBidiClass.txt from the Unicode ;; follows the file DerivedBidiClass.txt from the Unicode
...@@ -205,7 +205,8 @@ Property value is one of the following symbols: ...@@ -205,7 +205,8 @@ Property value is one of the following symbols:
(#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R)) (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R))
;; The order of elements must be in sync with bidi_type_t in ;; The order of elements must be in sync with bidi_type_t in
;; src/dispextern.h. ;; src/dispextern.h.
(L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) (L R EN AN BN B AL LRE LRO RLE RLO PDF LRI RLI FSI PDI
ES ET CS NSM S WS ON))
(decomposition (decomposition
5 unidata-gen-table-decomposition "uni-decomposition.el" 5 unidata-gen-table-decomposition "uni-decomposition.el"
"Unicode decomposition mapping. "Unicode decomposition mapping.
...@@ -397,12 +398,17 @@ is the character itself."))) ...@@ -397,12 +398,17 @@ is the character itself.")))
;; If VAL is one of VALn, just return n. ;; If VAL is one of VALn, just return n.
;; Otherwise, VAL-LIST is modified to this: ;; Otherwise, VAL-LIST is modified to this:
;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1)) ;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1))
;;
;; WARN is an optional warning to display when the value list is
;; extended, for property values that need to be in sync with other
;; parts of Emacs; currently only used for bidi-class.
(defun unidata-encode-val (val-list val) (defun unidata-encode-val (val-list val &optional warn)
(let ((slot (assoc val val-list)) (let ((slot (assoc val val-list))
val-code) val-code)
(if slot (if slot
(cdr slot) (cdr slot)
(if warn (message warn val))
(setq val-code (length val-list)) (setq val-code (length val-list))
(nconc val-list (list (cons val val-code))) (nconc val-list (list (cons val val-code)))
val-code))) val-code)))
...@@ -413,6 +419,16 @@ is the character itself."))) ...@@ -413,6 +419,16 @@ is the character itself.")))
(let ((table (make-char-table 'char-code-property-table)) (let ((table (make-char-table 'char-code-property-table))
(prop-idx (unidata-prop-index prop)) (prop-idx (unidata-prop-index prop))
(vec (make-vector 128 0)) (vec (make-vector 128 0))
;; When this warning is printed, there's a need to make the
;; following changes:
;; (1) update unidata-prop-alist with the new bidi-class values;
;; (2) extend bidi_type_t enumeration on src/dispextern.h to
;; include the new classes;
;; (3) possibly update the assertion in bidi.c:bidi_check_type; and
;; (4) possibly update the switch cases in
;; bidi.c:bidi_get_type and bidi.c:bidi_get_category.
(bidi-warning "\
** Found new bidi-class '%s', please update bidi.c and dispextern.h")
tail elt range val val-code idx slot tail elt range val val-code idx slot
prev-range-data) prev-range-data)
(setq val-list (cons nil (copy-sequence val-list))) (setq val-list (cons nil (copy-sequence val-list)))
...@@ -438,7 +454,9 @@ is the character itself."))) ...@@ -438,7 +454,9 @@ is the character itself.")))
(setq elt (car tail) tail (cdr tail)) (setq elt (car tail) tail (cdr tail))
(setq range (car elt) (setq range (car elt)
val (funcall val-func (nth prop-idx elt))) val (funcall val-func (nth prop-idx elt)))
(setq val-code (if val (unidata-encode-val val-list val))) (setq val-code (if val (unidata-encode-val val-list val
(and (eq prop 'bidi-class)
bidi-warning))))
(if (consp range) (if (consp range)
(when val-code (when val-code
(set-char-table-range table range val-code) (set-char-table-range table range val-code)
...@@ -486,7 +504,9 @@ is the character itself."))) ...@@ -486,7 +504,9 @@ is the character itself.")))
(setq new-val (funcall val-func (nth prop-idx elt))) (setq new-val (funcall val-func (nth prop-idx elt)))
(if (not (eq val new-val)) (if (not (eq val new-val))
(setq val new-val (setq val new-val
val-code (if val (unidata-encode-val val-list val)))) val-code (if val (unidata-encode-val
val-list val (and (eq prop 'bidi-class)
bidi-warning)))))
(if val-code (if val-code
(aset vec (- range start) val-code)) (aset vec (- range start) val-code))
(setq tail (cdr tail))) (setq tail (cdr tail)))
......
2013-12-04 Eli Zaretskii <eliz@gnu.org>
* bidi.c (bidi_get_type, bidi_get_category): Handle the isolate
directional control characters. Update type and category
determination according to the UBA from Unicode v6.3.
(bidi_category_t): New category EXPLICIT_FORMATTING.
* dispextern.h (bidi_type_t): Update to include new bidirectional
properties introduced with Unicode v6.3. (Bug#16043)
2013-12-04 Martin Rudalics <rudalics@gmx.at> 2013-12-04 Martin Rudalics <rudalics@gmx.at>
* xterm.c (XTflash): Fix coordinate of bottom area to flash * xterm.c (XTflash): Fix coordinate of bottom area to flash
......
...@@ -76,7 +76,8 @@ typedef enum { ...@@ -76,7 +76,8 @@ typedef enum {
UNKNOWN_BC, UNKNOWN_BC,
NEUTRAL, NEUTRAL,
WEAK, WEAK,
STRONG STRONG,
EXPLICIT_FORMATTING
} bidi_category_t; } bidi_category_t;
/* UAX#9 says to search only for L, AL, or R types of characters, and /* UAX#9 says to search only for L, AL, or R types of characters, and
...@@ -115,13 +116,9 @@ bidi_get_type (int ch, bidi_dir_t override) ...@@ -115,13 +116,9 @@ bidi_get_type (int ch, bidi_dir_t override)
if (default_type == UNKNOWN_BT) if (default_type == UNKNOWN_BT)
emacs_abort (); emacs_abort ();
if (override == NEUTRAL_DIR)
return default_type;
switch (default_type) switch (default_type)
{ {
/* Although UAX#9 does not tell, it doesn't make sense to case WEAK_BN:
override NEUTRAL_B and LRM/RLM characters. */
case NEUTRAL_B: case NEUTRAL_B:
case LRE: case LRE:
case LRO: case LRO:
...@@ -129,20 +126,20 @@ bidi_get_type (int ch, bidi_dir_t override) ...@@ -129,20 +126,20 @@ bidi_get_type (int ch, bidi_dir_t override)
case RLO: case RLO:
case PDF: case PDF:
return default_type; return default_type;
/* FIXME: The isolate controls are treated as BN until we add
support for UBA v6.3. */
case LRI:
case RLI:
case FSI:
case PDI:
return WEAK_BN;
default: default:
switch (ch) if (override == L2R)
{ return STRONG_L;
case LRM_CHAR: else if (override == R2L)
case RLM_CHAR: return STRONG_R;
return default_type; else
default: return default_type;
if (override == L2R) /* X6 */
return STRONG_L;
else if (override == R2L)
return STRONG_R;
else
emacs_abort (); /* can't happen: handled above */
}
} }
} }
...@@ -163,12 +160,7 @@ bidi_get_category (bidi_type_t type) ...@@ -163,12 +160,7 @@ bidi_get_category (bidi_type_t type)
case STRONG_L: case STRONG_L:
case STRONG_R: case STRONG_R:
case STRONG_AL: case STRONG_AL:
case LRE:
case LRO:
case RLE:
case RLO:
return STRONG; return STRONG;
case PDF: /* ??? really?? */
case WEAK_EN: case WEAK_EN:
case WEAK_ES: case WEAK_ES:
case WEAK_ET: case WEAK_ET:
...@@ -176,12 +168,30 @@ bidi_get_category (bidi_type_t type) ...@@ -176,12 +168,30 @@ bidi_get_category (bidi_type_t type)
case WEAK_CS: case WEAK_CS:
case WEAK_NSM: case WEAK_NSM:
case WEAK_BN: case WEAK_BN:
/* FIXME */
case LRI:
case RLI:
case FSI:
case PDI:
return WEAK; return WEAK;
case NEUTRAL_B: case NEUTRAL_B:
case NEUTRAL_S: case NEUTRAL_S:
case NEUTRAL_WS: case NEUTRAL_WS:
case NEUTRAL_ON: case NEUTRAL_ON:
return NEUTRAL; return NEUTRAL;
case LRE:
case LRO:
case RLE:
case RLO:
case PDF:
#if 0
/* FIXME: This awaits implementation of isolate support. */
case LRI:
case RLI:
case FSI:
case PDI:
#endif
return EXPLICIT_FORMATTING;
default: default:
emacs_abort (); emacs_abort ();
} }
......
...@@ -1895,6 +1895,10 @@ typedef enum { ...@@ -1895,6 +1895,10 @@ typedef enum {
RLE, /* right-to-left embedding */ RLE, /* right-to-left embedding */
RLO, /* right-to-left override */ RLO, /* right-to-left override */
PDF, /* pop directional format */ PDF, /* pop directional format */
LRI, /* left-to-right isolate */
RLI, /* right-to-left isolate */
FSI, /* first strong isolate */
PDI, /* pop directional isolate */
WEAK_ES, /* european number separator */ WEAK_ES, /* european number separator */
WEAK_ET, /* european number terminator */ WEAK_ET, /* european number terminator */
WEAK_CS, /* common separator */ WEAK_CS, /* common separator */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment