Commit 7a84eee5 authored by Kenichi Handa's avatar Kenichi Handa
Browse files

Improve the encoding by compound-text-with-extensions.

parent faa28da9
2010-08-06 Kenichi Handa <handa@m17n.org>
* international/mule.el (define-charset): Store NAME as :base
property.
(ctext-non-standard-encodings-table): Pay attention to charset
aliases.
(ctext-pre-write-conversion): Sort ctext-standard-encodings by the
current priority. Force using the designation of the specific
charset by adding `charset' text property. Improve the whole
algorithm.
2010-08-04 Kenichi Handa <handa@m17n.org> 2010-08-04 Kenichi Handa <handa@m17n.org>
* language/cyrillic.el: Don't add "microsoft-cp1251" to * language/cyrillic.el: Don't add "microsoft-cp1251" to
......
...@@ -282,6 +282,7 @@ attribute." ...@@ -282,6 +282,7 @@ attribute."
(plist-put props :short-name (symbol-name name))) (plist-put props :short-name (symbol-name name)))
(or (plist-get props :long-name) (or (plist-get props :long-name)
(plist-put props :long-name (plist-get props :short-name))) (plist-put props :long-name (plist-get props :short-name)))
(plist-put props :base name)
;; We can probably get a worthwhile amount in purespace. ;; We can probably get a worthwhile amount in purespace.
(setq props (setq props
(mapcar (lambda (elt) (mapcar (lambda (elt)
...@@ -1535,11 +1536,13 @@ of `ctext-non-standard-encodings-alist'.") ...@@ -1535,11 +1536,13 @@ of `ctext-non-standard-encodings-alist'.")
(let* ((slot (assoc elt ctext-non-standard-encodings-alist)) (let* ((slot (assoc elt ctext-non-standard-encodings-alist))
(charset (nth 3 slot))) (charset (nth 3 slot)))
(if (charsetp charset) (if (charsetp charset)
(setcar tail (cons charset slot)) (setcar tail
(cons (plist-get (charset-plist charset) :base) slot))
(setcar tail (cons (car charset) slot)) (setcar tail (cons (car charset) slot))
(dolist (cs (cdr charset)) (dolist (cs (cdr charset))
(setcdr tail (setcdr tail
(cons (cons (car cs) slot) (cdr tail))) (cons (cons (plist-get (charset-plist (car cs)) :base) slot)
(cdr tail)))
(setq tail (cdr tail)))) (setq tail (cdr tail))))
(setq tail (cdr tail)))) (setq tail (cdr tail))))
table)) table))
...@@ -1559,74 +1562,56 @@ in-place." ...@@ -1559,74 +1562,56 @@ in-place."
(setq from 1 to (point-max))) (setq from 1 to (point-max)))
(save-restriction (save-restriction
(narrow-to-region from to) (narrow-to-region from to)
(goto-char from)
(let ((encoding-table (ctext-non-standard-encodings-table)) (let ((encoding-table (ctext-non-standard-encodings-table))
(charset-list ctext-standard-encodings) (charset-list (sort-charsets
(copy-sequence ctext-standard-encodings)))
(end-pos (make-marker))
last-coding-system-used last-coding-system-used
last-pos last-encoding-info last-pos charset encoding-info)
encoding-info end-pos ch charset)
(dolist (elt encoding-table) (dolist (elt encoding-table)
(push (car elt) charset-list)) (push (car elt) charset-list))
(goto-char (setq last-pos from))
(setq end-pos (point-marker)) (setq end-pos (point-marker))
(while (re-search-forward "[^\000-\177]+" nil t) (while (re-search-forward "[^\0-\177]+" nil t)
;; Found a sequence of non-ASCII characters. ;; Found a sequence of non-ASCII characters.
(setq last-pos (match-beginning 0)
ch (char-after last-pos)
charset (char-charset ch charset-list)
last-encoding-info
(if charset
(or (cdr (assq charset encoding-table))
charset)
'utf-8))
(set-marker end-pos (match-end 0)) (set-marker end-pos (match-end 0))
(goto-char (1+ last-pos)) (goto-char (match-beginning 0))
(while (marker-position end-pos) (setq last-pos (point)
(if (< (point) end-pos) charset (char-charset (following-char) charset-list))
(progn (forward-char 1)
(setq charset (char-charset (following-char) charset-list) (while (and (< (point) end-pos)
encoding-info (eq charset (char-charset (following-char) charset-list)))
(if charset (forward-char 1))
(or (cdr (assq charset encoding-table)) (if charset
charset) (if (setq encoding-info (cdr (assq charset encoding-table)))
'utf-8)) ;; Encode this range using an extended segment.
(forward-char 1)) (let ((encoding-name (car encoding-info))
(setq encoding-info nil) (coding-system (nth 1 encoding-info))
(set-marker end-pos nil)) (noctets (nth 2 encoding-info))
(unless (eq last-encoding-info encoding-info) len)
(cond ((consp last-encoding-info) (encode-coding-region last-pos (point) coding-system)
;; Encode the previous range using an extended (setq len (+ (length encoding-name) 1
;; segment. (- (point) last-pos)))
(let ((encoding-name (car last-encoding-info)) ;; According to the spec of CTEXT, it is not
(coding-system (nth 1 last-encoding-info)) ;; necessary to produce this extra designation
(noctets (nth 2 last-encoding-info)) ;; sequence, but some buggy application
len) ;; (e.g. crxvt-gb) requires it.
(encode-coding-region last-pos (point) coding-system) (insert "\e(B")
(setq len (+ (length encoding-name) 1 (save-excursion
(- (point) last-pos))) (goto-char last-pos)
;; According to the spec of CTEXT, it is not (insert (format "\e%%/%d" noctets))
;; necessary to produce this extra designation (insert-byte (+ (/ len 128) 128) 1)
;; sequence, but some buggy application (insert-byte (+ (% len 128) 128) 1)
;; (e.g. crxvt-gb) requires it. (insert encoding-name)
(insert "\e(B") (insert 2)))
(save-excursion ;; Encode this range as characters in CHARSET.
(goto-char last-pos) (put-text-property last-pos (point) 'charset charset))
(insert (format "\e%%/%d" noctets)) ;; Encode this range using UTF-8 encoding extention.
(insert-byte (+ (/ len 128) 128) 1) (encode-coding-region last-pos (point) 'mule-utf-8)
(insert-byte (+ (% len 128) 128) 1) (save-excursion
(insert encoding-name) (goto-char last-pos)
(insert 2)))) (insert "\e%G"))
((eq last-encoding-info 'utf-8) (insert "\e%@")))
;; Encode the previous range using UTF-8 encoding
;; extention.
(encode-coding-region last-pos (point) 'mule-utf-8)
(save-excursion
(goto-char last-pos)
(insert "\e%G"))
(insert "\e%@"))
(t
(put-text-property last-pos (point) 'charset charset)))
(setq last-pos (point)
last-encoding-info encoding-info))))
(goto-char (point-min))))) (goto-char (point-min)))))
;; Must return nil, as build_annotations_2 expects that. ;; Must return nil, as build_annotations_2 expects that.
nil) nil)
......
2010-08-06 Kenichi Handa <handa@m17n.org>
* charset.c: Include <stdlib.h>
(struct charset_sort_data): New struct.
(charset_compare): New function.
(Fsort_charsets): New funciton.
(syms_of_charset): Declare Fsort_charsets as a Lisp function.
* coding.c (decode_coding_iso_2022): Fix checking of dimension
number in CTEXT extended segment.
2010-08-01 Juanma Barranquero <lekktu@gmail.com> 2010-08-01 Juanma Barranquero <lekktu@gmail.com>
   
* w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring. * w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring.
......
...@@ -28,6 +28,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ ...@@ -28,6 +28,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h> #include <config.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <unistd.h> #include <unistd.h>
#include <ctype.h> #include <ctype.h>
#include <sys/types.h> #include <sys/types.h>
...@@ -2139,23 +2140,22 @@ that case, find the charset from what supported by that coding system. */) ...@@ -2139,23 +2140,22 @@ that case, find the charset from what supported by that coding system. */)
charset = CHAR_CHARSET (XINT (ch)); charset = CHAR_CHARSET (XINT (ch));
else else
{ {
Lisp_Object charset_list;
if (CONSP (restriction)) if (CONSP (restriction))
{ {
for (charset_list = Qnil; CONSP (restriction); int c = XFASTINT (ch);
restriction = XCDR (restriction))
for (; CONSP (restriction); restriction = XCDR (restriction))
{ {
int id; struct charset *charset;
CHECK_CHARSET_GET_ID (XCAR (restriction), id); CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
charset_list = Fcons (make_number (id), charset_list); if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
return XCAR (restriction);
} }
charset_list = Fnreverse (charset_list); return Qnil;
} }
else restriction = coding_system_charset_list (restriction);
charset_list = coding_system_charset_list (restriction); charset = char_charset (XINT (ch), restriction, NULL);
charset = char_charset (XINT (ch), charset_list, NULL);
if (! charset) if (! charset)
return Qnil; return Qnil;
} }
...@@ -2312,6 +2312,69 @@ Return charset identification number of CHARSET. */) ...@@ -2312,6 +2312,69 @@ Return charset identification number of CHARSET. */)
return make_number (id); return make_number (id);
} }
struct charset_sort_data
{
Lisp_Object charset;
int id;
int priority;
};
static int
charset_compare (const void *d1, const void *d2)
{
const struct charset_sort_data *data1 = d1, *data2 = d2;
return (data1->priority - data2->priority);
}
DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
doc: /* Sort charset list CHARSETS by a priority of each charset.
Return the sorted list. CHARSETS is modified by side effects.
See also `charset-priority-list' and `set-charset-priority'. */)
(Lisp_Object charsets)
{
Lisp_Object len = Flength (charsets);
int n = XFASTINT (len), i, j, done;
Lisp_Object tail, elt, attrs;
struct charset_sort_data *sort_data;
int id, min_id, max_id;
USE_SAFE_ALLOCA;
if (n == 0)
return Qnil;
SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
{
elt = XCAR (tail);
CHECK_CHARSET_GET_ATTR (elt, attrs);
sort_data[i].charset = elt;
sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
if (i == 0)
min_id = max_id = id;
else if (id < min_id)
min_id = id;
else if (id > max_id)
max_id = id;
}
for (done = 0, tail = Vcharset_ordered_list, i = 0;
done < n && CONSP (tail); tail = XCDR (tail), i++)
{
elt = XCAR (tail);
id = XFASTINT (elt);
if (id >= min_id && id <= max_id)
for (j = 0; j < n; j++)
if (sort_data[j].id == id)
{
sort_data[j].priority = i;
done++;
}
}
qsort (sort_data, n, sizeof *sort_data, charset_compare);
for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
XSETCAR (tail, sort_data[i].charset);
SAFE_FREE ();
return charsets;
}
void void
init_charset () init_charset ()
...@@ -2414,6 +2477,7 @@ syms_of_charset () ...@@ -2414,6 +2477,7 @@ syms_of_charset ()
defsubr (&Scharset_priority_list); defsubr (&Scharset_priority_list);
defsubr (&Sset_charset_priority); defsubr (&Sset_charset_priority);
defsubr (&Scharset_id_internal); defsubr (&Scharset_id_internal);
defsubr (&Ssort_charsets);
DEFVAR_LISP ("charset-map-path", &Vcharset_map_path, DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
doc: /* *List of directories to search for charset map files. */); doc: /* *List of directories to search for charset map files. */);
......
...@@ -3935,7 +3935,7 @@ decode_coding_iso_2022 (coding) ...@@ -3935,7 +3935,7 @@ decode_coding_iso_2022 (coding)
int size; int size;
ONE_MORE_BYTE (dim); ONE_MORE_BYTE (dim);
if (dim < 0 || dim > 4) if (dim < '0' || dim > '4')
goto invalid_code; goto invalid_code;
ONE_MORE_BYTE (M); ONE_MORE_BYTE (M);
if (M < 128) if (M < 128)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment