Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
emacs
emacs
Commits
7a84eee5
Commit
7a84eee5
authored
Aug 06, 2010
by
Kenichi Handa
Browse files
Improve the encoding by compound-text-with-extensions.
parent
faa28da9
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
146 additions
and
75 deletions
+146
-75
lisp/ChangeLog
lisp/ChangeLog
+11
-0
lisp/international/mule.el
lisp/international/mule.el
+48
-63
src/ChangeLog
src/ChangeLog
+11
-0
src/charset.c
src/charset.c
+75
-11
src/coding.c
src/coding.c
+1
-1
No files found.
lisp/ChangeLog
View file @
7a84eee5
2010-08-06 Kenichi Handa <handa@m17n.org>
* international/mule.el (define-charset): Store NAME as :base
property.
(ctext-non-standard-encodings-table): Pay attention to charset
aliases.
(ctext-pre-write-conversion): Sort ctext-standard-encodings by the
current priority. Force using the designation of the specific
charset by adding `charset' text property. Improve the whole
algorithm.
2010-08-04 Kenichi Handa <handa@m17n.org>
* language/cyrillic.el: Don't add "microsoft-cp1251" to
...
...
lisp/international/mule.el
View file @
7a84eee5
...
...
@@ -282,6 +282,7 @@ attribute."
(plist-put props :short-name (symbol-name name)))
(or (plist-get props :long-name)
(plist-put props :long-name (plist-get props :short-name)))
(plist-put props :base name)
;; We can probably get a worthwhile amount in purespace.
(setq props
(mapcar (lambda (elt)
...
...
@@ -1535,11 +1536,13 @@ of `ctext-non-standard-encodings-alist'.")
(let* ((slot (assoc elt ctext-non-standard-encodings-alist))
(charset (nth 3 slot)))
(if (charsetp charset)
(setcar tail (cons charset slot))
(setcar tail
(cons (plist-get (charset-plist charset) :base) slot))
(setcar tail (cons (car charset) slot))
(dolist (cs (cdr charset))
(setcdr tail
(cons (cons (car cs) slot) (cdr tail)))
(cons (cons (plist-get (charset-plist (car cs)) :base) slot)
(cdr tail)))
(setq tail (cdr tail))))
(setq tail (cdr tail))))
table))
...
...
@@ -1559,74 +1562,56 @@ in-place."
(setq from 1 to (point-max)))
(save-restriction
(narrow-to-region from to)
(goto-char from)
(let ((encoding-table (ctext-non-standard-encodings-table))
(charset-list ctext-standard-encodings)
(charset-list (sort-charsets
(copy-sequence ctext-standard-encodings)))
(end-pos (make-marker))
last-coding-system-used
last-pos last-encoding-info
encoding-info end-pos ch charset)
last-pos charset encoding-info)
(dolist (elt encoding-table)
(push (car elt) charset-list))
(goto-char (setq last-pos from))
(setq end-pos (point-marker))
(while (re-search-forward "[^\0
00
-\177]+" nil t)
(while (re-search-forward "[^\0-\177]+" nil t)
;; Found a sequence of non-ASCII characters.
(setq last-pos (match-beginning 0)
ch (char-after last-pos)
charset (char-charset ch charset-list)
last-encoding-info
(if charset
(or (cdr (assq charset encoding-table))
charset)
'utf-8))
(set-marker end-pos (match-end 0))
(goto-char (1+ last-pos))
(while (marker-position end-pos)
(if (< (point) end-pos)
(progn
(setq charset (char-charset (following-char) charset-list)
encoding-info
(if charset
(or (cdr (assq charset encoding-table))
charset)
'utf-8))
(forward-char 1))
(setq encoding-info nil)
(set-marker end-pos nil))
(unless (eq last-encoding-info encoding-info)
(cond ((consp last-encoding-info)
;; Encode the previous range using an extended
;; segment.
(let ((encoding-name (car last-encoding-info))
(coding-system (nth 1 last-encoding-info))
(noctets (nth 2 last-encoding-info))
len)
(encode-coding-region last-pos (point) coding-system)
(setq len (+ (length encoding-name) 1
(- (point) last-pos)))
;; According to the spec of CTEXT, it is not
;; necessary to produce this extra designation
;; sequence, but some buggy application
;; (e.g. crxvt-gb) requires it.
(insert "\e(B")
(save-excursion
(goto-char last-pos)
(insert (format "\e%%/%d" noctets))
(insert-byte (+ (/ len 128) 128) 1)
(insert-byte (+ (% len 128) 128) 1)
(insert encoding-name)
(insert 2))))
((eq last-encoding-info 'utf-8)
;; Encode the previous range using UTF-8 encoding
;; extention.
(encode-coding-region last-pos (point) 'mule-utf-8)
(save-excursion
(goto-char last-pos)
(insert "\e%G"))
(insert "\e%@"))
(t
(put-text-property last-pos (point) 'charset charset)))
(setq last-pos (point)
last-encoding-info encoding-info))))
(goto-char (match-beginning 0))
(setq last-pos (point)
charset (char-charset (following-char) charset-list))
(forward-char 1)
(while (and (< (point) end-pos)
(eq charset (char-charset (following-char) charset-list)))
(forward-char 1))
(if charset
(if (setq encoding-info (cdr (assq charset encoding-table)))
;; Encode this range using an extended segment.
(let ((encoding-name (car encoding-info))
(coding-system (nth 1 encoding-info))
(noctets (nth 2 encoding-info))
len)
(encode-coding-region last-pos (point) coding-system)
(setq len (+ (length encoding-name) 1
(- (point) last-pos)))
;; According to the spec of CTEXT, it is not
;; necessary to produce this extra designation
;; sequence, but some buggy application
;; (e.g. crxvt-gb) requires it.
(insert "\e(B")
(save-excursion
(goto-char last-pos)
(insert (format "\e%%/%d" noctets))
(insert-byte (+ (/ len 128) 128) 1)
(insert-byte (+ (% len 128) 128) 1)
(insert encoding-name)
(insert 2)))
;; Encode this range as characters in CHARSET.
(put-text-property last-pos (point) 'charset charset))
;; Encode this range using UTF-8 encoding extention.
(encode-coding-region last-pos (point) 'mule-utf-8)
(save-excursion
(goto-char last-pos)
(insert "\e%G"))
(insert "\e%@")))
(goto-char (point-min)))))
;; Must return nil, as build_annotations_2 expects that.
nil)
...
...
src/ChangeLog
View file @
7a84eee5
2010-08-06 Kenichi Handa <handa@m17n.org>
* charset.c: Include <stdlib.h>
(struct charset_sort_data): New struct.
(charset_compare): New function.
(Fsort_charsets): New funciton.
(syms_of_charset): Declare Fsort_charsets as a Lisp function.
* coding.c (decode_coding_iso_2022): Fix checking of dimension
number in CTEXT extended segment.
2010-08-01 Juanma Barranquero <lekktu@gmail.com>
* w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring.
...
...
src/charset.c
View file @
7a84eee5
...
...
@@ -28,6 +28,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <sys/types.h>
...
...
@@ -2139,23 +2140,22 @@ that case, find the charset from what supported by that coding system. */)
charset
=
CHAR_CHARSET
(
XINT
(
ch
));
else
{
Lisp_Object
charset_list
;
if
(
CONSP
(
restriction
))
{
for
(
charset_list
=
Qnil
;
CONSP
(
restriction
);
restriction
=
XCDR
(
restriction
))
int
c
=
XFASTINT
(
ch
);
for
(;
CONSP
(
restriction
);
restriction
=
XCDR
(
restriction
))
{
int
id
;
struct
charset
*
charset
;
CHECK_CHARSET_GET_ID
(
XCAR
(
restriction
),
id
);
charset_list
=
Fcons
(
make_number
(
id
),
charset_list
);
CHECK_CHARSET_GET_CHARSET
(
XCAR
(
restriction
),
charset
);
if
(
ENCODE_CHAR
(
charset
,
c
)
!=
CHARSET_INVALID_CODE
(
charset
))
return
XCAR
(
restriction
);
}
charset_list
=
Fnreverse
(
charset_list
)
;
return
Qnil
;
}
else
charset_list
=
coding_system_charset_list
(
restriction
);
charset
=
char_charset
(
XINT
(
ch
),
charset_list
,
NULL
);
restriction
=
coding_system_charset_list
(
restriction
);
charset
=
char_charset
(
XINT
(
ch
),
restriction
,
NULL
);
if
(
!
charset
)
return
Qnil
;
}
...
...
@@ -2312,6 +2312,69 @@ Return charset identification number of CHARSET. */)
return
make_number
(
id
);
}
struct
charset_sort_data
{
Lisp_Object
charset
;
int
id
;
int
priority
;
};
static
int
charset_compare
(
const
void
*
d1
,
const
void
*
d2
)
{
const
struct
charset_sort_data
*
data1
=
d1
,
*
data2
=
d2
;
return
(
data1
->
priority
-
data2
->
priority
);
}
DEFUN
(
"sort-charsets"
,
Fsort_charsets
,
Ssort_charsets
,
1
,
1
,
0
,
doc
:
/* Sort charset list CHARSETS by a priority of each charset.
Return the sorted list. CHARSETS is modified by side effects.
See also `charset-priority-list' and `set-charset-priority'. */
)
(
Lisp_Object
charsets
)
{
Lisp_Object
len
=
Flength
(
charsets
);
int
n
=
XFASTINT
(
len
),
i
,
j
,
done
;
Lisp_Object
tail
,
elt
,
attrs
;
struct
charset_sort_data
*
sort_data
;
int
id
,
min_id
,
max_id
;
USE_SAFE_ALLOCA
;
if
(
n
==
0
)
return
Qnil
;
SAFE_ALLOCA
(
sort_data
,
struct
charset_sort_data
*
,
sizeof
(
*
sort_data
)
*
n
);
for
(
tail
=
charsets
,
i
=
0
;
CONSP
(
tail
);
tail
=
XCDR
(
tail
),
i
++
)
{
elt
=
XCAR
(
tail
);
CHECK_CHARSET_GET_ATTR
(
elt
,
attrs
);
sort_data
[
i
].
charset
=
elt
;
sort_data
[
i
].
id
=
id
=
XINT
(
CHARSET_ATTR_ID
(
attrs
));
if
(
i
==
0
)
min_id
=
max_id
=
id
;
else
if
(
id
<
min_id
)
min_id
=
id
;
else
if
(
id
>
max_id
)
max_id
=
id
;
}
for
(
done
=
0
,
tail
=
Vcharset_ordered_list
,
i
=
0
;
done
<
n
&&
CONSP
(
tail
);
tail
=
XCDR
(
tail
),
i
++
)
{
elt
=
XCAR
(
tail
);
id
=
XFASTINT
(
elt
);
if
(
id
>=
min_id
&&
id
<=
max_id
)
for
(
j
=
0
;
j
<
n
;
j
++
)
if
(
sort_data
[
j
].
id
==
id
)
{
sort_data
[
j
].
priority
=
i
;
done
++
;
}
}
qsort
(
sort_data
,
n
,
sizeof
*
sort_data
,
charset_compare
);
for
(
i
=
0
,
tail
=
charsets
;
CONSP
(
tail
);
tail
=
XCDR
(
tail
),
i
++
)
XSETCAR
(
tail
,
sort_data
[
i
].
charset
);
SAFE_FREE
();
return
charsets
;
}
void
init_charset
()
...
...
@@ -2414,6 +2477,7 @@ syms_of_charset ()
defsubr
(
&
Scharset_priority_list
);
defsubr
(
&
Sset_charset_priority
);
defsubr
(
&
Scharset_id_internal
);
defsubr
(
&
Ssort_charsets
);
DEFVAR_LISP
(
"charset-map-path"
,
&
Vcharset_map_path
,
doc:
/* *List of directories to search for charset map files. */
);
...
...
src/coding.c
View file @
7a84eee5
...
...
@@ -3935,7 +3935,7 @@ decode_coding_iso_2022 (coding)
int size;
ONE_MORE_BYTE (dim);
if (dim <
0
|| dim >
4
)
if (dim <
'0'
|| dim >
'4'
)
goto invalid_code;
ONE_MORE_BYTE (M);
if (M < 128)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment