Commit 835cbadb authored by Eli Zaretskii's avatar Eli Zaretskii
Browse files

(non-standard-icccm-encodings-alist, non-standard-designations-alist): New

(ctext-post-read-conversion, ctext-pre-write-conversion): New functions.
parent 23e16093
......@@ -1284,6 +1284,161 @@ ARG is a list of coding categories ordered by priority."
(setq coding-category-list (append arg current-list))
;;; X selections
(defvar non-standard-icccm-encodings-alist
'(("ISO8859-15" . latin-iso8859-15)
("ISO8859-14" . latin-iso8859-14)
("KOI8-R" . koi8-r)
("BIG5-0" . big5))
"Alist of font charset names defined by XLFD, and the corresponding Emacs
charsets or coding systems.")
;; Functions to support "Non-Standard Character Set Encodings" defined
;; by the ICCCM spec. We support that by converting the leading
;; sequence of the ``extended segment'' to the corresponding ISO-2022
;; sequences (if the leading sequence names an Emacs charset), or decode
;; the segment (if it names a coding system). Encoding does the reverse.
(defun ctext-post-read-conversion (len)
"Decode LEN characters encoded as Compound Text with Extended Segments."
(buffer-disable-undo) ; minimize consing due to insertions and deletions
(narrow-to-region (point) (+ (point) len))
(let ((pt (point-marker))
(oldpt (point-marker))
(newpt (make-marker))
(modified-p (buffer-modified-p))
(case-fold-search nil)
encoding textlen chset)
(while (re-search-forward
nil 'move)
(set-marker newpt (point))
(set-marker pt (match-beginning 0))
(setq encoding (match-string 3))
(setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
(- (aref (match-string 2) 1) 128))
(1+ (length encoding))))
chset (cdr (assoc-ignore-case encoding
(cond ((null chset)
;; This charset is not supported--leave this extended
;; segment unaltered and skip over it.
(goto-char (+ (point) textlen)))
((charsetp chset)
;; If it's a charset, replace the leading escape sequence
;; with a standard ISO-2022 sequence. We will decode all
;; such segments later, in one go, when we exit the loop
;; or find an extended segment that names a coding
;; system, not a charset.
(concat "\\1"
(if (= 0 (charset-iso-graphic-plane chset))
;; GL charsets
(if (= 1 (charset-dimension chset)) "(" "$(")
;; GR charsets
(if (= 96 (charset-chars chset))
(if (= 1 (charset-dimension chset)) ")" "$)")))
(string (charset-iso-final-char chset)))
(goto-char (+ (point) textlen)))
((coding-system-p chset)
;; If it's a coding system, we need to decode the segment
;; right away. But first, decode what we've skipped
;; across until now.
(when (> pt oldpt)
(decode-coding-region oldpt pt 'ctext-no-compositions))
(delete-region pt newpt)
(set-marker newpt (+ newpt textlen))
(decode-coding-region pt newpt chset)
(goto-char newpt)
(set-marker oldpt newpt))))
;; Decode what's left.
(when (> (point) oldpt)
(decode-coding-region oldpt (point) 'ctext-no-compositions))
;; This buffer started as unibyte, because the string we get from
;; the X selection is a unibyte string. We must now make it
;; multibyte, so that the decoded text is inserted as multibyte
;; into its buffer.
(set-buffer-multibyte t)
(set-buffer-modified-p modified-p)
(- (point-max) (point-min)))))
(defvar non-standard-designations-alist
'(("$(0" . (big5 "big5-0" 2))
("$(1" . (big5 "big5-0" 2))
("-V" . (t "iso8859-10" 1))
("-Y" . (t "iso8859-13" 1))
("-_" . (t "iso8859-14" 1))
("-b" . (t "iso8859-15" 1))
("-f" . (t "iso8859-16" 1)))
"Alist of ctext control sequences that introduce character sets which
are not in the list of approved ICCCM encodings, and the corresponding
coding system, identifier string, and number of octets per encoded
Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
is the control sequence (sans the leading ESC) that introduces the character
set in the text encoded by compound-text. ENCODING is a coding system
symbol; if it is t, it means that the ctext coding system already encodes
the text correctly, and only the leading control sequence needs to be altered.
If ENCODING is a coding system, we need to re-encode the text with that
coding system. CHARSET is the ICCCM name of the charset we need to put into
the leading control sequence. NOCTETS is the number of octets (bytes) that
encode each character in this charset. NOCTETS can be 0 (meaning the number
of octets per character is variable), 1, 2, 3, or 4.")
(defun ctext-pre-write-conversion (from to)
"Encode characters between FROM and TO as Compound Text w/Extended Segments."
(buffer-disable-undo) ; minimize consing due to insertions and deletions
(narrow-to-region from to)
(encode-coding-region from to 'ctext-no-compositions)
;; Replace ISO-2022 charset designations with extended segments, for
;; those charsets that are not part of the official X registry.
(goto-char (point-min))
(let ((newpt (make-marker))
(case-fold-search nil)
pt desig encode-info encoding chset noctets textlen)
(set-buffer-multibyte nil)
(while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move)
(setq desig (match-string 1)
pt (point-marker)
encode-info (cdr (assoc desig non-standard-designations-alist))
encoding (car encode-info)
chset (cadr encode-info)
noctets (car (cddr encode-info)))
(skip-chars-forward "^\e")
(set-marker newpt (point))
((eq encoding t) ; only the leading sequence needs to be changed
(setq textlen (+ (- newpt pt) (length chset) 1))
(replace-match (format "\e%%/%d%c%c%s"
(+ (/ textlen 128) 128)
(+ (% textlen 128) 128)
t t))
((coding-system-p encoding) ; need to recode the entire segment...
(set-marker pt (match-beginning 0))
(decode-coding-region pt newpt 'ctext-no-compositions)
(set-buffer-multibyte t)
(encode-coding-region pt newpt encoding)
(set-buffer-multibyte nil)
(setq textlen (+ (- newpt pt) (length chset) 1))
(goto-char pt)
(insert (format "\e%%/%d%c%c%s"
(+ (/ textlen 128) 128)
(+ (% textlen 128) 128)
(goto-char newpt))))
(set-buffer-multibyte t)
;;; FILE I/O
(defcustom auto-coding-alist
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment