Commit 65a0e5fe authored by Kenichi Handa's avatar Kenichi Handa
Browse files

Many name changes: utf-16-{be,le} -> utf-16{be,le}.

(mule-utf-16-le, utf-16-le, mule-utf-16-be, utf-16-be): New coding
system aliases for backward compatibility.
parent 9ef9b28e
......@@ -26,12 +26,18 @@
;; Support for UTF-16, which is a two-byte encoding (modulo
;; surrogates) of Unicode, written either in little or big endian
;; order: coding-systems `mule-utf-16-le' and `mule-utf-16-be'.
;; (utf-16-le is used by the DozeN'T clipboard, for instance.) The
;; data are preceeded by a two-byte signature which identifies their
;; byte sex. These are used by the coding-category-utf-16-{b,l}e code
;; to identify the coding, but ignored on decoding.
;; order and either with or without the leading BOM (a two-byte
;; signature which identifies their byte sex)a.
;;
;; We provides these base coding systems.
;; name endian BOM
;; ---- ------ ---
;; mule-utf-16le little no
;; mule-utf-16be big no
;; mule-utf-16le-with-signature little yes
;; mule-utf-16be-with-signature big yes
;; mule-utf-16 both yes
;;
;; Note that un-decodable sequences aren't (yet?) preserved as raw
;; bytes, as they are with utf-8, so reading and writing as utf-16 can
;; corrupt data.
......@@ -112,7 +118,7 @@
(r1 %= 96)
(r1 += (r2 + 32)))))))))))))
(defconst utf-16-le-decode-loop
(defconst utf-16le-decode-loop
`(loop
(read r3 r4)
(r1 = (r4 <8 r3))
......@@ -121,7 +127,7 @@
(write-multibyte-character r0 r1)
(repeat)))
(defconst utf-16-be-decode-loop
(defconst utf-16be-decode-loop
`(loop
(read r3 r4)
(r1 = (r3 <8 r4))
......@@ -132,35 +138,35 @@
)
(define-ccl-program ccl-decode-mule-utf-16-le
(define-ccl-program ccl-decode-mule-utf-16le
`(2 ; 2 bytes -> 1 to 4 bytes
,utf-16-le-decode-loop)
,utf-16le-decode-loop)
"Decode UTF-16LE (little endian without signature bytes).
Basic decoding is done into the charsets ascii, latin-iso8859-1 and
mule-unicode-*. Un-representable Unicode characters are decoded as
U+fffd. The result is run through the translation-table named
`utf-translation-table-for-decode'.")
(define-ccl-program ccl-decode-mule-utf-16-be
(define-ccl-program ccl-decode-mule-utf-16be
`(2 ; 2 bytes -> 1 to 4 bytes
,utf-16-be-decode-loop)
,utf-16be-decode-loop)
"Decode UTF-16BE (big endian without signature bytes).
Basic decoding is done into the charsets ascii, latin-iso8859-1 and
mule-unicode-*. Un-representable Unicode characters are
decoded as U+fffd. The result is run through the translation-table of
name `utf-translation-table-for-decode'.")
(define-ccl-program ccl-decode-mule-utf-16-le-with-signature
(define-ccl-program ccl-decode-mule-utf-16le-with-signature
`(2
((read r3 r4)
,utf-16-le-decode-loop))
"Like ccl-decode-utf-16-le but skip the first 2-byte BOM.")
,utf-16le-decode-loop))
"Like ccl-decode-utf-16le but skip the first 2-byte BOM.")
(define-ccl-program ccl-decode-mule-utf-16-be-with-signature
(define-ccl-program ccl-decode-mule-utf-16be-with-signature
`(2
((read r3 r4)
,utf-16-be-decode-loop))
"Like ccl-decode-utf-16-be but skip the first 2-byte BOM.")
,utf-16be-decode-loop))
"Like ccl-decode-utf-16be but skip the first 2-byte BOM.")
(define-ccl-program ccl-decode-mule-utf-16
`(2
......@@ -172,7 +178,7 @@ name `utf-translation-table-for-decode'.")
;; function.
(,@utf-16-decode-ucs
(write-multibyte-character r0 r1)
,utf-16-le-decode-loop)
,utf-16le-decode-loop)
((if (r1 == #xFEFF)
;; R1 is a BOM for big endian, but we can't keep that
;; character in the output because it can't be
......@@ -184,12 +190,12 @@ name `utf-translation-table-for-decode'.")
(,@utf-16-decode-ucs
(translate-character utf-translation-table-for-decode r0 r1)))
(write-multibyte-character r0 r1)
,utf-16-be-decode-loop))))
"Like ccl-decode-utf-16-be/le but check the first BOM.")
,utf-16be-decode-loop))))
"Like ccl-decode-utf-16be/le but check the first BOM.")
(makunbound 'utf-16-decode-ucs) ; done with it
(makunbound 'utf-16-le-decode-loop)
(makunbound 'utf-16-be-decode-loop)
(makunbound 'utf-16le-decode-loop)
(makunbound 'utf-16be-decode-loop)
(eval-and-compile
(defconst utf-16-decode-to-ucs
......@@ -216,7 +222,7 @@ name `utf-translation-table-for-decode'.")
(r0 = (r3 + #xe000))
(r0 = #xfffd))))))))))
(defconst utf-16-le-encode-loop
(defconst utf-16le-encode-loop
`(loop
(read-multibyte-character r0 r1)
(lookup-character utf-subst-table-for-encode r0 r1)
......@@ -227,7 +233,7 @@ name `utf-translation-table-for-decode'.")
(write (r0 >> 8))
(repeat)))
(defconst utf-16-be-encode-loop
(defconst utf-16be-encode-loop
`(loop
(read-multibyte-character r0 r1)
(lookup-character utf-subst-table-for-encode r0 r1)
......@@ -239,9 +245,10 @@ name `utf-translation-table-for-decode'.")
(repeat)))
)
(define-ccl-program ccl-encode-mule-utf-16-le
(define-ccl-program ccl-encode-mule-utf-16le
`(1
,utf-16-le-encode-loop)
,utf-16le-encode-loop)
"Encode to UTF-16LE (little endian without signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
......@@ -249,9 +256,9 @@ after translation through the translation-table of name
`utf-translation-table-for-encode'.
Others are encoded as U+FFFD.")
(define-ccl-program ccl-encode-mule-utf-16-be
(define-ccl-program ccl-encode-mule-utf-16be
`(1
,utf-16-be-encode-loop)
,utf-16be-encode-loop)
"Encode to UTF-16BE (big endian without signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
......@@ -259,11 +266,11 @@ after translation through the translation-table named
`utf-translation-table-for-encode'.
Others are encoded as U+FFFD.")
(define-ccl-program ccl-encode-mule-utf-16-le-with-signature
(define-ccl-program ccl-encode-mule-utf-16le-with-signature
`(1
((write #xFF)
(write #xFE)
,utf-16-le-encode-loop))
,utf-16le-encode-loop))
"Encode to UTF-16 (little endian with signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
......@@ -271,11 +278,11 @@ after translation through the translation-table of name
`utf-translation-table-for-encode'.
Others are encoded as U+FFFD.")
(define-ccl-program ccl-encode-mule-utf-16-be-with-signature
(define-ccl-program ccl-encode-mule-utf-16be-with-signature
`(1
((write #xFE)
(write #xFF)
,utf-16-be-encode-loop))
,utf-16be-encode-loop))
"Encode to UTF-16 (big endian with signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
......@@ -284,8 +291,8 @@ after translation through the translation-table named
Others are encoded as U+FFFD.")
(makunbound 'utf-16-decode-to-ucs)
(makunbound 'utf-16-le-encode-loop)
(makunbound 'utf-16-be-encode-loop)
(makunbound 'utf-16le-encode-loop)
(makunbound 'utf-16be-encode-loop)
(defun mule-utf-16-post-read-conversion (length)
(when (> length 0)
......@@ -295,17 +302,17 @@ Others are encoded as U+FFFD.")
(setq last-coding-system-used
(coding-system-change-text-conversion
last-coding-system-used
'mule-utf-16-le-with-signature))
'mule-utf-16le-with-signature))
(setq length (1- length)))
((= char (decode-char 'ucs #xFFFF))
(delete-char 1)
(setq last-coding-system-used
(coding-system-change-text-conversion
last-coding-system-used
'mule-utf-16-be-with-signature))
'mule-utf-16be-with-signature))
(setq length (1- length)))
(t
(setq last-coding-system-used 'mule-utf-16-be)))))
(setq last-coding-system-used 'mule-utf-16be)))))
length)
(let ((doc "
......@@ -324,13 +331,13 @@ On encoding (e.g. writing a file), Emacs characters not belonging to
any of the character sets listed above are encoded into the byte
sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
(make-coding-system
'mule-utf-16-le 4
'mule-utf-16le 4
?u ; Mule-UCS uses ?U, but code-pages uses that for koi8-u.
(concat
"Little endian UTF-16 encoding for Emacs-supported Unicode characters."
"UTF-16LE encoding for Emacs-supported Unicode characters."
doc)
'(ccl-decode-mule-utf-16-le . ccl-encode-mule-utf-16-le)
'(ccl-decode-mule-utf-16le . ccl-encode-mule-utf-16le)
'((safe-charsets
ascii
eight-bit-control
......@@ -346,12 +353,12 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
utf-translate-cjk)))
(make-coding-system
'mule-utf-16-be 4 ?u
'mule-utf-16be 4 ?u
(concat
"Big endian UTF-16 encoding for Emacs-supported Unicode characters."
"UTF-16BE encoding for Emacs-supported Unicode characters."
doc)
'(ccl-decode-mule-utf-16-be . ccl-encode-mule-utf-16-be)
'(ccl-decode-mule-utf-16be . ccl-encode-mule-utf-16be)
'((safe-charsets
ascii
eight-bit-control
......@@ -367,13 +374,13 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
utf-translate-cjk)))
(make-coding-system
'mule-utf-16-le-with-signature 4 ?u
'mule-utf-16le-with-signature 4 ?u
(concat
"Little endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
doc)
'(ccl-decode-mule-utf-16-le-with-signature
. ccl-encode-mule-utf-16-le-with-signature)
'(ccl-decode-mule-utf-16le-with-signature
. ccl-encode-mule-utf-16le-with-signature)
'((safe-charsets
ascii
eight-bit-control
......@@ -390,13 +397,13 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
utf-translate-cjk)))
(make-coding-system
'mule-utf-16-be-with-signature 4 ?u
'mule-utf-16be-with-signature 4 ?u
(concat
"Big endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
doc)
'(ccl-decode-mule-utf-16-be-with-signature
. ccl-encode-mule-utf-16-be-with-signature)
'(ccl-decode-mule-utf-16be-with-signature
. ccl-encode-mule-utf-16be-with-signature)
'((safe-charsets
ascii
eight-bit-control
......@@ -418,7 +425,7 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
"UTF-16 (with or without BOM) for Emacs-supported Unicode characters."
doc)
'(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16-be-with-signature)
'(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16be-with-signature)
'((safe-charsets
ascii
eight-bit-control
......@@ -436,12 +443,18 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
(post-read-conversion . mule-utf-16-post-read-conversion)))
)
(define-coding-system-alias 'utf-16-le 'mule-utf-16-le)
(define-coding-system-alias 'utf-16-be 'mule-utf-16-be)
(define-coding-system-alias 'utf-16-le-with-signature
'mule-utf-16-le-with-signature)
(define-coding-system-alias 'utf-16-be-with-signature
'mule-utf-16-be-with-signature)
(define-coding-system-alias 'utf-16le 'mule-utf-16le)
(define-coding-system-alias 'utf-16be 'mule-utf-16be)
(define-coding-system-alias 'utf-16le-with-signature
'mule-utf-16le-with-signature)
(define-coding-system-alias 'utf-16be-with-signature
'mule-utf-16be-with-signature)
(define-coding-system-alias 'utf-16 'mule-utf-16)
;; For backward compatibility.
(define-coding-system-alias 'mule-utf-16-le 'mule-utf-16le-with-signature)
(define-coding-system-alias 'utf-16-le 'mule-utf-16le-with-signature)
(define-coding-system-alias 'mule-utf-16-be 'mule-utf-16be-with-signature)
(define-coding-system-alias 'utf-16-be 'mule-utf-16be-with-signature)
;;; utf-16.el ends here
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment