Commit c912b478 authored by Katsumi Yamaoka's avatar Katsumi Yamaoka
Browse files

Decode extra numeric entities.

mm-util.el (mm-extra-numeric-entities): New variable.
mm-url.el (mm-url-decode-entities):
mm-decode.el (mm-shr): Use it to decode extra numeric entities.
parent 674c5ccf
2010-12-07 Katsumi Yamaoka <yamaoka@jpl.org>
* mm-util.el (mm-extra-numeric-entities): New variable.
* mm-url.el (mm-url-decode-entities):
* mm-decode.el (mm-shr): Use it to decode extra numeric entities.
2010-12-07 Stefan Monnier <monnier@iro.umontreal.ca>
 
* message.el: Use completion-at-point.
......
......@@ -1699,7 +1699,7 @@ If RECURSIVE, search recursively."
(when handle
(mm-with-part handle
(buffer-string))))))
shr-inhibit-images shr-blocked-images charset)
shr-inhibit-images shr-blocked-images charset char)
(if (and (boundp 'gnus-summary-buffer)
(buffer-name gnus-summary-buffer))
(with-current-buffer gnus-summary-buffer
......@@ -1714,13 +1714,25 @@ If RECURSIVE, search recursively."
(narrow-to-region (point) (point))
(shr-insert-document
(mm-with-part handle
(when (and charset
(setq charset (mm-charset-to-coding-system charset))
(not (eq charset 'ascii)))
(insert (prog1
(mm-decode-coding-string (buffer-string) charset)
(erase-buffer)
(mm-enable-multibyte))))
(insert (prog1
(if (and charset
(setq charset
(mm-charset-to-coding-system charset))
(not (eq charset 'ascii)))
(mm-decode-coding-string (buffer-string) charset)
(mm-string-as-multibyte (buffer-string)))
(erase-buffer)
(mm-enable-multibyte)))
(goto-char (point-min))
(setq case-fold-search t)
(while (re-search-forward
"&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t)
(when (setq char
(cdr (assq (if (match-beginning 1)
(string-to-number (match-string 1) 16)
(string-to-number (match-string 2)))
mm-extra-numeric-entities)))
(replace-match (char-to-string char))))
(libxml-parse-html-region (point-min) (point-max))))
(mm-handle-set-undisplayer
handle
......
......@@ -365,16 +365,19 @@ If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
(defun mm-url-decode-entities ()
"Decode all HTML entities."
(goto-char (point-min))
(while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);" nil t)
(while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);"
nil t)
(let* ((entity (match-string 1))
(elem (if (eq (aref entity 0) ?\#)
(let ((c (mm-ucs-to-char
;; Hex number: &#x3212
(if (eq (aref entity 1) ?x)
(string-to-number (substring entity 2)
16)
;; Decimal number: &#23
(string-to-number (substring entity 1))))))
(let ((c
;; Hex number: &#x3212
(if (eq (aref entity 1) ?x)
(string-to-number (substring entity 2)
16)
;; Decimal number: &#23
(string-to-number (substring entity 1)))))
(setq c (or (cdr (assq c mm-extra-numeric-entities))
(mm-ucs-to-char c)))
(if (mm-char-or-char-int-p c) c ?#))
(or (cdr (assq (intern entity)
mm-url-html-entities))
......
......@@ -866,6 +866,21 @@ variable is set, it overrides the default priority."
Setting it to nil is useful on Emacsen supporting Unicode if sending
mail with multiple parts is preferred to sending a Unicode one.")
(defvar mm-extra-numeric-entities
(mapcar
(lambda (item)
(cons (car item) (mm-ucs-to-char (cdr item))))
'((#x80 . #x20AC) (#x82 . #x201A) (#x83 . #x0192) (#x84 . #x201E)
(#x85 . #x2026) (#x86 . #x2020) (#x87 . #x2021) (#x88 . #x02C6)
(#x89 . #x2030) (#x8A . #x0160) (#x8B . #x2039) (#x8C . #x0152)
(#x8E . #x017D) (#x91 . #x2018) (#x92 . #x2019) (#x93 . #x201C)
(#x94 . #x201D) (#x95 . #x2022) (#x96 . #x2013) (#x97 . #x2014)
(#x98 . #x02DC) (#x99 . #x2122) (#x9A . #x0161) (#x9B . #x203A)
(#x9C . #x0153) (#x9E . #x017E) (#x9F . #x0178)))
"*Alist of extra numeric entities and characters other than ISO 10646.
This table is used for decoding extra numeric entities to characters,
like \"&#128;\" to the euro sign, mainly in html messages.")
;;; Internal variables:
;;; Functions:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment