Commit 738746ba authored by Kenichi Handa's avatar Kenichi Handa
Browse files

(search-unencodable-char): New

(select-safe-coding-system): Show unencodable characters.
(unencodable-char-position): Deleted, and implemented by C in
parent 8030369c
......@@ -548,6 +548,27 @@ For invalid characters, CHARs are actually strings."
(setq chars (cons (list charset 1 char) chars))))))))
(nreverse chars)))
(defun search-unencodable-char (coding-system)
"Search forward from point for a character that is not encodable.
It asks which coding system to check.
If such a character is found, set point after that character.
Otherwise, don't move point.
When called from a program, the value is a position of the found character,
or nil if all characters are encodable."
(list (let ((default (or buffer-file-coding-system 'us-ascii)))
(format "Coding-system (default, %s): " default)
(let ((pos (unencodable-char-position (point) (point-max) coding-system)))
(if pos
(goto-char (1+ pos))
(message "All following characters are encodable by %s" coding-system))
(defvar last-coding-system-specified nil
"Most recent coding system explicitly specified by the user when asked.
This variable is set whenever Emacs asks the user which coding system
......@@ -655,7 +676,30 @@ and TO is ignored."
;; If all the defaults failed, ask a user.
(when (or (not coding-system) (consp coding-system))
;; At first, change each coding system to the corresponding
;; At first, record at most 11 problematic characters and their
;; positions for each default.
(if (stringp from)
(mapc #'(lambda (coding)
(setcdr coding
(mapcar #'(lambda (pos)
(cons pos (aref from pos)))
0 (length from) (car coding) 11 from))))
(mapc #'(lambda (coding)
(setcdr coding
(mapcar #'(lambda (pos)
(cons pos (char-after pos)))
from to (car coding) 11))))
;; If 11 unencodable characters were found, mark the last one as nil.
(mapc #'(lambda (coding)
(if (> (length coding) 11)
(setcdr (car (last coding)) nil)))
;; Change each safe coding system to the corresponding
;; mime-charset name if it is also a coding system. Such a name
;; is more friendly to users.
(let ((l codings)
......@@ -676,14 +720,13 @@ and TO is ignored."
(coding-system-category elt)))
(push elt l))))
(let ((window-configuration (current-window-configuration)))
;; Make sure the offending buffer is displayed.
(unless (stringp from)
(when (and default-coding-system (not (stringp from)))
(pop-to-buffer bufname)
(goto-char (unencodable-char-position
from to (mapcar #'car default-coding-system))))
(goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
;; Then ask users to select one from CODINGS.
(with-output-to-temp-buffer "*Warning*"
......@@ -715,8 +758,45 @@ and TO is ignored."
but it is not recommended for encoding text in this context,
e.g., for sending an email message.\n")
(insert "\
However, none of them safely encodes the target text.
However, each of them encountered these problematic characters:\n")
#'(lambda (coding)
(insert (format " %s:" (car coding)))
(dolist (elt (cdr coding))
(insert " ")
(if (stringp from)
(insert (or (cdr elt) "..."))
(if (cdr elt)
(cdr elt)
:type 'help-xref
"mouse-2, RET: jump to this character"
#'(lambda (bufname pos)
(when (buffer-live-p (get-buffer bufname))
(pop-to-buffer bufname)
(goto-char pos)))
'help-args (list bufname (car elt)))
:type 'help-xref
"mouse-2, RET: next unencodable character"
#'(lambda (bufname pos coding)
(when (buffer-live-p (get-buffer bufname))
(pop-to-buffer bufname)
(if (< (point) pos)
(goto-char pos)
(forward-char 1)
(search-unencodable-char coding)
(forward-char -1))))
'help-args (list bufname (car elt)
(car coding))))))
(insert "\n"))
(insert "\
The first problematic character is at point in the displayed buffer,\n"
(substitute-command-keys "\
and \\[universal-argument] \\[what-cursor-position] will give information about it.\n"))))
......@@ -744,7 +824,8 @@ one of the following safe coding systems, or edit the buffer:\n")
(car (car safe-names)))))
(setq last-coding-system-specified (intern name)
coding-system last-coding-system-specified)))
(kill-buffer "*Warning*"))))
(kill-buffer "*Warning*")
(set-window-configuration window-configuration)))
(if (vectorp (coding-system-eol-type coding-system))
(let ((eol (coding-system-eol-type buffer-file-coding-system)))
......@@ -780,46 +861,6 @@ and try again)? " coding-system auto-cs))
(error "Save aborted")))))
(defun unencodable-char-position (start end coding-system)
"Return position of first un-encodable character in a region.
START and END specfiy the region and CODING-SYSTEM specifies the
encoding to check. Return nil if CODING-SYSTEM does encode the region.
CODING-SYSTEM may also be a list of coding systems, in which case return
the first position not encodable by any of them.
This function is fairly slow."
;; Use recursive calls in the binary chop below, since we're
;; O(logN), and the call overhead shouldn't be a bottleneck.
(unless enable-multibyte-characters
(error "Unibyte buffer"))
;; Recurse if list of coding systems.
(if (consp coding-system)
(let ((end end) res)
(dolist (elt coding-system (and res (>= res 0) res))
(let ((pos (unencodable-char-position start end elt)))
(if pos
(setq end pos
res pos)))))
;; Skip ASCII initially.
(goto-char start)
(skip-chars-forward "\000-\177" end)
(setq start (point))
(unless (= start end)
(setq coding-system (coding-system-base coding-system)) ; canonicalize
(let ((codings (find-coding-systems-region start end)))
(unless (or (equal codings '(undecided))
(memq coding-system
(find-coding-systems-region start end)))
;; Binary chop.
(if (= start (1- end))
(or (unencodable-char-position start (/ (+ start end) 2)
(unencodable-char-position (/ (+ start end) 2) end
(setq select-safe-coding-system-function 'select-safe-coding-system)
(defun select-message-coding-system ()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment