Commit 502522b2 authored by Gerd Moellmann's avatar Gerd Moellmann
Browse files

(auto-coding-regexp-alist): New user-option.

(auto-coding-from-file-contents): New function.
(set-auto-coding): Use it to determine a coding system.
parent dbcf3c03
......@@ -1237,6 +1237,20 @@ and the contents of `file-coding-system-alist'."
:type '(repeat (cons (regexp :tag "File name regexp")
(symbol :tag "Coding system"))))
(defcustom auto-coding-regexp-alist
'(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion))
"Alist of patterns vs corresponding coding systems.
Each element looks like (REGEXP . CODING-SYSTEM).
A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
The settings in this alist take priority over `coding:' tags
in the file (see the function `set-auto-coding')
and the contents of `file-coding-system-alist'."
:group 'files
:group 'mule
:type '(repeat (cons (regexp :tag "Regexp")
(symbol :tag "Coding system"))))
(defvar set-auto-coding-for-load nil
"Non-nil means look for `load-coding' property instead of `coding'.
This is used for loading and byte-compiling Emacs Lisp files.")
......@@ -1252,106 +1266,123 @@ This is used for loading and byte-compiling Emacs Lisp files.")
(setq alist (cdr alist))))
coding-system))
(defun auto-coding-from-file-contents (size)
"Determine a coding system from the contents of the current buffer.
The current buffer contains SIZE bytes starting at point.
Value is either a coding system or nil."
(save-excursion
(let ((alist auto-coding-regexp-alist)
coding-system)
(while (and alist (not coding-system))
(let ((regexp (car (car alist))))
(when (re-search-forward regexp (+ (point) size) t)
(setq coding-system (cdr (car alist)))))
(setq alist (cdr alist)))
coding-system)))
(defun set-auto-coding (filename size)
"Return coding system for a file FILENAME of which SIZE bytes follow point.
These bytes should include at least the first 1k of the file
and the last 3k of the file, but the middle may be omitted.
It checks FILENAME against the variable `auto-coding-alist'.
If FILENAME doesn't match any entries in the variable,
it checks for a `coding:' tag in the first one or two lines following
point. If no `coding:' tag is found, it checks for local variables
list in the last 3K bytes out of the SIZE bytes.
It checks FILENAME against the variable `auto-coding-alist'. If
FILENAME doesn't match any entries in the variable, it checks the
contents of the current buffer following point against
`auto-coding-regexp-alist'. If no match is found, it checks for a
`coding:' tag in the first one or two lines following point. If no
`coding:' tag is found, it checks for local variables list in the last
3K bytes out of the SIZE bytes.
The return value is the specified coding system,
or nil if nothing specified.
The variable `set-auto-coding-function' (which see) is set to this
function by default."
(let ((coding-system (auto-coding-alist-lookup filename)))
(or coding-system
(let* ((case-fold-search t)
(head-start (point))
(head-end (+ head-start (min size 1024)))
(tail-start (+ head-start (max (- size 3072) 0)))
(tail-end (+ head-start size))
coding-system head-found tail-found pos)
;; Try a short cut by searching for the string "coding:"
;; and for "unibyte:" at the head and tail of SIZE bytes.
(setq head-found (or (search-forward "coding:" head-end t)
(search-forward "unibyte:" head-end t)))
(if (and head-found (> head-found tail-start))
;; Head and tail are overlapped.
(setq tail-found head-found)
(goto-char tail-start)
(setq tail-found (or (search-forward "coding:" tail-end t)
(search-forward "unibyte:" tail-end t))))
;; At first check the head.
(when head-found
(or (auto-coding-alist-lookup filename)
(auto-coding-from-file-contents size)
(let* ((case-fold-search t)
(head-start (point))
(head-end (+ head-start (min size 1024)))
(tail-start (+ head-start (max (- size 3072) 0)))
(tail-end (+ head-start size))
coding-system head-found tail-found pos)
;; Try a short cut by searching for the string "coding:"
;; and for "unibyte:" at the head and tail of SIZE bytes.
(setq head-found (or (search-forward "coding:" head-end t)
(search-forward "unibyte:" head-end t)))
(if (and head-found (> head-found tail-start))
;; Head and tail are overlapped.
(setq tail-found head-found)
(goto-char tail-start)
(setq tail-found (or (search-forward "coding:" tail-end t)
(search-forward "unibyte:" tail-end t))))
;; At first check the head.
(when head-found
(goto-char head-start)
(setq pos (re-search-forward "[\n\r]" head-end t))
(if (and pos
(= (char-after head-start) ?#)
(= (char-after (1+ head-start)) ?!))
;; If the file begins with "#!" (exec interpreter magic),
;; look for coding frobs in the first two lines. You cannot
;; necessarily put them in the first line of such a file
;; without screwing up the interpreter invocation.
(setq pos (search-forward "\n" head-end t)))
(if pos (setq head-end pos))
(when (< head-found head-end)
(goto-char head-start)
(setq pos (re-search-forward "[\n\r]" head-end t))
(if (and pos
(= (char-after head-start) ?#)
(= (char-after (1+ head-start)) ?!))
;; If the file begins with "#!" (exec interpreter magic),
;; look for coding frobs in the first two lines. You cannot
;; necessarily put them in the first line of such a file
;; without screwing up the interpreter invocation.
(setq pos (search-forward "\n" head-end t)))
(if pos (setq head-end pos))
(when (< head-found head-end)
(goto-char head-start)
(when (and set-auto-coding-for-load
(re-search-forward
"-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
head-end t))
(setq coding-system 'raw-text))
(when (and (not coding-system)
(re-search-forward
"-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
head-end t))
(setq coding-system (intern (match-string 2)))
(or (coding-system-p coding-system)
(setq coding-system nil)))))
;; If no coding: tag in the head, check the tail.
(when (and tail-found (not coding-system))
(goto-char tail-start)
(search-forward "\n\^L" nil t)
(if (re-search-forward
"^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
;; The prefix is what comes before "local variables:" in its
;; line. The suffix is what comes after "local variables:"
;; in its line.
(let* ((prefix (regexp-quote (match-string 1)))
(suffix (regexp-quote (match-string 2)))
(re-coding
(concat
"^" prefix
"[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
suffix "$"))
(re-unibyte
(concat
"^" prefix
"[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
suffix "$"))
(re-end
(concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$"))
(pos (point)))
(re-search-forward re-end tail-end 'move)
(setq tail-end (point))
(goto-char pos)
(when (and set-auto-coding-for-load
(re-search-forward re-unibyte tail-end t))
(setq coding-system 'raw-text))
(when (and (not coding-system)
(re-search-forward re-coding tail-end t))
(setq coding-system (intern (match-string 1)))
(or (coding-system-p coding-system)
(setq coding-system nil))))))
coding-system))))
(when (and set-auto-coding-for-load
(re-search-forward
"-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
head-end t))
(setq coding-system 'raw-text))
(when (and (not coding-system)
(re-search-forward
"-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
head-end t))
(setq coding-system (intern (match-string 2)))
(or (coding-system-p coding-system)
(setq coding-system nil)))))
;; If no coding: tag in the head, check the tail.
(when (and tail-found (not coding-system))
(goto-char tail-start)
(search-forward "\n\^L" nil t)
(if (re-search-forward
"^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
;; The prefix is what comes before "local variables:" in its
;; line. The suffix is what comes after "local variables:"
;; in its line.
(let* ((prefix (regexp-quote (match-string 1)))
(suffix (regexp-quote (match-string 2)))
(re-coding
(concat
"^" prefix
"[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
suffix "$"))
(re-unibyte
(concat
"^" prefix
"[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
suffix "$"))
(re-end
(concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$"))
(pos (point)))
(re-search-forward re-end tail-end 'move)
(setq tail-end (point))
(goto-char pos)
(when (and set-auto-coding-for-load
(re-search-forward re-unibyte tail-end t))
(setq coding-system 'raw-text))
(when (and (not coding-system)
(re-search-forward re-coding tail-end t))
(setq coding-system (intern (match-string 1)))
(or (coding-system-p coding-system)
(setq coding-system nil))))))
coding-system)))
(setq set-auto-coding-function 'set-auto-coding)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment