latexenc.el 7.98 KB
Newer Older
Eli Zaretskii's avatar
Eli Zaretskii committed
1
;;; latexenc.el --- guess correct coding system in LaTeX files -*-coding: iso-2022-7bit -*-
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
2

3
;; Copyright (C) 2005, 2006 Free Software Foundation, Inc.
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21

;; Author: Arne J,Ax(Brgensen <arne@arnested.dk>
;; Keywords: mule, coding system, latex

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
Lute Kamstra's avatar
Lute Kamstra committed
22 23
;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49

;;; Commentary:

;; This code tries to guess the correct coding system of a LaTeX file.

;; First it searches for a \inputencoding{...} or
;; \usepackage[...]{inputenc} line in the file and looks up the ... in
;; `latex-inputenc-coding-alist' to find the corresponding coding
;; system.

;; If this fails it will search for AUCTeX's TeX-master or tex-mode's
;; tex-main-file variable in the local variables section and visit
;; that file to get the coding system from the master file. This check
;; can be disabled by setting `latexenc-dont-use-TeX-master-flag' to
;; t.

;; If we have still not found a coding system we will try to use the
;; standard tex-mode's `tex-guess-main-file' and get the coding system
;; from the main file. This check can be disabled by setting
;; `latexenc-dont-use-tex-guess-main-file-flag' to t.

;; The functionality is enabled by adding the function
;; `latexenc-find-file-coding-system' to `file-coding-system-alist'
;; like this

;; (add-to-list 'file-coding-system-alist
Stefan Monnier's avatar
Stefan Monnier committed
50
;; 	     '("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system))
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

;;; Code:

;;;###autoload
(defcustom latex-inputenc-coding-alist
  '(("ansinew" . windows-1252) ; MS Windows ANSI encoding, extension of Latin-1
    ("applemac" . mac-roman)
    ("ascii" . us-ascii)
    ("cp1250" . windows-1250) ; MS Windows encoding, codepage 1250
    ("cp1252" . windows-1252) ; synonym of ansinew
    ("cp1257" . cp1257)
    ("cp437de" . cp437) ; IBM code page 437 (German version): 225 is \ss
    ("cp437" . cp437) ; IBM code page 437: 225 is \beta
    ("cp850" . cp850) ; IBM code page 850
    ("cp852" . cp852) ; IBM code page 852
    ;; ("cp858" . undecided) ; IBM code page 850 but with a euro symbol
    ("cp865" . cp865) ; IBM code page 865
    ;; The DECMultinational charaterset used by the OpenVMS system
    ;; ("decmulti" . undecided)
    ("latin1" . iso-8859-1)
    ("latin2" . iso-8859-2)
    ("latin3" . iso-8859-3)
    ("latin4" . iso-8859-4)
    ("latin5" . iso-8859-5)
    ("latin9" . iso-8859-15)
    ;; ("latin10" . undecided)
    ;; ("macce" . undecided) ; Apple Central European
    ("next" . next) ; The Next encoding
    ("utf8" . utf-8)
    ("utf8x" . utf-8)) ; used by the Unicode LaTeX package
81
  "Mapping from LaTeX encodings in \"inputenc.sty\" to Emacs coding systems.
82
LaTeX encodings are specified with \"\\usepackage[encoding]{inputenc}\".
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
Used by the function `latexenc-find-file-coding-system'."
  :group 'files
  :group 'mule
  :type '(alist :key-type (string :tag "LaTeX input encoding")
		:value-type (coding-system :tag "Coding system")))

;;;###autoload
(defun latexenc-inputenc-to-coding-system (inputenc)
  "Return the corresponding coding-system for the specified input encoding.
Return nil if no matching coding system can be found."
  (cdr (assoc inputenc latex-inputenc-coding-alist)))

;;;###autoload
(defun latexenc-coding-system-to-inputenc (cs)
  "Return the corresponding input encoding for the specified coding system.
Return nil if no matching input encoding can be found."
  (let (result)
    (catch 'result
      (dolist (elem latex-inputenc-coding-alist result)
	(let ((elem-cs (cdr elem)))
	  (when (and (coding-system-p elem-cs)
		     (coding-system-p cs)
		     (eq (coding-system-base cs) (coding-system-base elem-cs)))
	    (setq result (car elem))
	    (throw 'result result)))))))

(defvar latexenc-dont-use-TeX-master-flag nil
  "Non-nil means don't follow TeX-master to find the coding system.")

(defvar latexenc-dont-use-tex-guess-main-file-flag nil
  "Non-nil means don't use tex-guessmain-file to find the coding system.")

;;;###autoload
(defun latexenc-find-file-coding-system (arg-list)
  "Determine the coding system of a LaTeX file if it uses \"inputenc.sty\".
The mapping from LaTeX's \"inputenc.sty\" encoding names to Emacs
coding system names is determined from `latex-inputenc-coding-alist'."
  (if (eq (car arg-list) 'insert-file-contents)
      (save-excursion
        ;; try to find the coding system in this file
        (goto-char (point-min))
124 125 126 127 128 129 130 131 132 133
	(if (catch 'cs
	      (let ((case-fold-search nil))
		(while (search-forward "inputenc" nil t)
		  (goto-char (match-beginning 0))
		  (beginning-of-line)
		  (if (or (looking-at "[^%\n]*\\\\usepackage\\[\\([^]]*\\)\\]{\\([^}]*,\\)?inputenc\\(,[^}]*\\)?}")
			  (looking-at "[^%\n]*\\\\inputencoding{\\([^}]*\\)}"))
		      (throw 'cs t)
		    (goto-char (match-end 0))))))
	    (let* ((match (match-string 1))
134 135 136 137 138 139
		   (sym (or (latexenc-inputenc-to-coding-system match)
                            (intern match))))
	      (cond
               ((coding-system-p sym) sym)
               ((and (require 'code-pages nil t) (coding-system-p sym)) sym)
               (t 'undecided)))
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
140
          ;; else try to find it in the master/main file
141 142 143 144
          (let ((default-directory (file-name-directory (nth 1 arg-list)))
                latexenc-main-file)
            ;; Is there a TeX-master or tex-main-file in the local variables
            ;; section?
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
145 146
            (unless latexenc-dont-use-TeX-master-flag
              (goto-char (point-max))
147 148
	      (search-backward "\n\^L" (max (- (point-max) 3000) (point-min))
                               'move)
149
	      (search-forward "Local Variables:" nil t)
150 151 152 153 154 155 156 157 158
              (when (re-search-forward
                     "^%+ *\\(TeX-master\\|tex-main-file\\): *\"\\(.+\\)\""
                     nil t)
                (let ((file (match-string 2)))
                  (dolist (ext `("" ,(if (boundp 'TeX-default-extension)
                                         (concat "." TeX-default-extension)
                                       "")
                                 ".tex" ".ltx" ".dtx" ".drv"))
                    (if (and (null latexenc-main-file) ;Stop at first.
Eli Zaretskii's avatar
Eli Zaretskii committed
159
                             (file-exists-p (concat file ext)))
160
                        (setq latexenc-main-file (concat file ext)))))))
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
161 162
            ;; try tex-modes tex-guess-main-file
            (when (and (not latexenc-dont-use-tex-guess-main-file-flag)
163 164 165 166 167
                       (not latexenc-main-file))
              ;; Use a separate `when' so the byte-compiler sees the fboundp.
              (when (fboundp 'tex-guess-main-file)
                (let ((tex-start-of-header "\\\\document\\(style\\|class\\)"))
                  (setq latexenc-main-file (tex-guess-main-file)))))
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
168 169
            ;; if we found a master/main file get the coding system from it
            (if (and latexenc-main-file
170
                     (file-regular-p latexenc-main-file)
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
171 172 173
                     (file-readable-p latexenc-main-file))
                (let* ((latexenc-dont-use-tex-guess-main-file-flag t)
                       (latexenc-dont-use-TeX-master-flag t)
174 175 176 177 178
                       (latexenc-main-buffer
                        (find-file-noselect latexenc-main-file t)))
                  (coding-system-base   ;Disregard the EOL part of the CS.
                   (with-current-buffer latexenc-main-buffer
                     (or coding-system-for-write buffer-file-coding-system))))
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
179 180 181
              'undecided))))
    'undecided))

182

Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
183 184
(provide 'latexenc)

Miles Bader's avatar
Miles Bader committed
185
;; arch-tag: f971bc3e-1fec-4609-8f2f-73dd41ab22e1
Thien-Thi Nguyen's avatar
Thien-Thi Nguyen committed
186
;;; latexenc.el ends here