rfc2231.el 8.81 KB
Newer Older
1
;;; rfc2231.el --- Functions for decoding rfc2231 headers
Gerd Moellmann's avatar
Gerd Moellmann committed
2

3 4
;; Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004, 2005,
;;   2006 Free Software Foundation, Inc.
Gerd Moellmann's avatar
Gerd Moellmann committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
Lute Kamstra's avatar
Lute Kamstra committed
21 22
;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
Gerd Moellmann's avatar
Gerd Moellmann committed
23 24 25 26 27

;;; Commentary:

;;; Code:

28
(eval-when-compile (require 'cl))
Gerd Moellmann's avatar
Gerd Moellmann committed
29
(require 'ietf-drums)
30 31 32 33
(require 'rfc2047)
(autoload 'mm-encode-body "mm-bodies")
(autoload 'mail-header-remove-whitespace "mail-parse")
(autoload 'mail-header-remove-comments "mail-parse")
Gerd Moellmann's avatar
Gerd Moellmann committed
34 35 36 37 38

(defun rfc2231-get-value (ct attribute)
  "Return the value of ATTRIBUTE from CT."
  (cdr (assq attribute (cdr ct))))

39 40 41 42 43
(defun rfc2231-parse-qp-string (string)
  "Parse QP-encoded string using `rfc2231-parse-string'.
N.B.  This is in violation with RFC2047, but it seem to be in common use."
  (rfc2231-parse-string (rfc2047-decode-string string)))

44
(defun rfc2231-parse-string (string &optional signal-error)
Gerd Moellmann's avatar
Gerd Moellmann committed
45 46
  "Parse STRING and return a list.
The list will be on the form
47 48 49 50
 `(name (attribute . value) (attribute . value)...)'.

If the optional SIGNAL-ERROR is non-nil, signal an error when this
function fails in parsing of parameters."
Gerd Moellmann's avatar
Gerd Moellmann committed
51 52 53 54 55 56 57
  (with-temp-buffer
    (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token))
	  (stoken (ietf-drums-token-to-list ietf-drums-tspecials))
	  (ntoken (ietf-drums-token-to-list "0-9"))
	  (prev-value "")
	  display-name mailbox c display-string parameters
	  attribute value type subtype number encoded
58
	  prev-attribute prev-encoded)
59 60 61 62
      ;; Some mailer (e.g. Thunderbird 1.5) doesn't terminate each
      ;; line with semicolon when folding a long parameter value.
      (while (string-match "\\([^\t\n\r ;]\\)[\t ]*\r?\n[\t ]+" string)
	(setq string (replace-match "\\1;\n " nil nil string)))
Gerd Moellmann's avatar
Gerd Moellmann committed
63 64 65 66
      (ietf-drums-init (mail-header-remove-whitespace
			(mail-header-remove-comments string)))
      (let ((table (copy-syntax-table ietf-drums-syntax-table)))
	(modify-syntax-entry ?\' "w" table)
67 68 69
	(modify-syntax-entry ?* " " table)
	(modify-syntax-entry ?\; " " table)
	(modify-syntax-entry ?= " " table)
Gerd Moellmann's avatar
Gerd Moellmann committed
70 71 72 73 74 75 76 77 78 79
	;; The following isn't valid, but one should be liberal
	;; in what one receives.
	(modify-syntax-entry ?\: "w" table)
	(set-syntax-table table))
      (setq c (char-after))
      (when (and (memq c ttoken)
		 (not (memq c stoken)))
	(setq type (downcase (buffer-substring
			      (point) (progn (forward-sexp 1) (point)))))
	;; Do the params
80 81 82
	(condition-case err
	    (progn
	      (while (not (eobp))
Gerd Moellmann's avatar
Gerd Moellmann committed
83
		(setq c (char-after))
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
		(unless (eq c ?\;)
		  (error "Invalid header: %s" string))
		(forward-char 1)
		;; If c in nil, then this is an invalid header, but
		;; since elm generates invalid headers on this form,
		;; we allow it.
		(when (setq c (char-after))
		  (if (and (memq c ttoken)
			   (not (memq c stoken)))
		      (setq attribute
			    (intern
			     (downcase
			      (buffer-substring
			       (point) (progn (forward-sexp 1) (point))))))
		    (error "Invalid header: %s" string))
		  (setq c (char-after))
		  (when (eq c ?*)
		    (forward-char 1)
		    (setq c (char-after))
		    (if (not (memq c ntoken))
			(setq encoded t
			      number nil)
		      (setq number
			    (string-to-number
			     (buffer-substring
			      (point) (progn (forward-sexp 1) (point)))))
		      (setq c (char-after))
		      (when (eq c ?*)
			(setq encoded t)
			(forward-char 1)
			(setq c (char-after)))))
		  ;; See if we have any previous continuations.
		  (when (and prev-attribute
			     (not (eq prev-attribute attribute)))
		    (push (cons prev-attribute
				(if prev-encoded
				    (rfc2231-decode-encoded-string prev-value)
				  prev-value))
			  parameters)
		    (setq prev-attribute nil
			  prev-value ""
			  prev-encoded nil))
		  (unless (eq c ?=)
		    (error "Invalid header: %s" string))
Gerd Moellmann's avatar
Gerd Moellmann committed
128
		  (forward-char 1)
129 130 131 132 133 134 135 136 137 138 139 140 141
		  (setq c (char-after))
		  (cond
		   ((eq c ?\")
		    (setq value (buffer-substring (1+ (point))
						  (progn
						    (forward-sexp 1)
						    (1- (point))))))
		   ((and (or (memq c ttoken)
			     ;; EXTENSION: Support non-ascii chars.
			     (> c ?\177))
			 (not (memq c stoken)))
		    (setq value
			  (buffer-substring
142 143 144 145 146 147 148 149 150 151 152
			   (point)
			   (progn
			     (forward-sexp)
			     ;; We might not have reached at the end of
			     ;; the value because of non-ascii chars,
			     ;; so we should jump over them if any.
			     (while (and (not (eobp))
					 (> (char-after) ?\177))
			       (forward-char 1)
			       (forward-sexp))
			     (point)))))
153 154 155 156 157 158 159 160 161 162 163
		   (t
		    (error "Invalid header: %s" string)))
		  (if number
		      (setq prev-attribute attribute
			    prev-value (concat prev-value value)
			    prev-encoded encoded)
		    (push (cons attribute
				(if encoded
				    (rfc2231-decode-encoded-string value)
				  value))
			  parameters))))
Gerd Moellmann's avatar
Gerd Moellmann committed
164

165 166 167 168 169 170 171 172 173 174 175 176 177
	      ;; Take care of any final continuations.
	      (when prev-attribute
		(push (cons prev-attribute
			    (if prev-encoded
				(rfc2231-decode-encoded-string prev-value)
			      prev-value))
		      parameters)))
	  (error
	   (setq parameters nil)
	   (if signal-error
	       (signal (car err) (cdr err))
	     ;;(message "%s" (error-message-string err))
	     )))
Gerd Moellmann's avatar
Gerd Moellmann committed
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197

	(when type
	  `(,type ,@(nreverse parameters)))))))

(defun rfc2231-decode-encoded-string (string)
  "Decode an RFC2231-encoded string.
These look like \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\"."
  (with-temp-buffer
    (let ((elems (split-string string "'")))
      ;; The encoded string may contain zero to two single-quote
      ;; marks.  This should give us the encoded word stripped
      ;; of any preceding values.
      (insert (car (last elems)))
      (goto-char (point-min))
      (while (search-forward "%" nil t)
	(insert
	 (prog1
	     (string-to-number (buffer-substring (point) (+ (point) 2)) 16)
	   (delete-region (1- (point)) (+ (point) 2)))))
      ;; Encode using the charset, if any.
198 199 200
      (when (and (mm-multibyte-p)
		 (> (length elems) 1)
		 (not (equal (intern (downcase (car elems))) 'us-ascii)))
Gerd Moellmann's avatar
Gerd Moellmann committed
201
	(mm-decode-coding-region (point-min) (point-max)
202
				 (intern (downcase (car elems)))))
Gerd Moellmann's avatar
Gerd Moellmann committed
203 204 205
      (buffer-string))))

(defun rfc2231-encode-string (param value)
206 207 208
  "Return and PARAM=VALUE string encoded according to RFC2231.
Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert
the result of this function."
Gerd Moellmann's avatar
Gerd Moellmann committed
209 210 211 212 213
  (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token))
	(tspecial (ietf-drums-token-to-list ietf-drums-tspecials))
	(special (ietf-drums-token-to-list "*'%\n\t"))
	(ascii (ietf-drums-token-to-list ietf-drums-text-token))
	(num -1)
214
	;; Don't make lines exceeding 76 column.
215
	(limit (- 74 (length param)))
Gerd Moellmann's avatar
Gerd Moellmann committed
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
	spacep encodep charsetp charset broken)
    (with-temp-buffer
      (insert value)
      (goto-char (point-min))
      (while (not (eobp))
	(cond
	 ((or (memq (following-char) control)
	      (memq (following-char) tspecial)
	      (memq (following-char) special))
	  (setq encodep t))
	 ((eq (following-char) ? )
	  (setq spacep t))
	 ((not (memq (following-char) ascii))
	  (setq charsetp t)))
	(forward-char 1))
      (when charsetp
	(setq charset (mm-encode-body)))
      (cond
234 235 236 237 238
       ((or encodep charsetp
	    (progn
	      (end-of-line)
	      (> (current-column) (if spacep (- limit 2) limit))))
	(setq limit (- limit 6))
Gerd Moellmann's avatar
Gerd Moellmann committed
239
	(goto-char (point-min))
240
	(insert (symbol-name (or charset 'us-ascii)) "''")
Gerd Moellmann's avatar
Gerd Moellmann committed
241 242 243 244 245 246 247
	(while (not (eobp))
	  (if (or (not (memq (following-char) ascii))
		  (memq (following-char) control)
		  (memq (following-char) tspecial)
		  (memq (following-char) special)
		  (eq (following-char) ? ))
	      (progn
248 249 250
		(when (>= (current-column) (1- limit))
		  (insert ";\n")
		  (setq broken t))
Gerd Moellmann's avatar
Gerd Moellmann committed
251 252
		(insert "%" (format "%02x" (following-char)))
		(delete-char 1))
253 254 255
	    (when (> (current-column) limit)
	      (insert ";\n")
	      (setq broken t))
Gerd Moellmann's avatar
Gerd Moellmann committed
256 257 258 259 260
	    (forward-char 1)))
	(goto-char (point-min))
	(if (not broken)
	    (insert param "*=")
	  (while (not (eobp))
261
	    (insert (if (>= num 0) " " "")
262
		    param "*" (format "%d" (incf num)) "*=")
Gerd Moellmann's avatar
Gerd Moellmann committed
263 264 265
	    (forward-line 1))))
       (spacep
	(goto-char (point-min))
266
	(insert "\n " param "=\"")
Gerd Moellmann's avatar
Gerd Moellmann committed
267 268 269 270
	(goto-char (point-max))
	(insert "\""))
       (t
	(goto-char (point-min))
271
	(insert "\n " param "=")))
Gerd Moellmann's avatar
Gerd Moellmann committed
272 273 274 275
      (buffer-string))))

(provide 'rfc2231)

Miles Bader's avatar
Miles Bader committed
276
;;; arch-tag: c3ab751d-d108-406a-b301-68882ad8cd63
Gerd Moellmann's avatar
Gerd Moellmann committed
277
;;; rfc2231.el ends here