rfc2231.el 9.96 KB
Newer Older
1
;;; rfc2231.el --- Functions for decoding rfc2231 headers
Gerd Moellmann's avatar
Gerd Moellmann committed
2

3 4
;; Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004, 2005,
;;   2006 Free Software Foundation, Inc.
Gerd Moellmann's avatar
Gerd Moellmann committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
Lute Kamstra's avatar
Lute Kamstra committed
21 22
;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
Gerd Moellmann's avatar
Gerd Moellmann committed
23 24 25 26 27

;;; Commentary:

;;; Code:

28
(eval-when-compile (require 'cl))
Gerd Moellmann's avatar
Gerd Moellmann committed
29
(require 'ietf-drums)
30 31 32 33
(require 'rfc2047)
(autoload 'mm-encode-body "mm-bodies")
(autoload 'mail-header-remove-whitespace "mail-parse")
(autoload 'mail-header-remove-comments "mail-parse")
Gerd Moellmann's avatar
Gerd Moellmann committed
34 35 36 37 38

(defun rfc2231-get-value (ct attribute)
  "Return the value of ATTRIBUTE from CT."
  (cdr (assq attribute (cdr ct))))

39 40 41 42 43
(defun rfc2231-parse-qp-string (string)
  "Parse QP-encoded string using `rfc2231-parse-string'.
N.B.  This is in violation with RFC2047, but it seem to be in common use."
  (rfc2231-parse-string (rfc2047-decode-string string)))

44
(defun rfc2231-parse-string (string &optional signal-error)
Gerd Moellmann's avatar
Gerd Moellmann committed
45 46
  "Parse STRING and return a list.
The list will be on the form
47 48 49
 `(name (attribute . value) (attribute . value)...)'.

If the optional SIGNAL-ERROR is non-nil, signal an error when this
50 51
function fails in parsing of parameters.  Otherwise, this function
must never cause a Lisp error."
Gerd Moellmann's avatar
Gerd Moellmann committed
52 53 54 55
  (with-temp-buffer
    (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token))
	  (stoken (ietf-drums-token-to-list ietf-drums-tspecials))
	  (ntoken (ietf-drums-token-to-list "0-9"))
56 57
	  c type attribute encoded number prev-attribute vals
	  prev-encoded parameters value)
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
      (ietf-drums-init
       (condition-case nil
	   (mail-header-remove-whitespace
	    (mail-header-remove-comments string))
	 ;; The most likely cause of an error is unbalanced parentheses
	 ;; or double-quotes.  If all parentheses and double-quotes are
	 ;; quoted meaninglessly with backslashes, removing them might
	 ;; make it parseable.  Let's try...
	 (error
	  (let (mod)
	    (when (and (string-match "\\\\\"" string)
		       (not (string-match "\\`\"\\|[^\\]\"" string)))
	      (setq string (mm-replace-in-string string "\\\\\"" "\"")
		    mod t))
	    (when (and (string-match "\\\\(" string)
		       (string-match "\\\\)" string)
		       (not (string-match "\\`(\\|[^\\][()]" string)))
	      (setq string (mm-replace-in-string string "\\\\\\([()]\\)" "\\1")
		    mod t))
	    (or (and mod
		     (ignore-errors
		       (mail-header-remove-whitespace
			(mail-header-remove-comments string))))
		;; Finally, attempt to extract only type.
		(if (string-match
		     (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+"
			     "\\(/[^" ietf-drums-tspecials
			     "\t\n ]+\\)?\\)\\([\t\n ;]\\|\\'\\)")
		     string)
		    (match-string 1 string)
		  ""))))))
Gerd Moellmann's avatar
Gerd Moellmann committed
89 90
      (let ((table (copy-syntax-table ietf-drums-syntax-table)))
	(modify-syntax-entry ?\' "w" table)
91 92 93
	(modify-syntax-entry ?* " " table)
	(modify-syntax-entry ?\; " " table)
	(modify-syntax-entry ?= " " table)
Gerd Moellmann's avatar
Gerd Moellmann committed
94 95 96 97 98 99
	;; The following isn't valid, but one should be liberal
	;; in what one receives.
	(modify-syntax-entry ?\: "w" table)
	(set-syntax-table table))
      (setq c (char-after))
      (when (and (memq c ttoken)
100 101 102 103 104 105
		 (not (memq c stoken))
		 (setq type (ignore-errors
			      (downcase
			       (buffer-substring (point) (progn
							   (forward-sexp 1)
							   (point)))))))
Gerd Moellmann's avatar
Gerd Moellmann committed
106
	;; Do the params
107 108 109
	(condition-case err
	    (progn
	      (while (not (eobp))
Gerd Moellmann's avatar
Gerd Moellmann committed
110
		(setq c (char-after))
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
		(unless (eq c ?\;)
		  (error "Invalid header: %s" string))
		(forward-char 1)
		;; If c in nil, then this is an invalid header, but
		;; since elm generates invalid headers on this form,
		;; we allow it.
		(when (setq c (char-after))
		  (if (and (memq c ttoken)
			   (not (memq c stoken)))
		      (setq attribute
			    (intern
			     (downcase
			      (buffer-substring
			       (point) (progn (forward-sexp 1) (point))))))
		    (error "Invalid header: %s" string))
		  (setq c (char-after))
127 128
		  (if (eq c ?*)
		      (progn
129
			(forward-char 1)
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
			(setq c (char-after))
			(if (not (memq c ntoken))
			    (setq encoded t
				  number nil)
			  (setq number
				(string-to-number
				 (buffer-substring
				  (point) (progn (forward-sexp 1) (point)))))
			  (setq c (char-after))
			  (when (eq c ?*)
			    (setq encoded t)
			    (forward-char 1)
			    (setq c (char-after)))))
		    (setq number nil
			  encoded nil))
145 146 147
		  ;; See if we have any previous continuations.
		  (when (and prev-attribute
			     (not (eq prev-attribute attribute)))
148 149
		    (setq vals
			  (mapconcat 'cdr (sort vals 'car-less-than-car) ""))
150 151
		    (push (cons prev-attribute
				(if prev-encoded
152 153
				    (rfc2231-decode-encoded-string vals)
				  vals))
154 155
			  parameters)
		    (setq prev-attribute nil
156
			  vals nil
157 158 159
			  prev-encoded nil))
		  (unless (eq c ?=)
		    (error "Invalid header: %s" string))
Gerd Moellmann's avatar
Gerd Moellmann committed
160
		  (forward-char 1)
161 162 163 164 165 166
		  (setq c (char-after))
		  (cond
		   ((eq c ?\")
		    (setq value (buffer-substring (1+ (point))
						  (progn
						    (forward-sexp 1)
167 168 169 170
						    (1- (point)))))
		    (when encoded
		      (setq value (mapconcat (lambda (c) (format "%%%02x" c))
					     value ""))))
171 172 173 174 175 176
		   ((and (or (memq c ttoken)
			     ;; EXTENSION: Support non-ascii chars.
			     (> c ?\177))
			 (not (memq c stoken)))
		    (setq value
			  (buffer-substring
177 178
			   (point)
			   (progn
Miles Bader's avatar
Miles Bader committed
179 180 181 182 183 184
			     ;; Jump over asterisk, non-ASCII
			     ;; and non-boundary characters.
			     (while (and c
					 (or (eq c ?*)
					     (> c ?\177)
					     (not (eq (char-syntax c) ? ))))
185
			       (forward-char 1)
Miles Bader's avatar
Miles Bader committed
186
			       (setq c (char-after)))
187
			     (point)))))
188 189 190
		   (t
		    (error "Invalid header: %s" string)))
		  (if number
191 192 193 194
		      (progn
			(push (cons number value) vals)
			(setq prev-attribute attribute
			      prev-encoded encoded))
195 196 197 198 199
		    (push (cons attribute
				(if encoded
				    (rfc2231-decode-encoded-string value)
				  value))
			  parameters))))
Gerd Moellmann's avatar
Gerd Moellmann committed
200

201 202
	      ;; Take care of any final continuations.
	      (when prev-attribute
203
		(setq vals (mapconcat 'cdr (sort vals 'car-less-than-car) ""))
204 205
		(push (cons prev-attribute
			    (if prev-encoded
206 207
				(rfc2231-decode-encoded-string vals)
			      vals))
208 209 210 211 212 213 214
		      parameters)))
	  (error
	   (setq parameters nil)
	   (if signal-error
	       (signal (car err) (cdr err))
	     ;;(message "%s" (error-message-string err))
	     )))
Gerd Moellmann's avatar
Gerd Moellmann committed
215

216
	(cons type (nreverse parameters))))))
Gerd Moellmann's avatar
Gerd Moellmann committed
217 218 219

(defun rfc2231-decode-encoded-string (string)
  "Decode an RFC2231-encoded string.
220 221 222 223 224 225 226 227 228 229
These look like:
 \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
 \"us-ascii''This%20is%20%2A%2A%2Afun%2A%2A%2A\",
 \"'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
 \"''This%20is%20%2A%2A%2Afun%2A%2A%2A\", or
 \"This is ***fun***\"."
  (string-match "\\`\\(\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string)
  (let ((coding-system (mm-charset-to-coding-system (match-string 2 string)))
	;;(language (match-string 3 string))
	(value (match-string 4 string)))
230
    (mm-with-unibyte-buffer
231
      (insert value)
Gerd Moellmann's avatar
Gerd Moellmann committed
232 233 234 235 236 237
      (goto-char (point-min))
      (while (search-forward "%" nil t)
	(insert
	 (prog1
	     (string-to-number (buffer-substring (point) (+ (point) 2)) 16)
	   (delete-region (1- (point)) (+ (point) 2)))))
238
      ;; Decode using the charset, if any.
239 240 241
      (if (memq coding-system '(nil ascii))
	  (buffer-string)
	(mm-decode-coding-string (buffer-string) coding-system)))))
Gerd Moellmann's avatar
Gerd Moellmann committed
242 243

(defun rfc2231-encode-string (param value)
244 245 246
  "Return and PARAM=VALUE string encoded according to RFC2231.
Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert
the result of this function."
Gerd Moellmann's avatar
Gerd Moellmann committed
247 248 249 250 251
  (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token))
	(tspecial (ietf-drums-token-to-list ietf-drums-tspecials))
	(special (ietf-drums-token-to-list "*'%\n\t"))
	(ascii (ietf-drums-token-to-list ietf-drums-text-token))
	(num -1)
252
	;; Don't make lines exceeding 76 column.
253
	(limit (- 74 (length param)))
Gerd Moellmann's avatar
Gerd Moellmann committed
254
	spacep encodep charsetp charset broken)
255
    (mm-with-multibyte-buffer
Gerd Moellmann's avatar
Gerd Moellmann committed
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
      (insert value)
      (goto-char (point-min))
      (while (not (eobp))
	(cond
	 ((or (memq (following-char) control)
	      (memq (following-char) tspecial)
	      (memq (following-char) special))
	  (setq encodep t))
	 ((eq (following-char) ? )
	  (setq spacep t))
	 ((not (memq (following-char) ascii))
	  (setq charsetp t)))
	(forward-char 1))
      (when charsetp
	(setq charset (mm-encode-body)))
271
      (mm-disable-multibyte)
Gerd Moellmann's avatar
Gerd Moellmann committed
272
      (cond
273 274 275 276 277
       ((or encodep charsetp
	    (progn
	      (end-of-line)
	      (> (current-column) (if spacep (- limit 2) limit))))
	(setq limit (- limit 6))
Gerd Moellmann's avatar
Gerd Moellmann committed
278
	(goto-char (point-min))
279
	(insert (symbol-name (or charset 'us-ascii)) "''")
Gerd Moellmann's avatar
Gerd Moellmann committed
280 281 282 283 284 285 286
	(while (not (eobp))
	  (if (or (not (memq (following-char) ascii))
		  (memq (following-char) control)
		  (memq (following-char) tspecial)
		  (memq (following-char) special)
		  (eq (following-char) ? ))
	      (progn
287 288 289
		(when (>= (current-column) (1- limit))
		  (insert ";\n")
		  (setq broken t))
Gerd Moellmann's avatar
Gerd Moellmann committed
290 291
		(insert "%" (format "%02x" (following-char)))
		(delete-char 1))
292 293 294
	    (when (> (current-column) limit)
	      (insert ";\n")
	      (setq broken t))
Gerd Moellmann's avatar
Gerd Moellmann committed
295 296 297 298 299
	    (forward-char 1)))
	(goto-char (point-min))
	(if (not broken)
	    (insert param "*=")
	  (while (not (eobp))
300
	    (insert (if (>= num 0) " " "")
301
		    param "*" (format "%d" (incf num)) "*=")
Gerd Moellmann's avatar
Gerd Moellmann committed
302 303 304
	    (forward-line 1))))
       (spacep
	(goto-char (point-min))
305
	(insert param "=\"")
Gerd Moellmann's avatar
Gerd Moellmann committed
306 307 308 309
	(goto-char (point-max))
	(insert "\""))
       (t
	(goto-char (point-min))
310
	(insert param "=")))
Gerd Moellmann's avatar
Gerd Moellmann committed
311 312 313 314
      (buffer-string))))

(provide 'rfc2231)

Miles Bader's avatar
Miles Bader committed
315
;;; arch-tag: c3ab751d-d108-406a-b301-68882ad8cd63
Gerd Moellmann's avatar
Gerd Moellmann committed
316
;;; rfc2231.el ends here