shr.el 64.2 KB
Newer Older
1 2
;;; shr.el --- Simple HTML Renderer

Paul Eggert's avatar
Paul Eggert committed
3
;; Copyright (C) 2010-2015 Free Software Foundation, Inc.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32

;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; Keywords: html

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

;;; Commentary:

;; This package takes a HTML parse tree (as provided by
;; libxml-parse-html-region) and renders it in the current buffer.  It
;; does not do CSS, JavaScript or anything advanced: It's geared
;; towards rendering typical short snippets of HTML, like what you'd
;; find in HTML email and the like.

;;; Code:

33
(eval-when-compile (require 'cl))
34
(eval-when-compile (require 'url))      ;For url-filename's setf handler.
35
(require 'browse-url)
36 37
(require 'subr-x)
(require 'dom)
38

39 40
(defgroup shr nil
  "Simple HTML Renderer"
41 42
  :version "25.1"
  :group 'web)
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57

(defcustom shr-max-image-proportion 0.9
  "How big pictures displayed are in relation to the window they're in.
A value of 0.7 means that they are allowed to take up 70% of the
width and height of the window.  If they are larger than this,
and Emacs supports it, then the images will be rescaled down to
fit these criteria."
  :version "24.1"
  :group 'shr
  :type 'float)

(defcustom shr-blocked-images nil
  "Images that have URLs matching this regexp will be blocked."
  :version "24.1"
  :group 'shr
58
  :type '(choice (const nil) regexp))
59

60 61 62 63 64 65
(defcustom shr-use-fonts nil
  "If non-nil, use proportional fonts for text."
  :version "25.1"
  :group 'shr
  :type 'boolean)

66 67 68
(defcustom shr-table-horizontal-line nil
  "Character used to draw horizontal table lines.
If nil, don't draw horizontal table lines."
69
  :group 'shr
Glenn Morris's avatar
Glenn Morris committed
70
  :type '(choice (const nil) character))
71

72
(defcustom shr-table-vertical-line ?\s
73
  "Character used to draw vertical table lines."
74
  :group 'shr
75
  :type 'character)
76

77
(defcustom shr-table-corner ?\s
78
  "Character used to draw table corners."
79
  :group 'shr
80
  :type 'character)
81 82

(defcustom shr-hr-line ?-
83
  "Character used to draw hr lines."
84
  :group 'shr
85
  :type 'character)
86

87
(defcustom shr-width nil
88 89 90 91
  "Frame width to use for rendering.
May either be an integer specifying a fixed width in characters,
or nil, meaning that the full width of the window should be
used."
92
  :version "25.1"
93 94
  :type '(choice (integer :tag "Fixed width in characters")
		 (const   :tag "Use the width of the window" nil))
95 96
  :group 'shr)

97 98 99 100 101
(defcustom shr-bullet "* "
  "Bullet used for unordered lists.
Alternative suggestions are:
- \"  \"
- \"  \""
102
  :version "24.4"
103 104 105
  :type 'string
  :group 'shr)

106 107 108 109 110 111
(defcustom shr-external-browser 'browse-url-default-browser
  "Function used to launch an external browser."
  :version "24.4"
  :group 'shr
  :type 'function)

112
(defcustom shr-image-animate t
Glenn Morris's avatar
Glenn Morris committed
113
  "Non nil means that images that can be animated will be."
114 115 116 117
  :version "24.4"
  :group 'shr
  :type 'boolean)

118 119 120 121 122
(defvar shr-content-function nil
  "If bound, this should be a function that will return the content.
This is used for cid: URLs, and the function is called with the
cid: URL as the argument.")

123 124 125
(defvar shr-put-image-function 'shr-put-image
  "Function called to put image and alt string.")

126 127 128 129
(defface shr-strike-through '((t (:strike-through t)))
  "Font for <s> elements."
  :group 'shr)

130
(defface shr-link
131
  '((t (:inherit link)))
132
  "Font for link elements."
133 134
  :group 'shr)

135 136 137
(defvar shr-inhibit-images nil
  "If non-nil, inhibit loading images.")

138 139
;;; Internal variables.

140 141
(defvar shr-folding-mode nil)
(defvar shr-start nil)
142
(defvar shr-indentation 0)
143
(defvar shr-internal-width nil)
144
(defvar shr-list-mode nil)
145
(defvar shr-content-cache nil)
146
(defvar shr-kinsoku-shorten nil)
147
(defvar shr-table-depth 0)
148
(defvar shr-stylesheet nil)
149
(defvar shr-base nil)
150 151
(defvar shr-depth 0)
(defvar shr-warning nil)
152
(defvar shr-ignore-cache nil)
153
(defvar shr-external-rendering-functions nil)
154
(defvar shr-target-id nil)
155
(defvar shr-inhibit-decoration nil)
156
(defvar shr-table-separator-length 1)
157 158 159
(defvar shr-table-separator-pixel-width 0)
(defvar shr-table-id nil)
(defvar shr-current-font nil)
160
(defvar shr-internal-bullet nil)
161

162 163 164 165
(defvar shr-map
  (let ((map (make-sparse-keymap)))
    (define-key map "a" 'shr-show-alt-text)
    (define-key map "i" 'shr-browse-image)
166
    (define-key map "z" 'shr-zoom-image)
167 168
    (define-key map [?\t] 'shr-next-link)
    (define-key map [?\M-\t] 'shr-previous-link)
169
    (define-key map [follow-link] 'mouse-face)
170
    (define-key map [mouse-2] 'shr-browse-url)
171
    (define-key map "I" 'shr-insert-image)
172
    (define-key map "w" 'shr-copy-url)
173
    (define-key map "u" 'shr-copy-url)
174
    (define-key map "v" 'shr-browse-url)
175
    (define-key map "o" 'shr-save-contents)
176 177 178
    (define-key map "\r" 'shr-browse-url)
    map))

179
;; Public functions and commands.
180 181
(declare-function libxml-parse-html-region "xml.c"
		  (start end &optional base-url))
182

183 184 185
(defun shr-render-buffer (buffer)
  "Display the HTML rendering of the current buffer."
  (interactive (list (current-buffer)))
186 187
  (or (fboundp 'libxml-parse-html-region)
      (error "This function requires Emacs to be compiled with libxml2"))
188 189 190
  (pop-to-buffer "*html*")
  (erase-buffer)
  (shr-insert-document
191
   (with-current-buffer buffer
192 193
     (libxml-parse-html-region (point-min) (point-max))))
  (goto-char (point-min)))
194

195
;;;###autoload
196 197 198 199 200 201 202 203 204 205 206
(defun shr-render-region (begin end &optional buffer)
  "Display the HTML rendering of the region between BEGIN and END."
  (interactive "r")
  (unless (fboundp 'libxml-parse-html-region)
    (error "This function requires Emacs to be compiled with libxml2"))
  (with-current-buffer (or buffer (current-buffer))
    (let ((dom (libxml-parse-html-region begin end)))
      (delete-region begin end)
      (goto-char begin)
      (shr-insert-document dom))))

207 208
;;;###autoload
(defun shr-insert-document (dom)
209 210 211
  "Render the parsed document DOM into the current buffer.
DOM should be a parse tree as generated by
`libxml-parse-html-region' or similar."
212
  (setq shr-content-cache nil)
213
  (let ((start (point))
214
	(shr-start nil)
215
	(shr-base nil)
216
	(shr-depth 0)
217
	(shr-table-id 0)
218
	(shr-warning nil)
219
	(shr-table-separator-pixel-width (shr-string-pixel-width "-"))
220 221
	(shr-internal-bullet (cons shr-bullet
				   (shr-string-pixel-width shr-bullet)))
222 223 224 225 226 227 228 229
	(shr-internal-width (or (and shr-width
				     (if (not shr-use-fonts)
					 shr-width
				       (* shr-width (frame-char-width))))
				(if (not shr-use-fonts)
				    (- (window-width) 2)
				  (- (window-pixel-width)
				     (* (frame-fringe-width) 2))))))
230
    (shr-descend dom)
231
    (shr-fill-lines start (point))
232 233 234
    (shr-remove-trailing-whitespace start (point))
    (when shr-warning
      (message "%s" shr-warning))))
235 236

(defun shr-remove-trailing-whitespace (start end)
237 238 239 240 241 242
  (let ((width (window-width)))
    (save-restriction
      (narrow-to-region start end)
      (goto-char start)
      (while (not (eobp))
	(end-of-line)
243
	(when (> (shr-previous-newline-padding-width (current-column)) width)
244 245 246 247
	  (dolist (overlay (overlays-at (point)))
	    (when (overlay-get overlay 'before-string)
	      (overlay-put overlay 'before-string nil))))
	(forward-line 1)))))
248

249
(defun shr-copy-url (&optional image-url)
250
  "Copy the URL under point to the kill ring.
251 252 253
If IMAGE-URL (the prefix) is non-nil, or there is no link under
point, but there is an image under point then copy the URL of the
image under point instead.
254 255
If called twice, then try to fetch the URL and see whether it
redirects somewhere else."
256 257 258
  (interactive "P")
  (let ((url (or (get-text-property (point) 'shr-url)
		 (get-text-property (point) 'image-url))))
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
    (cond
     ((not url)
      (message "No URL under point"))
     ;; Resolve redirected URLs.
     ((equal url (car kill-ring))
      (url-retrieve
       url
       (lambda (a)
	 (when (and (consp a)
		    (eq (car a) :redirect))
	   (with-temp-buffer
	     (insert (cadr a))
	     (goto-char (point-min))
	     ;; Remove common tracking junk from the URL.
	     (when (re-search-forward ".utm_.*" nil t)
	       (replace-match "" t t))
	     (message "Copied %s" (buffer-string))
276 277
	     (copy-region-as-kill (point-min) (point-max)))))
       nil t))
278 279 280
     ;; Copy the URL to the kill ring.
     (t
      (with-temp-buffer
281
	(insert (url-encode-url url))
282
	(copy-region-as-kill (point-min) (point-max))
283
	(message "Copied %s" (buffer-string)))))))
284

285 286 287
(defun shr-next-link ()
  "Skip to the next link."
  (interactive)
288
  (let ((skip (text-property-any (point) (point-max) 'help-echo nil)))
289 290 291
    (if (or (eobp)
	    (not (setq skip (text-property-not-all skip (point-max)
						   'help-echo nil))))
292 293 294 295 296 297 298 299 300 301 302
	(message "No next link")
      (goto-char skip)
      (message "%s" (get-text-property (point) 'help-echo)))))

(defun shr-previous-link ()
  "Skip to the previous link."
  (interactive)
  (let ((start (point))
	(found nil))
    ;; Skip past the current link.
    (while (and (not (bobp))
303
		(get-text-property (point) 'help-echo))
304 305 306
      (forward-char -1))
    ;; Find the previous link.
    (while (and (not (bobp))
307
		(not (setq found (get-text-property (point) 'help-echo))))
308 309 310 311 312 313 314
      (forward-char -1))
    (if (not found)
	(progn
	  (message "No previous link")
	  (goto-char start))
      ;; Put point at the start of the link.
      (while (and (not (bobp))
315
		  (get-text-property (point) 'help-echo))
316 317 318 319
	(forward-char -1))
      (forward-char 1)
      (message "%s" (get-text-property (point) 'help-echo)))))

320 321 322 323 324 325
(defun shr-show-alt-text ()
  "Show the ALT text of the image under point."
  (interactive)
  (let ((text (get-text-property (point) 'shr-alt)))
    (if (not text)
	(message "No image under point")
326
      (message "%s" (shr-fill-text text)))))
327

328 329 330 331 332
(defun shr-browse-image (&optional copy-url)
  "Browse the image under point.
If COPY-URL (the prefix if called interactively) is non-nil, copy
the URL of the image to the kill buffer instead."
  (interactive "P")
333
  (let ((url (get-text-property (point) 'image-url)))
334 335 336 337 338 339 340 341 342
    (cond
     ((not url)
      (message "No image under point"))
     (copy-url
      (with-temp-buffer
	(insert url)
	(copy-region-as-kill (point-min) (point-max))
	(message "Copied %s" url)))
     (t
343
      (message "Browsing %s..." url)
344
      (browse-url url)))))
345

346 347 348
(defun shr-insert-image ()
  "Insert the image under point into the buffer."
  (interactive)
349
  (let ((url (get-text-property (point) 'image-url)))
350 351 352 353 354
    (if (not url)
	(message "No image under point")
      (message "Inserting %s..." url)
      (url-retrieve url 'shr-image-fetched
		    (list (current-buffer) (1- (point)) (point-marker))
355
		    t t))))
356

357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
(defun shr-zoom-image ()
  "Toggle the image size.
The size will be rotated between the default size, the original
size, and full-buffer size."
  (interactive)
  (let ((url (get-text-property (point) 'image-url))
	(size (get-text-property (point) 'image-size))
	(buffer-read-only nil))
    (if (not url)
	(message "No image under point")
      ;; Delete the old picture.
      (while (get-text-property (point) 'image-url)
	(forward-char -1))
      (forward-char 1)
      (let ((start (point)))
	(while (get-text-property (point) 'image-url)
	  (forward-char 1))
	(forward-char -1)
	(put-text-property start (point) 'display nil)
	(when (> (- (point) start) 2)
	  (delete-region start (1- (point)))))
      (message "Inserting %s..." url)
      (url-retrieve url 'shr-image-fetched
		    (list (current-buffer) (1- (point)) (point-marker)
			  (list (cons 'size
				      (cond ((or (eq size 'default)
						 (null size))
					     'original)
					    ((eq size 'original)
					     'full)
					    ((eq size 'full)
					     'default)))))
		    t))))

391 392
;;; Utility functions.

393 394 395 396 397
(defsubst shr-generic (dom)
  (dolist (sub (dom-children dom))
    (if (stringp sub)
	(shr-insert sub)
      (shr-descend sub))))
398

399
(defun shr-descend (dom)
400 401 402 403
  (let ((function
	 (or
	  ;; Allow other packages to override (or provide) rendering
	  ;; of elements.
404 405 406
	  (cdr (assq (dom-tag dom) shr-external-rendering-functions))
	  (intern (concat "shr-tag-" (symbol-name (dom-tag dom))) obarray)))
	(style (dom-attr dom 'style))
407
	(shr-stylesheet shr-stylesheet)
408
	(shr-depth (1+ shr-depth))
409
	(start (point)))
410 411 412
    ;; shr uses about 12 frames per nested node.
    (if (> shr-depth (/ max-specpdl-size 12))
	(setq shr-warning "Too deeply nested to render properly; consider increasing `max-specpdl-size'")
413
      (when style
414 415 416 417 418 419 420
	(if (string-match "color\\|display\\|border-collapse" style)
	    (setq shr-stylesheet (nconc (shr-parse-style style)
					shr-stylesheet))
	  (setq style nil)))
      ;; If we have a display:none, then just ignore this part of the DOM.
      (unless (equal (cdr (assq 'display shr-stylesheet)) "none")
	(if (fboundp function)
421 422
	    (funcall function dom)
	  (shr-generic dom))
423
	(when (and shr-target-id
424
		   (equal (dom-attr dom 'id) shr-target-id))
425 426 427 428 429 430 431 432 433 434 435
	  ;; If the element was empty, we don't have anything to put the
	  ;; anchor on.  So just insert a dummy character.
	  (when (= start (point))
	    (insert "*"))
	  (put-text-property start (1+ start) 'shr-target-id shr-target-id))
	;; If style is set, then this node has set the color.
	(when style
	  (shr-colorize-region
	   start (point)
	   (cdr (assq 'color shr-stylesheet))
	   (cdr (assq 'background-color shr-stylesheet))))))))
436

437
(defun shr-fill-text (text)
438 439 440 441 442
  (if (zerop (length text))
      text
    (with-temp-buffer
      (let ((shr-indentation 0)
	    (shr-start nil)
443 444
	    (shr-internal-width (- (window-pixel-width)
				   (* (frame-fringe-width) 2))))
445 446
	(shr-insert text)
	(buffer-string)))))
447

448
(define-inline shr-char-breakable-p (char)
449
  "Return non-nil if a line can be broken before and after CHAR."
450 451
  (inline-quote (aref fill-find-break-point-function-table ,char)))
(define-inline shr-char-nospace-p (char)
452
  "Return non-nil if no space is required before and after CHAR."
453
  (inline-quote (aref fill-nospace-between-words-table ,char)))
454 455 456 457 458

;; KINSOKU is a Japanese word meaning a rule that should not be violated.
;; In Emacs, it is a term used for characters, e.g. punctuation marks,
;; parentheses, and so on, that should not be placed in the beginning
;; of a line or the end of a line.
459
(define-inline shr-char-kinsoku-bol-p (char)
460
  "Return non-nil if a line ought not to begin with CHAR."
461 462 463 464
  (inline-letevals (char)
    (inline-quote (and (not (eq ,char ?'))
                       (aref (char-category-set ,char) ?>)))))
(define-inline shr-char-kinsoku-eol-p (char)
465
  "Return non-nil if a line ought not to end with CHAR."
466
  (inline-quote (aref (char-category-set ,char) ?<)))
467 468 469
(unless (shr-char-kinsoku-bol-p (make-char 'japanese-jisx0208 33 35))
  (load "kinsoku" nil t))

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
(defun shr-pixel-column ()
  (if (not shr-use-fonts)
      (current-column)
    (if (not (get-buffer-window (current-buffer)))
	(save-window-excursion
	  (set-window-buffer nil (current-buffer))
	  (car (window-text-pixel-size nil (line-beginning-position) (point))))
      (car (window-text-pixel-size nil (line-beginning-position) (point))))))

(defun shr-pixel-region ()
  (- (shr-pixel-column)
     (save-excursion
       (goto-char (mark))
       (shr-pixel-column))))

(defun shr-string-pixel-width (string)
  (if (not shr-use-fonts)
      (length string)
    (with-temp-buffer
      (insert string)
      (shr-pixel-column))))

492
(defun shr-insert (text)
493 494 495
  (when (and (not (bolp))
	     (get-text-property (1- (point)) 'image-url))
    (insert "\n"))
496 497 498 499
  (cond
   ((eq shr-folding-mode 'none)
    (insert text))
   (t
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
    (let ((font-start (point)))
      (when (and (string-match "\\`[ \t\n\r ]" text)
		 (not (bolp))
		 (not (eq (char-after (1- (point))) ? )))
	(insert " "))
      (let ((start (point))
	    (bolp (bolp)))
	(insert text)
	(save-restriction
	  (narrow-to-region start (point))
	  (goto-char start)
	  (when (looking-at "[ \t\n\r ]+")
	    (replace-match "" t t))
	  (while (re-search-forward "[ \t\n\r ]+" nil t)
	    (replace-match " " t t))
	  (goto-char (point-max)))
	;; We may have removed everything we inserted if if was just
	;; spaces.
	(unless (= font-start (point))
	  ;; Mark all lines that should possibly be folded afterwards.
	  (when bolp
	    (shr-mark-fill start))
	  (when shr-use-fonts
	    (put-text-property font-start (point)
			       'face
			       (or shr-current-font 'variable-pitch)))))))))
526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555

(defun shr-fill-lines (start end)
  (if (<= shr-internal-width 0)
      nil
    (save-restriction
      (narrow-to-region start end)
      (goto-char start)
      (when (get-text-property (point) 'shr-indentation)
	(shr-fill-line))
      (while (setq start (next-single-property-change start 'shr-indentation))
	(goto-char start)
	(when (bolp)
	  (shr-fill-line)))
      (goto-char (point-max)))))

(defun shr-vertical-motion (column)
  (if (not shr-use-fonts)
      (move-to-column column)
    (unless (eolp)
      (forward-char 1))
    (vertical-motion (cons (/ column (frame-char-width)) 0))
    (unless (eolp)
      (forward-char 1))))

(defun shr-fill-line ()
  (let ((shr-indentation (get-text-property (point) 'shr-indentation))
	(continuation (get-text-property
		       (point) 'shr-continuation-indentation))
	start)
    (put-text-property (point) (1+ (point)) 'shr-indentation nil)
556 557 558 559 560 561
    (let ((face (get-text-property (point) 'face))
	  (background-start (point)))
      (shr-indent)
      (when face
	(put-text-property background-start (point) 'face
			   `,(shr-face-background face))))
562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
    (setq start (point))
    (setq shr-indentation (or continuation shr-indentation))
    (shr-vertical-motion shr-internal-width)
    (when (looking-at " $")
      (delete-region (point) (line-end-position)))
    (while (not (eolp))
      ;; We have to do some folding.  First find the first
      ;; previous point suitable for folding.
      (if (or (not (shr-find-fill-point (line-beginning-position)))
	      (= (point) start))
	  ;; We had unbreakable text (for this width), so just go to
	  ;; the first space and carry on.
	  (progn
	    (beginning-of-line)
	    (skip-chars-forward " ")
	    (search-forward " " (line-end-position) 'move)))
      ;; Success; continue.
      (when (= (preceding-char) ?\s)
	(delete-char -1))
581 582 583 584 585 586 587
      (let ((face (get-text-property (point) 'face))
	    (background-start (point)))
	(insert "\n")
	(shr-indent)
	(when face
	  (put-text-property background-start (point) 'face
			     `,(shr-face-background face))))
588 589 590 591 592 593
      (setq start (point))
      (shr-vertical-motion shr-internal-width)
      (when (looking-at " $")
	(delete-region (point) (line-end-position))))))

(defun shr-find-fill-point (start)
594
  (let ((bp (point))
595
	(end (point))
596
	failed)
597
    (while (not (or (setq failed (<= (point) start))
598 599 600 601
		    (eq (preceding-char) ? )
		    (eq (following-char) ? )
		    (shr-char-breakable-p (preceding-char))
		    (shr-char-breakable-p (following-char))
602 603 604
		    (and (shr-char-kinsoku-bol-p (preceding-char))
			 (shr-char-breakable-p (following-char))
			 (not (shr-char-kinsoku-bol-p (following-char))))
605 606
		    (shr-char-kinsoku-eol-p (following-char))
		    (bolp)))
607 608
      (backward-char 1))
    (if failed
609
	;; There's no breakable point, so we give it up.
610 611 612
	(let (found)
	  (goto-char bp)
	  (unless shr-kinsoku-shorten
613 614 615
	    (while (setq found (re-search-forward
				"\\(\\c>\\)\\| \\|\\c<\\|\\c|"
				(line-end-position) 'move)))
616 617
	    (if (and found
		     (not (match-beginning 1)))
618
		(goto-char (match-beginning 0)))))
619 620
      (or
       (eolp)
621 622 623 624 625 626 627
       ;; Don't put kinsoku-bol characters at the beginning of a line,
       ;; or kinsoku-eol characters at the end of a line.
       (cond
	(shr-kinsoku-shorten
	 (while (and (not (memq (preceding-char) (list ?\C-@ ?\n ? )))
		     (shr-char-kinsoku-eol-p (preceding-char)))
	   (backward-char 1))
628
	 (when (setq failed (<= (point) start))
629 630 631 632
	   ;; There's no breakable point that doesn't violate kinsoku,
	   ;; so we look for the second best position.
	   (while (and (progn
			 (forward-char 1)
633
			 (<= (point) end))
634 635 636 637 638
		       (progn
			 (setq bp (point))
			 (shr-char-kinsoku-eol-p (following-char)))))
	   (goto-char bp)))
	((shr-char-kinsoku-eol-p (preceding-char))
639 640 641 642 643 644 645 646 647
	 ;; Find backward the point where kinsoku-eol characters begin.
	 (let ((count 4))
	   (while
	       (progn
		 (backward-char 1)
		 (and (> (setq count (1- count)) 0)
		      (not (memq (preceding-char) (list ?\C-@ ?\n ? )))
		      (or (shr-char-kinsoku-eol-p (preceding-char))
			  (shr-char-kinsoku-bol-p (following-char)))))))
648
	 (when (setq failed (<= (point) start))
649 650 651 652 653
	   ;; There's no breakable point that doesn't violate kinsoku,
	   ;; so we go to the second best position.
	   (if (looking-at "\\(\\c<+\\)\\c<")
	       (goto-char (match-end 1))
	     (forward-char 1))))
654
	((shr-char-kinsoku-bol-p (following-char))
655 656 657 658 659
	 ;; Find forward the point where kinsoku-bol characters end.
	 (let ((count 4))
	   (while (progn
		    (forward-char 1)
		    (and (>= (setq count (1- count)) 0)
660
			 (shr-char-kinsoku-bol-p (following-char))
661
			 (shr-char-breakable-p (following-char))))))))
662 663 664
       (when (eq (following-char) ? )
	 (forward-char 1))))
    (not failed)))
665

666 667 668 669
(defun shr-parse-base (url)
  ;; Always chop off anchors.
  (when (string-match "#.*" url)
    (setq url (substring url 0 (match-beginning 0))))
Ivan Shmakov's avatar
Ivan Shmakov committed
670 671
  ;; NB: <base href="" > URI may itself be relative to the document s URI
  (setq url (shr-expand-url url))
672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
  (let* ((parsed (url-generic-parse-url url))
	 (local (url-filename parsed)))
    (setf (url-filename parsed) "")
    ;; Chop off the bit after the last slash.
    (when (string-match "\\`\\(.*/\\)[^/]+\\'" local)
      (setq local (match-string 1 local)))
    ;; Always make the local bit end with a slash.
    (when (and (not (zerop (length local)))
	       (not (eq (aref local (1- (length local))) ?/)))
      (setq local (concat local "/")))
    (list (url-recreate-url parsed)
	  local
	  (url-type parsed)
	  url)))

687 688
(autoload 'url-expand-file-name "url-expand")

Glenn Morris's avatar
Glenn Morris committed
689 690
;; FIXME This needs some tests writing.
;; Does it even need to exist, given that url-expand-file-name does?
691 692 693
(defun shr-expand-url (url &optional base)
  (setq base
	(if base
Ivan Shmakov's avatar
Ivan Shmakov committed
694
	    ;; shr-parse-base should never call this with non-nil base!
695 696 697 698 699 700 701 702
	    (shr-parse-base base)
	  ;; Bound by the parser.
	  shr-base))
  (when (zerop (length url))
    (setq url nil))
  (cond ((or (not url)
	     (not base)
	     (string-match "\\`[a-z]*:" url))
Ivan Shmakov's avatar
Ivan Shmakov committed
703 704
	 ;; Absolute or empty URI
	 (or url (nth 3 base)))
705 706 707 708 709 710 711 712 713 714 715 716
	((eq (aref url 0) ?/)
	 (if (and (> (length url) 1)
		  (eq (aref url 1) ?/))
	     ;; //host...; just use the protocol
	     (concat (nth 2 base) ":" url)
	   ;; Just use the host name part.
	   (concat (car base) url)))
	((eq (aref url 0) ?#)
	 ;; A link to an anchor.
	 (concat (nth 3 base) url))
	(t
	 ;; Totally relative.
717
	 (url-expand-file-name url (concat (car base) (cadr base))))))
718

719 720 721
(defun shr-ensure-newline ()
  (unless (zerop (current-column))
    (insert "\n")))
722 723 724

(defun shr-ensure-paragraph ()
  (unless (bobp)
725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750
    (let ((prefix (get-text-property (line-beginning-position)
				     'shr-prefix-length)))
      (cond
       ((and (bolp)
	     (save-excursion
	       (forward-line -1)
	       (looking-at " *$")))
	;; We're already at a new paragraph; do nothing.
	)
       ((and prefix
	     (= prefix (- (point) (line-beginning-position))))
	;; Do nothing; we're at the start of a <li>.
	)
       ((save-excursion
	  (beginning-of-line)
	  ;; If the current line is totally blank, and doesn't even
	  ;; have any face properties set, then delete the blank
	  ;; space.
	  (and (looking-at " *$")
	       (not (get-text-property (point) 'face))
	       (not (= (next-single-property-change (point) 'face nil
						    (line-end-position))
		       (line-end-position)))))
	(delete-region (match-beginning 0) (match-end 0)))
       (t
	(insert "\n\n"))))))
751

752
(defun shr-indent ()
753
  (when (> shr-indentation 0)
754 755 756 757 758 759
    (insert
     (if (not shr-use-fonts)
	 (make-string shr-indentation ?\s)
       (propertize " "
		   'display
		   `(space :width (,shr-indentation)))))))
760

761
(defun shr-fontize-dom (dom &rest types)
762
  (let ((start (point)))
763
    (shr-generic dom)
764
    (dolist (type types)
765
      (shr-add-font start (point) type))))
766

767 768 769
;; Add face to the region, but avoid putting the font properties on
;; blank text at the start of the line, and the newline at the end, to
;; avoid ugliness.
770
(defun shr-add-font (start end type)
771 772 773 774 775 776 777 778 779 780
  (unless shr-inhibit-decoration
    (save-excursion
      (goto-char start)
      (while (< (point) end)
	(when (bolp)
	  (skip-chars-forward " "))
	(add-face-text-property (point) (min (line-end-position) end) type t)
	(if (< (line-end-position) end)
	    (forward-line 1)
	  (goto-char end))))))
781

782 783 784 785 786 787
(defun shr-mouse-browse-url (ev)
  "Browse the URL under the mouse cursor."
  (interactive "e")
  (mouse-set-point ev)
  (shr-browse-url))

788
(defun shr-browse-url (&optional external mouse-event)
789 790
  "Browse the URL under point.
If EXTERNAL, browse the URL using `shr-external-browser'."
791 792
  (interactive (list current-prefix-arg last-nonmenu-event))
  (mouse-set-point mouse-event)
793
  (let ((url (get-text-property (point) 'shr-url)))
794 795 796 797
    (cond
     ((not url)
      (message "No link under point"))
     ((string-match "^mailto:" url)
798
      (browse-url-mail url))
799
     (t
800 801 802
      (if external
	  (funcall shr-external-browser url)
	(browse-url url))))))
803

804 805 806 807 808 809 810
(defun shr-save-contents (directory)
  "Save the contents from URL in a file."
  (interactive "DSave contents of URL to directory: ")
  (let ((url (get-text-property (point) 'shr-url)))
    (if (not url)
	(message "No link under point")
      (url-retrieve (shr-encode-url url)
811 812
		    'shr-store-contents (list url directory)
		    nil t))))
813 814 815 816 817 818 819 820 821

(defun shr-store-contents (status url directory)
  (unless (plist-get status :error)
    (when (or (search-forward "\n\n" nil t)
	      (search-forward "\r\n\r\n" nil t))
      (write-region (point) (point-max)
		    (expand-file-name (file-name-nondirectory url)
				      directory)))))

822
(defun shr-image-fetched (status buffer start end &optional flags)
823 824 825 826 827 828
  (let ((image-buffer (current-buffer)))
    (when (and (buffer-name buffer)
	       (not (plist-get status :error)))
      (url-store-in-cache image-buffer)
      (when (or (search-forward "\n\n" nil t)
		(search-forward "\r\n\r\n" nil t))
829
	(let ((data (shr-parse-image-data)))
830 831 832
	  (with-current-buffer buffer
	    (save-excursion
	      (let ((alt (buffer-substring start end))
833
		    (properties (text-properties-at start))
834 835 836
		    (inhibit-read-only t))
		(delete-region start end)
		(goto-char start)
837 838 839 840 841 842
		(funcall shr-put-image-function data alt flags)
		(while properties
		  (let ((type (pop properties))
			(value (pop properties)))
		    (unless (memq type '(display image-size))
		      (put-text-property start (point) type value))))))))))
843
    (kill-buffer image-buffer)))
844

845 846 847 848 849 850 851 852 853 854 855
(defun shr-image-from-data (data)
  "Return an image from the data: URI content DATA."
  (when (string-match
	 "\\(\\([^/;,]+\\(/[^;,]+\\)?\\)\\(;[^;,]+\\)*\\)?,\\(.*\\)"
	 data)
    (let ((param (match-string 4 data))
	  (payload (url-unhex-string (match-string 5 data))))
      (when (string-match "^.*\\(;[ \t]*base64\\)$" param)
	(setq payload (base64-decode-string payload)))
      payload)))

856 857 858 859
;; Behind display-graphic-p test.
(declare-function image-size "image.c" (spec &optional pixels frame))
(declare-function image-animate "image" (image &optional index limit))

860 861 862 863
(defun shr-put-image (spec alt &optional flags)
  "Insert image SPEC with a string ALT.  Return image.
SPEC is either an image data blob, or a list where the first
element is the data blob and the second element is the content-type."
864
  (if (display-graphic-p)
865
      (let* ((size (cdr (assq 'size flags)))
866 867 868 869 870
	     (data (if (consp spec)
		       (car spec)
		     spec))
	     (content-type (and (consp spec)
				(cadr spec)))
871 872 873
	     (start (point))
	     (image (cond
		     ((eq size 'original)
874
		      (create-image data nil t :ascent 100
875
				    :format content-type))
876 877
		     ((eq content-type 'image/svg+xml)
		      (create-image data 'svg t :ascent 100))
878 879
		     ((eq size 'full)
		      (ignore-errors
880
			(shr-rescale-image data content-type)))
881 882
		     (t
		      (ignore-errors
883
			(shr-rescale-image data content-type))))))
884
        (when image
885 886 887 888 889
	  ;; When inserting big-ish pictures, put them at the
	  ;; beginning of the line.
	  (when (and (> (current-column) 0)
		     (> (car (image-size image t)) 400))
	    (insert "\n"))
890
	  (if (eq size 'original)
891
	      (insert-sliced-image image (or alt "*") nil 20 1)
892 893
	    (insert-image image (or alt "*")))
	  (put-text-property start (point) 'image-size size)
894 895
	  (when (and shr-image-animate
                     (cond ((fboundp 'image-multi-frame-p)
896 897 898
		       ;; Only animate multi-frame things that specify a
		       ;; delay; eg animated gifs as opposed to
		       ;; multi-page tiffs.  FIXME?
899 900 901 902
                            (cdr (image-multi-frame-p image)))
                           ((fboundp 'image-animated-p)
                            (image-animated-p image))))
            (image-animate image nil 60)))
903
	image)
904
    (insert alt)))
905

906 907 908 909
(defun shr-rescale-image (data &optional content-type)
  "Rescale DATA, if too big, to fit the current buffer."
  (if (not (and (fboundp 'imagemagick-types)
                (get-buffer-window (current-buffer))))
910 911 912 913 914 915 916 917 918
      (create-image data nil t :ascent 100)
    (let ((edges (window-inside-pixel-edges
		  (get-buffer-window (current-buffer)))))
      (create-image
       data 'imagemagick t
       :ascent 100
       :max-width (truncate (* shr-max-image-proportion
			       (- (nth 2 edges) (nth 0 edges))))
       :max-height (truncate (* shr-max-image-proportion
919
				(- (nth 3 edges) (nth 1 edges))))
920
       :format content-type))))
921

Glenn Morris's avatar
Glenn Morris committed
922 923 924
;; url-cache-extract autoloads url-cache.
(declare-function url-cache-create-filename "url-cache" (url))
(autoload 'mm-disable-multibyte "mm-util")
925
(autoload 'browse-url-mail "browse-url")
Glenn Morris's avatar
Glenn Morris committed
926

927 928 929 930 931
(defun shr-get-image-data (url)
  "Get image data for URL.
Return a string with image data."
  (with-temp-buffer
    (mm-disable-multibyte)
932
    (when (ignore-errors
933
	    (url-cache-extract (url-cache-create-filename (shr-encode-url url)))
934 935 936
	    t)
      (when (or (search-forward "\n\n" nil t)
		(search-forward "\r\n\r\n" nil t))
937 938 939
	(shr-parse-image-data)))))

(defun shr-parse-image-data ()
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
  (let ((data (buffer-substring (point) (point-max)))
	(content-type
	 (save-excursion
	   (save-restriction
	     (narrow-to-region (point-min) (point))
	     (let ((content-type (mail-fetch-field "content-type")))
	       (and content-type
		    ;; Remove any comments in the type string.
		    (intern (replace-regexp-in-string ";.*" "" content-type)
			    obarray)))))))
    ;; SVG images may contain references to further images that we may
    ;; want to block.  So special-case these by parsing the XML data
    ;; and remove the blocked bits.
    (when (eq content-type 'image/svg+xml)
      (setq data
	    (shr-dom-to-xml
956
	     (libxml-parse-xml-region (point) (point-max)))))
957
    (list data content-type)))
958

959 960 961 962
(defun shr-image-displayer (content-function)
  "Return a function to display an image.
CONTENT-FUNCTION is a function to retrieve an image for a cid url that
is an argument.  The function to be returned takes three arguments URL,
Paul Eggert's avatar
Paul Eggert committed
963
START, and END.  Note that START and END should be markers."
964
  `(lambda (url start end)
965 966 967 968 969 970 971
     (when url
       (if (string-match "\\`cid:" url)
	   ,(when content-function
	      `(let ((image (funcall ,content-function
				     (substring url (match-end 0)))))
		 (when image
		   (goto-char start)
972
		   (funcall shr-put-image-function
973
			    image (buffer-substring start end))
974
		   (delete-region (point) end))))
975 976
	 (url-retrieve url 'shr-image-fetched
		       (list (current-buffer) start end)
977
		       t t)))))
978

979
(defun shr-heading (dom &rest types)
980
  (shr-ensure-paragraph)
981
  (apply #'shr-fontize-dom dom types)
982 983
  (shr-ensure-paragraph))

984
(defun shr-urlify (start url &optional title)
985
  (shr-add-font start (point) 'shr-link)
986 987 988
  (add-text-properties
   start (point)
   (list 'shr-url url
989
	 'help-echo (let ((iri (or (ignore-errors
990 991 992 993 994
				     (decode-coding-string
				      (url-unhex-string url)
				      'utf-8 t))
				   url)))
		      (if title (format "%s (%s)" iri title) iri))
995
	 'follow-link t
996
	 'mouse-face 'highlight
997
	 'keymap shr-map)))
998 999 1000 1001 1002

(defun shr-encode-url (url)
  "Encode URL."
  (browse-url-url-encode-chars