Add a new, somewhat experimental "readability" command to eww

* net/eww.el (eww-readable): New command and keystroke.

* net/shr.el (shr-retransform-dom): New function.
parent 816cad6e
......@@ -133,6 +133,12 @@ result of the calculation into the current buffer.
*** New minor mode global-eldoc-mode
*** eldoc-documentation-function now defaults to nil
** eww
*** A new command `R' (`eww-readable') will try do identify the main
textual parts of a web page and display only that, leaving menus and
the like off the page.
** Message mode
*** text/html messages that contain inline image parts will be
......
2014-11-02 Lars Magne Ingebrigtsen <larsi@gnus.org>
* net/eww.el (eww-readable): New command and keystroke.
* net/shr.el (shr-retransform-dom): New function.
* net/eww.el (eww-display-html): Set `eww-current-source' in the
correct buffer.
(eww-view-source): Use it.
......
......@@ -402,6 +402,7 @@ word(s) will be searched for via `eww-search-prefix'."
(setq-local eww-contents-url nil))
(defun eww-view-source ()
"View the HTML source code of the current page."
(interactive)
(let ((buf (get-buffer-create "*eww-source*"))
(source eww-current-source))
......@@ -413,6 +414,60 @@ word(s) will be searched for via `eww-search-prefix'."
(html-mode)))
(view-buffer buf)))
(defun eww-readable ()
"View the main \"readable\" parts of the current web page.
This command uses heuristics to find the parts of the web page that
contains the main textual portion, leaving out navigation menus and
the like."
(interactive)
(let* ((source eww-current-source)
(dom (shr-transform-dom
(with-temp-buffer
(insert source)
(libxml-parse-html-region (point-min) (point-max))))))
(eww-score-readability dom)
(eww-display-html 'utf-8 nil (shr-retransform-dom
(eww-highest-readability dom)))
(setq eww-current-source source)))
(defun eww-score-readability (node)
(let ((score -1))
(cond
((memq (car node) '(script head))
(setq score -2))
((eq (car node) 'meta)
(setq score -1))
((eq (car node) 'a)
(setq score (- (length (split-string
(or (cdr (assoc 'text (cdr node))) ""))))))
(t
(dolist (elem (cdr node))
(cond
((eq (car elem) 'text)
(setq score (+ score (length (split-string (cdr elem))))))
((consp (cdr elem))
(setq score (+ score
(or (cdr (assoc :eww-readability-score (cdr elem)))
(eww-score-readability elem)))))))))
;; Cache the score of the node to avoid recomputing all the time.
(setcdr node (cons (cons :eww-readability-score score) (cdr node)))
score))
(defun eww-highest-readability (node)
(let ((result node)
highest)
(dolist (elem (cdr node))
(when (and (consp (cdr elem))
(> (or (cdr (assoc
:eww-readability-score
(setq highest
(eww-highest-readability elem))))
most-negative-fixnum)
(or (cdr (assoc :eww-readability-score (cdr result)))
most-negative-fixnum)))
(setq result highest)))
result))
(defvar eww-mode-map
(let ((map (make-sparse-keymap)))
(suppress-keymap map)
......@@ -435,6 +490,7 @@ word(s) will be searched for via `eww-search-prefix'."
(define-key map "w" 'eww-copy-page-url)
(define-key map "C" 'url-cookie-list)
(define-key map "v" 'eww-view-source)
(define-key map "R" 'eww-readable)
(define-key map "H" 'eww-list-histories)
(define-key map "b" 'eww-add-bookmark)
......
......@@ -370,6 +370,26 @@ size, and full-buffer size."
(push (shr-transform-dom sub) result)))
(nreverse result)))
(defun shr-retransform-dom (dom)
"Transform the shr DOM back into the libxml DOM."
(let ((tag (car dom))
(attributes nil)
(text nil)
(sub-nodes nil))
(dolist (elem (cdr dom))
(cond
((eq (car elem) 'text)
(setq text (cdr elem)))
((not (consp (cdr elem)))
(push (cons (intern (substring (symbol-name (car elem)) 1) obarray)
(cdr elem))
attributes))
(t
(push (shr-retransform-dom elem) sub-nodes))))
(append (list tag (nreverse attributes))
(nreverse sub-nodes)
(and text (list text)))))
(defsubst shr-generic (cont)
(dolist (sub cont)
(cond
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment