Commit 8d3bb7be authored by Michal Nazarewicz's avatar Michal Nazarewicz

Handle quotation marks and apostrophes in ‘sgml-quote’

To be able to use text in an HTML argument, quotation marks need
to be replaced with an appropriate character reference.  Make
‘sgml-quote’ do that.

While at it, fix entiteis not being unquoted if they lack closing
semicolon (e.g. ‘&amp’) occuring at the very end of a region.
Even though unlikely, make ‘sgml-quote’ handle this scenario.

* lisp/textmodes/sgml-mode.el (sgml-quote): Handle quotation marks and
apostrophes.  Match entities lacking semicolon at the end of regions.
* test/lisp/textmodes/sgml-mode-tests.el (sgml-quote-works): New test
case for ‘sgml-quote’ function.
parent 358da456
......@@ -208,6 +208,13 @@ Can be controlled via the new variable 'footnote-align-to-fn-text'.
formats (e.g. "black" => "#000000" => "rgb(0, 0, 0)") has been added,
bound to 'C-c C-f'.
** SGML mode
---
*** 'sgml-quote' now handles double quotes and apostrophes
when escaping text and in addition all numeric entities when
unescaping text.
** Dired
+++
......
......@@ -1241,8 +1241,11 @@ See `sgml-tag-alist' for info about attribute rules."
(defun sgml-quote (start end &optional unquotep)
"Quote SGML text in region START ... END.
Only &, < and > are quoted, the rest is left untouched.
With prefix argument UNQUOTEP, unquote the region."
Only &, <, >, ' and \" characters are quoted, the rest is left
untouched. This is sufficient to use quoted text as SGML argument.
With prefix argument UNQUOTEP, unquote the region. All numeric entities,
\"amp\", \"lt\", \"gt\" and \"quot\" named entities are unquoted."
(interactive "r\nP")
(save-restriction
(narrow-to-region start end)
......@@ -1250,14 +1253,23 @@ With prefix argument UNQUOTEP, unquote the region."
(if unquotep
;; FIXME: We should unquote other named character references as well.
(while (re-search-forward
"\\(&\\(amp\\|\\(l\\|\\(g\\)\\)t\\)\\)[][<>&;\n\t \"%!'(),/=?]"
"\\(&\\(amp\\|quot\\|lt\\|gt\\|#\\([0-9]+\\|[xX][0-9a-fA-F]+\\)\\)\\)\\([][<>&;\n\t \"%!'(),/=?]\\|$\\)"
nil t)
(replace-match (if (match-end 4) ">" (if (match-end 3) "<" "&")) t t
nil (if (eq (char-before (match-end 0)) ?\;) 0 1)))
(while (re-search-forward "[&<>]" nil t)
(replace-match
(string
(or (cdr (assq (char-after (match-beginning 2))
'((?a . ?&) (?q . ?\") (?l . ?<) (?g . ?>))))
(let ((num (match-string 3)))
(if (or (eq ?x (aref num 0)) (eq ?X (aref num 0)))
(string-to-number (substring num 1) 16)
(string-to-number num 10)))))
t t nil (if (eq (char-before (match-end 0)) ?\;) 0 1)))
(while (re-search-forward "[&<>\"']" nil t)
(replace-match (cdr (assq (char-before) '((?& . "&amp;")
(?< . "&lt;")
(?> . "&gt;"))))
(?> . "&gt;")
(?\" . "&#34;")
(?' . "&#39;"))))
t t)))))
(defun sgml-pretty-print (beg end)
......
......@@ -131,5 +131,35 @@ The point is set to the beginning of the buffer."
(sgml-delete-tag 1)
(should (string= "Winter is comin'" (buffer-string)))))
(ert-deftest sgml-quote-works ()
(let ((text "Foo<Bar> \"Baz\" 'Qux'\n"))
(with-temp-buffer
;; Back and forth transformation.
(insert text)
(sgml-quote (point-min) (point-max))
(should (string= "Foo&lt;Bar&gt; &#34;Baz&#34; &#39;Qux&#39;\n"
(buffer-string)))
(sgml-quote (point-min) (point-max) t)
(should (string= text (buffer-string)))
;; The same text escaped differently.
(erase-buffer)
(insert "Foo&lt;Bar&gt; &#34;Baz&quot; &#x27;Qux&#X27;\n")
(sgml-quote (point-min) (point-max) t)
(should (string= text (buffer-string)))
;; Lack of semicolon.
(erase-buffer)
(insert "&amp&amp")
(sgml-quote (point-min) (point-max) t)
(should (string= "&&" (buffer-string)))
;; Double quoting
(sgml-quote (point-min) (point-max))
(sgml-quote (point-min) (point-max))
(sgml-quote (point-min) (point-max) t)
(sgml-quote (point-min) (point-max) t)
(should (string= "&&" (buffer-string))))))
(provide 'sgml-mode-tests)
;;; sgml-mode-tests.el ends here
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment