Commit b71d4ce0 authored by Mattias Engdegård's avatar Mattias Engdegård Committed by Eli Zaretskii

Handle raw bytes, and LF in ranges, in rx `any' argument strings

* lisp/emacs-lisp/rx.el (rx-check-any-string): Rewrite to handle raw bytes
in unibyte strings and accept LF as range endpoints (Bug#33205).
* test/lisp/emacs-lisp/rx-tests.el: Add tests for the above.
parent fb10834a
Pipeline #399 failed with stage
in 12 seconds
...@@ -449,28 +449,35 @@ Only both edges of each range is checked." ...@@ -449,28 +449,35 @@ Only both edges of each range is checked."
(defun rx-check-any-string (str) (defun rx-check-any-string (str)
"Check string argument STR for Rx `any'." "Turn the `any' argument string STR into a list of characters.
(let ((i 0) The original order is not preserved. Ranges, \"A-Z\", become pairs, (?A . ?Z)."
c1 c2 l) (let ((decode-char
(if (= 0 (length str)) ;; Make sure raw bytes are decoded as such, to avoid confusion with
(error "String arg for Rx `any' must not be empty")) ;; U+0080..U+00FF.
(while (string-match ".-." str i) (if (multibyte-string-p str)
;; string before range: convert it to characters #'identity
(if (< i (match-beginning 0)) (lambda (c) (if (<= #x80 c #xff)
(setq l (nconc (+ c #x3fff00)
l c))))
(append (substring str i (match-beginning 0)) nil)))) (len (length str))
;; range (i 0)
(setq i (match-end 0) (ret nil))
c1 (aref str (match-beginning 0)) (if (= 0 len)
c2 (aref str (1- i))) (error "String arg for Rx `any' must not be empty"))
(cond (while (< i len)
((< c1 c2) (setq l (nconc l (list (cons c1 c2))))) (cond ((and (< i (- len 2))
((= c1 c2) (setq l (nconc l (list c1)))))) (= (aref str (+ i 1)) ?-))
;; rest? ;; Range.
(if (< i (length str)) (let ((start (funcall decode-char (aref str i)))
(setq l (nconc l (append (substring str i) nil)))) (end (funcall decode-char (aref str (+ i 2)))))
l)) (cond ((< start end) (push (cons start end) ret))
((= start end) (push start ret)))
(setq i (+ i 3))))
(t
;; Single character.
(push (funcall decode-char (aref str i)) ret)
(setq i (+ i 1)))))
ret))
(defun rx-check-any (arg) (defun rx-check-any (arg)
......
...@@ -33,6 +33,28 @@ ...@@ -33,6 +33,28 @@
(number-sequence ?< ?\]) (number-sequence ?< ?\])
(number-sequence ?- ?:)))))) (number-sequence ?- ?:))))))
(ert-deftest rx-char-any-range-nl ()
"Test character alternatives with LF as a range endpoint."
(should (equal (rx (any "\n-\r"))
"[\n-\r]"))
(should (equal (rx (any "\a-\n"))
"[\a-\n]")))
(ert-deftest rx-char-any-raw-byte ()
"Test raw bytes in character alternatives."
;; Separate raw characters.
(should (equal (string-match-p (rx (any "\326A\333B"))
"X\326\333")
1))
;; Range of raw characters, unibyte.
(should (equal (string-match-p (rx (any "\200-\377"))
"ÿA\310B")
2))
;; Range of raw characters, multibyte.
(should (equal (string-match-p (rx (any "Å\211\326-\377\177"))
"XY\355\177\327")
2)))
(ert-deftest rx-pcase () (ert-deftest rx-pcase ()
(should (equal (pcase "a 1 2 3 1 1 b" (should (equal (pcase "a 1 2 3 1 1 b"
((rx (let u (+ digit)) space ((rx (let u (+ digit)) space
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment