Commit 19b1cefa authored by Juri Linkov's avatar Juri Linkov

* lisp/char-fold.el (char-fold-to-regexp): Implement arg LAX (bug#36398).

* test/lisp/char-fold-tests.el (char-fold--test-multi-lax): New test.
parent 4a754df8
Pipeline #2246 failed with stage
in 51 minutes and 20 seconds
......@@ -148,12 +148,18 @@ Exceptionally for the space character (32), ALIST is ignored.")
(make-list n (or (aref char-fold-table ?\s) " ")))))
;;;###autoload
(defun char-fold-to-regexp (string &optional _lax from)
(defun char-fold-to-regexp (string &optional lax from)
"Return a regexp matching anything that char-folds into STRING.
Any character in STRING that has an entry in
`char-fold-table' is replaced with that entry (which is a
regexp) and other characters are `regexp-quote'd.
When LAX is non-nil, then the final character also matches ligatures
partially, for instance, the search string \"f\" will match \"fi\",
so when typing the search string in isearch while the cursor is on
a ligature, the search won't try to immediately advance to the next
complete match, but will stay on the partially matched ligature.
If the resulting regexp would be too long for Emacs to handle,
just return the result of calling `regexp-quote' on STRING.
......@@ -183,36 +189,40 @@ from which to start."
;; Long string. The regexp would probably be too long.
(alist (unless (> end 50)
(aref multi-char-table c))))
(push (let ((matched-entries nil)
(max-length 0))
(dolist (entry alist)
(let* ((suffix (car entry))
(len-suf (length suffix)))
(when (eq (compare-strings suffix 0 nil
string (1+ i) (+ i 1 len-suf)
nil)
t)
(push (cons len-suf (cdr entry)) matched-entries)
(setq max-length (max max-length len-suf)))))
;; If no suffixes matched, just go on.
(if (not matched-entries)
regexp
(push (if (and lax alist (= (1+ i) end))
(concat "\\(?:" regexp "\\|"
(mapconcat (lambda (entry)
(cdr entry)) alist "\\|") "\\)")
(let ((matched-entries nil)
(max-length 0))
(dolist (entry alist)
(let* ((suffix (car entry))
(len-suf (length suffix)))
(when (eq (compare-strings suffix 0 nil
string (1+ i) (+ i 1 len-suf)
nil)
t)
(push (cons len-suf (cdr entry)) matched-entries)
(setq max-length (max max-length len-suf)))))
;; If no suffixes matched, just go on.
(if (not matched-entries)
regexp
;;; If N suffixes match, we "branch" out into N+1 executions for the
;;; length of the longest match. This means "fix" will match "fix" but
;;; not "fⅸ", but it's necessary to keep the regexp size from scaling
;;; exponentially. See https://lists.gnu.org/r/emacs-devel/2015-11/msg02562.html
(let ((subs (substring string (1+ i) (+ i 1 max-length))))
;; `i' is still going to inc by 1 below.
(setq i (+ i max-length))
(concat
"\\(?:"
(mapconcat (lambda (entry)
(let ((length (car entry))
(suffix-regexp (cdr entry)))
(concat suffix-regexp
(char-fold-to-regexp subs nil length))))
`((0 . ,regexp) . ,matched-entries) "\\|")
"\\)"))))
(let ((subs (substring string (1+ i) (+ i 1 max-length))))
;; `i' is still going to inc by 1 below.
(setq i (+ i max-length))
(concat
"\\(?:"
(mapconcat (lambda (entry)
(let ((length (car entry))
(suffix-regexp (cdr entry)))
(concat suffix-regexp
(char-fold-to-regexp subs nil length))))
`((0 . ,regexp) . ,matched-entries) "\\|")
"\\)")))))
out))))
(setq i (1+ i)))
(when (> spaces 0)
......
......@@ -82,6 +82,14 @@
(set-char-table-extra-slot char-fold-table 0 multi)
(char-fold--test-match-exactly (car it) (cdr it)))))
(ert-deftest char-fold--test-multi-lax ()
(dolist (it '(("f" . "fi") ("f" . "ff")))
(with-temp-buffer
(insert (cdr it))
(goto-char (point-min))
(should (search-forward-regexp
(char-fold-to-regexp (car it) 'lax) nil 'noerror)))))
(ert-deftest char-fold--test-fold-to-regexp ()
(let ((char-fold-table (make-char-table 'char-fold-table))
(multi (make-char-table 'char-fold-table)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment