Commit 3fd74915 authored by Mattias Engdegård's avatar Mattias Engdegård

Optimise more inputs to `regexp-opt' (bug#36444)

Use a more precise test to determine whether the input to `regexp-opt'
is safe to optimise when KEEP-ORDER is non-nil, permitting more inputs
to be optimised than before.  For example, ("good" "goal" "go") is now

* lisp/emacs-lisp/regexp-opt.el (regexp-opt):
More precise test for whether the list is safe w.r.t. KEEP-ORDER.
(regexp-opt--contains-prefix): Remove.

* test/lisp/emacs-lisp/regexp-opt-tests.el: Use lexical-binding.
(regexp-opt-test--permutation, regexp-opt-test--factorial)
(regexp-opt-test--permutations, regexp-opt-test--match-all)
(regexp-opt-test--check-perm, regexp-opt-test--explain-perm)
(regexp-opt-keep-order): Test KEEP-ORDER.
parent 2bc90e0c
Pipeline #2244 failed with stage
in 51 minutes and 32 seconds
......@@ -140,21 +140,34 @@ usually more efficient than that of a simplified version:
(completion-ignore-case nil)
(completion-regexp-list nil)
(open (cond ((stringp paren) paren) (paren "\\(")))
(sorted-strings (delete-dups
(sort (copy-sequence strings) 'string-lessp)))
;; No strings: return an unmatchable regexp.
((null strings)
(concat (or open "\\(?:") regexp-unmatchable "\\)"))
;; If we cannot reorder, give up all attempts at
;; optimisation. There is room for improvement (Bug#34641).
((and keep-order (regexp-opt--contains-prefix sorted-strings))
(concat (or open "\\(?:")
(mapconcat #'regexp-quote strings "\\|")
;; The algorithm will generate a pattern that matches
;; longer strings in the list before shorter. If the
;; list order matters, then no string must come after a
;; proper prefix of that string. To check this, verify
;; that a straight or-pattern matches each string
;; entirely.
((and keep-order
(let* ((case-fold-search nil)
(alts (mapconcat #'regexp-quote strings "\\|")))
(and (let ((s strings))
(while (and s
(string-match alts (car s))
(= (match-end 0) (length (car s))))
(setq s (cdr s)))
;; If we exited early, we found evidence that
;; regexp-opt-group cannot be used.
(concat (or open "\\(?:") alts "\\)")))))
(regexp-opt-group sorted-strings (or open t) (not open))))))
(delete-dups (sort (copy-sequence strings) 'string-lessp))
(or open t) (not open))))))
(cond ((eq paren 'words)
(concat "\\<" re "\\>"))
((eq paren 'symbols)
......@@ -339,21 +352,6 @@ never matches anything."
(concat "[" all "]")))))))
(defun regexp-opt--contains-prefix (strings)
"Whether STRINGS contains a proper prefix of one of its other elements.
STRINGS must be a list of sorted strings without duplicates."
(let ((s strings))
;; In a lexicographically sorted list, a string always immediately
;; succeeds one of its prefixes.
(while (and (cdr s)
(not (string-equal
(car s)
(substring (cadr s) 0 (min (length (car s))
(length (cadr s)))))))
(setq s (cdr s)))
(cdr s)))
(provide 'regexp-opt)
;;; regexp-opt.el ends here
;;; regexp-opt-tests.el --- Tests for regexp-opt.el
;;; regexp-opt-tests.el --- Tests for regexp-opt.el -*- lexical-binding: t -*-
;; Copyright (C) 2013-2019 Free Software Foundation, Inc.
......@@ -25,6 +25,66 @@
(require 'regexp-opt)
(defun regexp-opt-test--permutation (n list)
"The Nth permutation of LIST, 0 ≤ N < (length LIST)!."
(let ((len (length list))
(perm-list nil))
(dotimes (i len)
(let* ((d (- len i))
(k (mod n d)))
(push (nth k list) perm-list)
(setq list (append (butlast list (- (length list) k))
(nthcdr (1+ k) list)))
(setq n (/ n d))))
(nreverse perm-list)))
(defun regexp-opt-test--factorial (n)
(apply #'* (number-sequence 1 n)))
(defun regexp-opt-test--permutations (list)
"All permutations of LIST."
(mapcar (lambda (i) (regexp-opt-test--permutation i list))
(number-sequence 0 (1- (regexp-opt-test--factorial (length list))))))
(defun regexp-opt-test--match-all (words re)
(mapcar (lambda (w) (and (string-match re w)
(match-string 0 w)))
(defun regexp-opt-test--check-perm (perm)
(let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
(opt-re (regexp-opt perm nil t))
(ref (regexp-opt-test--match-all perm ref-re))
(opt (regexp-opt-test--match-all perm opt-re)))
(equal opt ref)))
(defun regexp-opt-test--explain-perm (perm)
(let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
(opt-re (regexp-opt perm nil t))
(ref (regexp-opt-test--match-all perm ref-re))
(opt (regexp-opt-test--match-all perm opt-re)))
(concat "\n"
(format "Naïve regexp: %s\n" ref-re)
(format "Optimised regexp: %s\n" opt-re)
(format "Got: %s\n" opt)
(format "Expected: %s\n" ref))))
(put 'regexp-opt-test--check-perm 'ert-explainer 'regexp-opt-test--explain-perm)
(ert-deftest regexp-opt-keep-order ()
"Check that KEEP-ORDER works."
(dolist (perm (regexp-opt-test--permutations '("abc" "bca" "cab")))
(should (regexp-opt-test--check-perm perm)))
(dolist (perm (regexp-opt-test--permutations '("abc" "ab" "bca" "bc")))
(should (regexp-opt-test--check-perm perm)))
(dolist (perm (regexp-opt-test--permutations '("abxy" "cdxy")))
(should (regexp-opt-test--check-perm perm)))
(dolist (perm (regexp-opt-test--permutations '("afgx" "bfgx" "afgy" "bfgy")))
(should (regexp-opt-test--check-perm perm)))
(dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc")))
(should (regexp-opt-test--check-perm perm))))
(ert-deftest regexp-opt-charset ()
(should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]"))
(should (equal (regexp-opt-charset '(?D ?d ?B ?a ?b ?C ?7 ?a ?c ?A))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment