Commit b02b54a8 authored by Gerd Moellmann's avatar Gerd Moellmann
Browse files

(regexp-opt-try-suffix): New function.

(regexp-opt-group): Use it to get common suffixes in STRINGS.
If STRINGS is nil, return "" rather than nil.
parent 57f707e7
1999-10-08 Simon Marshall <simon@gnu.org>
* emacs-lisp/regexp-opt.el (regexp-opt-try-suffix): New function.
(regexp-opt-group): Use it to get common suffixes in STRINGS.
If STRINGS is nil, return "" rather than nil.
1999-10-08 Gerd Moellmann <gerd@gnu.org>
* Makefile (compile): Compile files one by one. Set load-path to
......
;;; regexp-opt.el --- generate efficient regexps to match strings.
;; Copyright (C) 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
;; Copyright (C) 1994, 95, 96, 97, 98, 1999 Free Software Foundation, Inc.
;; Author: Simon Marshall <simon@gnu.org>
;; Keywords: strings, regexps
;; Version: 1.07
;; This file is part of GNU Emacs.
......@@ -24,7 +25,7 @@
;;; Commentary:
;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i\\(se\\|ze\\)\\)".
;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i[sz]e\\)".
;;
;; This package generates a regexp from a given list of strings (which matches
;; one of those strings) so that the regexp generated by:
......@@ -47,6 +48,17 @@
;;
;; Searching using the above example `regexp-opt' regexp takes approximately
;; two-thirds of the time taken using the equivalent `mapconcat' regexp.
;;
;; Note that this package will also find common suffix strings if this does not
;; increase the number of grouping constructs. For example:
;;
;; (regexp-opt '("these" "those"))
;; => "th[eo]se"
;;
;; but:
;;
;; (regexp-opt '("barfly" "housefly"))
;; => "barfly\\|housefly" rather than "\\(bar\\|house\\)fly"
;; Since this package was written to produce efficient regexps, not regexps
;; efficiently, it is probably not a good idea to in-line too many calls in
......@@ -72,13 +84,12 @@
;; your code for such changes to have effect in your code.
;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with
;; thanks for ideas also to Michael Ernst, Bob Glickstein and Dan Nicolaescu.
;; Please don't tell me that it doesn't produce optimal regexps; I know that
;; already. For example, the above explanation for the meaning of "opt" would
;; be more efficient as "optim\\(al\\|i[sz]e\\)", but this requires complex
;; forward looking. But (ideas or) code to improve things (are) is welcome.
;; thanks for ideas also to Michael Ernst, Bob Glickstein, Dan Nicolaescu and
;; Stefan Monnier.
;; No doubt `regexp-opt' doesn't always produce optimal regexps, so code, ideas
;; or any other information to improve things are welcome.
;;; Code:
;;; Code.
;;;###autoload
(defun regexp-opt (strings &optional paren)
......@@ -128,9 +139,9 @@ in REGEXP."
;; If LAX non-nil, don't output parentheses if it doesn't require them.
;; Merges keywords to avoid backtracking in Emacs' regexp matcher.
;;
;; The basic idea is to find the shortest common prefix, remove it and
;; recurse. If there is no prefix, we divide the list into two so that (at
;; least) one half will have at least a one-character common prefix.
;; The basic idea is to find the shortest common prefix or suffix, remove it
;; and recurse. If there is no prefix, we divide the list into two so that
;; (at least) one half will have at least a one-character common prefix.
;;
;; Also we delay the addition of grouping parenthesis as long as possible
;; until we're sure we need them, and try to remove one-character sequences
......@@ -139,11 +150,15 @@ in REGEXP."
(let* ((open-group (if paren "\\(" ""))
(close-group (if paren "\\)" ""))
(open-charset (if lax "" open-group))
(close-charset (if lax "" close-group)))
(close-charset (if lax "" close-group))
(open-presuf open-charset)
(close-presuf close-charset))
(cond
;; Protect against user-stupidity... could call error here
((null strings)
nil)
;;
;; If there are no strings, just return the empty string.
((= (length strings) 0)
"")
;;
;; If there is only one string, just return it.
((= (length strings) 1)
(if (= (length (car strings)) 1)
......@@ -157,7 +172,7 @@ in REGEXP."
close-charset))
;;
;; If all are one-character strings, just return a character set.
((= (length strings) (apply '+ (mapcar 'length strings)))
((= (apply 'max (mapcar 'length strings)) 1)
(concat open-charset
(regexp-opt-charset strings)
close-charset))
......@@ -165,17 +180,30 @@ in REGEXP."
;; We have a list of different length strings.
(t
(let ((prefix (try-completion "" (mapcar 'list strings)))
(suffix (regexp-opt-try-suffix strings))
(letters (let ((completion-regexp-list '("^.$")))
(all-completions "" (mapcar 'list strings)))))
(cond
;;
;; If there is a common prefix, remove it and recurse on the suffixes.
((> (length prefix) 0)
(let* ((length (length prefix))
(suffixes (mapcar (lambda (s) (substring s length)) strings)))
(concat open-group
(let* ((end (length prefix))
(suffixes (mapcar (lambda (s) (substring s end)) strings)))
(concat open-presuf
(regexp-quote prefix) (regexp-opt-group suffixes t t)
close-group)))
close-presuf)))
;;
;; If there is a common suffix, remove it and recurse on the prefixes.
((> (length suffix) (if lax
0
(- (apply 'max (mapcar 'length strings)) 2)))
(let* ((end (- (length suffix)))
(prefixes (sort (mapcar (lambda (s) (substring s 0 end))
strings)
'string-lessp)))
(concat open-presuf
(regexp-opt-group prefixes t t) (regexp-quote suffix)
close-presuf)))
;;
;; If there are several one-character strings, remove them and recurse
;; on the rest (first so the final regexp finds the longest match).
......@@ -235,6 +263,16 @@ in REGEXP."
(concat "[" dash caret "]")
(concat "[" bracket charset caret dash "]"))))
(defun regexp-opt-try-suffix (strings)
;;
;; Return common suffix of each string in STRINGS. See `try-completion'.
;;
(let* ((chars (mapcar (lambda (s) (mapcar 'identity s)) strings))
(srahc (mapcar 'reverse chars))
(sgnirts (mapcar (lambda (c) (mapconcat 'char-to-string c "")) srahc))
(xiffus (try-completion "" (mapcar 'list sgnirts))))
(mapconcat 'char-to-string (reverse (mapcar 'identity xiffus)) "")))
(provide 'regexp-opt)
;;; regexp-opt.el ends here
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment