thingatpt.el 15.9 KB
Newer Older
1
;;; thingatpt.el --- get the `thing' at point
Richard M. Stallman's avatar
Richard M. Stallman committed
2

Glenn Morris's avatar
Glenn Morris committed
3
;; Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000,
4 5
;;   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
;;   Free Software Foundation, Inc.
Richard M. Stallman's avatar
Richard M. Stallman committed
6 7

;; Author: Mike Williams <mikew@gopher.dosli.govt.nz>
8
;; Maintainer: FSF
Richard M. Stallman's avatar
Richard M. Stallman committed
9
;; Keywords: extensions, matching, mouse
Richard M. Stallman's avatar
Richard M. Stallman committed
10 11 12 13
;; Created: Thu Mar 28 13:48:23 1991

;; This file is part of GNU Emacs.

14
;; GNU Emacs is free software: you can redistribute it and/or modify
Richard M. Stallman's avatar
Richard M. Stallman committed
15
;; it under the terms of the GNU General Public License as published by
16 17
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
Richard M. Stallman's avatar
Richard M. Stallman committed
18 19 20 21 22 23

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

24 25 26
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

Richard M. Stallman's avatar
Richard M. Stallman committed
27
;;; Commentary:
Erik Naggum's avatar
Erik Naggum committed
28

29 30
;; This file provides routines for getting the "thing" at the location of
;; point, whatever that "thing" happens to be.  The "thing" is defined by
Richard M. Stallman's avatar
Richard M. Stallman committed
31
;; its beginning and end positions in the buffer.
Richard M. Stallman's avatar
Richard M. Stallman committed
32 33
;;
;; The function bounds-of-thing-at-point finds the beginning and end
34
;; positions by moving first forward to the end of the "thing", and then
Richard M. Stallman's avatar
Richard M. Stallman committed
35
;; backwards to the beginning.  By default, it uses the corresponding
36
;; forward-"thing" operator (eg. forward-word, forward-line).
Richard M. Stallman's avatar
Richard M. Stallman committed
37 38
;;
;; Special cases are allowed for using properties associated with the named
39
;; "thing":
Richard M. Stallman's avatar
Richard M. Stallman committed
40
;;
41
;;   forward-op		Function to call to skip forward over a "thing" (or
Richard M. Stallman's avatar
Richard M. Stallman committed
42
;;                      with a negative argument, backward).
43
;;
44 45
;;   beginning-op	Function to call to skip to the beginning of a "thing".
;;   end-op		Function to call to skip to the end of a "thing".
Richard M. Stallman's avatar
Richard M. Stallman committed
46 47 48 49 50 51
;;
;; Reliance on existing operators means that many `things' can be accessed
;; without further code:  eg.
;;     (thing-at-point 'line)
;;     (thing-at-point 'page)

Erik Naggum's avatar
Erik Naggum committed
52
;;; Code:
Richard M. Stallman's avatar
Richard M. Stallman committed
53 54 55

(provide 'thingatpt)

Erik Naggum's avatar
Erik Naggum committed
56
;; Basic movement
Richard M. Stallman's avatar
Richard M. Stallman committed
57 58

;;;###autoload
59
(defun forward-thing (thing &optional n)
60
  "Move forward to the end of the Nth next THING."
61 62
  (let ((forward-op (or (get thing 'forward-op)
			(intern-soft (format "forward-%s" thing)))))
63
    (if (functionp forward-op)
64 65
	(funcall forward-op (or n 1))
      (error "Can't determine how to move over a %s" thing))))
Richard M. Stallman's avatar
Richard M. Stallman committed
66

Erik Naggum's avatar
Erik Naggum committed
67
;; General routines
Richard M. Stallman's avatar
Richard M. Stallman committed
68 69

;;;###autoload
70 71 72 73
(defun bounds-of-thing-at-point (thing)
  "Determine the start and end buffer locations for the THING at point.
THING is a symbol which specifies the kind of syntactic entity you want.
Possibilities include `symbol', `list', `sexp', `defun', `filename', `url',
74
`email', `word', `sentence', `whitespace', `line', `page' and others.
75 76 77 78 79 80

See the file `thingatpt.el' for documentation on how to define
a symbol as a valid THING.

The value is a cons cell (START . END) giving the start and end positions
of the textual entity that was found."
81 82 83 84 85 86
  (if (get thing 'bounds-of-thing-at-point)
      (funcall (get thing 'bounds-of-thing-at-point))
    (let ((orig (point)))
      (condition-case nil
	  (save-excursion
	    ;; Try moving forward, then back.
87 88 89 90 91 92 93
            (funcall ;; First move to end.
             (or (get thing 'end-op)
                 (lambda () (forward-thing thing 1))))
            (funcall ;; Then move to beg.
             (or (get thing 'beginning-op)
                 (lambda () (forward-thing thing -1))))
	    (let ((beg (point)))
94 95 96 97 98
	      (if (not (and beg (> beg orig)))
		  ;; If that brings us all the way back to ORIG,
		  ;; it worked.  But END may not be the real end.
		  ;; So find the real end that corresponds to BEG.
		  (let ((real-end
99 100 101
			 (progn
			   (funcall
			    (or (get thing 'end-op)
102
                                (lambda () (forward-thing thing 1))))
103 104 105 106 107 108
			   (point))))
		    (if (and beg real-end (<= beg orig) (<= orig real-end))
			(cons beg real-end)))
		(goto-char orig)
		;; Try a second time, moving backward first and then forward,
		;; so that we can find a thing that ends at ORIG.
109 110 111 112 113 114 115 116
                (funcall ;; First, move to beg.
                 (or (get thing 'beginning-op)
                     (lambda () (forward-thing thing -1))))
                (funcall ;; Then move to end.
                 (or (get thing 'end-op)
                     (lambda () (forward-thing thing 1))))
		(let ((end (point))
                      (real-beg
117 118 119
		       (progn
			 (funcall
			  (or (get thing 'beginning-op)
120
                              (lambda () (forward-thing thing -1))))
121 122 123 124
			 (point))))
		  (if (and real-beg end (<= real-beg orig) (<= orig end))
		      (cons real-beg end))))))
	(error nil)))))
Richard M. Stallman's avatar
Richard M. Stallman committed
125 126

;;;###autoload
127 128 129 130
(defun thing-at-point (thing)
  "Return the THING at point.
THING is a symbol which specifies the kind of syntactic entity you want.
Possibilities include `symbol', `list', `sexp', `defun', `filename', `url',
131
`email', `word', `sentence', `whitespace', `line', `page' and others.
132 133 134

See the file `thingatpt.el' for documentation on how to define
a symbol as a valid THING."
135 136 137
  (if (get thing 'thing-at-point)
      (funcall (get thing 'thing-at-point))
    (let ((bounds (bounds-of-thing-at-point thing)))
138
      (if bounds
139
	  (buffer-substring (car bounds) (cdr bounds))))))
Richard M. Stallman's avatar
Richard M. Stallman committed
140

Erik Naggum's avatar
Erik Naggum committed
141
;; Go to beginning/end
Richard M. Stallman's avatar
Richard M. Stallman committed
142

143 144 145
(defun beginning-of-thing (thing)
  (let ((bounds (bounds-of-thing-at-point thing)))
    (or bounds (error "No %s here" thing))
Richard M. Stallman's avatar
Richard M. Stallman committed
146 147
    (goto-char (car bounds))))

148 149 150
(defun end-of-thing (thing)
  (let ((bounds (bounds-of-thing-at-point thing)))
    (or bounds (error "No %s here" thing))
Richard M. Stallman's avatar
Richard M. Stallman committed
151 152
    (goto-char (cdr bounds))))

153
;;  Special cases
Richard M. Stallman's avatar
Richard M. Stallman committed
154

155
;;  Lines
156 157 158 159 160

;; bolp will be false when you click on the last line in the buffer
;; and it has no final newline.

(put 'line 'beginning-op
161
     (lambda () (if (bolp) (forward-line -1) (beginning-of-line))))
162

163
;;  Sexps
Richard M. Stallman's avatar
Richard M. Stallman committed
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179

(defun in-string-p ()
  (let ((orig (point)))
    (save-excursion
      (beginning-of-defun)
      (nth 3 (parse-partial-sexp (point) orig)))))

(defun end-of-sexp ()
  (let ((char-syntax (char-syntax (char-after (point)))))
    (if (or (eq char-syntax ?\))
	    (and (eq char-syntax ?\") (in-string-p)))
	(forward-char 1)
      (forward-sexp 1))))

(put 'sexp 'end-op 'end-of-sexp)

180 181 182 183 184 185 186 187 188
(defun beginning-of-sexp ()
  (let ((char-syntax (char-syntax (char-before (point)))))
    (if (or (eq char-syntax ?\()
	    (and (eq char-syntax ?\") (in-string-p)))
	(forward-char -1)
      (forward-sexp -1))))

(put 'sexp 'beginning-op 'beginning-of-sexp)

189
;;  Lists
Richard M. Stallman's avatar
Richard M. Stallman committed
190

191
(put 'list 'end-op (lambda () (up-list 1)))
Richard M. Stallman's avatar
Richard M. Stallman committed
192 193
(put 'list 'beginning-op 'backward-sexp)

194
;;  Filenames and URLs  www.com/foo%32bar
Richard M. Stallman's avatar
Richard M. Stallman committed
195

196
(defvar thing-at-point-file-name-chars "-~/[:alnum:]_.${}#%,:"
Richard M. Stallman's avatar
Richard M. Stallman committed
197 198
  "Characters allowable in filenames.")

199
(put 'filename 'end-op
200 201 202
     (lambda ()
       (re-search-forward (concat "\\=[" thing-at-point-file-name-chars "]*")
			  nil t)))
Richard M. Stallman's avatar
Richard M. Stallman committed
203
(put 'filename 'beginning-op
204 205 206 207 208
     (lambda ()
       (if (re-search-backward (concat "[^" thing-at-point-file-name-chars "]")
			       nil t)
	   (forward-char)
	 (goto-char (point-min)))))
209

210
(defvar thing-at-point-url-path-regexp
211
  "[^]\t\n \"'<>[^`{}]*[^]\t\n \"'<>[^`{}.,;]+"
212
  "A regular expression probably matching the host and filename or e-mail part of a URL.")
213 214 215 216 217 218 219

(defvar thing-at-point-short-url-regexp
  (concat "[-A-Za-z0-9.]+" thing-at-point-url-path-regexp)
  "A regular expression probably matching a URL without an access scheme.
Hostname matching is stricter in this case than for
``thing-at-point-url-regexp''.")

220
(defvar thing-at-point-uri-schemes
221
  ;; Officials from http://www.iana.org/assignments/uri-schemes.html
222 223 224 225 226 227
  '("ftp://" "http://" "gopher://" "mailto:" "news:" "nntp:"
    "telnet://" "wais://" "file:/" "prospero:" "z39.50s:" "z39.50r:"
    "cid:" "mid:" "vemmi:" "service:" "imap:" "nfs:" "acap:" "rtsp:"
    "tip:" "pop:" "data:" "dav:" "opaquelocktoken:" "sip:" "tel:" "fax:"
    "modem:" "ldap:" "https://" "soap.beep:" "soap.beeps:" "urn:" "go:"
    "afs:" "tn3270:" "mailserver:"
228 229 230
    "crid:" "dict:" "dns:" "dtn:" "h323:" "im:" "info:" "ipp:"
    "iris.beep:" "mtqp:" "mupdate:" "pres:" "sips:" "snmp:" "tag:"
    "tftp:" "xmlrpc.beep:" "xmlrpc.beeps:" "xmpp:"
231
  ;; Compatibility
232
    "snews:" "irc:" "mms://" "mmsh://")
233
  "Uniform Resource Identifier (URI) Schemes.")
234

235
(defvar thing-at-point-url-regexp
236 237
  (concat "\\<\\(" (mapconcat 'identity thing-at-point-uri-schemes "\\|") "\\)"
          thing-at-point-url-path-regexp)
238 239 240 241 242 243 244 245 246
  "A regular expression probably matching a complete URL.")

(defvar thing-at-point-markedup-url-regexp
  "<URL:[^>]+>"
  "A regular expression matching a URL marked up per RFC1738.
This may contain whitespace (including newlines) .")

(put 'url 'bounds-of-thing-at-point 'thing-at-point-bounds-of-url-at-point)
(defun thing-at-point-bounds-of-url-at-point ()
247 248 249
  (let ((strip (thing-at-point-looking-at
			 thing-at-point-markedup-url-regexp))) ;; (url "") short
    (if (or strip
250
	    (thing-at-point-looking-at thing-at-point-url-regexp)
251
	    ;; Access scheme omitted?
252 253 254
	    ;; (setq short (thing-at-point-looking-at
	    ;;     	 thing-at-point-short-url-regexp))
            )
255 256
	(let ((beginning (match-beginning 0))
	      (end (match-end 0)))
257 258 259
	  (when strip
            (setq beginning (+ beginning 5))
            (setq end (- end 1)))
260 261 262 263 264
	  (cons beginning end)))))

(put 'url 'thing-at-point 'thing-at-point-url-at-point)
(defun thing-at-point-url-at-point ()
  "Return the URL around or before point.
265 266 267 268 269 270

Search backwards for the start of a URL ending at or after point.  If
no URL found, return nil.  The access scheme will be prepended if
absent: \"mailto:\" if the string contains \"@\", \"ftp://\" if it
starts with \"ftp\" and not \"ftp:/\", or \"http://\" by default."

271 272 273 274 275 276 277 278 279 280 281 282
  (let ((url "") short strip)
    (if (or (setq strip (thing-at-point-looking-at
			 thing-at-point-markedup-url-regexp))
	    (thing-at-point-looking-at thing-at-point-url-regexp)
	    ;; Access scheme omitted?
	    (setq short (thing-at-point-looking-at
			 thing-at-point-short-url-regexp)))
	(progn
	  (setq url (buffer-substring-no-properties (match-beginning 0)
						    (match-end 0)))
	  (and strip (setq url (substring url 5 -1))) ; Drop "<URL:" & ">"
	  ;; strip whitespace
283
	  (while (string-match "[ \t\n\r]+" url)
284
	    (setq url (replace-match "" t t url)))
285 286 287 288
	  (and short (setq url (concat (cond ((string-match "^[a-zA-Z]+:" url)
					       ;; already has a URL scheme.
					       "")
					     ((string-match "@" url)
289 290 291 292 293 294
                                              "mailto:")
					     ;; e.g. ftp.swiss... or ftp-swiss...
                                             ((string-match "^ftp" url)
                                              "ftp://")
                                             (t "http://"))
                                       url)))
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
	  (if (string-equal "" url)
	      nil
	    url)))))

;; The normal thingatpt mechanism doesn't work for complex regexps.
;; This should work for almost any regexp wherever we are in the
;; match.  To do a perfect job for any arbitrary regexp would mean
;; testing every position before point.  Regexp searches won't find
;; matches that straddle the start position so we search forwards once
;; and then back repeatedly and then back up a char at a time.

(defun thing-at-point-looking-at (regexp)
  "Return non-nil if point is in or just after a match for REGEXP.
Set the match data from the earliest such match ending at or after
point."
  (save-excursion
    (let ((old-point (point)) match)
      (and (looking-at regexp)
	   (>= (match-end 0) old-point)
	   (setq match (point)))
      ;; Search back repeatedly from end of next match.
      ;; This may fail if next match ends before this match does.
      (re-search-forward regexp nil 'limit)
      (while (and (re-search-backward regexp nil t)
		  (or (> (match-beginning 0) old-point)
		      (and (looking-at regexp)	; Extend match-end past search start
			   (>= (match-end 0) old-point)
			   (setq match (point))))))
      (if (not match) nil
	(goto-char match)
	;; Back up a char at a time in case search skipped
	;; intermediate match straddling search start pos.
	(while (and (not (bobp))
		    (progn (backward-char 1) (looking-at regexp))
		    (>= (match-end 0) old-point)
		    (setq match (point))))
	(goto-char match)
	(looking-at regexp)))))

334
(put 'url 'end-op
335 336 337 338 339
     (lambda ()
       (let ((bounds (thing-at-point-bounds-of-url-at-point)))
         (if bounds
             (goto-char (cdr bounds))
           (error "No URL here")))))
340
(put 'url 'beginning-op
341 342 343 344 345
     (lambda ()
       (let ((bounds (thing-at-point-bounds-of-url-at-point)))
         (if bounds
             (goto-char (car bounds))
           (error "No URL here")))))
Richard M. Stallman's avatar
Richard M. Stallman committed
346

347 348
;;   Email addresses
(defvar thing-at-point-email-regexp
349
  "<?[-+_.~a-zA-Z][-+_.~:a-zA-Z0-9]*@[-.a-zA-Z0-9]+>?"
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
  "A regular expression probably matching an email address.
This does not match the real name portion, only the address, optionally
with angle brackets.")

;; Haven't set 'forward-op on 'email nor defined 'forward-email' because
;; not sure they're actually needed, and URL seems to skip them too.
;; Note that (end-of-thing 'email) and (beginning-of-thing 'email)
;; work automagically, though.

(put 'email 'bounds-of-thing-at-point
     (lambda ()
       (let ((thing (thing-at-point-looking-at thing-at-point-email-regexp)))
         (if thing
             (let ((beginning (match-beginning 0))
                   (end (match-end 0)))
               (cons beginning end))))))

(put 'email 'thing-at-point
     (lambda ()
       (let ((boundary-pair (bounds-of-thing-at-point 'email)))
         (if boundary-pair
             (buffer-substring-no-properties
              (car boundary-pair) (cdr boundary-pair))))))

374
;;  Whitespace
Richard M. Stallman's avatar
Richard M. Stallman committed
375

376
(defun forward-whitespace (arg)
Richard M. Stallman's avatar
Richard M. Stallman committed
377
  (interactive "p")
378
  (if (natnump arg)
379
      (re-search-forward "[ \t]+\\|\n" nil 'move arg)
380
    (while (< arg 0)
381
      (if (re-search-backward "[ \t]+\\|\n" nil 'move)
Richard M. Stallman's avatar
Richard M. Stallman committed
382 383
	  (or (eq (char-after (match-beginning 0)) 10)
	      (skip-chars-backward " \t")))
384
      (setq arg (1+ arg)))))
Richard M. Stallman's avatar
Richard M. Stallman committed
385

386
;;  Buffer
Richard M. Stallman's avatar
Richard M. Stallman committed
387

388 389
(put 'buffer 'end-op (lambda () (goto-char (point-max))))
(put 'buffer 'beginning-op (lambda () (goto-char (point-min))))
Richard M. Stallman's avatar
Richard M. Stallman committed
390

391
;;  Symbols
Richard M. Stallman's avatar
Richard M. Stallman committed
392

393
(defun forward-symbol (arg)
Richard M. Stallman's avatar
Richard M. Stallman committed
394
  (interactive "p")
395
  (if (natnump arg)
396
      (re-search-forward "\\(\\sw\\|\\s_\\)+" nil 'move arg)
397
    (while (< arg 0)
398
      (if (re-search-backward "\\(\\sw\\|\\s_\\)+" nil 'move)
Richard M. Stallman's avatar
Richard M. Stallman committed
399
	  (skip-syntax-backward "w_"))
400
      (setq arg (1+ arg)))))
Richard M. Stallman's avatar
Richard M. Stallman committed
401

402
;;  Syntax blocks
403 404 405 406

(defun forward-same-syntax (&optional arg)
  (interactive "p")
  (while (< arg 0)
407
    (skip-syntax-backward
408 409 410 411 412 413
     (char-to-string (char-syntax (char-after (1- (point))))))
    (setq arg (1+ arg)))
  (while (> arg 0)
    (skip-syntax-forward (char-to-string (char-syntax (char-after (point)))))
    (setq arg (1- arg))))

414
;;  Aliases
Richard M. Stallman's avatar
Richard M. Stallman committed
415 416 417 418

(defun word-at-point () (thing-at-point 'word))
(defun sentence-at-point () (thing-at-point 'sentence))

419
(defun read-from-whole-string (str)
420
  "Read a Lisp expression from STR.
421 422
Signal an error if the entire string was not used."
  (let* ((read-data (read-from-string str))
423
	 (more-left
Richard M. Stallman's avatar
Richard M. Stallman committed
424
	  (condition-case nil
Richard M. Stallman's avatar
Richard M. Stallman committed
425
	      ;; The call to `ignore' suppresses a compiler warning.
426
	      (progn (ignore (read-from-string (substring str (cdr read-data))))
Richard M. Stallman's avatar
Richard M. Stallman committed
427 428 429 430 431 432
		     t)
	    (end-of-file nil))))
    (if more-left
	(error "Can't read whole string")
      (car read-data))))

433 434
(defun form-at-point (&optional thing pred)
  (let ((sexp (condition-case nil
435
		  (read-from-whole-string (thing-at-point (or thing 'sexp)))
Richard M. Stallman's avatar
Richard M. Stallman committed
436
		(error nil))))
437
    (if (or (not pred) (funcall pred sexp)) sexp)))
Richard M. Stallman's avatar
Richard M. Stallman committed
438

Dave Love's avatar
Dave Love committed
439
;;;###autoload
440 441 442
(defun sexp-at-point ()
  "Return the sexp at point, or nil if none is found."
  (form-at-point 'sexp))
Dave Love's avatar
Dave Love committed
443
;;;###autoload
444
(defun symbol-at-point ()
445
  "Return the symbol at point, or nil if none is found."
446 447
  (let ((thing (thing-at-point 'symbol)))
    (if thing (intern thing))))
Dave Love's avatar
Dave Love committed
448
;;;###autoload
449 450 451
(defun number-at-point ()
  "Return the number at point, or nil if none is found."
  (form-at-point 'sexp 'numberp))
Dave Love's avatar
Dave Love committed
452
;;;###autoload
453 454 455
(defun list-at-point ()
  "Return the Lisp list at point, or nil if none is found."
  (form-at-point 'list 'listp))
Richard M. Stallman's avatar
Richard M. Stallman committed
456

457
;; arch-tag: bb65a163-dae2-4055-aedc-fe11f497f698
458
;;; thingatpt.el ends here