cc-engine.el 484 KB
Newer Older
1
;;; cc-engine.el --- core syntax guessing engine for CC mode -*- coding: utf-8 -*-
Richard M. Stallman's avatar
Richard M. Stallman committed
2

Paul Eggert's avatar
Paul Eggert committed
3
;; Copyright (C) 1985, 1987, 1992-2019 Free Software Foundation, Inc.
Richard M. Stallman's avatar
Richard M. Stallman committed
4

Alan Mackenzie's avatar
Alan Mackenzie committed
5 6
;; Authors:    2001- Alan Mackenzie
;;             1998- Martin Stjernholm
7
;;             1992-1999 Barry A. Warsaw
8 9
;;             1987 Dave Detlefs
;;             1987 Stewart Clamen
Richard M. Stallman's avatar
Richard M. Stallman committed
10
;;             1985 Richard M. Stallman
Barry A. Warsaw's avatar
Barry A. Warsaw committed
11
;; Maintainer: bug-cc-mode@gnu.org
Richard M. Stallman's avatar
Richard M. Stallman committed
12
;; Created:    22-Apr-1997 (split from cc-mode.el)
13 14
;; Keywords:   c languages
;; Package:    cc-mode
Richard M. Stallman's avatar
Richard M. Stallman committed
15 16 17

;; This file is part of GNU Emacs.

18
;; GNU Emacs is free software: you can redistribute it and/or modify
Richard M. Stallman's avatar
Richard M. Stallman committed
19
;; it under the terms of the GNU General Public License as published by
20 21
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
Richard M. Stallman's avatar
Richard M. Stallman committed
22 23 24 25 26 27 28

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
29
;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
Richard M. Stallman's avatar
Richard M. Stallman committed
30

31 32
;;; Commentary:

33 34 35 36 37 38 39 40 41 42
;; The functions which have docstring documentation can be considered
;; part of an API which other packages can use in CC Mode buffers.
;; Otoh, undocumented functions and functions with the documentation
;; in comments are considered purely internal and can change semantics
;; or even disappear in the future.
;;
;; (This policy applies to CC Mode as a whole, not just this file.  It
;; probably also applies to many other Emacs packages, but here it's
;; clearly spelled out.)

43 44 45 46
;; Hidden buffer changes
;;
;; Various functions in CC Mode use text properties for caching and
;; syntactic markup purposes, and those of them that might modify such
47 48 49 50
;; properties but still don't modify the buffer in a visible way are
;; said to do "hidden buffer changes".  They should be used within
;; `c-save-buffer-state' or a similar function that saves and restores
;; buffer modifiedness, disables buffer change hooks, etc.
51
;;
52 53
;; Interactive functions are assumed to not do hidden buffer changes,
;; except in the specific parts of them that do real changes.
54
;;
55 56
;; Lineup functions are assumed to do hidden buffer changes.  They
;; must not do real changes, though.
57
;;
58 59 60 61 62 63 64 65 66 67 68 69
;; All other functions that do hidden buffer changes have that noted
;; in their doc string or comment.
;;
;; The intention with this system is to avoid wrapping every leaf
;; function that do hidden buffer changes inside
;; `c-save-buffer-state'.  It should be used as near the top of the
;; interactive functions as possible.
;;
;; Functions called during font locking are allowed to do hidden
;; buffer changes since the font-lock package run them in a context
;; similar to `c-save-buffer-state' (in fact, that function is heavily
;; inspired by `save-buffer-state' in the font-lock package).
70 71 72 73 74 75 76

;; Use of text properties
;;
;; CC Mode uses several text properties internally to mark up various
;; positions, e.g. to improve speed and to eliminate glitches in
;; interactive refontification.
;;
77 78 79
;; Note: This doc is for internal use only.  Other packages should not
;; assume that these text properties are used as described here.
;;
80 81 82 83
;; 'category
;;   Used for "indirection".  With its help, some other property can
;;   be cheaply and easily switched on or off everywhere it occurs.
;;
84
;; 'syntax-table
85
;;   Used to modify the syntax of some characters.  It is used to
Alan Mackenzie's avatar
Alan Mackenzie committed
86 87 88
;;   mark the "<" and ">" of angle bracket parens with paren syntax, to
;;   "hide" obtrusive characters in preprocessor lines, and to mark C++
;;   raw strings to enable their fontification.
89 90 91 92 93 94 95 96 97 98 99 100 101 102
;;
;;   This property is used on single characters and is therefore
;;   always treated as front and rear nonsticky (or start and end open
;;   in XEmacs vocabulary).  It's therefore installed on
;;   `text-property-default-nonsticky' if that variable exists (Emacs
;;   >= 21).
;;
;; 'c-is-sws and 'c-in-sws
;;   Used by `c-forward-syntactic-ws' and `c-backward-syntactic-ws' to
;;   speed them up.  See the comment blurb before `c-put-is-sws'
;;   below for further details.
;;
;; 'c-type
;;   This property is used on single characters to mark positions with
103 104
;;   special syntactic relevance of various sorts.  Its primary use is
;;   to avoid glitches when multiline constructs are refontified
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
;;   interactively (on font lock decoration level 3).  It's cleared in
;;   a region before it's fontified and is then put on relevant chars
;;   in that region as they are encountered during the fontification.
;;   The value specifies the kind of position:
;;
;;     'c-decl-arg-start
;;  	 Put on the last char of the token preceding each declaration
;;  	 inside a declaration style arglist (typically in a function
;;  	 prototype).
;;
;;     'c-decl-end
;;  	 Put on the last char of the token preceding a declaration.
;;  	 This is used in cases where declaration boundaries can't be
;;  	 recognized simply by looking for a token like ";" or "}".
;;  	 `c-type-decl-end-used' must be set if this is used (see also
;;  	 `c-find-decl-spots').
;;
;;     'c-<>-arg-sep
;;  	 Put on the commas that separate arguments in angle bracket
;;  	 arglists like C++ template arglists.
;;
;;     'c-decl-id-start and 'c-decl-type-start
;;  	 Put on the last char of the token preceding each declarator
;;  	 in the declarator list of a declaration.  They are also used
;;  	 between the identifiers cases like enum declarations.
;;  	 'c-decl-type-start is used when the declarators are types,
;;  	 'c-decl-id-start otherwise.
;;
133 134
;;     'c-not-decl
;;       Put on the brace which introduces a brace list and on the commas
135
;;       which separate the elements within it.
136
;;
137 138 139 140
;; 'c-awk-NL-prop
;;   Used in AWK mode to mark the various kinds of newlines.  See
;;   cc-awk.el.

141 142
;;; Code:

Barry A. Warsaw's avatar
Barry A. Warsaw committed
143
(eval-when-compile
Gerd Moellmann's avatar
Gerd Moellmann committed
144
  (let ((load-path
Gerd Moellmann's avatar
Gerd Moellmann committed
145 146 147
	 (if (and (boundp 'byte-compile-dest-file)
		  (stringp byte-compile-dest-file))
	     (cons (file-name-directory byte-compile-dest-file) load-path)
Gerd Moellmann's avatar
Gerd Moellmann committed
148
	   load-path)))
149
    (load "cc-bytecomp" nil t)))
Gerd Moellmann's avatar
Gerd Moellmann committed
150 151

(cc-require 'cc-defs)
152
(cc-require-when-compile 'cc-langs)
Gerd Moellmann's avatar
Gerd Moellmann committed
153
(cc-require 'cc-vars)
154

155 156
(eval-when-compile (require 'cl))

Gerd Moellmann's avatar
Gerd Moellmann committed
157

158 159 160 161
;; Make declarations for all the `c-lang-defvar' variables in cc-langs.

(defmacro c-declare-lang-variables ()
  `(progn
162 163 164 165 166 167
     ,@(c--mapcan (lambda (init)
		    `(,(if (elt init 2)
			   `(defvar ,(car init) nil ,(elt init 2))
			 `(defvar ,(car init) nil))
		      (make-variable-buffer-local ',(car init))))
		 (cdr c-lang-variable-inits))))
168 169 170 171 172 173 174 175 176
(c-declare-lang-variables)


;;; Internal state variables.

;; Internal state of hungry delete key feature
(defvar c-hungry-delete-key nil)
(make-variable-buffer-local 'c-hungry-delete-key)

177 178 179 180 181 182 183
;; The electric flag (toggled by `c-toggle-electric-state').
;; If t, electric actions (like automatic reindentation, and (if
;; c-auto-newline is also set) auto newlining) will happen when an electric
;; key like `{' is pressed (or an electric keyword like `else').
(defvar c-electric-flag t)
(make-variable-buffer-local 'c-electric-flag)

184 185 186 187
;; Internal state of auto newline feature.
(defvar c-auto-newline nil)
(make-variable-buffer-local 'c-auto-newline)

188
;; Included in the mode line to indicate the active submodes.
189 190
;; (defvar c-submode-indicators nil)
;; (make-variable-buffer-local 'c-submode-indicators)
191

192 193 194 195 196 197 198 199 200
(defun c-calculate-state (arg prevstate)
  ;; Calculate the new state of PREVSTATE, t or nil, based on arg. If
  ;; arg is nil or zero, toggle the state. If arg is negative, turn
  ;; the state off, and if arg is positive, turn the state on
  (if (or (not arg)
	  (zerop (setq arg (prefix-numeric-value arg))))
      (not prevstate)
    (> arg 0)))

Martin Stjernholm's avatar
Martin Stjernholm committed
201 202 203 204 205 206 207 208 209 210 211

;; Basic handling of preprocessor directives.

;; This is a dynamically bound cache used together with
;; `c-query-macro-start' and `c-query-and-set-macro-start'.  It only
;; works as long as point doesn't cross a macro boundary.
(defvar c-macro-start 'unknown)

(defsubst c-query-and-set-macro-start ()
  (if (symbolp c-macro-start)
      (setq c-macro-start (save-excursion
212 213 214
			    (c-save-buffer-state ()
			      (and (c-beginning-of-macro)
				   (point)))))
Martin Stjernholm's avatar
Martin Stjernholm committed
215 216 217 218 219
    c-macro-start))

(defsubst c-query-macro-start ()
  (if (symbolp c-macro-start)
      (save-excursion
220 221 222
	(c-save-buffer-state ()
	  (and (c-beginning-of-macro)
	       (point))))
Martin Stjernholm's avatar
Martin Stjernholm committed
223 224
    c-macro-start))

225 226 227 228 229 230 231 232 233 234 235 236
;; One element macro cache to cope with continual movement within very large
;; CPP macros.
(defvar c-macro-cache nil)
(make-variable-buffer-local 'c-macro-cache)
;; Nil or cons of the bounds of the most recent CPP form probed by
;; `c-beginning-of-macro', `c-end-of-macro' or `c-syntactic-end-of-macro'.
;; The cdr will be nil if we know only the start of the CPP form.
(defvar c-macro-cache-start-pos nil)
(make-variable-buffer-local 'c-macro-cache-start-pos)
;; The starting position from where we determined `c-macro-cache'.
(defvar c-macro-cache-syntactic nil)
(make-variable-buffer-local 'c-macro-cache-syntactic)
237 238 239 240
;; Either nil, or the syntactic end of the macro currently represented by
;; `c-macro-cache'.
(defvar c-macro-cache-no-comment nil)
(make-variable-buffer-local 'c-macro-cache-no-comment)
241 242
;; Either nil, or the position of a comment which is open at the end of the
;; macro represented by `c-macro-cache'.
243

244
(defun c-invalidate-macro-cache (beg _end)
245
  ;; Called from a before-change function.  If the change region is before or
Paul Eggert's avatar
Paul Eggert committed
246
  ;; in the macro characterized by `c-macro-cache' etc., nullify it
247 248 249 250
  ;; appropriately.  BEG and END are the standard before-change-functions
  ;; parameters.  END isn't used.
  (cond
   ((null c-macro-cache))
251
   ((<= beg (car c-macro-cache))
252 253
    (setq c-macro-cache nil
	  c-macro-cache-start-pos nil
254 255
	  c-macro-cache-syntactic nil
	  c-macro-cache-no-comment nil))
256 257 258 259
   ((and (cdr c-macro-cache)
	 (< beg (cdr c-macro-cache)))
    (setcdr c-macro-cache nil)
    (setq c-macro-cache-start-pos beg
260 261
	  c-macro-cache-syntactic nil
	  c-macro-cache-no-comment nil))))
262

263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
(defun c-macro-is-genuine-p ()
  ;; Check that the ostensible CPP construct at point is a real one.  In
  ;; particular, if point is on the first line of a narrowed buffer, make sure
  ;; that the "#" isn't, say, the second character of a "##" operator.  Return
  ;; t when the macro is real, nil otherwise.
  (let ((here (point)))
    (beginning-of-line)
    (prog1
	(if (and (eq (point) (point-min))
		 (/= (point) 1))
	    (save-restriction
	      (widen)
	      (beginning-of-line)
	      (and (looking-at c-anchored-cpp-prefix)
		   (eq (match-beginning 1) here)))
	  t)
      (goto-char here))))

Martin Stjernholm's avatar
Martin Stjernholm committed
281 282 283 284 285
(defun c-beginning-of-macro (&optional lim)
  "Go to the beginning of a preprocessor directive.
Leave point at the beginning of the directive and return t if in one,
otherwise return nil and leave point unchanged.

286 287
Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
288 289 290 291 292 293 294 295 296 297 298 299 300 301
  (let ((here (point)))
    (when c-opt-cpp-prefix
      (if (and (car c-macro-cache)
	       (>= (point) (car c-macro-cache))
	       (or (and (cdr c-macro-cache)
			(<= (point) (cdr c-macro-cache)))
		   (<= (point) c-macro-cache-start-pos)))
	  (unless (< (car c-macro-cache) (or lim (point-min)))
	    (progn (goto-char (max (or lim (point-min)) (car c-macro-cache)))
		   (setq c-macro-cache-start-pos
			 (max c-macro-cache-start-pos here))
		   t))
	(setq c-macro-cache nil
	      c-macro-cache-start-pos nil
302 303
	      c-macro-cache-syntactic nil
	      c-macro-cache-no-comment nil)
304 305 306 307 308 309 310 311

	(save-restriction
	  (if lim (narrow-to-region lim (point-max)))
	  (beginning-of-line)
	  (while (eq (char-before (1- (point))) ?\\)
	    (forward-line -1))
	  (back-to-indentation)
	  (if (and (<= (point) here)
312
		   (save-match-data (looking-at c-opt-cpp-start))
313
		   (c-macro-is-genuine-p))
314 315
	      (progn
		(setq c-macro-cache (cons (point) nil)
316 317
		      c-macro-cache-start-pos here
		      c-macro-cache-syntactic nil)
318 319 320
		t)
	    (goto-char here)
	    nil))))))
Martin Stjernholm's avatar
Martin Stjernholm committed
321

322
(defun c-end-of-macro (&optional lim)
Martin Stjernholm's avatar
Martin Stjernholm committed
323
  "Go to the end of a preprocessor directive.
324 325 326
More accurately, move the point to the end of the closest following
line that doesn't end with a line continuation backslash - no check is
done that the point is inside a cpp directive to begin with.
Martin Stjernholm's avatar
Martin Stjernholm committed
327

328 329 330
If LIM is provided, it is a limit position at which point is left
if the end of the macro doesn't occur earlier.

331 332
Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
  (save-restriction
    (if lim (narrow-to-region (point-min) lim))
    (if (and (cdr c-macro-cache)
	     (<= (point) (cdr c-macro-cache))
	     (>= (point) (car c-macro-cache)))
	(goto-char (cdr c-macro-cache))
      (unless (and (car c-macro-cache)
		   (<= (point) c-macro-cache-start-pos)
		   (>= (point) (car c-macro-cache)))
	(setq c-macro-cache nil
	      c-macro-cache-start-pos nil
	      c-macro-cache-syntactic nil
	      c-macro-cache-no-comment nil))
      (while (progn
	       (end-of-line)
	       (when (and (eq (char-before) ?\\)
			  (not (eobp)))
		 (forward-char)
		 t)))
      (when (and (car c-macro-cache)
		 (bolp)
		 (not (eq (char-before (1- (point))) ?\\)))
	(setcdr c-macro-cache (point))
	(setq c-macro-cache-syntactic nil)))))
Martin Stjernholm's avatar
Martin Stjernholm committed
357

358 359 360 361 362 363 364 365 366 367 368 369 370
(defun c-syntactic-end-of-macro ()
  ;; Go to the end of a CPP directive, or a "safe" pos just before.
  ;;
  ;; This is normally the end of the next non-escaped line.  A "safe"
  ;; position is one not within a string or comment.  (The EOL on a line
  ;; comment is NOT "safe").
  ;;
  ;; This function must only be called from the beginning of a CPP construct.
  ;;
  ;; Note that this function might do hidden buffer changes.  See the comment
  ;; at the start of cc-engine.el for more info.
  (let* ((here (point))
	 (there (progn (c-end-of-macro) (point)))
371
	 s)
372 373
    (if c-macro-cache-syntactic
	(goto-char c-macro-cache-syntactic)
374 375
      (setq s (parse-partial-sexp here there))
      (while (and (or (nth 3 s)	 ; in a string
376 377
		      (and (nth 4 s) ; in a comment (maybe at end of line comment)
			   (not (eq (nth 7 s) 'syntax-table)))) ; Not a pseudo comment
378 379 380
		  (> there here))	; No infinite loops, please.
	(setq there (1- (nth 8 s)))
	(setq s (parse-partial-sexp here there)))
381 382 383 384
      (setq c-macro-cache-syntactic (point)))
    (point)))

(defun c-no-comment-end-of-macro ()
385 386 387
  ;; Go to the start of the comment which is open at the end of the current
  ;; CPP directive, or to the end of that directive.  For this purpose, open
  ;; strings are ignored.
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
  ;;
  ;; This function must only be called from the beginning of a CPP construct.
  ;;
  ;; Note that this function might do hidden buffer changes.  See the comment
  ;; at the start of cc-engine.el for more info.
  (let* ((here (point))
	 (there (progn (c-end-of-macro) (point)))
	 s)
    (if c-macro-cache-no-comment
	(goto-char c-macro-cache-no-comment)
      (setq s (parse-partial-sexp here there))
      (while (and (nth 3 s)	 ; in a string
		  (> there here))	; No infinite loops, please.
	(setq here (1+ (nth 8 s)))
	(setq s (parse-partial-sexp here there)))
403 404
      (when (and (nth 4 s)
		 (not (eq (nth 7 s) 'syntax-table))) ; no pseudo comments.
405
	(goto-char (nth 8 s)))
406
      (setq c-macro-cache-no-comment (point)))
407 408
    (point)))

409 410 411 412 413 414 415 416 417 418 419 420
(defun c-forward-over-cpp-define-id ()
  ;; Assuming point is at the "#" that introduces a preprocessor
  ;; directive, it's moved forward to the end of the identifier which is
  ;; "#define"d (or whatever c-opt-cpp-macro-define specifies).  Non-nil
  ;; is returned in this case, in all other cases nil is returned and
  ;; point isn't moved.
  ;;
  ;; This function might do hidden buffer changes.
  (when (and c-opt-cpp-macro-define-id
	     (looking-at c-opt-cpp-macro-define-id))
    (goto-char (match-end 0))))

Martin Stjernholm's avatar
Martin Stjernholm committed
421 422 423
(defun c-forward-to-cpp-define-body ()
  ;; Assuming point is at the "#" that introduces a preprocessor
  ;; directive, it's moved forward to the start of the definition body
424 425 426 427 428 429 430
  ;; if it's a "#define" (or whatever c-opt-cpp-macro-define
  ;; specifies).  Non-nil is returned in this case, in all other cases
  ;; nil is returned and point isn't moved.
  ;;
  ;; This function might do hidden buffer changes.
  (when (and c-opt-cpp-macro-define-start
	     (looking-at c-opt-cpp-macro-define-start)
Martin Stjernholm's avatar
Martin Stjernholm committed
431 432 433
	     (not (= (match-end 0) (c-point 'eol))))
    (goto-char (match-end 0))))

434 435 436

;;; Basic utility functions.

437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
(defun c-delq-from-dotted-list (elt dlist)
  ;; If ELT is a member of the (possibly dotted) list DLIST, remove all
  ;; occurrences of it (except for any in the last cdr of DLIST).
  ;;
  ;; Call this as (setq DLIST (c-delq-from-dotted-list ELT DLIST)), as
  ;; sometimes the original structure is changed, sometimes it's not.
  ;;
  ;; This function is needed in Emacs < 24.5, and possibly XEmacs, because
  ;; `delq' throws an error in these versions when given a dotted list.
  (let ((tail dlist) prev)
    (while (consp tail)
      (if (eq (car tail) elt)
	  (if prev
	      (setcdr prev (cdr tail))
	    (setq dlist (cdr dlist)))
	(setq prev tail))
      (setq tail (cdr tail)))
    dlist))

456
(defun c-syntactic-content (from to paren-level)
457 458
  ;; Return the given region as a string where all syntactic
  ;; whitespace is removed or, where necessary, replaced with a single
459 460 461 462 463
  ;; space.  If PAREN-LEVEL is given then all parens in the region are
  ;; collapsed to "()", "[]" etc.
  ;;
  ;; This function might do hidden buffer changes.

464
  (save-excursion
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
    (save-restriction
      (narrow-to-region from to)
      (goto-char from)
      (let* ((parts (list nil)) (tail parts) pos in-paren)

	(while (re-search-forward c-syntactic-ws-start to t)
	  (goto-char (setq pos (match-beginning 0)))
	  (c-forward-syntactic-ws)
	  (if (= (point) pos)
	      (forward-char)

	    (when paren-level
	      (save-excursion
		(setq in-paren (= (car (parse-partial-sexp from pos 1)) 1)
		      pos (point))))

	    (if (and (> pos from)
		     (< (point) to)
		     (looking-at "\\w\\|\\s_")
		     (save-excursion
		       (goto-char (1- pos))
		       (looking-at "\\w\\|\\s_")))
		(progn
		  (setcdr tail (list (buffer-substring-no-properties from pos)
				     " "))
		  (setq tail (cddr tail)))
	      (setcdr tail (list (buffer-substring-no-properties from pos)))
	      (setq tail (cdr tail)))

	    (when in-paren
	      (when (= (car (parse-partial-sexp pos to -1)) -1)
		(setcdr tail (list (buffer-substring-no-properties
				    (1- (point)) (point))))
		(setq tail (cdr tail))))

	    (setq from (point))))

	(setcdr tail (list (buffer-substring-no-properties from to)))
	(apply 'concat (cdr parts))))))

(defun c-shift-line-indentation (shift-amt)
  ;; Shift the indentation of the current line with the specified
  ;; amount (positive inwards).  The buffer is modified only if
  ;; SHIFT-AMT isn't equal to zero.
  (let ((pos (- (point-max) (point)))
	(c-macro-start c-macro-start)
	tmp-char-inserted)
    (if (zerop shift-amt)
	nil
      ;; If we're on an empty line inside a macro, we take the point
      ;; to be at the current indentation and shift it to the
      ;; appropriate column. This way we don't treat the extra
      ;; whitespace out to the line continuation as indentation.
      (when (and (c-query-and-set-macro-start)
		 (looking-at "[ \t]*\\\\$")
		 (save-excursion
		   (skip-chars-backward " \t")
		   (bolp)))
	(insert ?x)
	(backward-char)
	(setq tmp-char-inserted t))
      (unwind-protect
	  (let ((col (current-indentation)))
	    (delete-region (c-point 'bol) (c-point 'boi))
	    (beginning-of-line)
	    (indent-to (+ col shift-amt)))
	(when tmp-char-inserted
	  (delete-char 1))))
    ;; If initial point was within line's indentation and we're not on
    ;; a line with a line continuation in a macro, position after the
    ;; indentation.  Else stay at same point in text.
    (if (and (< (point) (c-point 'boi))
	     (not tmp-char-inserted))
	(back-to-indentation)
      (if (> (- (point-max) pos) (point))
	  (goto-char (- (point-max) pos))))))
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559

(defsubst c-keyword-sym (keyword)
  ;; Return non-nil if the string KEYWORD is a known keyword.  More
  ;; precisely, the value is the symbol for the keyword in
  ;; `c-keywords-obarray'.
  (intern-soft keyword c-keywords-obarray))

(defsubst c-keyword-member (keyword-sym lang-constant)
  ;; Return non-nil if the symbol KEYWORD-SYM, as returned by
  ;; `c-keyword-sym', is a member of LANG-CONSTANT, which is the name
  ;; of a language constant that ends with "-kwds".  If KEYWORD-SYM is
  ;; nil then the result is nil.
  (get keyword-sym lang-constant))

;; String syntax chars, suitable for skip-syntax-(forward|backward).
(defconst c-string-syntax (if (memq 'gen-string-delim c-emacs-features)
                              "\"|"
                            "\""))

560
;; Regexp matching string limit syntax.
561 562 563 564
(defconst c-string-limit-regexp (if (memq 'gen-string-delim c-emacs-features)
                                    "\\s\"\\|\\s|"
                                  "\\s\""))

565 566 567 568
;; Regexp matching WS followed by string limit syntax.
(defconst c-ws*-string-limit-regexp
  (concat "[ \t]*\\(" c-string-limit-regexp "\\)"))

569 570
;; Holds formatted error strings for the few cases where parse errors
;; are reported.
571
(defvar c-parsing-error nil)
572 573 574 575 576 577 578 579 580 581 582
(make-variable-buffer-local 'c-parsing-error)

(defun c-echo-parsing-error (&optional quiet)
  (when (and c-report-syntactic-errors c-parsing-error (not quiet))
    (c-benign-error "%s" c-parsing-error))
  c-parsing-error)

;; Faces given to comments and string literals.  This is used in some
;; situations to speed up recognition; it isn't mandatory that font
;; locking is in use.  This variable is extended with the face in
;; `c-doc-face-name' when fontification is activated in cc-fonts.el.
583
(defvar c-literal-faces
584 585 586 587 588 589 590 591 592 593
  (append '(font-lock-comment-face font-lock-string-face)
	  (when (facep 'font-lock-comment-delimiter-face)
	    ;; New in Emacs 22.
	    '(font-lock-comment-delimiter-face))))

(defsubst c-put-c-type-property (pos value)
  ;; Put a c-type property with the given value at POS.
  (c-put-char-property pos 'c-type value))

(defun c-clear-c-type-property (from to value)
Juanma Barranquero's avatar
Juanma Barranquero committed
594
  ;; Remove all occurrences of the c-type property that has the given
595 596 597 598 599 600 601 602 603 604
  ;; value in the region between FROM and TO.  VALUE is assumed to not
  ;; be nil.
  ;;
  ;; Note: This assumes that c-type is put on single chars only; it's
  ;; very inefficient if matching properties cover large regions.
  (save-excursion
    (goto-char from)
    (while (progn
	     (when (eq (get-text-property (point) 'c-type) value)
	       (c-clear-char-property (point) 'c-type))
605
	     (goto-char (c-next-single-property-change (point) 'c-type nil to))
606
	     (< (point) to)))))
Martin Stjernholm's avatar
Martin Stjernholm committed
607

608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647

;; Some debug tools to visualize various special positions.  This
;; debug code isn't as portable as the rest of CC Mode.

(cc-bytecomp-defun overlays-in)
(cc-bytecomp-defun overlay-get)
(cc-bytecomp-defun overlay-start)
(cc-bytecomp-defun overlay-end)
(cc-bytecomp-defun delete-overlay)
(cc-bytecomp-defun overlay-put)
(cc-bytecomp-defun make-overlay)

(defun c-debug-add-face (beg end face)
  (c-save-buffer-state ((overlays (overlays-in beg end)) overlay)
    (while overlays
      (setq overlay (car overlays)
	    overlays (cdr overlays))
      (when (eq (overlay-get overlay 'face) face)
	(setq beg (min beg (overlay-start overlay))
	      end (max end (overlay-end overlay)))
	(delete-overlay overlay)))
    (overlay-put (make-overlay beg end) 'face face)))

(defun c-debug-remove-face (beg end face)
  (c-save-buffer-state ((overlays (overlays-in beg end)) overlay
			(ol-beg beg) (ol-end end))
    (while overlays
      (setq overlay (car overlays)
	    overlays (cdr overlays))
      (when (eq (overlay-get overlay 'face) face)
	(setq ol-beg (min ol-beg (overlay-start overlay))
	      ol-end (max ol-end (overlay-end overlay)))
	(delete-overlay overlay)))
    (when (< ol-beg beg)
      (overlay-put (make-overlay ol-beg beg) 'face face))
    (when (> ol-end end)
      (overlay-put (make-overlay end ol-end) 'face face))))


;; `c-beginning-of-statement-1' and accompanying stuff.
Gerd Moellmann's avatar
Gerd Moellmann committed
648

649 650 651 652
;; KLUDGE ALERT: c-maybe-labelp is used to pass information between
;; c-crosses-statement-barrier-p and c-beginning-of-statement-1.  A
;; better way should be implemented, but this will at least shut up
;; the byte compiler.
653
(defvar c-maybe-labelp)
654

655 656
;; New awk-compatible version of c-beginning-of-statement-1, ACM 2002/6/22

657 658 659 660 661 662 663 664 665 666 667
;; Macros used internally in c-beginning-of-statement-1 for the
;; automaton actions.
(defmacro c-bos-push-state ()
  '(setq stack (cons (cons state saved-pos)
		     stack)))
(defmacro c-bos-pop-state (&optional do-if-done)
  `(if (setq state (car (car stack))
	     saved-pos (cdr (car stack))
	     stack (cdr stack))
       t
     ,do-if-done
668
     (setq pre-stmt-found t)
669 670 671 672
     (throw 'loop nil)))
(defmacro c-bos-pop-state-and-retry ()
  '(throw 'loop (setq state (car (car stack))
		      saved-pos (cdr (car stack))
673
		      pre-stmt-found (not (cdr stack))
674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
		      ;; Throw nil if stack is empty, else throw non-nil.
		      stack (cdr stack))))
(defmacro c-bos-save-pos ()
  '(setq saved-pos (vector pos tok ptok pptok)))
(defmacro c-bos-restore-pos ()
  '(unless (eq (elt saved-pos 0) start)
     (setq pos (elt saved-pos 0)
	   tok (elt saved-pos 1)
	   ptok (elt saved-pos 2)
	   pptok (elt saved-pos 3))
     (goto-char pos)
     (setq sym nil)))
(defmacro c-bos-save-error-info (missing got)
  `(setq saved-pos (vector pos ,missing ,got)))
(defmacro c-bos-report-error ()
  '(unless noerror
     (setq c-parsing-error
Paul Eggert's avatar
Paul Eggert committed
691 692 693 694 695 696
	   (format-message
	    "No matching `%s' found for `%s' on line %d"
	    (elt saved-pos 1)
	    (elt saved-pos 2)
	    (1+ (count-lines (point-min)
			     (c-point 'bol (elt saved-pos 0))))))))
697 698

(defun c-beginning-of-statement-1 (&optional lim ignore-labels
699
					     noerror comma-delim hit-lim)
700 701 702 703 704
  "Move to the start of the current statement or declaration, or to
the previous one if already at the beginning of one.  Only
statements/declarations on the same level are considered, i.e. don't
move into or out of sexps (not even normal expression parentheses).

Juanma Barranquero's avatar
Juanma Barranquero committed
705
If point is already at the earliest statement within braces or parens,
706
this function doesn't move back into any whitespace preceding it; it
707
returns `same' in this case.
708

709 710 711 712 713 714
Stop at statement continuation tokens like \"else\", \"catch\",
\"finally\" and the \"while\" in \"do ... while\" if the start point
is within the continuation.  If starting at such a token, move to the
corresponding statement start.  If at the beginning of a statement,
move to the closest containing statement if there is any.  This might
also stop at a continuation clause.
715

716 717
Labels are treated as part of the following statements if
IGNORE-LABELS is non-nil.  (FIXME: Doesn't work if we stop at a known
718 719
statement start keyword.)  Otherwise, each label is treated as a
separate statement.
720

721 722 723 724 725 726
Macros are ignored \(i.e. skipped over) unless point is within one, in
which case the content of the macro is treated as normal code.  Aside
from any normal statement starts found in it, stop at the first token
of the content in the macro, i.e. the expression of an \"#if\" or the
start of the definition in a \"#define\".  Also stop at start of
macros before leaving them.
727

728
Return:
729 730 731 732 733
`label'         if stopped at a label or \"case...:\" or \"default:\";
`same'          if stopped at the beginning of the current statement;
`up'            if stepped to a containing statement;
`previous'      if stepped to a preceding statement;
`beginning'     if stepped from a statement continuation clause to
734 735 736
                its start clause;
`macro'         if stepped to a macro start; or
nil             if HIT-LIM is non-nil, and we hit the limit.
737
Note that `same' and not `label' is returned if stopped at the same
738
label without crossing the colon character.
739 740 741

LIM may be given to limit the search.  If the search hits the limit,
point will be left at the closest following token, or at the start
742 743
position if that is less.  If HIT-LIM is non-nil, nil is returned in
this case, otherwise `same'.
744 745 746

NOERROR turns off error logging to `c-parsing-error'.

747 748
Normally only `;' and virtual semicolons are considered to delimit
statements, but if COMMA-DELIM is non-nil then `,' is treated
749
as a delimiter too.
750 751 752

Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
753

754 755 756
  ;; The bulk of this function is a pushdown automaton that looks at statement
  ;; boundaries and the tokens (such as "while") in c-opt-block-stmt-key.  Its
  ;; purpose is to keep track of nested statements, ensuring that such
Juanma Barranquero's avatar
Juanma Barranquero committed
757
  ;; statements are skipped over in their entirety (somewhat akin to what C-M-p
758
  ;; does with nested braces/brackets/parentheses).
759 760 761
  ;;
  ;; Note: The position of a boundary is the following token.
  ;;
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788
  ;; Beginning with the current token (the one following point), move back one
  ;; sexp at a time (where a sexp is, more or less, either a token or the
  ;; entire contents of a brace/bracket/paren pair).  Each time a statement
  ;; boundary is crossed or a "while"-like token is found, update the state of
  ;; the PDA.  Stop at the beginning of a statement when the stack (holding
  ;; nested statement info) is empty and the position has been moved.
  ;;
  ;; The following variables constitute the PDA:
  ;;
  ;; sym:    This is either the "while"-like token (e.g. 'for) we've just
  ;;         scanned back over, 'boundary if we've just gone back over a
  ;;         statement boundary, or nil otherwise.
  ;; state:  takes one of the values (nil else else-boundary while
  ;;         while-boundary catch catch-boundary).
  ;;         nil means "no "while"-like token yet scanned".
  ;;         'else, for example, means "just gone back over an else".
  ;;         'else-boundary means "just gone back over a statement boundary
  ;;         immediately after having gone back over an else".
  ;; saved-pos: A vector of either saved positions (tok ptok pptok, etc.) or
  ;;         of error reporting information.
  ;; stack:  The stack onto which the PDA pushes its state.  Each entry
  ;;         consists of a saved value of state and saved-pos.  An entry is
  ;;         pushed when we move back over a "continuation" token (e.g. else)
  ;;         and popped when we encounter the corresponding opening token
  ;;         (e.g. if).
  ;;
  ;;
789
  ;; The following diagram briefly outlines the PDA.
790 791
  ;;
  ;; Common state:
792 793 794 795
  ;;   "else": Push state, goto state `else'.
  ;;   "while": Push state, goto state `while'.
  ;;   "catch" or "finally": Push state, goto state `catch'.
  ;;   boundary: Pop state.
796 797
  ;;   other: Do nothing special.
  ;;
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
  ;; State `else':
  ;;   boundary: Goto state `else-boundary'.
  ;;   other: Error, pop state, retry token.
  ;;
  ;; State `else-boundary':
  ;;   "if": Pop state.
  ;;   boundary: Error, pop state.
  ;;   other: See common state.
  ;;
  ;; State `while':
  ;;   boundary: Save position, goto state `while-boundary'.
  ;;   other: Pop state, retry token.
  ;;
  ;; State `while-boundary':
  ;;   "do": Pop state.
  ;;   boundary: Restore position if it's not at start, pop state. [*see below]
  ;;   other: See common state.
  ;;
  ;; State `catch':
  ;;   boundary: Goto state `catch-boundary'.
  ;;   other: Error, pop state, retry token.
  ;;
  ;; State `catch-boundary':
  ;;   "try": Pop state.
  ;;   "catch": Goto state `catch'.
  ;;   boundary: Error, pop state.
  ;;   other: See common state.
  ;;
  ;; [*] In the `while-boundary' state, we had pushed a 'while state, and were
  ;; searching for a "do" which would have opened a do-while.  If we didn't
  ;; find it, we discard the analysis done since the "while", go back to this
  ;; token in the buffer and restart the scanning there, this time WITHOUT
  ;; pushing the 'while state onto the stack.
  ;;
832 833 834 835 836 837 838 839 840 841
  ;; In addition to the above there is some special handling of labels
  ;; and macros.

  (let ((case-fold-search nil)
	(start (point))
	macro-start
	(delims (if comma-delim '(?\; ?,) '(?\;)))
	(c-stmt-delim-chars (if comma-delim
				c-stmt-delim-chars-with-comma
			      c-stmt-delim-chars))
842
	c-maybe-labelp after-case:-pos saved
843 844 845 846
	;; Current position.
	pos
	;; Position of last stmt boundary character (e.g. ;).
	boundary-pos
847 848 849 850
	;; Non-nil when a construct has been found which delimits the search
	;; for a statement start, e.g. an opening brace or a macro start, or a
	;; keyword like `if' when the PDA stack is empty.
	pre-stmt-found
851 852 853 854 855 856 857 858 859 860 861 862
	;; The position of the last sexp or bound that follows the
	;; first found colon, i.e. the start of the nonlabel part of
	;; the statement.  It's `start' if a colon is found just after
	;; the start.
	after-labels-pos
	;; Like `after-labels-pos', but the first such position inside
	;; a label, i.e. the start of the last label before the start
	;; of the nonlabel part of the statement.
	last-label-pos
	;; The last position where a label is possible provided the
	;; statement started there.  It's nil as long as no invalid
	;; label content has been found (according to
863
	;; `c-nonlabel-token-key').  It's `start' if no valid label
864 865 866
	;; content was found in the label.  Note that we might still
	;; regard it a label if it starts with `c-label-kwds'.
	label-good-pos
867 868 869
	;; Putative positions of the components of a bitfield declaration,
	;; e.g. "int foo : NUM_FOO_BITS ;"
	bitfield-type-pos bitfield-id-pos bitfield-size-pos
870 871 872 873 874 875 876 877 878 879 880
	;; Symbol just scanned back over (e.g. 'while or 'boundary).
	;; See above.
	sym
	;; Current state in the automaton.  See above.
	state
	;; Current saved positions.  See above.
	saved-pos
	;; Stack of conses (state . saved-pos).
	stack
	;; Regexp which matches "for", "if", etc.
	(cond-key (or c-opt-block-stmt-key
881
		      "a\\`"))	; Doesn't match anything.
882 883 884 885
	;; Return value.
	(ret 'same)
	;; Positions of the last three sexps or bounds we've stopped at.
	tok ptok pptok)
886 887

    (save-restriction
888 889 890 891
      (setq lim (if lim
		    (max lim (point-min))
		  (point-min)))
      (widen)
892 893 894 895 896 897

      (if (save-excursion
	    (and (c-beginning-of-macro)
		 (/= (point) start)))
	  (setq macro-start (point)))

898
      ;; Try to skip back over unary operator characters, to register
899 900 901
      ;; that we've moved.
      (while (progn
	       (setq pos (point))
902 903 904 905
	       (c-backward-syntactic-ws)
	       ;; Protect post-++/-- operators just before a virtual semicolon.
	       (and (not (c-at-vsemi-p))
		    (/= (skip-chars-backward "-+!*&~@`#") 0))))
906 907

      ;; Skip back over any semicolon here.  If it was a bare semicolon, we're
908
      ;; done.  Later on we ignore the boundaries for statements that don't
909 910
      ;; contain any sexp.  The only thing that is affected is that the error
      ;; checking is a little less strict, and we really don't bother.
911 912 913
      (if (and (memq (char-before) delims)
	       (progn (forward-char -1)
		      (setq saved (point))
914
		      (c-backward-syntactic-ws)
915 916
		      (or (memq (char-before) delims)
			  (memq (char-before) '(?: nil))
917
			  (eq (char-syntax (char-before)) ?\()
918
			  (c-at-vsemi-p))))
919 920 921 922 923 924 925 926 927 928
	  (setq ret 'previous
		pos saved)

	;; Begin at start and not pos to detect macros if we stand
	;; directly after the #.
	(goto-char start)
	(if (looking-at "\\<\\|\\W")
	    ;; Record this as the first token if not starting inside it.
	    (setq tok start))

929 930 931 932 933 934 935 936
	;; The following while loop goes back one sexp (balanced parens,
	;; etc. with contents, or symbol or suchlike) each iteration.  This
	;; movement is accomplished with a call to c-backward-sexp approx 170
	;; lines below.
	;;
	;; The loop is exited only by throwing nil to the (catch 'loop ...):
	;; 1. On reaching the start of a macro;
	;; 2. On having passed a stmt boundary with the PDA stack empty;
937 938 939 940
	;; 3. Going backwards past the search limit.
	;; 4. On reaching the start of an Objective C method def;
	;; 5. From macro `c-bos-pop-state'; when the stack is empty;
	;; 6. From macro `c-bos-pop-state-and-retry' when the stack is empty.
941 942 943
	(while
	    (catch 'loop ;; Throw nil to break, non-nil to continue.
	      (cond
944
	       ;; Are we in a macro, just after the opening #?
945
	       ((save-excursion
946
		  (and macro-start	; Always NIL for AWK.
947 948 949 950 951
		       (progn (skip-chars-backward " \t")
			      (eq (char-before) ?#))
		       (progn (setq saved (1- (point)))
			      (beginning-of-line)
			      (not (eq (char-before (1- (point))) ?\\)))
952
		       (looking-at c-opt-cpp-start)
953 954 955 956 957 958 959 960 961 962 963 964
		       (progn (skip-chars-forward " \t")
			      (eq (point) saved))))
		(goto-char saved)
		(if (and (c-forward-to-cpp-define-body)
			 (progn (c-forward-syntactic-ws start)
				(< (point) start)))
		    ;; Stop at the first token in the content of the macro.
		    (setq pos (point)
			  ignore-labels t) ; Avoid the label check on exit.
		  (setq pos saved
			ret 'macro
			ignore-labels t))
965
		(setq pre-stmt-found t)
966
		(throw 'loop nil))	; 1. Start of macro.
967

968 969
	       ;; Do a round through the automaton if we've just passed a
	       ;; statement boundary or passed a "while"-like token.
970 971 972 973 974
	       ((or sym
		    (and (looking-at cond-key)
			 (setq sym (intern (match-string 1)))))

		(when (and (< pos start) (null stack))
975
		  (setq pre-stmt-found t)
976
		  (throw 'loop nil))	; 2. Statement boundary.
977

978 979
		;; The PDA state handling.
                ;;
Martin Stjernholm's avatar
Martin Stjernholm committed
980
                ;; Refer to the description of the PDA in the opening
981 982 983 984 985 986 987 988 989 990 991
                ;; comments.  In the following OR form, the first leaf
                ;; attempts to handles one of the specific actions detailed
                ;; (e.g., finding token "if" whilst in state `else-boundary').
                ;; We drop through to the second leaf (which handles common
                ;; state) if no specific handler is found in the first cond.
                ;; If a parsing error is detected (e.g. an "else" with no
                ;; preceding "if"), we throw to the enclosing catch.
                ;;
                ;; Note that the (eq state 'else) means
		;; "we've just passed an else", NOT "we're looking for an
		;; else".
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
		(or (cond
		     ((eq state 'else)
		      (if (eq sym 'boundary)
			  (setq state 'else-boundary)
			(c-bos-report-error)
			(c-bos-pop-state-and-retry)))

		     ((eq state 'else-boundary)
		      (cond ((eq sym 'if)
			     (c-bos-pop-state (setq ret 'beginning)))
			    ((eq sym 'boundary)
			     (c-bos-report-error)
			     (c-bos-pop-state))))

		     ((eq state 'while)
		      (if (and (eq sym 'boundary)
			       ;; Since this can cause backtracking we do a
			       ;; little more careful analysis to avoid it:
			       ;; If there's a label in front of the while
			       ;; it can't be part of a do-while.
			       (not after-labels-pos))
			  (progn (c-bos-save-pos)
				 (setq state 'while-boundary))
1015
			(c-bos-pop-state-and-retry))) ; Can't be a do-while
1016 1017 1018 1019

		     ((eq state 'while-boundary)
		      (cond ((eq sym 'do)
			     (c-bos-pop-state (setq ret 'beginning)))
1020 1021 1022
			    ((eq sym 'boundary) ; isn't a do-while
			     (c-bos-restore-pos) ; the position of the while
			     (c-bos-pop-state)))) ; no longer searching for do.
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039

		     ((eq state 'catch)
		      (if (eq sym 'boundary)
			  (setq state 'catch-boundary)
			(c-bos-report-error)
			(c-bos-pop-state-and-retry)))

		     ((eq state 'catch-boundary)
		      (cond
		       ((eq sym 'try)
			(c-bos-pop-state (setq ret 'beginning)))
		       ((eq sym 'catch)
			(setq state 'catch))
		       ((eq sym 'boundary)
			(c-bos-report-error)
			(c-bos-pop-state)))))

1040 1041
		    ;; This is state common.  We get here when the previous
		    ;; cond statement found no particular state handler.
1042
		    (cond ((eq sym 'boundary)
1043 1044 1045 1046 1047 1048
			   ;; If we have a boundary at the start
			   ;; position we push a frame to go to the
			   ;; previous statement.
			   (if (>= pos start)
			       (c-bos-push-state)
			     (c-bos-pop-state)))
1049 1050 1051 1052 1053
			  ((eq sym 'else)
			   (c-bos-push-state)
			   (c-bos-save-error-info 'if 'else)
			   (setq state 'else))
			  ((eq sym 'while)
1054 1055
			   ;; Is this a real while, or a do-while?
			   ;; The next `when' triggers unless we are SURE that
Paul Eggert's avatar
Paul Eggert committed
1056
			   ;; the `while' is not the tail end of a `do-while'.
1057
			   (when (or (not pptok)
1058
				     (memq (char-after pptok) delims)
1059 1060 1061 1062 1063 1064 1065
				     ;; The following kludge is to prevent
				     ;; infinite recursion when called from
				     ;; c-awk-after-if-for-while-condition-p,
				     ;; or the like.
				     (and (eq (point) start)
					  (c-vsemi-status-unknown-p))
				     (c-at-vsemi-p pptok))
1066 1067
			     ;; Since this can cause backtracking we do a
			     ;; little more careful analysis to avoid it: If
1068 1069
			     ;; the while isn't followed by a (possibly
			     ;; virtual) semicolon it can't be a do-while.
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084
			     (c-bos-push-state)
			     (setq state 'while)))
			  ((memq sym '(catch finally))
			   (c-bos-push-state)
			   (c-bos-save-error-info 'try sym)
			   (setq state 'catch))))

		(when c-maybe-labelp
		  ;; We're either past a statement boundary or at the
		  ;; start of a statement, so throw away any label data
		  ;; for the previous one.
		  (setq after-labels-pos nil
			last-label-pos nil
			c-maybe-labelp nil))))

1085 1086
	      ;; Step to the previous sexp, but not if we crossed a
	      ;; boundary, since that doesn't consume an sexp.
1087
	      (if (eq sym 'boundary)
1088 1089
		  (when (>= (point) lim)
		    (setq ret 'previous))
1090 1091

                ;; HERE IS THE SINGLE PLACE INSIDE THE PDA LOOP WHERE WE MOVE
1092 1093 1094 1095 1096 1097
		;; BACKWARDS THROUGH THE SOURCE.

		(c-backward-syntactic-ws)
		(let ((before-sws-pos (point))
		      ;; The end position of the area to search for statement
		      ;; barriers in this round.
1098 1099
		      (maybe-after-boundary-pos pos)
		      comma-delimited)
1100

1101 1102
		  ;; Go back over exactly one logical sexp, taking proper
		  ;; account of macros and escaped EOLs.
1103 1104
		  (while
		      (progn
1105 1106
			(setq comma-delimited (and (not comma-delim)
						   (eq (char-before) ?\,)))
1107 1108 1109 1110
			(unless (c-safe (c-backward-sexp) t)
			  ;; Give up if we hit an unbalanced block.  Since the
			  ;; stack won't be empty the code below will report a
			  ;; suitable error.
1111
			  (setq pre-stmt-found t)
1112
			  (throw 'loop nil))
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
			(cond
			 ;; Have we moved into a macro?
			 ((and (not macro-start)
			       (c-beginning-of-macro))
			  ;; Have we crossed a statement boundary?  If not,
			  ;; keep going back until we find one or a "real" sexp.
			  (and
			   (save-excursion
			     (c-end-of-macro)
			     (not (c-crosses-statement-barrier-p
				   (point) maybe-after-boundary-pos)))
			   (setq maybe-after-boundary-pos (point))))
			 ;; Have we just gone back over an escaped NL?  This
			 ;; doesn't count as a sexp.
			 ((looking-at "\\\\$")))))

		  ;; Have we crossed a statement boundary?
		  (setq boundary-pos
			(cond
			 ;; Are we at a macro beginning?
			 ((and (not macro-start)
			       c-opt-cpp-prefix
			       (looking-at c-opt-cpp-prefix))
			  (save-excursion
			    (c-end-of-macro)
			    (c-crosses-statement-barrier-p
			     (point) maybe-after-boundary-pos)))
			 ;; Just gone back over a brace block?
			 ((and
			   (eq (char-after) ?{)
1143
			   (not comma-delimited)
1144 1145 1146
			   (not (c-looking-at-inexpr-block lim nil t))
			   (save-excursion
			     (c-backward-token-2 1 t nil)
1147 1148 1149 1150 1151 1152 1153 1154
			     (not (looking-at "=\\([^=]\\|$\\)")))
			   (or
			    (not c-opt-block-decls-with-vars-key)
			    (save-excursion
			      (c-backward-token-2 1 t nil)
			      (if (and (looking-at c-symbol-start)
				       (not (looking-at c-keywords-regexp)))
				  (c-backward-token-2 1 t nil))
1155 1156 1157 1158 1159
			      (and
			       (not (looking-at
				     c-opt-block-decls-with-vars-key))
			       (or comma-delim
				   (not (eq (char-after) ?\,)))))))
1160 1161 1162
			  (save-excursion
			    (c-forward-sexp) (point)))
			 ;; Just gone back over some paren block?
1163
			 ((looking-at "\\s(")
1164 1165 1166 1167 1168 1169 1170 1171
			  (save-excursion
			    (goto-char (1+ (c-down-list-backward
					    before-sws-pos)))
			    (c-crosses-statement-barrier-p
			     (point) maybe-after-boundary-pos)))
			 ;; Just gone back over an ordinary symbol of some sort?
			 (t (c-crosses-statement-barrier-p
			     (point) maybe-after-boundary-pos))))
Paul Eggert's avatar
Paul Eggert committed
1172

1173 1174 1175 1176 1177 1178 1179
		  (when boundary-pos
		    (setq pptok ptok
			  ptok tok
			  tok boundary-pos
			  sym 'boundary)
		    ;; Like a C "continue".  Analyze the next sexp.
		    (throw 'loop t))))
1180

1181 1182 1183 1184
	      ;; Have we gone past the limit?
	      (when (< (point) lim)
		(throw 'loop nil))	; 3. Gone back over the limit.

1185 1186 1187 1188
	      ;; ObjC method def?
	      (when (and c-opt-method-key
			 (setq saved (c-in-method-def-p)))
		(setq pos saved
1189
		      pre-stmt-found t
1190
		      ignore-labels t)	; Avoid the label check on exit.
1191
		(throw 'loop nil))	; 4. ObjC method def.
1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214

	      ;; Might we have a bitfield declaration, "<type> <id> : <size>"?
	      (if c-has-bitfields
		  (cond
		   ;; The : <size> and <id> fields?
		   ((and (numberp c-maybe-labelp)
			 (not bitfield-size-pos)
			 (save-excursion
			   (goto-char (or tok start))
			   (not (looking-at c-keywords-regexp)))
			 (not (looking-at c-keywords-regexp))
			 (not (c-punctuation-in (point) c-maybe-labelp)))
		    (setq bitfield-size-pos (or tok start)
			  bitfield-id-pos (point)))
		   ;; The <type> field?
		   ((and bitfield-id-pos
			 (not bitfield-type-pos))
		    (if (and (looking-at c-symbol-key) ; Can only be an integer type.  :-)
			     (not (looking-at c-not-primitive-type-keywords-regexp))
			     (not (c-punctuation-in (point) tok)))
			(setq bitfield-type-pos (point))
		      (setq bitfield-size-pos nil
			    bitfield-id-pos nil)))))
1215

1216 1217 1218
	      ;; Handle labels.
	      (unless (eq ignore-labels t)
		(when (numberp c-maybe-labelp)
1219 1220 1221
		  ;; `c-crosses-statement-barrier-p' has found a colon, so we
		  ;; might be in a label now.  Have we got a real label
		  ;; (including a case label) or something like C++'s "public:"?
1222 1223
		  ;; A case label might use an expression rather than a token.
		  (setq after-case:-pos (or tok start))
1224 1225 1226 1227 1228 1229
		  (if (or (looking-at c-nonlabel-token-key) ; e.g. "while" or "'a'"
			  ;; Catch C++'s inheritance construct "class foo : bar".
			  (save-excursion
			    (and
			     (c-safe (c-backward-sexp) t)
			     (looking-at c-nonlabel-token-2-key))))
1230 1231 1232 1233 1234 1235 1236
		      (setq c-maybe-labelp nil)
		    (if after-labels-pos ; Have we already encountered a label?
			(if (not last-label-pos)
			    (setq last-label-pos (or tok start)))
		      (setq after-labels-pos (or tok start)))
		    (setq c-maybe-labelp t
			  label-good-pos nil))) ; bogus "label"
1237 1238 1239 1240

		(when (and (not label-good-pos)	; i.e. no invalid "label"'s yet
						; been found.
			   (looking-at c-nonlabel-token-key)) ; e.g. "while :"
1241 1242 1243 1244 1245 1246
		  ;; We're in a potential label and it's the first
		  ;; time we've found something that isn't allowed in
		  ;; one.
		  (setq label-good-pos (or tok start))))

	      ;; We've moved back by a sexp, so update the token positions.
1247 1248 1249 1250
	      (setq sym nil
		    pptok ptok
		    ptok tok
		    tok (point)
1251
		    pos tok) ; always non-nil
1252
	      )		     ; end of (catch 'loop ....)
1253
	  )		     ; end of sexp-at-a-time (while ....)
Paul Eggert's avatar
Paul Eggert committed
1254

1255 1256 1257 1258 1259 1260
	(when (and hit-lim
		   (or (not pre-stmt-found)
		       (< pos lim)
		       (>= pos start)))
	  (setq ret nil))

1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271
	;; If the stack isn't empty there might be errors to report.
	(while stack
	  (if (and (vectorp saved-pos) (eq (length saved-pos) 3))
	      (c-bos-report-error))
	  (setq saved-pos (cdr (car stack))
		stack (cdr stack)))

	(when (and (eq ret 'same)
		   (not (memq sym '(boundary ignore nil))))
	  ;; Need to investigate closer whether we've crossed
	  ;; between a substatement and its containing statement.
1272 1273 1274 1275 1276 1277 1278
	  (if (setq saved
		    (cond ((and (looking-at c-block-stmt-1-2-key)
				(eq (char-after ptok) ?\())
			   pptok)
			  ((looking-at c-block-stmt-1-key)
			   ptok)
			  (t pptok)))
1279 1280 1281
	      (cond ((> start saved) (setq pos saved))
		    ((= start saved) (setq ret 'up)))))

1282 1283
	(when (and (not ignore-labels)
		   (eq c-maybe-labelp t)
1284
		   (not (eq ret 'beginning))
1285
		   after-labels-pos
1286
		   (not bitfield-type-pos) ; Bitfields take precedence over labels.
1287 1288 1289 1290 1291 1292 1293 1294
		   (or (not label-good-pos)
		       (<= label-good-pos pos)
		       (progn
			 (goto-char (if (and last-label-pos
					     (< last-label-pos start))
					last-label-pos
				      pos))
			 (looking-at c-label-kwds-regexp))))
1295 1296 1297 1298 1299
	  ;; We're in a label.  Maybe we should step to the statement
	  ;; after it.
	  (if (< after-labels-pos start)
	      (setq pos after-labels-pos)
	    (setq ret 'label)
1300 1301
	    (if (and last-label-pos (< last-label-pos start))
		;; Might have jumped over several labels.  Go to the last one.
1302 1303
		(setq pos last-label-pos)))))

1304
      ;; Have we got "case <expression>:"?
1305
      (goto-char pos)
1306 1307 1308 1309
      (when (and after-case:-pos
		 (not (eq ret 'beginning))
		 (looking-at c-case-kwds-regexp))
	(if (< after-case:-pos start)
1310 1311 1312
	    (setq pos after-case:-pos))
	(if (eq ret 'same)
	    (setq ret 'label)))
1313 1314

      ;; Skip over the unary operators that can start the statement.
1315
      (while (progn
1316 1317 1318
	       (c-backward-syntactic-ws)
	       ;; protect AWK post-inc/decrement operators, etc.
	       (and (not (c-at-vsemi-p (point)))
1319
		    (/= (skip-chars-backward "-.+!*&~@`#") 0)))
1320 1321 1322
	(setq pos (point)))
      (goto-char pos)
      ret)))
Richard M. Stallman's avatar
Richard M. Stallman committed
1323

1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336
(defun c-punctuation-in (from to)
  "Return non-nil if there is a non-comment non-macro punctuation character
between FROM and TO.  FROM must not be in a string or comment.  The returned
value is the position of the first such character."
  (save-excursion
    (goto-char from)
    (let ((pos (point)))
      (while (progn (skip-chars-forward c-symbol-chars to)
		    (c-forward-syntactic-ws to)
		    (> (point) pos))
	(setq pos (point))))
    (and (< (point) to) (point))))

Richard M. Stallman's avatar
Richard M. Stallman committed
1337
(defun c-crosses-statement-barrier-p (from to)
1338 1339
  "Return non-nil if buffer positions FROM to TO cross one or more
statement or declaration boundaries.  The returned value is actually
1340 1341
the position of the earliest boundary char.  FROM must not be within
a string or comment.
1342 1343 1344

The variable `c-maybe-labelp' is set to the position of the first `:' that
might start a label (i.e. not part of `::' and not preceded by `?').  If a
1345 1346
single `?' is found, then `c-maybe-labelp' is cleared.

1347
For AWK, a statement which is terminated by an EOL (not a ; or a }) is
1348
regarded as having a \"virtual semicolon\" immediately after the last token on
1349
the line.  If this virtual semicolon is _at_ from, the function recognizes it.
1350 1351 1352

Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
1353 1354 1355 1356 1357 1358 1359 1360 1361
  (let* ((skip-chars
	  ;; If the current language has CPP macros, insert # into skip-chars.
	  (if c-opt-cpp-symbol
	      (concat (substring c-stmt-delim-chars 0 1) ; "^"
		      c-opt-cpp-symbol			 ; usually "#"
		      (substring c-stmt-delim-chars 1))	 ; e.g. ";{}?:"
	    c-stmt-delim-chars))
	 (non-skip-list
	  (append (substring skip-chars 1) nil)) ; e.g. (?# ?\; ?{ ?} ?? ?:)
1362
	 lit-range lit-start vsemi-pos)
1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376
    (save-restriction
      (widen)
      (save-excursion
	(catch 'done
	  (goto-char from)
	  (while (progn (skip-chars-forward
			 skip-chars
			 (min to (c-point 'bonl)))
			(< (point) to))
	    (cond
	     ;; Virtual semicolon?
	     ((and (bolp)
		   (save-excursion
		     (progn
1377 1378
		       (if (setq lit-start (c-literal-start from)) ; Have we landed in a string/comment?
			   (goto-char lit-start))
1379 1380 1381 1382 1383
		       (c-backward-syntactic-ws) ; ? put a limit here, maybe?
		       (setq vsemi-pos (point))
		       (c-at-vsemi-p))))
	      (throw 'done vsemi-pos))
	     ;; In a string/comment?
1384
	     ((setq lit-range (c-literal-limits from))
1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397
	      (goto-char (cdr lit-range)))
	     ((eq (char-after) ?:)
	      (forward-char)
	      (if (and (eq (char-after) ?:)
		       (< (point) to))
		  ;; Ignore scope operators.
		  (forward-char)
		(setq c-maybe-labelp (1- (point)))))
	     ((eq (char-after) ??)
	      ;; A question mark.  Can't be a label, so stop
	      ;; looking for more : and ?.
	      (setq c-maybe-labelp nil
		    skip-chars (substring c-stmt-delim-chars 0 -2)))
1398 1399 1400 1401 1402 1403 1404 1405 1406
	     ;; At a CPP construct or a "#" or "##" operator?
	     ((and c-opt-cpp-symbol (looking-at c-opt-cpp-symbol))
	      (if (save-excursion
		    (skip-chars-backward " \t")
		    (and (bolp)
			 (or (bobp)
			     (not (eq (char-before (1- (point))) ?\\)))))
		  (c-end-of-macro)
		(skip-chars-forward c-opt-cpp-symbol)))
1407 1408 1409 1410
	     ((memq (char-after) non-skip-list)
	      (throw 'done (point)))))
	  ;; In trailing space after an as yet undetected virtual semicolon?
	  (c-backward-syntactic-ws from)
1411 1412 1413
	  (when (and (bolp) (not (bobp))) ; Can happen in AWK Mode with an
					  ; unterminated string/regexp.
	    (backward-char))
1414 1415 1416 1417
	  (if (and (< (point) to)
		   (c-at-vsemi-p))
	      (point)
	    nil))))))
1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467

(defun c-at-statement-start-p ()
  "Return non-nil if the point is at the first token in a statement
or somewhere in the syntactic whitespace before it.

A \"statement\" here is not restricted to those inside code blocks.
Any kind of declaration-like construct that occur outside function
bodies is also considered a \"statement\".

Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."

  (save-excursion
    (let ((end (point))
	  c-maybe-labelp)
      (c-syntactic-skip-backward (substring c-stmt-delim-chars 1) nil t)
      (or (bobp)
	  (eq (char-before) ?})
	  (and (eq (char-before) ?{)
	       (not (and c-special-brace-lists
			 (progn (backward-char)
				(c-looking-at-special-brace-list)))))
	  (c-crosses-statement-barrier-p (point) end)))))

(defun c-at-expression-start-p ()
  "Return non-nil if the point is at the first token in an expression or
statement, or somewhere in the syntactic whitespace before it.

An \"expression\" here is a bit different from the normal language
grammar sense: It's any sequence of expression tokens except commas,
unless they are enclosed inside parentheses of some kind.  Also, an
expression never continues past an enclosing parenthesis, but it might
contain parenthesis pairs of any sort except braces.

Since expressions never cross statement boundaries, this function also
recognizes statement beginnings, just like `c-at-statement-start-p'.

Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."

  (save-excursion
    (let ((end (point))
	  (c-stmt-delim-chars c-stmt-delim-chars-with-comma)
	  c-maybe-labelp)
      (c-syntactic-skip-backward (substring c-stmt-delim-chars 1) nil t)
      (or (bobp)
	  (memq (char-before) '(?{ ?}))
	  (save-excursion (backward-char)
			  (looking-at "\\s("))
	  (c-crosses-statement-barrier-p (point) end)))))
Richard M. Stallman's avatar
Richard M. Stallman committed
1468 1469


1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513
;; A set of functions that covers various idiosyncrasies in
;; implementations of `forward-comment'.

;; Note: Some emacsen considers incorrectly that any line comment
;; ending with a backslash continues to the next line.  I can't think
;; of any way to work around that in a reliable way without changing
;; the buffer, though.  Suggestions welcome. ;) (No, temporarily
;; changing the syntax for backslash doesn't work since we must treat
;; escapes in string literals correctly.)

(defun c-forward-single-comment ()
  "Move forward past whitespace and the closest following comment, if any.
Return t if a comment was found, nil otherwise.  In either case, the
point is moved past the following whitespace.  Line continuations,
i.e. a backslashes followed by line breaks, are treated as whitespace.
The line breaks that end line comments are considered to be the
comment enders, so the point will be put on the beginning of the next
line if it moved past a line comment.

This function does not do any hidden buffer changes."

  (let ((start (point)))
    (when (looking-at "\\([ \t\n\r\f\v]\\|\\\\[\n\r]\\)+")
      (goto-char (match-end 0)))

    (when (forward-comment 1)
      (if (eobp)
	  ;; Some emacsen (e.g. XEmacs 21) return t when moving
	  ;; forwards at eob.
	  nil

	;; Emacs includes the ending newline in a b-style (c++)
	;; comment, but XEmacs doesn't.  We depend on the Emacs
	;; behavior (which also is symmetric).
	(if (and (eolp) (elt (parse-partial-sexp start (point)) 7))
	    (condition-case nil (forward-char 1)))

	t))))

(defsubst c-forward-comments ()
  "Move forward past all following whitespace and comments.
Line continuations, i.e. a backslashes followed by line breaks, are
treated as whitespace.

1514 1515
Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548

  (while (or
	  ;; If forward-comment in at least XEmacs 21 is given a large
	  ;; positive value, it'll loop all the way through if it hits
	  ;; eob.
	  (and (forward-comment 5)
	       ;; Some emacsen (e.g. XEmacs 21) return t when moving
	       ;; forwards at eob.
	       (not (eobp)))

	  (when (looking-at "\\\\[\n\r]")
	    (forward-char 2)
	    t))))

(defun c-backward-single-comment ()
  "Move backward past whitespace and the closest preceding comment, if any.
Return t if a comment was found, nil otherwise.  In either case, the
point is moved past the preceding whitespace.  Line continuations,
i.e. a backslashes followed by line breaks, are treated as whitespace.
The line breaks that end line comments are considered to be the
comment enders, so the point cannot be at the end of the same line to
move over a line comment.

This function does not do any hidden buffer changes."

  (let ((start (point)))
    ;; When we got newline terminated comments, forward-comment in all
    ;; supported emacsen so far will stop at eol of each line not
    ;; ending with a comment when moving backwards.  This corrects for
    ;; that, and at the same time handles line continuations.
    (while (progn
	     (skip-chars-backward " \t\n\r\f\v")
	     (and (looking-at "[\n\r]")