cc-engine.el 394 KB
Newer Older
Richard M. Stallman's avatar
Richard M. Stallman committed
1 2
;;; cc-engine.el --- core syntax guessing engine for CC mode

3
;; Copyright (C) 1985, 1987, 1992-2012  Free Software Foundation, Inc.
Richard M. Stallman's avatar
Richard M. Stallman committed
4

Alan Mackenzie's avatar
Alan Mackenzie committed
5 6
;; Authors:    2001- Alan Mackenzie
;;             1998- Martin Stjernholm
7
;;             1992-1999 Barry A. Warsaw
8 9
;;             1987 Dave Detlefs
;;             1987 Stewart Clamen
Richard M. Stallman's avatar
Richard M. Stallman committed
10
;;             1985 Richard M. Stallman
Barry A. Warsaw's avatar
Barry A. Warsaw committed
11
;; Maintainer: bug-cc-mode@gnu.org
Richard M. Stallman's avatar
Richard M. Stallman committed
12
;; Created:    22-Apr-1997 (split from cc-mode.el)
13 14
;; Keywords:   c languages
;; Package:    cc-mode
Richard M. Stallman's avatar
Richard M. Stallman committed
15 16 17

;; This file is part of GNU Emacs.

18
;; GNU Emacs is free software: you can redistribute it and/or modify
Richard M. Stallman's avatar
Richard M. Stallman committed
19
;; it under the terms of the GNU General Public License as published by
20 21
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
Richard M. Stallman's avatar
Richard M. Stallman committed
22 23 24 25 26 27 28

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
29
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
Richard M. Stallman's avatar
Richard M. Stallman committed
30

31 32
;;; Commentary:

33 34 35 36 37 38 39 40 41 42
;; The functions which have docstring documentation can be considered
;; part of an API which other packages can use in CC Mode buffers.
;; Otoh, undocumented functions and functions with the documentation
;; in comments are considered purely internal and can change semantics
;; or even disappear in the future.
;;
;; (This policy applies to CC Mode as a whole, not just this file.  It
;; probably also applies to many other Emacs packages, but here it's
;; clearly spelled out.)

43 44 45 46
;; Hidden buffer changes
;;
;; Various functions in CC Mode use text properties for caching and
;; syntactic markup purposes, and those of them that might modify such
47 48 49 50
;; properties but still don't modify the buffer in a visible way are
;; said to do "hidden buffer changes".  They should be used within
;; `c-save-buffer-state' or a similar function that saves and restores
;; buffer modifiedness, disables buffer change hooks, etc.
51
;;
52 53
;; Interactive functions are assumed to not do hidden buffer changes,
;; except in the specific parts of them that do real changes.
54
;;
55 56
;; Lineup functions are assumed to do hidden buffer changes.  They
;; must not do real changes, though.
57
;;
58 59 60 61 62 63 64 65 66 67 68 69
;; All other functions that do hidden buffer changes have that noted
;; in their doc string or comment.
;;
;; The intention with this system is to avoid wrapping every leaf
;; function that do hidden buffer changes inside
;; `c-save-buffer-state'.  It should be used as near the top of the
;; interactive functions as possible.
;;
;; Functions called during font locking are allowed to do hidden
;; buffer changes since the font-lock package run them in a context
;; similar to `c-save-buffer-state' (in fact, that function is heavily
;; inspired by `save-buffer-state' in the font-lock package).
70 71 72 73 74 75 76

;; Use of text properties
;;
;; CC Mode uses several text properties internally to mark up various
;; positions, e.g. to improve speed and to eliminate glitches in
;; interactive refontification.
;;
77 78 79
;; Note: This doc is for internal use only.  Other packages should not
;; assume that these text properties are used as described here.
;;
80 81 82 83
;; 'category
;;   Used for "indirection".  With its help, some other property can
;;   be cheaply and easily switched on or off everywhere it occurs.
;;
84
;; 'syntax-table
85 86 87
;;   Used to modify the syntax of some characters.  It is used to
;;   mark the "<" and ">" of angle bracket parens with paren syntax, and
;;   to "hide" obtrusive characters in preprocessor lines.
88 89 90 91 92 93 94 95 96 97 98 99 100 101
;;
;;   This property is used on single characters and is therefore
;;   always treated as front and rear nonsticky (or start and end open
;;   in XEmacs vocabulary).  It's therefore installed on
;;   `text-property-default-nonsticky' if that variable exists (Emacs
;;   >= 21).
;;
;; 'c-is-sws and 'c-in-sws
;;   Used by `c-forward-syntactic-ws' and `c-backward-syntactic-ws' to
;;   speed them up.  See the comment blurb before `c-put-is-sws'
;;   below for further details.
;;
;; 'c-type
;;   This property is used on single characters to mark positions with
102 103
;;   special syntactic relevance of various sorts.  Its primary use is
;;   to avoid glitches when multiline constructs are refontified
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
;;   interactively (on font lock decoration level 3).  It's cleared in
;;   a region before it's fontified and is then put on relevant chars
;;   in that region as they are encountered during the fontification.
;;   The value specifies the kind of position:
;;
;;     'c-decl-arg-start
;;  	 Put on the last char of the token preceding each declaration
;;  	 inside a declaration style arglist (typically in a function
;;  	 prototype).
;;
;;     'c-decl-end
;;  	 Put on the last char of the token preceding a declaration.
;;  	 This is used in cases where declaration boundaries can't be
;;  	 recognized simply by looking for a token like ";" or "}".
;;  	 `c-type-decl-end-used' must be set if this is used (see also
;;  	 `c-find-decl-spots').
;;
;;     'c-<>-arg-sep
;;  	 Put on the commas that separate arguments in angle bracket
;;  	 arglists like C++ template arglists.
;;
;;     'c-decl-id-start and 'c-decl-type-start
;;  	 Put on the last char of the token preceding each declarator
;;  	 in the declarator list of a declaration.  They are also used
;;  	 between the identifiers cases like enum declarations.
;;  	 'c-decl-type-start is used when the declarators are types,
;;  	 'c-decl-id-start otherwise.
;;
;; 'c-awk-NL-prop
;;   Used in AWK mode to mark the various kinds of newlines.  See
;;   cc-awk.el.

136 137
;;; Code:

Barry A. Warsaw's avatar
Barry A. Warsaw committed
138
(eval-when-compile
Gerd Moellmann's avatar
Gerd Moellmann committed
139
  (let ((load-path
Gerd Moellmann's avatar
Gerd Moellmann committed
140 141 142
	 (if (and (boundp 'byte-compile-dest-file)
		  (stringp byte-compile-dest-file))
	     (cons (file-name-directory byte-compile-dest-file) load-path)
Gerd Moellmann's avatar
Gerd Moellmann committed
143
	   load-path)))
144
    (load "cc-bytecomp" nil t)))
Gerd Moellmann's avatar
Gerd Moellmann committed
145 146

(cc-require 'cc-defs)
147
(cc-require-when-compile 'cc-langs)
Gerd Moellmann's avatar
Gerd Moellmann committed
148
(cc-require 'cc-vars)
149

Gerd Moellmann's avatar
Gerd Moellmann committed
150 151
;; Silence the compiler.
(cc-bytecomp-defun buffer-syntactic-context) ; XEmacs
Barry A. Warsaw's avatar
Barry A. Warsaw committed
152

Gerd Moellmann's avatar
Gerd Moellmann committed
153

154 155 156 157
;; Make declarations for all the `c-lang-defvar' variables in cc-langs.

(defmacro c-declare-lang-variables ()
  `(progn
158 159 160 161 162 163 164
     ,@(apply 'nconc
	      (mapcar (lambda (init)
			`(,(if (elt init 2)
			       `(defvar ,(car init) nil ,(elt init 2))
			     `(defvar ,(car init) nil))
			  (make-variable-buffer-local ',(car init))))
		      (cdr c-lang-variable-inits)))))
165 166 167 168 169 170 171 172 173
(c-declare-lang-variables)


;;; Internal state variables.

;; Internal state of hungry delete key feature
(defvar c-hungry-delete-key nil)
(make-variable-buffer-local 'c-hungry-delete-key)

174 175 176 177 178 179 180
;; The electric flag (toggled by `c-toggle-electric-state').
;; If t, electric actions (like automatic reindentation, and (if
;; c-auto-newline is also set) auto newlining) will happen when an electric
;; key like `{' is pressed (or an electric keyword like `else').
(defvar c-electric-flag t)
(make-variable-buffer-local 'c-electric-flag)

181 182 183 184
;; Internal state of auto newline feature.
(defvar c-auto-newline nil)
(make-variable-buffer-local 'c-auto-newline)

185
;; Included in the mode line to indicate the active submodes.
186 187
;; (defvar c-submode-indicators nil)
;; (make-variable-buffer-local 'c-submode-indicators)
188

189 190 191 192 193 194 195 196 197
(defun c-calculate-state (arg prevstate)
  ;; Calculate the new state of PREVSTATE, t or nil, based on arg. If
  ;; arg is nil or zero, toggle the state. If arg is negative, turn
  ;; the state off, and if arg is positive, turn the state on
  (if (or (not arg)
	  (zerop (setq arg (prefix-numeric-value arg))))
      (not prevstate)
    (> arg 0)))

Martin Stjernholm's avatar
Martin Stjernholm committed
198 199 200 201 202 203 204 205 206 207 208

;; Basic handling of preprocessor directives.

;; This is a dynamically bound cache used together with
;; `c-query-macro-start' and `c-query-and-set-macro-start'.  It only
;; works as long as point doesn't cross a macro boundary.
(defvar c-macro-start 'unknown)

(defsubst c-query-and-set-macro-start ()
  (if (symbolp c-macro-start)
      (setq c-macro-start (save-excursion
209 210 211
			    (c-save-buffer-state ()
			      (and (c-beginning-of-macro)
				   (point)))))
Martin Stjernholm's avatar
Martin Stjernholm committed
212 213 214 215 216
    c-macro-start))

(defsubst c-query-macro-start ()
  (if (symbolp c-macro-start)
      (save-excursion
217 218 219
	(c-save-buffer-state ()
	  (and (c-beginning-of-macro)
	       (point))))
Martin Stjernholm's avatar
Martin Stjernholm committed
220 221
    c-macro-start))

222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
;; One element macro cache to cope with continual movement within very large
;; CPP macros.
(defvar c-macro-cache nil)
(make-variable-buffer-local 'c-macro-cache)
;; Nil or cons of the bounds of the most recent CPP form probed by
;; `c-beginning-of-macro', `c-end-of-macro' or `c-syntactic-end-of-macro'.
;; The cdr will be nil if we know only the start of the CPP form.
(defvar c-macro-cache-start-pos nil)
(make-variable-buffer-local 'c-macro-cache-start-pos)
;; The starting position from where we determined `c-macro-cache'.
(defvar c-macro-cache-syntactic nil)
(make-variable-buffer-local 'c-macro-cache-syntactic)
;; non-nil iff `c-macro-cache' has both elements set AND the cdr is at a
;; syntactic end of macro, not merely an apparent one.

(defun c-invalidate-macro-cache (beg end)
  ;; Called from a before-change function.  If the change region is before or
Paul Eggert's avatar
Paul Eggert committed
239
  ;; in the macro characterized by `c-macro-cache' etc., nullify it
240 241 242 243 244 245 246 247 248 249 250 251 252 253
  ;; appropriately.  BEG and END are the standard before-change-functions
  ;; parameters.  END isn't used.
  (cond
   ((null c-macro-cache))
   ((< beg (car c-macro-cache))
    (setq c-macro-cache nil
	  c-macro-cache-start-pos nil
	  c-macro-cache-syntactic nil))
   ((and (cdr c-macro-cache)
	 (< beg (cdr c-macro-cache)))
    (setcdr c-macro-cache nil)
    (setq c-macro-cache-start-pos beg
	  c-macro-cache-syntactic nil))))

Martin Stjernholm's avatar
Martin Stjernholm committed
254 255 256 257 258
(defun c-beginning-of-macro (&optional lim)
  "Go to the beginning of a preprocessor directive.
Leave point at the beginning of the directive and return t if in one,
otherwise return nil and leave point unchanged.

259 260
Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
  (let ((here (point)))
    (when c-opt-cpp-prefix
      (if (and (car c-macro-cache)
	       (>= (point) (car c-macro-cache))
	       (or (and (cdr c-macro-cache)
			(<= (point) (cdr c-macro-cache)))
		   (<= (point) c-macro-cache-start-pos)))
	  (unless (< (car c-macro-cache) (or lim (point-min)))
	    (progn (goto-char (max (or lim (point-min)) (car c-macro-cache)))
		   (setq c-macro-cache-start-pos
			 (max c-macro-cache-start-pos here))
		   t))
	(setq c-macro-cache nil
	      c-macro-cache-start-pos nil
	      c-macro-cache-syntactic nil)

	(save-restriction
	  (if lim (narrow-to-region lim (point-max)))
	  (beginning-of-line)
	  (while (eq (char-before (1- (point))) ?\\)
	    (forward-line -1))
	  (back-to-indentation)
	  (if (and (<= (point) here)
		   (looking-at c-opt-cpp-start))
	      (progn
		(setq c-macro-cache (cons (point) nil)
		      c-macro-cache-start-pos here)
		t)
	    (goto-char here)
	    nil))))))
Martin Stjernholm's avatar
Martin Stjernholm committed
291 292 293

(defun c-end-of-macro ()
  "Go to the end of a preprocessor directive.
294 295 296
More accurately, move the point to the end of the closest following
line that doesn't end with a line continuation backslash - no check is
done that the point is inside a cpp directive to begin with.
Martin Stjernholm's avatar
Martin Stjernholm committed
297

298 299
Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
   (if (and (cdr c-macro-cache)
	    (<= (point) (cdr c-macro-cache))
	    (>= (point) (car c-macro-cache)))
       (goto-char (cdr c-macro-cache))
     (unless (and (car c-macro-cache)
		  (<= (point) c-macro-cache-start-pos)
		  (>= (point) (car c-macro-cache)))
       (setq c-macro-cache nil
	     c-macro-cache-start-pos nil
	     c-macro-cache-syntactic nil))
     (while (progn
	      (end-of-line)
	      (when (and (eq (char-before) ?\\)
			 (not (eobp)))
		(forward-char)
		t)))
     (when (car c-macro-cache)
       (setcdr c-macro-cache (point)))))
Martin Stjernholm's avatar
Martin Stjernholm committed
318

319 320 321 322 323 324 325 326 327 328 329 330 331
(defun c-syntactic-end-of-macro ()
  ;; Go to the end of a CPP directive, or a "safe" pos just before.
  ;;
  ;; This is normally the end of the next non-escaped line.  A "safe"
  ;; position is one not within a string or comment.  (The EOL on a line
  ;; comment is NOT "safe").
  ;;
  ;; This function must only be called from the beginning of a CPP construct.
  ;;
  ;; Note that this function might do hidden buffer changes.  See the comment
  ;; at the start of cc-engine.el for more info.
  (let* ((here (point))
	 (there (progn (c-end-of-macro) (point)))
332 333 334 335 336 337 338 339 340
	 s)
    (unless c-macro-cache-syntactic
      (setq s (parse-partial-sexp here there))
      (while (and (or (nth 3 s)	 ; in a string
		      (nth 4 s)) ; in a comment (maybe at end of line comment)
		  (> there here))	; No infinite loops, please.
	(setq there (1- (nth 8 s)))
	(setq s (parse-partial-sexp here there)))
      (setq c-macro-cache-syntactic (car c-macro-cache)))
341 342
    (point)))

343 344 345 346 347 348 349 350 351 352 353 354
(defun c-forward-over-cpp-define-id ()
  ;; Assuming point is at the "#" that introduces a preprocessor
  ;; directive, it's moved forward to the end of the identifier which is
  ;; "#define"d (or whatever c-opt-cpp-macro-define specifies).  Non-nil
  ;; is returned in this case, in all other cases nil is returned and
  ;; point isn't moved.
  ;;
  ;; This function might do hidden buffer changes.
  (when (and c-opt-cpp-macro-define-id
	     (looking-at c-opt-cpp-macro-define-id))
    (goto-char (match-end 0))))

Martin Stjernholm's avatar
Martin Stjernholm committed
355 356 357
(defun c-forward-to-cpp-define-body ()
  ;; Assuming point is at the "#" that introduces a preprocessor
  ;; directive, it's moved forward to the start of the definition body
358 359 360 361 362 363 364
  ;; if it's a "#define" (or whatever c-opt-cpp-macro-define
  ;; specifies).  Non-nil is returned in this case, in all other cases
  ;; nil is returned and point isn't moved.
  ;;
  ;; This function might do hidden buffer changes.
  (when (and c-opt-cpp-macro-define-start
	     (looking-at c-opt-cpp-macro-define-start)
Martin Stjernholm's avatar
Martin Stjernholm committed
365 366 367
	     (not (= (match-end 0) (c-point 'eol))))
    (goto-char (match-end 0))))

368 369 370

;;; Basic utility functions.

371
(defun c-syntactic-content (from to paren-level)
372 373
  ;; Return the given region as a string where all syntactic
  ;; whitespace is removed or, where necessary, replaced with a single
374 375 376 377 378
  ;; space.  If PAREN-LEVEL is given then all parens in the region are
  ;; collapsed to "()", "[]" etc.
  ;;
  ;; This function might do hidden buffer changes.

379
  (save-excursion
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
    (save-restriction
      (narrow-to-region from to)
      (goto-char from)
      (let* ((parts (list nil)) (tail parts) pos in-paren)

	(while (re-search-forward c-syntactic-ws-start to t)
	  (goto-char (setq pos (match-beginning 0)))
	  (c-forward-syntactic-ws)
	  (if (= (point) pos)
	      (forward-char)

	    (when paren-level
	      (save-excursion
		(setq in-paren (= (car (parse-partial-sexp from pos 1)) 1)
		      pos (point))))

	    (if (and (> pos from)
		     (< (point) to)
		     (looking-at "\\w\\|\\s_")
		     (save-excursion
		       (goto-char (1- pos))
		       (looking-at "\\w\\|\\s_")))
		(progn
		  (setcdr tail (list (buffer-substring-no-properties from pos)
				     " "))
		  (setq tail (cddr tail)))
	      (setcdr tail (list (buffer-substring-no-properties from pos)))
	      (setq tail (cdr tail)))

	    (when in-paren
	      (when (= (car (parse-partial-sexp pos to -1)) -1)
		(setcdr tail (list (buffer-substring-no-properties
				    (1- (point)) (point))))
		(setq tail (cdr tail))))

	    (setq from (point))))

	(setcdr tail (list (buffer-substring-no-properties from to)))
	(apply 'concat (cdr parts))))))

(defun c-shift-line-indentation (shift-amt)
  ;; Shift the indentation of the current line with the specified
  ;; amount (positive inwards).  The buffer is modified only if
  ;; SHIFT-AMT isn't equal to zero.
  (let ((pos (- (point-max) (point)))
	(c-macro-start c-macro-start)
	tmp-char-inserted)
    (if (zerop shift-amt)
	nil
      ;; If we're on an empty line inside a macro, we take the point
      ;; to be at the current indentation and shift it to the
      ;; appropriate column. This way we don't treat the extra
      ;; whitespace out to the line continuation as indentation.
      (when (and (c-query-and-set-macro-start)
		 (looking-at "[ \t]*\\\\$")
		 (save-excursion
		   (skip-chars-backward " \t")
		   (bolp)))
	(insert ?x)
	(backward-char)
	(setq tmp-char-inserted t))
      (unwind-protect
	  (let ((col (current-indentation)))
	    (delete-region (c-point 'bol) (c-point 'boi))
	    (beginning-of-line)
	    (indent-to (+ col shift-amt)))
	(when tmp-char-inserted
	  (delete-char 1))))
    ;; If initial point was within line's indentation and we're not on
    ;; a line with a line continuation in a macro, position after the
    ;; indentation.  Else stay at same point in text.
    (if (and (< (point) (c-point 'boi))
	     (not tmp-char-inserted))
	(back-to-indentation)
      (if (> (- (point-max) pos) (point))
	  (goto-char (- (point-max) pos))))))
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474

(defsubst c-keyword-sym (keyword)
  ;; Return non-nil if the string KEYWORD is a known keyword.  More
  ;; precisely, the value is the symbol for the keyword in
  ;; `c-keywords-obarray'.
  (intern-soft keyword c-keywords-obarray))

(defsubst c-keyword-member (keyword-sym lang-constant)
  ;; Return non-nil if the symbol KEYWORD-SYM, as returned by
  ;; `c-keyword-sym', is a member of LANG-CONSTANT, which is the name
  ;; of a language constant that ends with "-kwds".  If KEYWORD-SYM is
  ;; nil then the result is nil.
  (get keyword-sym lang-constant))

;; String syntax chars, suitable for skip-syntax-(forward|backward).
(defconst c-string-syntax (if (memq 'gen-string-delim c-emacs-features)
                              "\"|"
                            "\""))

475
;; Regexp matching string limit syntax.
476 477 478 479
(defconst c-string-limit-regexp (if (memq 'gen-string-delim c-emacs-features)
                                    "\\s\"\\|\\s|"
                                  "\\s\""))

480 481 482 483
;; Regexp matching WS followed by string limit syntax.
(defconst c-ws*-string-limit-regexp
  (concat "[ \t]*\\(" c-string-limit-regexp "\\)"))

484 485
;; Holds formatted error strings for the few cases where parse errors
;; are reported.
486
(defvar c-parsing-error nil)
487 488 489 490 491 492 493 494 495 496 497
(make-variable-buffer-local 'c-parsing-error)

(defun c-echo-parsing-error (&optional quiet)
  (when (and c-report-syntactic-errors c-parsing-error (not quiet))
    (c-benign-error "%s" c-parsing-error))
  c-parsing-error)

;; Faces given to comments and string literals.  This is used in some
;; situations to speed up recognition; it isn't mandatory that font
;; locking is in use.  This variable is extended with the face in
;; `c-doc-face-name' when fontification is activated in cc-fonts.el.
498
(defvar c-literal-faces
499 500 501 502 503 504 505 506 507 508
  (append '(font-lock-comment-face font-lock-string-face)
	  (when (facep 'font-lock-comment-delimiter-face)
	    ;; New in Emacs 22.
	    '(font-lock-comment-delimiter-face))))

(defsubst c-put-c-type-property (pos value)
  ;; Put a c-type property with the given value at POS.
  (c-put-char-property pos 'c-type value))

(defun c-clear-c-type-property (from to value)
Juanma Barranquero's avatar
Juanma Barranquero committed
509
  ;; Remove all occurrences of the c-type property that has the given
510 511 512 513 514 515 516 517 518 519 520 521
  ;; value in the region between FROM and TO.  VALUE is assumed to not
  ;; be nil.
  ;;
  ;; Note: This assumes that c-type is put on single chars only; it's
  ;; very inefficient if matching properties cover large regions.
  (save-excursion
    (goto-char from)
    (while (progn
	     (when (eq (get-text-property (point) 'c-type) value)
	       (c-clear-char-property (point) 'c-type))
	     (goto-char (next-single-property-change (point) 'c-type nil to))
	     (< (point) to)))))
Martin Stjernholm's avatar
Martin Stjernholm committed
522

523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562

;; Some debug tools to visualize various special positions.  This
;; debug code isn't as portable as the rest of CC Mode.

(cc-bytecomp-defun overlays-in)
(cc-bytecomp-defun overlay-get)
(cc-bytecomp-defun overlay-start)
(cc-bytecomp-defun overlay-end)
(cc-bytecomp-defun delete-overlay)
(cc-bytecomp-defun overlay-put)
(cc-bytecomp-defun make-overlay)

(defun c-debug-add-face (beg end face)
  (c-save-buffer-state ((overlays (overlays-in beg end)) overlay)
    (while overlays
      (setq overlay (car overlays)
	    overlays (cdr overlays))
      (when (eq (overlay-get overlay 'face) face)
	(setq beg (min beg (overlay-start overlay))
	      end (max end (overlay-end overlay)))
	(delete-overlay overlay)))
    (overlay-put (make-overlay beg end) 'face face)))

(defun c-debug-remove-face (beg end face)
  (c-save-buffer-state ((overlays (overlays-in beg end)) overlay
			(ol-beg beg) (ol-end end))
    (while overlays
      (setq overlay (car overlays)
	    overlays (cdr overlays))
      (when (eq (overlay-get overlay 'face) face)
	(setq ol-beg (min ol-beg (overlay-start overlay))
	      ol-end (max ol-end (overlay-end overlay)))
	(delete-overlay overlay)))
    (when (< ol-beg beg)
      (overlay-put (make-overlay ol-beg beg) 'face face))
    (when (> ol-end end)
      (overlay-put (make-overlay end ol-end) 'face face))))


;; `c-beginning-of-statement-1' and accompanying stuff.
Gerd Moellmann's avatar
Gerd Moellmann committed
563

564 565 566 567
;; KLUDGE ALERT: c-maybe-labelp is used to pass information between
;; c-crosses-statement-barrier-p and c-beginning-of-statement-1.  A
;; better way should be implemented, but this will at least shut up
;; the byte compiler.
568
(defvar c-maybe-labelp)
569

570 571
;; New awk-compatible version of c-beginning-of-statement-1, ACM 2002/6/22

572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
;; Macros used internally in c-beginning-of-statement-1 for the
;; automaton actions.
(defmacro c-bos-push-state ()
  '(setq stack (cons (cons state saved-pos)
		     stack)))
(defmacro c-bos-pop-state (&optional do-if-done)
  `(if (setq state (car (car stack))
	     saved-pos (cdr (car stack))
	     stack (cdr stack))
       t
     ,do-if-done
     (throw 'loop nil)))
(defmacro c-bos-pop-state-and-retry ()
  '(throw 'loop (setq state (car (car stack))
		      saved-pos (cdr (car stack))
		      ;; Throw nil if stack is empty, else throw non-nil.
		      stack (cdr stack))))
(defmacro c-bos-save-pos ()
  '(setq saved-pos (vector pos tok ptok pptok)))
(defmacro c-bos-restore-pos ()
  '(unless (eq (elt saved-pos 0) start)
     (setq pos (elt saved-pos 0)
	   tok (elt saved-pos 1)
	   ptok (elt saved-pos 2)
	   pptok (elt saved-pos 3))
     (goto-char pos)
     (setq sym nil)))
(defmacro c-bos-save-error-info (missing got)
  `(setq saved-pos (vector pos ,missing ,got)))
(defmacro c-bos-report-error ()
  '(unless noerror
     (setq c-parsing-error
	   (format "No matching `%s' found for `%s' on line %d"
		   (elt saved-pos 1)
		   (elt saved-pos 2)
		   (1+ (count-lines (point-min)
				    (c-point 'bol (elt saved-pos 0))))))))

(defun c-beginning-of-statement-1 (&optional lim ignore-labels
					     noerror comma-delim)
  "Move to the start of the current statement or declaration, or to
the previous one if already at the beginning of one.  Only
statements/declarations on the same level are considered, i.e. don't
move into or out of sexps (not even normal expression parentheses).

Juanma Barranquero's avatar
Juanma Barranquero committed
617
If point is already at the earliest statement within braces or parens,
618 619 620
this function doesn't move back into any whitespace preceding it; it
returns 'same in this case.

621 622 623 624 625 626
Stop at statement continuation tokens like \"else\", \"catch\",
\"finally\" and the \"while\" in \"do ... while\" if the start point
is within the continuation.  If starting at such a token, move to the
corresponding statement start.  If at the beginning of a statement,
move to the closest containing statement if there is any.  This might
also stop at a continuation clause.
627

628 629
Labels are treated as part of the following statements if
IGNORE-LABELS is non-nil.  (FIXME: Doesn't work if we stop at a known
630 631
statement start keyword.)  Otherwise, each label is treated as a
separate statement.
632

633 634 635 636 637 638
Macros are ignored \(i.e. skipped over) unless point is within one, in
which case the content of the macro is treated as normal code.  Aside
from any normal statement starts found in it, stop at the first token
of the content in the macro, i.e. the expression of an \"#if\" or the
start of the definition in a \"#define\".  Also stop at start of
macros before leaving them.
639

640
Return:
641
'label          if stopped at a label or \"case...:\" or \"default:\";
642 643 644 645 646 647 648 649
'same           if stopped at the beginning of the current statement;
'up             if stepped to a containing statement;
'previous       if stepped to a preceding statement;
'beginning      if stepped from a statement continuation clause to
                its start clause; or
'macro          if stepped to a macro start.
Note that 'same and not 'label is returned if stopped at the same
label without crossing the colon character.
650 651 652 653 654 655 656

LIM may be given to limit the search.  If the search hits the limit,
point will be left at the closest following token, or at the start
position if that is less ('same is returned in this case).

NOERROR turns off error logging to `c-parsing-error'.

657 658 659
Normally only ';' and virtual semicolons are considered to delimit
statements, but if COMMA-DELIM is non-nil then ',' is treated
as a delimiter too.
660 661 662

Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
663

664 665 666
  ;; The bulk of this function is a pushdown automaton that looks at statement
  ;; boundaries and the tokens (such as "while") in c-opt-block-stmt-key.  Its
  ;; purpose is to keep track of nested statements, ensuring that such
Juanma Barranquero's avatar
Juanma Barranquero committed
667
  ;; statements are skipped over in their entirety (somewhat akin to what C-M-p
668
  ;; does with nested braces/brackets/parentheses).
669 670 671
  ;;
  ;; Note: The position of a boundary is the following token.
  ;;
672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
  ;; Beginning with the current token (the one following point), move back one
  ;; sexp at a time (where a sexp is, more or less, either a token or the
  ;; entire contents of a brace/bracket/paren pair).  Each time a statement
  ;; boundary is crossed or a "while"-like token is found, update the state of
  ;; the PDA.  Stop at the beginning of a statement when the stack (holding
  ;; nested statement info) is empty and the position has been moved.
  ;;
  ;; The following variables constitute the PDA:
  ;;
  ;; sym:    This is either the "while"-like token (e.g. 'for) we've just
  ;;         scanned back over, 'boundary if we've just gone back over a
  ;;         statement boundary, or nil otherwise.
  ;; state:  takes one of the values (nil else else-boundary while
  ;;         while-boundary catch catch-boundary).
  ;;         nil means "no "while"-like token yet scanned".
  ;;         'else, for example, means "just gone back over an else".
  ;;         'else-boundary means "just gone back over a statement boundary
  ;;         immediately after having gone back over an else".
  ;; saved-pos: A vector of either saved positions (tok ptok pptok, etc.) or
  ;;         of error reporting information.
  ;; stack:  The stack onto which the PDA pushes its state.  Each entry
  ;;         consists of a saved value of state and saved-pos.  An entry is
  ;;         pushed when we move back over a "continuation" token (e.g. else)
  ;;         and popped when we encounter the corresponding opening token
  ;;         (e.g. if).
  ;;
  ;;
699
  ;; The following diagram briefly outlines the PDA.
700 701
  ;;
  ;; Common state:
702 703 704 705
  ;;   "else": Push state, goto state `else'.
  ;;   "while": Push state, goto state `while'.
  ;;   "catch" or "finally": Push state, goto state `catch'.
  ;;   boundary: Pop state.
706 707
  ;;   other: Do nothing special.
  ;;
708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
  ;; State `else':
  ;;   boundary: Goto state `else-boundary'.
  ;;   other: Error, pop state, retry token.
  ;;
  ;; State `else-boundary':
  ;;   "if": Pop state.
  ;;   boundary: Error, pop state.
  ;;   other: See common state.
  ;;
  ;; State `while':
  ;;   boundary: Save position, goto state `while-boundary'.
  ;;   other: Pop state, retry token.
  ;;
  ;; State `while-boundary':
  ;;   "do": Pop state.
  ;;   boundary: Restore position if it's not at start, pop state. [*see below]
  ;;   other: See common state.
  ;;
  ;; State `catch':
  ;;   boundary: Goto state `catch-boundary'.
  ;;   other: Error, pop state, retry token.
  ;;
  ;; State `catch-boundary':
  ;;   "try": Pop state.
  ;;   "catch": Goto state `catch'.
  ;;   boundary: Error, pop state.
  ;;   other: See common state.
  ;;
  ;; [*] In the `while-boundary' state, we had pushed a 'while state, and were
  ;; searching for a "do" which would have opened a do-while.  If we didn't
  ;; find it, we discard the analysis done since the "while", go back to this
  ;; token in the buffer and restart the scanning there, this time WITHOUT
  ;; pushing the 'while state onto the stack.
  ;;
742 743 744 745 746 747 748 749 750 751
  ;; In addition to the above there is some special handling of labels
  ;; and macros.

  (let ((case-fold-search nil)
	(start (point))
	macro-start
	(delims (if comma-delim '(?\; ?,) '(?\;)))
	(c-stmt-delim-chars (if comma-delim
				c-stmt-delim-chars-with-comma
			      c-stmt-delim-chars))
752
	c-in-literal-cache c-maybe-labelp after-case:-pos saved
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
	;; Current position.
	pos
	;; Position of last stmt boundary character (e.g. ;).
	boundary-pos
	;; The position of the last sexp or bound that follows the
	;; first found colon, i.e. the start of the nonlabel part of
	;; the statement.  It's `start' if a colon is found just after
	;; the start.
	after-labels-pos
	;; Like `after-labels-pos', but the first such position inside
	;; a label, i.e. the start of the last label before the start
	;; of the nonlabel part of the statement.
	last-label-pos
	;; The last position where a label is possible provided the
	;; statement started there.  It's nil as long as no invalid
	;; label content has been found (according to
769
	;; `c-nonlabel-token-key').  It's `start' if no valid label
770 771 772
	;; content was found in the label.  Note that we might still
	;; regard it a label if it starts with `c-label-kwds'.
	label-good-pos
773 774 775
	;; Putative positions of the components of a bitfield declaration,
	;; e.g. "int foo : NUM_FOO_BITS ;"
	bitfield-type-pos bitfield-id-pos bitfield-size-pos
776 777 778 779 780 781 782 783 784 785 786
	;; Symbol just scanned back over (e.g. 'while or 'boundary).
	;; See above.
	sym
	;; Current state in the automaton.  See above.
	state
	;; Current saved positions.  See above.
	saved-pos
	;; Stack of conses (state . saved-pos).
	stack
	;; Regexp which matches "for", "if", etc.
	(cond-key (or c-opt-block-stmt-key
787
		      "\\<\\>"))	; Matches nothing.
788 789 790 791
	;; Return value.
	(ret 'same)
	;; Positions of the last three sexps or bounds we've stopped at.
	tok ptok pptok)
792 793 794 795 796 797 798 799 800

    (save-restriction
      (if lim (narrow-to-region lim (point-max)))

      (if (save-excursion
	    (and (c-beginning-of-macro)
		 (/= (point) start)))
	  (setq macro-start (point)))

801
      ;; Try to skip back over unary operator characters, to register
802 803 804
      ;; that we've moved.
      (while (progn
	       (setq pos (point))
805 806 807 808
	       (c-backward-syntactic-ws)
	       ;; Protect post-++/-- operators just before a virtual semicolon.
	       (and (not (c-at-vsemi-p))
		    (/= (skip-chars-backward "-+!*&~@`#") 0))))
809 810

      ;; Skip back over any semicolon here.  If it was a bare semicolon, we're
811
      ;; done.  Later on we ignore the boundaries for statements that don't
812 813
      ;; contain any sexp.  The only thing that is affected is that the error
      ;; checking is a little less strict, and we really don't bother.
814 815 816
      (if (and (memq (char-before) delims)
	       (progn (forward-char -1)
		      (setq saved (point))
817
		      (c-backward-syntactic-ws)
818 819
		      (or (memq (char-before) delims)
			  (memq (char-before) '(?: nil))
820
			  (eq (char-syntax (char-before)) ?\()
821
			  (c-at-vsemi-p))))
822 823 824 825 826 827 828 829 830 831
	  (setq ret 'previous
		pos saved)

	;; Begin at start and not pos to detect macros if we stand
	;; directly after the #.
	(goto-char start)
	(if (looking-at "\\<\\|\\W")
	    ;; Record this as the first token if not starting inside it.
	    (setq tok start))

832 833 834 835 836 837 838 839 840 841 842 843

	;; The following while loop goes back one sexp (balanced parens,
	;; etc. with contents, or symbol or suchlike) each iteration.  This
	;; movement is accomplished with a call to c-backward-sexp approx 170
	;; lines below.
	;;
	;; The loop is exited only by throwing nil to the (catch 'loop ...):
	;; 1. On reaching the start of a macro;
	;; 2. On having passed a stmt boundary with the PDA stack empty;
	;; 3. On reaching the start of an Objective C method def;
	;; 4. From macro `c-bos-pop-state'; when the stack is empty;
	;; 5. From macro `c-bos-pop-state-and-retry' when the stack is empty.
844 845 846
	(while
	    (catch 'loop ;; Throw nil to break, non-nil to continue.
	      (cond
847
	       ;; Are we in a macro, just after the opening #?
848
	       ((save-excursion
849
		  (and macro-start	; Always NIL for AWK.
850 851 852 853 854
		       (progn (skip-chars-backward " \t")
			      (eq (char-before) ?#))
		       (progn (setq saved (1- (point)))
			      (beginning-of-line)
			      (not (eq (char-before (1- (point))) ?\\)))
855
		       (looking-at c-opt-cpp-start)
856 857 858 859 860 861 862 863 864 865 866 867
		       (progn (skip-chars-forward " \t")
			      (eq (point) saved))))
		(goto-char saved)
		(if (and (c-forward-to-cpp-define-body)
			 (progn (c-forward-syntactic-ws start)
				(< (point) start)))
		    ;; Stop at the first token in the content of the macro.
		    (setq pos (point)
			  ignore-labels t) ; Avoid the label check on exit.
		  (setq pos saved
			ret 'macro
			ignore-labels t))
868
		(throw 'loop nil))	; 1. Start of macro.
869

870 871
	       ;; Do a round through the automaton if we've just passed a
	       ;; statement boundary or passed a "while"-like token.
872 873 874 875 876
	       ((or sym
		    (and (looking-at cond-key)
			 (setq sym (intern (match-string 1)))))

		(when (and (< pos start) (null stack))
877
		  (throw 'loop nil))	; 2. Statement boundary.
878

879 880
		;; The PDA state handling.
                ;;
Martin Stjernholm's avatar
Martin Stjernholm committed
881
                ;; Refer to the description of the PDA in the opening
882 883 884 885 886 887 888 889 890 891 892
                ;; comments.  In the following OR form, the first leaf
                ;; attempts to handles one of the specific actions detailed
                ;; (e.g., finding token "if" whilst in state `else-boundary').
                ;; We drop through to the second leaf (which handles common
                ;; state) if no specific handler is found in the first cond.
                ;; If a parsing error is detected (e.g. an "else" with no
                ;; preceding "if"), we throw to the enclosing catch.
                ;;
                ;; Note that the (eq state 'else) means
		;; "we've just passed an else", NOT "we're looking for an
		;; else".
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
		(or (cond
		     ((eq state 'else)
		      (if (eq sym 'boundary)
			  (setq state 'else-boundary)
			(c-bos-report-error)
			(c-bos-pop-state-and-retry)))

		     ((eq state 'else-boundary)
		      (cond ((eq sym 'if)
			     (c-bos-pop-state (setq ret 'beginning)))
			    ((eq sym 'boundary)
			     (c-bos-report-error)
			     (c-bos-pop-state))))

		     ((eq state 'while)
		      (if (and (eq sym 'boundary)
			       ;; Since this can cause backtracking we do a
			       ;; little more careful analysis to avoid it:
			       ;; If there's a label in front of the while
			       ;; it can't be part of a do-while.
			       (not after-labels-pos))
			  (progn (c-bos-save-pos)
				 (setq state 'while-boundary))
916
			(c-bos-pop-state-and-retry))) ; Can't be a do-while
917 918 919 920

		     ((eq state 'while-boundary)
		      (cond ((eq sym 'do)
			     (c-bos-pop-state (setq ret 'beginning)))
921 922 923
			    ((eq sym 'boundary) ; isn't a do-while
			     (c-bos-restore-pos) ; the position of the while
			     (c-bos-pop-state)))) ; no longer searching for do.
924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940

		     ((eq state 'catch)
		      (if (eq sym 'boundary)
			  (setq state 'catch-boundary)
			(c-bos-report-error)
			(c-bos-pop-state-and-retry)))

		     ((eq state 'catch-boundary)
		      (cond
		       ((eq sym 'try)
			(c-bos-pop-state (setq ret 'beginning)))
		       ((eq sym 'catch)
			(setq state 'catch))
		       ((eq sym 'boundary)
			(c-bos-report-error)
			(c-bos-pop-state)))))

941 942
		    ;; This is state common.  We get here when the previous
		    ;; cond statement found no particular state handler.
943
		    (cond ((eq sym 'boundary)
944 945 946 947 948 949
			   ;; If we have a boundary at the start
			   ;; position we push a frame to go to the
			   ;; previous statement.
			   (if (>= pos start)
			       (c-bos-push-state)
			     (c-bos-pop-state)))
950 951 952 953 954
			  ((eq sym 'else)
			   (c-bos-push-state)
			   (c-bos-save-error-info 'if 'else)
			   (setq state 'else))
			  ((eq sym 'while)
955 956
			   ;; Is this a real while, or a do-while?
			   ;; The next `when' triggers unless we are SURE that
Paul Eggert's avatar
Paul Eggert committed
957
			   ;; the `while' is not the tail end of a `do-while'.
958
			   (when (or (not pptok)
959
				     (memq (char-after pptok) delims)
960 961 962 963 964 965 966
				     ;; The following kludge is to prevent
				     ;; infinite recursion when called from
				     ;; c-awk-after-if-for-while-condition-p,
				     ;; or the like.
				     (and (eq (point) start)
					  (c-vsemi-status-unknown-p))
				     (c-at-vsemi-p pptok))
967 968
			     ;; Since this can cause backtracking we do a
			     ;; little more careful analysis to avoid it: If
969 970
			     ;; the while isn't followed by a (possibly
			     ;; virtual) semicolon it can't be a do-while.
971 972 973 974 975 976 977 978 979 980 981 982 983 984 985
			     (c-bos-push-state)
			     (setq state 'while)))
			  ((memq sym '(catch finally))
			   (c-bos-push-state)
			   (c-bos-save-error-info 'try sym)
			   (setq state 'catch))))

		(when c-maybe-labelp
		  ;; We're either past a statement boundary or at the
		  ;; start of a statement, so throw away any label data
		  ;; for the previous one.
		  (setq after-labels-pos nil
			last-label-pos nil
			c-maybe-labelp nil))))

986 987
	      ;; Step to the previous sexp, but not if we crossed a
	      ;; boundary, since that doesn't consume an sexp.
988 989
	      (if (eq sym 'boundary)
		  (setq ret 'previous)
990 991

                ;; HERE IS THE SINGLE PLACE INSIDE THE PDA LOOP WHERE WE MOVE
992 993 994 995 996 997
		;; BACKWARDS THROUGH THE SOURCE.

		(c-backward-syntactic-ws)
		(let ((before-sws-pos (point))
		      ;; The end position of the area to search for statement
		      ;; barriers in this round.
998
		      (maybe-after-boundary-pos pos))
999

1000 1001
		  ;; Go back over exactly one logical sexp, taking proper
		  ;; account of macros and escaped EOLs.
1002 1003 1004 1005 1006 1007
		  (while
		      (progn
			(unless (c-safe (c-backward-sexp) t)
			  ;; Give up if we hit an unbalanced block.  Since the
			  ;; stack won't be empty the code below will report a
			  ;; suitable error.
1008
			  (throw 'loop nil))
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
			(cond
			 ;; Have we moved into a macro?
			 ((and (not macro-start)
			       (c-beginning-of-macro))
			  ;; Have we crossed a statement boundary?  If not,
			  ;; keep going back until we find one or a "real" sexp.
			  (and
			   (save-excursion
			     (c-end-of-macro)
			     (not (c-crosses-statement-barrier-p
				   (point) maybe-after-boundary-pos)))
			   (setq maybe-after-boundary-pos (point))))
			 ;; Have we just gone back over an escaped NL?  This
			 ;; doesn't count as a sexp.
			 ((looking-at "\\\\$")))))

		  ;; Have we crossed a statement boundary?
		  (setq boundary-pos
			(cond
			 ;; Are we at a macro beginning?
			 ((and (not macro-start)
			       c-opt-cpp-prefix
			       (looking-at c-opt-cpp-prefix))
			  (save-excursion
			    (c-end-of-macro)
			    (c-crosses-statement-barrier-p
			     (point) maybe-after-boundary-pos)))
			 ;; Just gone back over a brace block?
			 ((and
			   (eq (char-after) ?{)
			   (not (c-looking-at-inexpr-block lim nil t)))
			  (save-excursion
			    (c-forward-sexp) (point)))
			 ;; Just gone back over some paren block?
			 ((looking-at "\\s\(")
			  (save-excursion
			    (goto-char (1+ (c-down-list-backward
					    before-sws-pos)))
			    (c-crosses-statement-barrier-p
			     (point) maybe-after-boundary-pos)))
			 ;; Just gone back over an ordinary symbol of some sort?
			 (t (c-crosses-statement-barrier-p
			     (point) maybe-after-boundary-pos))))
Paul Eggert's avatar
Paul Eggert committed
1052

1053 1054 1055 1056 1057 1058 1059
		  (when boundary-pos
		    (setq pptok ptok
			  ptok tok
			  tok boundary-pos
			  sym 'boundary)
		    ;; Like a C "continue".  Analyze the next sexp.
		    (throw 'loop t))))
1060 1061 1062 1063 1064 1065

	      ;; ObjC method def?
	      (when (and c-opt-method-key
			 (setq saved (c-in-method-def-p)))
		(setq pos saved
		      ignore-labels t)	; Avoid the label check on exit.
1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
		(throw 'loop nil))	; 3. ObjC method def.

	      ;; Might we have a bitfield declaration, "<type> <id> : <size>"?
	      (if c-has-bitfields
		  (cond
		   ;; The : <size> and <id> fields?
		   ((and (numberp c-maybe-labelp)
			 (not bitfield-size-pos)
			 (save-excursion
			   (goto-char (or tok start))
			   (not (looking-at c-keywords-regexp)))
			 (not (looking-at c-keywords-regexp))
			 (not (c-punctuation-in (point) c-maybe-labelp)))
		    (setq bitfield-size-pos (or tok start)
			  bitfield-id-pos (point)))
		   ;; The <type> field?
		   ((and bitfield-id-pos
			 (not bitfield-type-pos))
		    (if (and (looking-at c-symbol-key) ; Can only be an integer type.  :-)
			     (not (looking-at c-not-primitive-type-keywords-regexp))
			     (not (c-punctuation-in (point) tok)))
			(setq bitfield-type-pos (point))
		      (setq bitfield-size-pos nil
			    bitfield-id-pos nil)))))
1090

1091 1092 1093
	      ;; Handle labels.
	      (unless (eq ignore-labels t)
		(when (numberp c-maybe-labelp)
1094 1095 1096
		  ;; `c-crosses-statement-barrier-p' has found a colon, so we
		  ;; might be in a label now.  Have we got a real label
		  ;; (including a case label) or something like C++'s "public:"?
1097 1098
		  ;; A case label might use an expression rather than a token.
		  (setq after-case:-pos (or tok start))
1099 1100 1101 1102 1103 1104
		  (if (or (looking-at c-nonlabel-token-key) ; e.g. "while" or "'a'"
			  ;; Catch C++'s inheritance construct "class foo : bar".
			  (save-excursion
			    (and
			     (c-safe (c-backward-sexp) t)
			     (looking-at c-nonlabel-token-2-key))))
1105 1106 1107 1108 1109 1110 1111
		      (setq c-maybe-labelp nil)
		    (if after-labels-pos ; Have we already encountered a label?
			(if (not last-label-pos)
			    (setq last-label-pos (or tok start)))
		      (setq after-labels-pos (or tok start)))
		    (setq c-maybe-labelp t
			  label-good-pos nil))) ; bogus "label"
1112 1113 1114 1115

		(when (and (not label-good-pos)	; i.e. no invalid "label"'s yet
						; been found.
			   (looking-at c-nonlabel-token-key)) ; e.g. "while :"
1116 1117 1118 1119 1120 1121
		  ;; We're in a potential label and it's the first
		  ;; time we've found something that isn't allowed in
		  ;; one.
		  (setq label-good-pos (or tok start))))

	      ;; We've moved back by a sexp, so update the token positions.
1122 1123 1124 1125
	      (setq sym nil
		    pptok ptok
		    ptok tok
		    tok (point)
1126 1127 1128
		    pos tok) ; always non-nil
	      )		     ; end of (catch loop ....)
	  )		     ; end of sexp-at-a-time (while ....)
Paul Eggert's avatar
Paul Eggert committed
1129

1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
	;; If the stack isn't empty there might be errors to report.
	(while stack
	  (if (and (vectorp saved-pos) (eq (length saved-pos) 3))
	      (c-bos-report-error))
	  (setq saved-pos (cdr (car stack))
		stack (cdr stack)))

	(when (and (eq ret 'same)
		   (not (memq sym '(boundary ignore nil))))
	  ;; Need to investigate closer whether we've crossed
	  ;; between a substatement and its containing statement.
	  (if (setq saved (if (looking-at c-block-stmt-1-key)
			      ptok
			    pptok))
	      (cond ((> start saved) (setq pos saved))
		    ((= start saved) (setq ret 'up)))))

1147 1148
	(when (and (not ignore-labels)
		   (eq c-maybe-labelp t)
1149
		   (not (eq ret 'beginning))
1150
		   after-labels-pos
1151
		   (not bitfield-type-pos) ; Bitfields take precedence over labels.
1152 1153 1154 1155 1156 1157 1158 1159
		   (or (not label-good-pos)
		       (<= label-good-pos pos)
		       (progn
			 (goto-char (if (and last-label-pos
					     (< last-label-pos start))
					last-label-pos
				      pos))
			 (looking-at c-label-kwds-regexp))))
1160 1161 1162 1163 1164
	  ;; We're in a label.  Maybe we should step to the statement
	  ;; after it.
	  (if (< after-labels-pos start)
	      (setq pos after-labels-pos)
	    (setq ret 'label)
1165 1166
	    (if (and last-label-pos (< last-label-pos start))
		;; Might have jumped over several labels.  Go to the last one.
1167 1168
		(setq pos last-label-pos)))))

1169
      ;; Have we got "case <expression>:"?
1170
      (goto-char pos)
1171 1172 1173 1174
      (when (and after-case:-pos
		 (not (eq ret 'beginning))
		 (looking-at c-case-kwds-regexp))
	(if (< after-case:-pos start)
1175 1176 1177
	    (setq pos after-case:-pos))
	(if (eq ret 'same)
	    (setq ret 'label)))
1178 1179

      ;; Skip over the unary operators that can start the statement.
1180
      (while (progn
1181 1182 1183 1184
	       (c-backward-syntactic-ws)
	       ;; protect AWK post-inc/decrement operators, etc.
	       (and (not (c-at-vsemi-p (point)))
		    (/= (skip-chars-backward "-+!*&~@`#") 0)))
1185 1186 1187
	(setq pos (point)))
      (goto-char pos)
      ret)))
Richard M. Stallman's avatar
Richard M. Stallman committed
1188

1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
(defun c-punctuation-in (from to)
  "Return non-nil if there is a non-comment non-macro punctuation character
between FROM and TO.  FROM must not be in a string or comment.  The returned
value is the position of the first such character."
  (save-excursion
    (goto-char from)
    (let ((pos (point)))
      (while (progn (skip-chars-forward c-symbol-chars to)
		    (c-forward-syntactic-ws to)
		    (> (point) pos))
	(setq pos (point))))
    (and (< (point) to) (point))))

Richard M. Stallman's avatar
Richard M. Stallman committed
1202
(defun c-crosses-statement-barrier-p (from to)
1203 1204
  "Return non-nil if buffer positions FROM to TO cross one or more
statement or declaration boundaries.  The returned value is actually
1205 1206
the position of the earliest boundary char.  FROM must not be within
a string or comment.
1207 1208 1209

The variable `c-maybe-labelp' is set to the position of the first `:' that
might start a label (i.e. not part of `::' and not preceded by `?').  If a
1210 1211 1212 1213
single `?' is found, then `c-maybe-labelp' is cleared.

For AWK, a statement which is terminated by an EOL (not a \; or a }) is
regarded as having a \"virtual semicolon\" immediately after the last token on
1214
the line.  If this virtual semicolon is _at_ from, the function recognizes it.
1215 1216 1217

Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
  (let* ((skip-chars
	  ;; If the current language has CPP macros, insert # into skip-chars.
	  (if c-opt-cpp-symbol
	      (concat (substring c-stmt-delim-chars 0 1) ; "^"
		      c-opt-cpp-symbol			 ; usually "#"
		      (substring c-stmt-delim-chars 1))	 ; e.g. ";{}?:"
	    c-stmt-delim-chars))
	 (non-skip-list
	  (append (substring skip-chars 1) nil)) ; e.g. (?# ?\; ?{ ?} ?? ?:)
	 lit-range vsemi-pos)
    (save-restriction
      (widen)
      (save-excursion
	(catch 'done
	  (goto-char from)
	  (while (progn (skip-chars-forward
			 skip-chars
			 (min to (c-point 'bonl)))
			(< (point) to))
	    (cond
	     ;; Virtual semicolon?
	     ((and (bolp)
		   (save-excursion
		     (progn
		       (if (setq lit-range (c-literal-limits from)) ; Have we landed in a string/comment?
			   (goto-char (car lit-range)))
		       (c-backward-syntactic-ws) ; ? put a limit here, maybe?
		       (setq vsemi-pos (point))
		       (c-at-vsemi-p))))
	      (throw 'done vsemi-pos))
	     ;; In a string/comment?
1249
	     ((setq lit-range (c-literal-limits from))
1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276
	      (goto-char (cdr lit-range)))
	     ((eq (char-after) ?:)
	      (forward-char)
	      (if (and (eq (char-after) ?:)
		       (< (point) to))
		  ;; Ignore scope operators.
		  (forward-char)
		(setq c-maybe-labelp (1- (point)))))
	     ((eq (char-after) ??)
	      ;; A question mark.  Can't be a label, so stop
	      ;; looking for more : and ?.
	      (setq c-maybe-labelp nil
		    skip-chars (substring c-stmt-delim-chars 0 -2)))
	     ;; At a CPP construct?
	     ((and c-opt-cpp-symbol (looking-at c-opt-cpp-symbol)
		   (save-excursion
		     (forward-line 0)
		     (looking-at c-opt-cpp-prefix)))
	      (c-end-of-macro))
	     ((memq (char-after) non-skip-list)
	      (throw 'done (point)))))
	  ;; In trailing space after an as yet undetected virtual semicolon?
	  (c-backward-syntactic-ws from)
	  (if (and (< (point) to)
		   (c-at-vsemi-p))
	      (point)
	    nil))))))
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326

(defun c-at-statement-start-p ()
  "Return non-nil if the point is at the first token in a statement
or somewhere in the syntactic whitespace before it.

A \"statement\" here is not restricted to those inside code blocks.
Any kind of declaration-like construct that occur outside function
bodies is also considered a \"statement\".

Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."

  (save-excursion
    (let ((end (point))
	  c-maybe-labelp)
      (c-syntactic-skip-backward (substring c-stmt-delim-chars 1) nil t)
      (or (bobp)
	  (eq (char-before) ?})
	  (and (eq (char-before) ?{)
	       (not (and c-special-brace-lists
			 (progn (backward-char)
				(c-looking-at-special-brace-list)))))
	  (c-crosses-statement-barrier-p (point) end)))))

(defun c-at-expression-start-p ()
  "Return non-nil if the point is at the first token in an expression or
statement, or somewhere in the syntactic whitespace before it.

An \"expression\" here is a bit different from the normal language
grammar sense: It's any sequence of expression tokens except commas,
unless they are enclosed inside parentheses of some kind.  Also, an
expression never continues past an enclosing parenthesis, but it might
contain parenthesis pairs of any sort except braces.

Since expressions never cross statement boundaries, this function also
recognizes statement beginnings, just like `c-at-statement-start-p'.

Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."

  (save-excursion
    (let ((end (point))
	  (c-stmt-delim-chars c-stmt-delim-chars-with-comma)
	  c-maybe-labelp)
      (c-syntactic-skip-backward (substring c-stmt-delim-chars 1) nil t)
      (or (bobp)
	  (memq (char-before) '(?{ ?}))
	  (save-excursion (backward-char)
			  (looking-at "\\s("))
	  (c-crosses-statement-barrier-p (point) end)))))
Richard M. Stallman's avatar
Richard M. Stallman committed
1327 1328


1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372
;; A set of functions that covers various idiosyncrasies in
;; implementations of `forward-comment'.

;; Note: Some emacsen considers incorrectly that any line comment
;; ending with a backslash continues to the next line.  I can't think
;; of any way to work around that in a reliable way without changing
;; the buffer, though.  Suggestions welcome. ;) (No, temporarily
;; changing the syntax for backslash doesn't work since we must treat
;; escapes in string literals correctly.)

(defun c-forward-single-comment ()
  "Move forward past whitespace and the closest following comment, if any.
Return t if a comment was found, nil otherwise.  In either case, the
point is moved past the following whitespace.  Line continuations,
i.e. a backslashes followed by line breaks, are treated as whitespace.
The line breaks that end line comments are considered to be the
comment enders, so the point will be put on the beginning of the next
line if it moved past a line comment.

This function does not do any hidden buffer changes."

  (let ((start (point)))
    (when (looking-at "\\([ \t\n\r\f\v]\\|\\\\[\n\r]\\)+")
      (goto-char (match-end 0)))

    (when (forward-comment 1)
      (if (eobp)
	  ;; Some emacsen (e.g. XEmacs 21) return t when moving
	  ;; forwards at eob.
	  nil

	;; Emacs includes the ending newline in a b-style (c++)
	;; comment, but XEmacs doesn't.  We depend on the Emacs
	;; behavior (which also is symmetric).
	(if (and (eolp) (elt (parse-partial-sexp start (point)) 7))
	    (condition-case nil (forward-char 1)))

	t))))

(defsubst c-forward-comments ()
  "Move forward past all following whitespace and comments.
Line continuations, i.e. a backslashes followed by line breaks, are
treated as whitespace.

1373 1374
Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407

  (while (or
	  ;; If forward-comment in at least XEmacs 21 is given a large
	  ;; positive value, it'll loop all the way through if it hits
	  ;; eob.
	  (and (forward-comment 5)
	       ;; Some emacsen (e.g. XEmacs 21) return t when moving
	       ;; forwards at eob.
	       (not (eobp)))

	  (when (looking-at "\\\\[\n\r]")
	    (forward-char 2)
	    t))))

(defun c-backward-single-comment ()
  "Move backward past whitespace and the closest preceding comment, if any.
Return t if a comment was found, nil otherwise.  In either case, the
point is moved past the preceding whitespace.  Line continuations,
i.e. a backslashes followed by line breaks, are treated as whitespace.
The line breaks that end line comments are considered to be the
comment enders, so the point cannot be at the end of the same line to
move over a line comment.

This function does not do any hidden buffer changes."

  (let ((start (point)))
    ;; When we got newline terminated comments, forward-comment in all
    ;; supported emacsen so far will stop at eol of each line not
    ;; ending with a comment when moving backwards.  This corrects for
    ;; that, and at the same time handles line continuations.
    (while (progn
	     (skip-chars-backward " \t\n\r\f\v")
	     (and (looking-at "[\n\r]")
1408
		  (eq (char-before) ?\\)))
1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421
      (backward-char))

    (if (bobp)
	;; Some emacsen (e.g. Emacs 19.34) return t when moving
	;; backwards at bob.
	nil

      ;; Leave point after the closest following newline if we've
      ;; backed up over any above, since forward-comment won't move
      ;; backward over a line comment if point is at the end of the
      ;; same line.
      (re-search-forward "\\=\\s *[\n\r]" start t)

1422
      (if (if (let (open-paren-in-column-0-is-defun-start) (forward-comment -1))
1423 1424 1425 1426
	      (if (eolp)
		  ;; If forward-comment above succeeded and we're at eol
		  ;; then the newline we moved over above didn't end a
		  ;; line comment, so we give it another go.
1427 1428
		  (let (open-paren-in-column-0-is-defun-start)
		    (forward-comment -1))
1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441
		t))

	  ;; Emacs <= 20 and XEmacs move back over the closer of a
	  ;; block comment that lacks an opener.
	  (if (looking-at "\\*/")
	      (progn (forward-char 2) nil)
	    t)))))

(defsubst c-backward-comments ()
  "Move backward past all preceding whitespace and comments.
Line continuations, i.e. a backslashes followed by line breaks, are
treated as whitespace.  The line breaks that end line comments are
considered to be the comment enders, so the point cannot be at the end
1442 1443 1444
of the same line to move over a line comment.  Unlike
c-backward-syntactic-ws, this function doesn't move back over
preprocessor directives.
1445

1446 1447
Note that this function might do hidden buffer changes.  See the
comment at the start of cc-engine.el for more info."
1448 1449 1450

  (let ((start (point)))
    (while (and
1451
	    ;; `forward-comment' in some emacsen (e.g. XEmacs 21.4)
1452 1453 1454
	    ;; return t when moving backwards at bob.
	    (not (bobp))

1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
	    (if (let (open-paren-in-column-0-is-defun-start moved-comment)
		  (while
		      (and (not (setq moved-comment (forward-comment -1)))
		      ;; Cope specifically with ^M^J here -
		      ;; forward-comment sometimes gets stuck after ^Ms,
		      ;; sometimes after ^M^J.
			   (or
			    (when (eq (char-before) ?\r)
			      (backward-char)
			      t)
			    (when (and (eq (char-before) ?\n)
				       (eq (char-before (1- (point))) ?\r))
			      (backward-char 2)
			      t))))
		  moved-comment)
1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
		(if (looking-at "\\*/")
		    ;; Emacs <= 20 and XEmacs move back over the
		    ;; closer of a block comment that lacks an opener.
		    (progn (forward-char 2) nil)
		  t)

	      ;; XEmacs treats line continuations as whitespace but
	      ;; only in the backward direction, which seems a bit
	      ;; odd.  Anyway, this is necessary for Emacs.
	      (when (and (looking-at "[\n\r]")
			 (eq (char-before) ?\\)
			 (< (point) start))
		(backward-char)
		t))))))


;; Tools for skipping over syntactic whitespace.
1487

1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559
;; The following functions use text properties to cache searches over
;; large regions of syntactic whitespace.  It works as follows:
;;
;; o  If a syntactic whitespace region contains anything but simple
;;    whitespace (i.e. space, tab and line breaks), the text property
;;    `c-in-sws' is put over it.  At places where we have stopped
;;    within that region there's also a `c-is-sws' text property.
;;    That since there typically are nested whitespace inside that
;;    must be handled separately, e.g. whitespace inside a comment or
;;    cpp directive.  Thus, from one point with `c-is-sws' it's safe
;;    to jump to another point with that property within the same
;;    `c-in-sws' region.  It can be likened to a ladder where
;;    `c-in-sws' marks the bars and `c-is-sws' the rungs.
;;
;; o  The `c-is-sws' property is put on the simple whitespace chars at
;;    a "rung position" and also maybe on the first following char.
;;    As many characters as can be conveniently found in this range
;;    are marked, but no assumption can be made that the whole range
;;    is marked (it could be clobbered by later changes, for
;;    instance).
;;
;;    Note that some part of the beginning of a sequence of simple
;;    whitespace might be part of the end of a preceding line comment
;;    or cpp directive and must not be considered part of the "rung".
;;    Such whitespace is some amount of horizontal whitespace followed
;;    by a newline.  In the case of cpp directives it could also be
;;    two newlines with horizontal whitespace between them.
;;
;;    The reason to include the first following char is to cope with
;;    "rung positions" that doesn't have any ordinary whitespace.  If
;;    `c-is-sws' is put on a token character it does not have
;;    `c-in-sws' set simultaneously.  That's the only case when that
;;    can occur, and the reason for not extending the `c-in-sws'
;;    region to cover it is that the `c-in-sws' region could then be
;;    accidentally merged with a following one if the token is only
;;    one character long.
;;
;; o  On buffer changes the `c-in-sws' and `c-is-sws' properties are
;;    removed in the changed region.  If the change was inside
;;    syntactic whitespace that means that the "ladder" is broken, but
;;    a later call to `c-forward-sws' or `c-backward-sws' will use the
;;    parts on either side and use an ordinary search only to "repair"
;;    the gap.
;;
;;    Special care needs to be taken if a region is removed: If there
;;    are `c-in-sws' on both sides of it which do not connect inside
;;    the region then they can't be joined.  If e.g. a marked macro is
;;    broken, syntactic whitespace inside the new text might be
;;    marked.  If those marks would become connected with the old
;;    `c-in-sws' range around the macro then we could get a ladder
;;    with one end outside the macro and the other at some whitespace
;;    within it.
;;
;; The main motivation for this system is to increase the speed in
;; skipping over the large whitespace regions that can occur at the
;; top level in e.g. header files that contain a lot of comments and
;; cpp directives.  For small comments inside code it's probably
;; slower than using `forward-comment' straightforwardly, but speed is
;; not a significant factor there anyway.

; (defface c-debug-is-sws-face
;   '((t (:background "GreenYellow")))
;   "Debug face to mark the `c-is-sws' property.")
; (defface c-debug-in-sws-face
;   '((t (:underline t)))
;   "Debug face to mark the `c-in-sws' property.")

; (defun c-debug-put-sws-faces ()
;   ;; Put the sws debug faces on all the `c-is-sws' and `c-in-sws'
;   ;; properties in the buffer.
;   (interactive)
;   (save-excursion
1560
;     (c-save-buffer-state (in-face)
1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590
;       (goto-char (point-min))
;       (setq in-face (if (get-text-property (point) 'c-is-sws)
; 			(point)))
;       (while (progn
; 	       (goto-char (next-single-property-change
; 			   (point) 'c-is-sws nil (point-max)))
; 	       (if in-face
; 		   (progn
; 		     (c-debug-add-face in-face (point) 'c-debug-is-sws-face)
; 		     (setq in-face nil))
; 		 (setq in-face (point)))
; 	       (not (eobp))))
;       (goto-char (point-min))
;       (setq in-face (if (get-text-property (point) 'c-in-sws)
; 			(point)))
;       (while (progn
; 	       (goto-char (next-single-property-change
; 			   (point) 'c-in-sws nil (point-max)))
; 	       (if in-face
; 		   (progn
; 		     (c-debug-add-face in-face (point) 'c-debug-in-sws-face)
; 		     (setq in-face nil))
; 		 (setq in-face (point)))
; 	       (not (eobp)))))))

(defmacro c-debug-sws-msg (&rest args)
  ;;`(message ,@args)
  )

(defmacro c-put-is-sws (beg end)
Alan Mackenzie's avatar