syntax.h 10.8 KB
Newer Older
Jim Blandy's avatar
Jim Blandy committed
1
/* Declarations having to do with GNU Emacs syntax tables.
2

3 4
Copyright (C) 1985, 1993-1994, 1997-1998, 2001-2013 Free Software
Foundation, Inc.
Jim Blandy's avatar
Jim Blandy committed
5 6 7

This file is part of GNU Emacs.

8
GNU Emacs is free software: you can redistribute it and/or modify
Jim Blandy's avatar
Jim Blandy committed
9
it under the terms of the GNU General Public License as published by
10 11
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Jim Blandy's avatar
Jim Blandy committed
12 13 14 15 16 17 18

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
19
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Jim Blandy's avatar
Jim Blandy committed
20 21


22
extern void update_syntax_table (ptrdiff_t, EMACS_INT, int, Lisp_Object);
Jim Blandy's avatar
Jim Blandy committed
23 24 25

/* The standard syntax table is stored where it will automatically
   be used in all new buffers.  */
Tom Tromey's avatar
Tom Tromey committed
26
#define Vstandard_syntax_table BVAR (&buffer_defaults, syntax_table)
Jim Blandy's avatar
Jim Blandy committed
27

28 29 30
/* A syntax table is a chartable whose elements are cons cells
   (CODE+FLAGS . MATCHING-CHAR).  MATCHING-CHAR can be nil if the char
   is not a kind of parenthesis.
Jim Blandy's avatar
Jim Blandy committed
31

32
   The low 8 bits of CODE+FLAGS is a code, as follows:  */
Jim Blandy's avatar
Jim Blandy committed
33 34 35 36 37 38 39 40 41 42 43

enum syntaxcode
  {
    Swhitespace, /* for a whitespace character */
    Spunct,	 /* for random punctuation characters */
    Sword,	 /* for a word constituent */
    Ssymbol,	 /* symbol constituent but not word constituent */
    Sopen,	 /* for a beginning delimiter */
    Sclose,      /* for an ending delimiter */
    Squote,	 /* for a prefix character like Lisp ' */
    Sstring,	 /* for a string-grouping character like Lisp " */
44
    Smath,	 /* for delimiters like $ in Tex.  */
Jim Blandy's avatar
Jim Blandy committed
45 46 47 48
    Sescape,	 /* for a character that begins a C-style escape */
    Scharquote,  /* for a character that quotes the following character */
    Scomment,    /* for a comment-starting character */
    Sendcomment, /* for a comment-ending character */
49
    Sinherit,    /* use the standard syntax table for this character */
50
    Scomment_fence, /* Starts/ends comment which is delimited on the
Richard M. Stallman's avatar
Richard M. Stallman committed
51
		       other side by any char with the same syntaxcode.  */
52
    Sstring_fence,  /* Starts/ends string which is delimited on the
Richard M. Stallman's avatar
Richard M. Stallman committed
53
		       other side by any char with the same syntaxcode.  */
Jim Blandy's avatar
Jim Blandy committed
54 55 56
    Smax	 /* Upper bound on codes that are meaningful */
  };

57
/* Set the syntax entry VAL for char C in table TABLE.  */
58

59
#define SET_RAW_SYNTAX_ENTRY(table, c, val)	\
60
  CHAR_TABLE_SET ((table), c, (val))
61

62 63
/* Set the syntax entry VAL for char-range RANGE in table TABLE.
   RANGE is a cons (FROM . TO) specifying the range of characters.  */
64

65 66
#define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val)	\
  Fset_char_table_range ((table), (range), (val))
Jim Blandy's avatar
Jim Blandy committed
67

68
/* SYNTAX_ENTRY fetches the information from the entry for character C
69
   in syntax table TABLE, or from globally kept data (gl_state).
70 71 72 73 74 75 76 77 78 79 80
   Does inheritance.  */
/* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
   position, it is either the buffer's syntax table, or syntax table
   found in text properties.  */

#ifdef SYNTAX_ENTRY_VIA_PROPERTY
#  define SYNTAX_ENTRY(c)                                             \
    (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
#  define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
#else
#  define SYNTAX_ENTRY SYNTAX_ENTRY_INT
Tom Tromey's avatar
Tom Tromey committed
81
#  define CURRENT_SYNTAX_TABLE BVAR (current_buffer, syntax_table)
82
#endif
83

84
#define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
85

86
/* Extract the information from the entry for character C
87
   in the current syntax table.  */
88 89

#ifdef __GNUC__
90
#define SYNTAX(c)							\
91 92 93 94
  ({ Lisp_Object _syntax_temp;						\
     _syntax_temp = SYNTAX_ENTRY (c);					\
     (CONSP (_syntax_temp)						\
      ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff)		\
95
      : Swhitespace); })
96 97

#define SYNTAX_WITH_FLAGS(c)						\
98 99 100 101
  ({ Lisp_Object _syntax_temp;						\
     _syntax_temp = SYNTAX_ENTRY (c);					\
     (CONSP (_syntax_temp)						\
      ? XINT (XCAR (_syntax_temp))					\
102
      : (int) Swhitespace); })
103 104

#define SYNTAX_MATCH(c)							\
105 106 107 108
  ({ Lisp_Object _syntax_temp;						\
     _syntax_temp = SYNTAX_ENTRY (c);					\
     (CONSP (_syntax_temp)						\
      ? XCDR (_syntax_temp)						\
109
      : Qnil); })
110
#else
111
extern Lisp_Object syntax_temp;
112
#define SYNTAX(c)							\
113
  (syntax_temp = SYNTAX_ENTRY ((c)),					\
114
   (CONSP (syntax_temp)							\
Ken Raeburn's avatar
Ken Raeburn committed
115
    ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff)	\
116
    : Swhitespace))
117 118

#define SYNTAX_WITH_FLAGS(c)						\
119
  (syntax_temp = SYNTAX_ENTRY ((c)),					\
120
   (CONSP (syntax_temp)							\
Ken Raeburn's avatar
Ken Raeburn committed
121
    ? XINT (XCAR (syntax_temp))					\
122
    : (int) Swhitespace))
123 124

#define SYNTAX_MATCH(c)							\
125
  (syntax_temp = SYNTAX_ENTRY ((c)),					\
126
   (CONSP (syntax_temp)							\
Ken Raeburn's avatar
Ken Raeburn committed
127
    ? XCDR (syntax_temp)						\
128
    : Qnil))
129
#endif
Jim Blandy's avatar
Jim Blandy committed
130

Richard M. Stallman's avatar
Richard M. Stallman committed
131

132 133
/* Whether the syntax of the character C has the prefix flag set.  */
extern int syntax_prefix_flag_p (int c);
134

Jim Blandy's avatar
Jim Blandy committed
135 136
/* This array, indexed by a character, contains the syntax code which that
 character signifies (as a char).  For example,
137
 (enum syntaxcode) syntax_spec_code['w'] is Sword.  */
Jim Blandy's avatar
Jim Blandy committed
138 139 140

extern unsigned char syntax_spec_code[0400];

141 142 143 144
/* Indexed by syntax code, give the letter that describes it.  */

extern char syntax_code_spec[16];

145
/* Convert the byte offset BYTEPOS into a character position,
146 147 148 149 150
   for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.

   The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
   These macros do nothing when parse_sexp_lookup_properties is 0,
   so we return 0 in that case, for speed.  */
151 152

#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos)				\
153 154 155
  (! parse_sexp_lookup_properties					\
   ? 0									\
   : STRINGP (gl_state.object)						\
156 157
   ? string_byte_to_char (gl_state.object, (bytepos))			\
   : BUFFERP (gl_state.object)						\
158 159
   ? buf_bytepos_to_charpos (XBUFFER (gl_state.object),			\
			     (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1	\
160
   : NILP (gl_state.object)						\
161
   ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1		\
162 163
   : (bytepos))

164 165
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
   currently good for a position before CHARPOS.  */
166

167
#define UPDATE_SYNTAX_TABLE_FORWARD(charpos)			\
168
  (parse_sexp_lookup_properties					\
169 170
   && (charpos) >= gl_state.e_property				\
   ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,	\
171 172 173
			   gl_state.object),			\
      1)							\
   : 0)
174

175 176
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
   currently good for a position after CHARPOS.  */
177

178
#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos)			\
179
  (parse_sexp_lookup_properties					\
180
   && (charpos) < gl_state.b_property				\
181
   ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0,	\
182 183 184
			   gl_state.object),			\
      1)							\
   : 0)
185

186
/* Make syntax table good for CHARPOS.  */
187

188
#define UPDATE_SYNTAX_TABLE(charpos)				\
189
  (parse_sexp_lookup_properties					\
190
   && (charpos) < gl_state.b_property				\
191
   ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0,	\
192 193
			   gl_state.object),			\
      1)							\
194
   : (parse_sexp_lookup_properties				\
195 196
      && (charpos) >= gl_state.e_property			\
      ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
197 198 199
			      gl_state.object),			\
	 1)							\
      : 0))
200

201 202 203 204 205
/* This macro sets up the buffer-global syntax table.  */
#define SETUP_BUFFER_SYNTAX_TABLE()					\
do									\
  {									\
    gl_state.use_global = 0;						\
206
    gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);\
207 208
  } while (0)

209 210 211 212
/* This macro should be called with FROM at the start of forward
   search, or after the last position of the backward search.  It
   makes sure that the first char is picked up with correct table, so
   one does not need to call UPDATE_SYNTAX_TABLE immediately after the
213
   call.
214 215 216
   Sign of COUNT gives the direction of the search.
 */

217
#define SETUP_SYNTAX_TABLE(FROM, COUNT)					\
218
do									\
219
  {									\
220
    SETUP_BUFFER_SYNTAX_TABLE ();					\
221
    gl_state.b_property = BEGV;						\
222 223 224 225
    gl_state.e_property = ZV + 1;					\
    gl_state.object = Qnil;						\
    gl_state.offset = 0;						\
    if (parse_sexp_lookup_properties)					\
226 227 228
      if ((COUNT) > 0 || (FROM) > BEGV)					\
        update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
			     1, Qnil);					\
229
  }									\
230
while (0)
231 232

/* Same as above, but in OBJECT.  If OBJECT is nil, use current buffer.
233 234
   If it is t (which is only used in fast_c_string_match_ignore_case),
   ignore properties altogether.
235 236 237

   This is meant for regex.c to use.  For buffers, regex.c passes arguments
   to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
238
   So if it is a buffer, we set the offset field to BEGV.  */
239

240
#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT)		\
241
do									\
242
  {									\
243
    SETUP_BUFFER_SYNTAX_TABLE ();					\
244 245 246 247
    gl_state.object = (OBJECT);						\
    if (BUFFERP (gl_state.object))					\
      {									\
	struct buffer *buf = XBUFFER (gl_state.object);			\
248
	gl_state.b_property = 1;					\
249
	gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;	\
250 251 252
	gl_state.offset = BUF_BEGV (buf) - 1;				\
      }									\
    else if (NILP (gl_state.object))					\
253
      {									\
254
	gl_state.b_property = 1;					\
255
	gl_state.e_property = ZV - BEGV + 1;				\
256 257
	gl_state.offset = BEGV - 1;					\
      }									\
258
    else if (EQ (gl_state.object, Qt))					\
259
      {									\
260
	gl_state.b_property = 0;					\
261
	gl_state.e_property = PTRDIFF_MAX;				\
262 263 264 265
	gl_state.offset = 0;						\
      }									\
    else								\
      {									\
266
	gl_state.b_property = 0;					\
267
	gl_state.e_property = 1 + SCHARS (gl_state.object);		\
268 269 270
	gl_state.offset = 0;						\
      }									\
    if (parse_sexp_lookup_properties)					\
271
      update_syntax_table (((FROM) + gl_state.offset			\
272 273
			    + (COUNT > 0 ? 0 :  -1)),			\
			   COUNT, 1, gl_state.object);			\
274
  }									\
275
while (0)
276 277 278

struct gl_state_s
{
279
  Lisp_Object object;			/* The object we are scanning. */
280 281
  ptrdiff_t start;			/* Where to stop. */
  ptrdiff_t stop;			/* Where to stop. */
282 283 284 285 286
  int use_global;			/* Whether to use global_code
					   or c_s_t. */
  Lisp_Object global_code;		/* Syntax code of current char. */
  Lisp_Object current_syntax_table;	/* Syntax table for current pos. */
  Lisp_Object old_prop;			/* Syntax-table prop at prev pos. */
287 288
  ptrdiff_t b_property;			/* First index where c_s_t is valid. */
  ptrdiff_t e_property;			/* First index where c_s_t is
289 290 291 292 293 294 295 296
					   not valid. */
  INTERVAL forward_i;			/* Where to start lookup on forward */
  INTERVAL backward_i;			/* or backward movement.  The
					   data in c_s_t is valid
					   between these intervals,
					   and possibly at the
					   intervals too, depending
					   on: */
297
  /* Offset for positions specified to UPDATE_SYNTAX_TABLE.  */
298
  ptrdiff_t offset;
299 300 301
};

extern struct gl_state_s gl_state;
302
extern ptrdiff_t scan_words (ptrdiff_t, EMACS_INT);