regex-emacs.c 148 KB
Newer Older
1
/* Emacs regular expression matching and search
Karl Berry's avatar
Karl Berry committed
2

Paul Eggert's avatar
Paul Eggert committed
3
   Copyright (C) 1993-2019 Free Software Foundation, Inc.
Karl Berry's avatar
Karl Berry committed
4

5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
Miles Bader's avatar
Miles Bader committed
7
   the Free Software Foundation; either version 3, or (at your option)
8 9 10 11
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Juanma Barranquero's avatar
Juanma Barranquero committed
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 14 15
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17

18
/* TODO:
19
   - structure the opcode space into opcode+flag.
20
   - replace (succeed_n + jump_n + set_number_at) with something that doesn't
21
     need to modify the compiled regexp so that re_search can be reentrant.
22
   - get rid of on_failure_jump_smart by doing the optimization in re_comp
23
     rather than at run-time, so that re_search can be reentrant.
24
*/
25

26
#include <config.h>
27

28
#include "regex-emacs.h"
29

30
#include <stdlib.h>
31

32 33 34 35
#include "character.h"
#include "buffer.h"
#include "syntax.h"
#include "category.h"
36

37
/* Maximum number of duplicates an interval can allow.  Some systems
38 39 40
   define this in other header files, but we want our value, so remove
   any previous define.  Repeat counts are stored in opcodes as 2-byte
   unsigned integers.  */
41 42
#ifdef RE_DUP_MAX
# undef RE_DUP_MAX
43
#endif
44 45
#define RE_DUP_MAX (0xffff)

46
/* Make syntax table lookup grant data in gl_state.  */
47
#define SYNTAX(c) syntax_property (c, 1)
48

49
/* Convert the pointer to the char to BEG-based offset from the start.  */
50
#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
51
/* Strings are 0-indexed, buffers are 1-indexed; pun on the boolean
52
   result to get the right base index.  */
53
#define POS_AS_IN_BUFFER(p)                                    \
54
  ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
55

56 57 58
#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
#define RE_STRING_CHAR(p, multibyte) \
59
  (multibyte ? STRING_CHAR (p) : *(p))
60
#define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
61
  (multibyte ? STRING_CHAR_AND_LENGTH (p, len) : ((len) = 1, *(p)))
62

63
#define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
64

65
#define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
66

67 68 69
/* Set C a (possibly converted to multibyte) character before P.  P
   points into a string which is the virtual concatenation of STR1
   (which ends at END1) or STR2 (which ends at END2).  */
70
#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)			     \
71
  do {									     \
72
    if (target_multibyte)						     \
73 74
      {									     \
	re_char *dtemp = (p) == (str2) ? (end1) : (p);			     \
75 76 77
	re_char *dlimit = (p) > (str2) && (p) <= (end2) ? (str2) : (str1);   \
	while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp))		     \
	  continue;							     \
78
	c = STRING_CHAR (dtemp);					     \
79 80 81 82
      }									     \
    else								     \
      {									     \
	(c = ((p) == (str2) ? (end1) : (p))[-1]);			     \
83
	(c) = RE_CHAR_TO_MULTIBYTE (c);					     \
84
      }									     \
85
  } while (false)
86

87 88
/* Set C a (possibly converted to multibyte) character at P, and set
   LEN to the byte length of that character.  */
89
#define GET_CHAR_AFTER(c, p, len)		\
90
  do {						\
91
    if (target_multibyte)			\
92
      (c) = STRING_CHAR_AND_LENGTH (p, len);	\
93 94
    else					\
      {						\
95
	(c) = *p;				\
96
	len = 1;				\
97
	(c) = RE_CHAR_TO_MULTIBYTE (c);		\
98
      }						\
99
   } while (false)
100

101
/* 1 if C is an ASCII character.  */
102
#define IS_REAL_ASCII(c) ((c) < 0200)
103

104
/* 1 if C is a unibyte character.  */
105
#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
106

107
/* The Emacs definitions should not be directly affected by locales.  */
108

109
/* In Emacs, these are only used for single-byte characters.  */
110 111 112
#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
#define ISCNTRL(c) ((c) < ' ')
#define ISXDIGIT(c) (0 <= char_hexdigit (c))
113 114 115

/* The rest must handle multibyte characters.  */

116
#define ISBLANK(c) (IS_REAL_ASCII (c)			\
117 118 119
                     ? ((c) == ' ' || (c) == '\t')      \
                     : blankp (c))

120
#define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)				\
121
		     ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240)	\
122
		     : graphicp (c))
123

124
#define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)				\
125
		    ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)	\
126
		     : printablep (c))
127

128
#define ISALNUM(c) (IS_REAL_ASCII (c)			\
129 130 131
		    ? (((c) >= 'a' && (c) <= 'z')	\
		       || ((c) >= 'A' && (c) <= 'Z')	\
		       || ((c) >= '0' && (c) <= '9'))	\
132
		    : alphanumericp (c))
133

134
#define ISALPHA(c) (IS_REAL_ASCII (c)			\
135 136
		    ? (((c) >= 'a' && (c) <= 'z')	\
		       || ((c) >= 'A' && (c) <= 'Z'))	\
137
		    : alphabeticp (c))
138

139
#define ISLOWER(c) lowercasep (c)
140

141
#define ISPUNCT(c) (IS_REAL_ASCII (c)				\
142 143
		    ? ((c) > ' ' && (c) < 0177			\
		       && !(((c) >= 'a' && (c) <= 'z')		\
144 145
		            || ((c) >= 'A' && (c) <= 'Z')	\
		            || ((c) >= '0' && (c) <= '9')))	\
146 147
		    : SYNTAX (c) != Sword)

148
#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
149

150
#define ISUPPER(c) uppercasep (c)
151

152
#define ISWORD(c) (SYNTAX (c) == Sword)
153

Paul Eggert's avatar
Paul Eggert committed
154
#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
155

156
/* Use alloca instead of malloc.  This is because using malloc in
157 158
   re_search* or re_match* could cause memory leaks when C-g is used
   in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
159
   via 'record_xmalloc' which uses 'unwind_protect' to ensure the
160 161 162
   memory is freed even in case of non-local exits); also, malloc is
   slower and causes storage fragmentation.  On the other hand, malloc
   is more portable, and easier to debug.
163

164
   Because we sometimes use alloca, some routines have to be macros,
165
   not functions -- 'alloca'-allocated space disappears at the end of the
166 167
   function it is called in.  */

168 169 170
/* This may be adjusted in main(), if the stack is successfully grown.  */
ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
/* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca.  */
171 172
#define REGEX_USE_SAFE_ALLOCA					       \
  USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca
173

174
/* Assumes a 'char *destination' variable.  */
175 176
#define REGEX_REALLOCATE(source, osize, nsize)				\
  (destination = SAFE_ALLOCA (nsize),					\
177
   memcpy (destination, source, osize))
178

179 180
/* True if 'size1' is non-NULL and PTR is pointing anywhere inside
   'string1' or just past its end.  This works if PTR is NULL, which is
181
   a good thing.  */
Richard M. Stallman's avatar
Richard M. Stallman committed
182
#define FIRST_STRING_P(ptr)					\
183 184 185
  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)

/* (Re)Allocate N items of type T using malloc, or fail.  */
186 187
#define TALLOC(n, t) ((t *) xmalloc ((n) * sizeof (t)))
#define RETALLOC(addr, n, t) ((addr) = (t *) xrealloc (addr, (n) * sizeof (t)))
188

189
#define BYTEWIDTH 8 /* In bits.  */
190

191 192 193
/* Type of source-pattern and string chars.  */
typedef const unsigned char re_char;

194 195
static void re_compile_fastmap (struct re_pattern_buffer *);
static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp,
Paul Eggert's avatar
Paul Eggert committed
196 197
				     re_char *string1, size_t size1,
				     re_char *string2, size_t size2,
198
				     ptrdiff_t pos,
Paul Eggert's avatar
Paul Eggert committed
199
				     struct re_registers *regs,
200
				     ptrdiff_t stop);
201 202

/* These are the command codes that appear in compiled regular
203
   expressions.  Some opcodes are followed by argument bytes.  A
204 205 206 207 208 209 210
   command code can specify any interpretation whatsoever for its
   arguments.  Zero bytes may appear in the compiled regular expression.  */

typedef enum
{
  no_op = 0,

211
  /* Succeed right away--no more backtracking.  */
212 213
  succeed,

Richard M. Stallman's avatar
Richard M. Stallman committed
214
	/* Followed by one byte giving n, then by n literal bytes.  */
215 216
  exactn,

Richard M. Stallman's avatar
Richard M. Stallman committed
217
	/* Matches any (more or less) character.  */
218 219
  anychar,

Richard M. Stallman's avatar
Richard M. Stallman committed
220 221 222 223 224
	/* Matches any one char belonging to specified set.  First
	   following byte is number of bitmap bytes.  Then come bytes
	   for a bitmap saying which chars are in.  Bits in each byte
	   are ordered low-bit-first.  A character is in the set if its
	   bit is 1.  A character too large to have a bit in the map is
225 226 227 228 229
	   automatically not in the set.

	   If the length byte has the 0x80 bit set, then that stuff
	   is followed by a range table:
	       2 bytes of flags for character sets (low 8 bits, high 8 bits)
230
		   See RANGE_TABLE_WORK_BITS below.
231
	       2 bytes, the number of pairs that follow (upto 32767)
232
	       pairs, each 2 multibyte characters,
233
		   each multibyte character represented as 3 bytes.  */
234 235
  charset,

Richard M. Stallman's avatar
Richard M. Stallman committed
236
	/* Same parameters as charset, but match any character that is
237
	   not one of those specified.  */
238 239
  charset_not,

Richard M. Stallman's avatar
Richard M. Stallman committed
240 241 242
	/* Start remembering the text that is matched, for storing in a
	   register.  Followed by one byte with the register number, in
	   the range 0 to one less than the pattern buffer's re_nsub
243
	   field.  */
244 245
  start_memory,

Richard M. Stallman's avatar
Richard M. Stallman committed
246 247
	/* Stop remembering the text that is matched and store it in a
	   memory register.  Followed by one byte with the register
248
	   number, in the range 0 to one less than 're_nsub' in the
249
	   pattern buffer.  */
250 251
  stop_memory,

Richard M. Stallman's avatar
Richard M. Stallman committed
252
	/* Match a duplicate of something remembered. Followed by one
253
	   byte containing the register number.  */
254 255
  duplicate,

Richard M. Stallman's avatar
Richard M. Stallman committed
256
	/* Fail unless at beginning of line.  */
257 258
  begline,

259
	/* Fail unless at end of line.  */
260 261
  endline,

262
	/* Succeeds if at beginning of buffer.  */
263 264
  begbuf,

Richard M. Stallman's avatar
Richard M. Stallman committed
265
	/* Analogously, for end of buffer/string.  */
266
  endbuf,
267

Richard M. Stallman's avatar
Richard M. Stallman committed
268
	/* Followed by two byte relative address to which to jump.  */
269
  jump,
270

Richard M. Stallman's avatar
Richard M. Stallman committed
271
	/* Followed by two-byte relative address of place to resume at
Juanma Barranquero's avatar
Juanma Barranquero committed
272
	   in case of failure.  */
273
  on_failure_jump,
274

Richard M. Stallman's avatar
Richard M. Stallman committed
275 276
	/* Like on_failure_jump, but pushes a placeholder instead of the
	   current string position when executed.  */
277
  on_failure_keep_string_jump,
278

279
	/* Just like 'on_failure_jump', except that it checks that we
280 281 282 283
	   don't get stuck in an infinite loop (matching an empty string
	   indefinitely).  */
  on_failure_jump_loop,

284
	/* Just like 'on_failure_jump_loop', except that it checks for
285 286
	   a different kind of loop (the kind that shows up with non-greedy
	   operators).  This operation has to be immediately preceded
287
	   by a 'no_op'.  */
288 289
  on_failure_jump_nastyloop,

290
	/* A smart 'on_failure_jump' used for greedy * and + operators.
Juanma Barranquero's avatar
Juanma Barranquero committed
291
	   It analyzes the loop before which it is put and if the
292
	   loop does not require backtracking, it changes itself to
293 294 295
	   'on_failure_keep_string_jump' and short-circuits the loop,
	   else it just defaults to changing itself into 'on_failure_jump'.
	   It assumes that it is pointing to just past a 'jump'.  */
296
  on_failure_jump_smart,
297

Richard M. Stallman's avatar
Richard M. Stallman committed
298
	/* Followed by two-byte relative address and two-byte number n.
299 300 301
	   After matching N times, jump to the address upon failure.
	   Does not work if N starts at 0: use on_failure_jump_loop
	   instead.  */
302 303
  succeed_n,

Richard M. Stallman's avatar
Richard M. Stallman committed
304 305
	/* Followed by two-byte relative address, and two-byte number n.
	   Jump to the address N times, then fail.  */
306 307
  jump_n,

Richard M. Stallman's avatar
Richard M. Stallman committed
308
	/* Set the following two-byte relative address to the
Juanma Barranquero's avatar
Juanma Barranquero committed
309
	   subsequent two-byte number.  The address *includes* the two
Richard M. Stallman's avatar
Richard M. Stallman committed
310
	   bytes of number.  */
311 312 313 314 315 316
  set_number_at,

  wordbeg,	/* Succeeds if at word beginning.  */
  wordend,	/* Succeeds if at word end.  */

  wordbound,	/* Succeeds if at a word boundary.  */
Juanma Barranquero's avatar
Juanma Barranquero committed
317
  notwordbound,	/* Succeeds if not at a word boundary.  */
318

319 320 321
  symbeg,       /* Succeeds if at symbol beginning.  */
  symend,       /* Succeeds if at symbol end.  */

322
	/* Matches any character whose syntax is specified.  Followed by
Richard M. Stallman's avatar
Richard M. Stallman committed
323
	   a byte which contains a syntax code, e.g., Sword.  */
324 325 326
  syntaxspec,

	/* Matches any character whose syntax is not that specified.  */
327
  notsyntaxspec,
328

329
  at_dot,	/* Succeeds if at point.  */
330 331

  /* Matches any character whose category-set contains the specified
Juanma Barranquero's avatar
Juanma Barranquero committed
332 333
     category.  The operator is followed by a byte which contains a
     category code (mnemonic ASCII character).  */
334 335 336 337 338 339
  categoryspec,

  /* Matches any character whose category-set does not contain the
     specified category.  The operator is followed by a byte which
     contains the category code (mnemonic ASCII character).  */
  notcategoryspec
340 341 342 343 344 345 346 347 348 349
} re_opcode_t;

/* Common operations on the compiled pattern.  */

/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */

#define STORE_NUMBER(destination, number)				\
  do {									\
    (destination)[0] = (number) & 0377;					\
    (destination)[1] = (number) >> 8;					\
350
  } while (false)
351 352 353 354 355 356 357 358 359

/* Same as STORE_NUMBER, except increment DESTINATION to
   the byte after where the number is stored.  Therefore, DESTINATION
   must be an lvalue.  */

#define STORE_NUMBER_AND_INCR(destination, number)			\
  do {									\
    STORE_NUMBER (destination, number);					\
    (destination) += 2;							\
360
  } while (false)
361 362 363 364 365

/* Put into DESTINATION a number stored in two contiguous bytes starting
   at SOURCE.  */

#define EXTRACT_NUMBER(destination, source)				\
366
  ((destination) = extract_number (source))
367

368 369
static int
extract_number (re_char *source)
370
{
371 372
  unsigned leading_byte = SIGN_EXTEND_CHAR (source[1]);
  return (leading_byte << 8) + source[0];
373 374 375 376 377 378
}

/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
   SOURCE must be an lvalue.  */

#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
379
  ((destination) = extract_number_and_incr (&source))
380

381 382
static int
extract_number_and_incr (re_char **source)
383
{
384
  int num = extract_number (*source);
385
  *source += 2;
386
  return num;
387 388
}

389 390
/* Store a multibyte character in three contiguous bytes starting
   DESTINATION, and increment DESTINATION to the byte after where the
Juanma Barranquero's avatar
Juanma Barranquero committed
391
   character is stored.  Therefore, DESTINATION must be an lvalue.  */
392 393 394 395 396 397 398

#define STORE_CHARACTER_AND_INCR(destination, character)	\
  do {								\
    (destination)[0] = (character) & 0377;			\
    (destination)[1] = ((character) >> 8) & 0377;		\
    (destination)[2] = (character) >> 16;			\
    (destination) += 3;						\
399
  } while (false)
400 401

/* Put into DESTINATION a character stored in three contiguous bytes
Juanma Barranquero's avatar
Juanma Barranquero committed
402
   starting at SOURCE.  */
403 404 405 406 407 408

#define EXTRACT_CHARACTER(destination, source)	\
  do {						\
    (destination) = ((source)[0]		\
		     | ((source)[1] << 8)	\
		     | ((source)[2] << 16));	\
409
  } while (false)
410 411 412 413 414 415 416 417 418


/* Macros for charset. */

/* Size of bitmap of charset P in bytes.  P is a start of charset,
   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)

/* Nonzero if charset P has range table.  */
Richard M. Stallman's avatar
Richard M. Stallman committed
419
#define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
420 421 422

/* Return the address of range table of charset P.  But not the start
   of table itself, but the before where the number of ranges is
423
   stored.  '2 +' means to skip re_opcode_t and size of bitmap,
424 425 426 427 428 429 430
   and the 2 bytes of flags at the start of the range table.  */
#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])

/* Extract the bit flags that start a range table.  */
#define CHARSET_RANGE_TABLE_BITS(p)		\
  ((p)[2 + CHARSET_BITMAP_SIZE (p)]		\
   + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
431 432

/* Return the address of end of RANGE_TABLE.  COUNT is number of
433 434
   ranges (which is a pair of (start, end)) in the RANGE_TABLE.  '* 2'
   is start of range and end of range.  '* 3' is size of each start
435 436 437 438
   and end.  */
#define CHARSET_RANGE_TABLE_END(range_table, count)	\
  ((range_table) + (count) * 2 * 3)

439 440
/* If REGEX_EMACS_DEBUG is defined, print many voluminous messages
   (if the variable regex_emacs_debug is positive).  */
441

442
#ifdef REGEX_EMACS_DEBUG
443

444
/* Use standard I/O for debugging.  */
445
# include <stdio.h>
446

447
static int regex_emacs_debug = -100000;
448

449
# define DEBUG_STATEMENT(e) e
450
# define DEBUG_PRINT(...) if (regex_emacs_debug > 0) printf (__VA_ARGS__)
451
# define DEBUG_COMPILES_ARGUMENTS
452
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
453
  if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e)
454
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
455
  if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
456 457 458 459


/* Print the fastmap in human-readable form.  */

460 461
static void
print_fastmap (char *fastmap)
462 463
{
  unsigned was_a_range = 0;
464 465
  unsigned i = 0;

466 467 468 469 470
  while (i < (1 << BYTEWIDTH))
    {
      if (fastmap[i++])
	{
	  was_a_range = 0;
Richard M. Stallman's avatar
Richard M. Stallman committed
471 472 473 474 475 476
	  putchar (i - 1);
	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
	    {
	      was_a_range = 1;
	      i++;
	    }
477
	  if (was_a_range)
Richard M. Stallman's avatar
Richard M. Stallman committed
478 479 480 481 482
	    {
	      printf ("-");
	      putchar (i - 1);
	    }
	}
483
    }
484
  putchar ('\n');
485 486 487 488 489 490
}


/* Print a compiled pattern string in human-readable form, starting at
   the START pointer into it and ending just before the pointer END.  */

491 492
static void
print_partial_compiled_pattern (re_char *start, re_char *end)
493 494
{
  int mcnt, mcnt2;
495 496
  re_char *p = start;
  re_char *pend = end;
497 498 499

  if (start == NULL)
    {
500
      fprintf (stderr, "(null)\n");
501 502
      return;
    }
503

504 505 506
  /* Loop over pattern commands.  */
  while (p < pend)
    {
507
      fprintf (stderr, "%td:\t", p - start);
508 509 510

      switch ((re_opcode_t) *p++)
	{
Richard M. Stallman's avatar
Richard M. Stallman committed
511
	case no_op:
512
	  fprintf (stderr, "/no_op");
Richard M. Stallman's avatar
Richard M. Stallman committed
513
	  break;
514

515
	case succeed:
516
	  fprintf (stderr, "/succeed");
517 518
	  break;

519 520
	case exactn:
	  mcnt = *p++;
521
	  fprintf (stderr, "/exactn/%d", mcnt);
Richard M. Stallman's avatar
Richard M. Stallman committed
522
	  do
523
	    {
524
	      fprintf (stderr, "/%c", *p++);
Richard M. Stallman's avatar
Richard M. Stallman committed
525 526 527
	    }
	  while (--mcnt);
	  break;
528 529

	case start_memory:
530
	  fprintf (stderr, "/start_memory/%d", *p++);
Richard M. Stallman's avatar
Richard M. Stallman committed
531
	  break;
532 533

	case stop_memory:
534
	  fprintf (stderr, "/stop_memory/%d", *p++);
Richard M. Stallman's avatar
Richard M. Stallman committed
535
	  break;
536 537

	case duplicate:
538
	  fprintf (stderr, "/duplicate/%d", *p++);
539 540 541
	  break;

	case anychar:
542
	  fprintf (stderr, "/anychar");
543 544 545
	  break;

	case charset:
Richard M. Stallman's avatar
Richard M. Stallman committed
546 547 548
	case charset_not:
	  {
	    register int c, last = -100;
549
	    register int in_range = 0;
550 551
	    int length = CHARSET_BITMAP_SIZE (p - 1);
	    int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
552

553
	    fprintf (stderr, "/charset [%s",
Kenichi Handa's avatar
Kenichi Handa committed
554
		     (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
555

Kenichi Handa's avatar
Kenichi Handa committed
556 557
	    if (p + *p >= pend)
	      fprintf (stderr, " !extends past end of pattern! ");
558

Richard M. Stallman's avatar
Richard M. Stallman committed
559
	    for (c = 0; c < 256; c++)
560
	      if (c / 8 < length
561 562 563 564 565
		  && (p[1 + (c/8)] & (1 << (c % 8))))
		{
		  /* Are we starting a range?  */
		  if (last + 1 == c && ! in_range)
		    {
566
		      fprintf (stderr, "-");
567 568 569 570
		      in_range = 1;
		    }
		  /* Have we broken a range?  */
		  else if (last + 1 != c && in_range)
571
		    {
572
		      fprintf (stderr, "%c", last);
573 574
		      in_range = 0;
		    }
575

576
		  if (! in_range)
577
		    fprintf (stderr, "%c", c);
578 579

		  last = c;
Richard M. Stallman's avatar
Richard M. Stallman committed
580
	      }
581 582

	    if (in_range)
583
	      fprintf (stderr, "%c", last);
584

585
	    fprintf (stderr, "]");
586

587
	    p += 1 + length;
588 589

	    if (has_range_table)
590 591
	      {
		int count;
592
		fprintf (stderr, "has-range-table");
593 594 595 596 597 598

		/* ??? Should print the range table; for now, just skip it.  */
		p += 2;		/* skip range table bits */
		EXTRACT_NUMBER_AND_INCR (count, p);
		p = CHARSET_RANGE_TABLE_END (p, count);
	      }
599 600 601 602
	  }
	  break;

	case begline:
603
	  fprintf (stderr, "/begline");
Richard M. Stallman's avatar
Richard M. Stallman committed
604
	  break;
605 606

	case endline:
607
	  fprintf (stderr, "/endline");
Richard M. Stallman's avatar
Richard M. Stallman committed
608
	  break;
609 610

	case on_failure_jump:
611 612
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_jump to %td", p + mcnt - start);
Richard M. Stallman's avatar
Richard M. Stallman committed
613
	  break;
614 615

	case on_failure_keep_string_jump:
616 617 618
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_keep_string_jump to %td",
		   p + mcnt - start);
Richard M. Stallman's avatar
Richard M. Stallman committed
619
	  break;
620

621
	case on_failure_jump_nastyloop:
622 623 624
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_jump_nastyloop to %td",
		   p + mcnt - start);
625 626
	  break;

627
	case on_failure_jump_loop:
628 629 630
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_jump_loop to %td",
		   p + mcnt - start);
631 632
	  break;

633
	case on_failure_jump_smart:
634 635 636
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_jump_smart to %td",
		   p + mcnt - start);
637 638
	  break;

Richard M. Stallman's avatar
Richard M. Stallman committed
639
	case jump:
640 641
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/jump to %td", p + mcnt - start);
642 643
	  break;

Richard M. Stallman's avatar
Richard M. Stallman committed
644
	case succeed_n:
645 646 647 648
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  EXTRACT_NUMBER_AND_INCR (mcnt2, p);
	  fprintf (stderr, "/succeed_n to %td, %d times",
		   p - 2 + mcnt - start, mcnt2);
Richard M. Stallman's avatar
Richard M. Stallman committed
649
	  break;
650

Richard M. Stallman's avatar
Richard M. Stallman committed
651
	case jump_n:
652 653 654 655
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  EXTRACT_NUMBER_AND_INCR (mcnt2, p);
	  fprintf (stderr, "/jump_n to %td, %d times",
		   p - 2 + mcnt - start, mcnt2);
Richard M. Stallman's avatar
Richard M. Stallman committed
656
	  break;
657

Richard M. Stallman's avatar
Richard M. Stallman committed
658
	case set_number_at:
659 660 661 662
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  EXTRACT_NUMBER_AND_INCR (mcnt2, p);
	  fprintf (stderr, "/set_number_at location %td to %d",
		   p - 2 + mcnt - start, mcnt2);
Richard M. Stallman's avatar
Richard M. Stallman committed
663
	  break;
664

Richard M. Stallman's avatar
Richard M. Stallman committed
665
	case wordbound:
666
	  fprintf (stderr, "/wordbound");
667 668 669
	  break;

	case notwordbound:
670
	  fprintf (stderr, "/notwordbound");
Richard M. Stallman's avatar
Richard M. Stallman committed
671
	  break;
672 673

	case wordbeg:
674
	  fprintf (stderr, "/wordbeg");
675
	  break;
676

677
	case wordend:
678
	  fprintf (stderr, "/wordend");
679
	  break;
680

681
	case symbeg:
682
	  fprintf (stderr, "/symbeg");
683 684 685
	  break;

	case symend:
686
	  fprintf (stderr, "/symend");
687
	  break;
688

689
	case syntaxspec:
690
	  fprintf (stderr, "/syntaxspec");
691
	  mcnt = *p++;
692
	  fprintf (stderr, "/%d", mcnt);
693 694 695
	  break;

	case notsyntaxspec:
696
	  fprintf (stderr, "/notsyntaxspec");
697
	  mcnt = *p++;
698
	  fprintf (stderr, "/%d", mcnt);
699 700
	  break;

701
	case at_dot:
702
	  fprintf (stderr, "/at_dot");
Richard M. Stallman's avatar
Richard M. Stallman committed
703
	  break;
704

705
	case categoryspec:
706
	  fprintf (stderr, "/categoryspec");
707
	  mcnt = *p++;
708
	  fprintf (stderr, "/%d", mcnt);
Richard M. Stallman's avatar
Richard M. Stallman committed
709
	  break;
710

711
	case notcategoryspec:
712
	  fprintf (stderr, "/notcategoryspec");
713
	  mcnt = *p++;
714
	  fprintf (stderr, "/%d", mcnt);
715 716 717
	  break;

	case begbuf:
718
	  fprintf (stderr, "/begbuf");
Richard M. Stallman's avatar
Richard M. Stallman committed
719
	  break;
720 721

	case endbuf:
722
	  fprintf (stderr, "/endbuf");
Richard M. Stallman's avatar
Richard M. Stallman committed
723
	  break;
724

Richard M. Stallman's avatar
Richard M. Stallman committed
725
	default:
726
	  fprintf (stderr, "?%d", *(p-1));
727 728
	}

729
      fprintf (stderr, "\n");
730 731
    }

732
  fprintf (stderr, "%td:\tend of pattern.\n", p - start);
733 734 735
}


736 737
static void
print_compiled_pattern (struct re_pattern_buffer *bufp)
738
{
739
  re_char *buffer = bufp->buffer;
740 741

  print_partial_compiled_pattern (buffer, buffer + bufp->used);
742
  printf ("%zu bytes used/%zu bytes allocated.\n",
743
	  bufp->used, bufp->allocated);
744 745 746 747 748 749 750

  if (bufp->fastmap_accurate && bufp->fastmap)
    {
      printf ("fastmap: ");
      print_fastmap (bufp->fastmap);
    }

751
  printf ("re_nsub: %zu\t", bufp->re_nsub);
752 753
  printf ("regs_alloc: %d\t", bufp->regs_allocated);
  printf ("can_be_null: %d\t", bufp->can_be_null);
754
  fflush (stdout);
755 756 757 758
  /* Perhaps we should print the translate table?  */
}


759 760 761
static void
print_double_string (re_char *where, re_char *string1, ssize_t size1,
		     re_char *string2, ssize_t size2)
762
{
763
  ssize_t this_char;
764

765 766 767 768 769
  if (where == NULL)
    printf ("(null)");
  else
    {
      if (FIRST_STRING_P (where))
Richard M. Stallman's avatar
Richard M. Stallman committed
770 771 772
	{
	  for (this_char = where - string1; this_char < size1; this_char++)
	    putchar (string1[this_char]);
773

Richard M. Stallman's avatar
Richard M. Stallman committed
774 775
	  where = string2;
	}
776 777

      for (this_char = where - string2; this_char < size2; this_char++)
Richard M. Stallman's avatar
Richard M. Stallman committed
778
	putchar (string2[this_char]);
779 780 781
    }
}

782
#else /* not REGEX_EMACS_DEBUG */
783

784
# define DEBUG_STATEMENT(e)
Paul Eggert's avatar
Paul Eggert committed
785
# define DEBUG_PRINT(...)
786 787
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
788

789
#endif /* not REGEX_EMACS_DEBUG */
790

791
typedef enum
792
{
793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815