regex-emacs.c 149 KB
Newer Older
1
/* Emacs regular expression matching and search
Karl Berry's avatar
Karl Berry committed
2

Paul Eggert's avatar
Paul Eggert committed
3
   Copyright (C) 1993-2020 Free Software Foundation, Inc.
Karl Berry's avatar
Karl Berry committed
4

5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
Miles Bader's avatar
Miles Bader committed
7
   the Free Software Foundation; either version 3, or (at your option)
8 9 10 11
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Juanma Barranquero's avatar
Juanma Barranquero committed
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 14 15
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17

18
/* TODO:
19
   - structure the opcode space into opcode+flag.
20
   - replace (succeed_n + jump_n + set_number_at) with something that doesn't
21
     need to modify the compiled regexp so that re_search can be reentrant.
22
   - get rid of on_failure_jump_smart by doing the optimization in re_comp
23
     rather than at run-time, so that re_search can be reentrant.
24
*/
25

26
#include <config.h>
27

28
#include "regex-emacs.h"
29

30
#include <stdlib.h>
31

32 33 34 35
#ifdef HAVE_SANITIZER_LSAN_INTERFACE_H
#include <sanitizer/lsan_interface.h>
#endif

36 37 38 39
#include "character.h"
#include "buffer.h"
#include "syntax.h"
#include "category.h"
40

41
/* Maximum number of duplicates an interval can allow.  Some systems
42 43 44
   define this in other header files, but we want our value, so remove
   any previous define.  Repeat counts are stored in opcodes as 2-byte
   unsigned integers.  */
45 46
#ifdef RE_DUP_MAX
# undef RE_DUP_MAX
47
#endif
48 49
#define RE_DUP_MAX (0xffff)

50
/* Make syntax table lookup grant data in gl_state.  */
51
#define SYNTAX(c) syntax_property (c, 1)
52

53
/* Convert the pointer to the char to BEG-based offset from the start.  */
54
#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
55
/* Strings are 0-indexed, buffers are 1-indexed; pun on the boolean
56
   result to get the right base index.  */
57
#define POS_AS_IN_BUFFER(p)                                    \
58
  ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
59

60 61 62
#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
#define RE_STRING_CHAR(p, multibyte) \
63
  (multibyte ? STRING_CHAR (p) : *(p))
64
#define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
65
  (multibyte ? string_char_and_length (p, &(len)) : ((len) = 1, *(p)))
66

67
#define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
68

69
#define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
70

71 72 73
/* Set C a (possibly converted to multibyte) character before P.  P
   points into a string which is the virtual concatenation of STR1
   (which ends at END1) or STR2 (which ends at END2).  */
74
#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)			     \
75
  do {									     \
76
    if (target_multibyte)						     \
77 78
      {									     \
	re_char *dtemp = (p) == (str2) ? (end1) : (p);			     \
79 80 81
	re_char *dlimit = (p) > (str2) && (p) <= (end2) ? (str2) : (str1);   \
	while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp))		     \
	  continue;							     \
82
	c = STRING_CHAR (dtemp);					     \
83 84 85 86
      }									     \
    else								     \
      {									     \
	(c = ((p) == (str2) ? (end1) : (p))[-1]);			     \
87
	(c) = RE_CHAR_TO_MULTIBYTE (c);					     \
88
      }									     \
89
  } while (false)
90

91 92
/* Set C a (possibly converted to multibyte) character at P, and set
   LEN to the byte length of that character.  */
93
#define GET_CHAR_AFTER(c, p, len)		\
94
  do {						\
95
    if (target_multibyte)			\
96
      (c) = string_char_and_length (p, &(len));	\
97 98
    else					\
      {						\
99
	(c) = *p;				\
100
	len = 1;				\
101
	(c) = RE_CHAR_TO_MULTIBYTE (c);		\
102
      }						\
103
   } while (false)
104

105
/* 1 if C is an ASCII character.  */
106
#define IS_REAL_ASCII(c) ((c) < 0200)
107

108
/* 1 if C is a unibyte character.  */
109
#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
110

111
/* The Emacs definitions should not be directly affected by locales.  */
112

113
/* In Emacs, these are only used for single-byte characters.  */
114 115 116
#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
#define ISCNTRL(c) ((c) < ' ')
#define ISXDIGIT(c) (0 <= char_hexdigit (c))
117 118 119

/* The rest must handle multibyte characters.  */

120
#define ISBLANK(c) (IS_REAL_ASCII (c)			\
121 122 123
                     ? ((c) == ' ' || (c) == '\t')      \
                     : blankp (c))

124
#define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)				\
125
		     ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240)	\
126
		     : graphicp (c))
127

128
#define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)				\
129
		    ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)	\
130
		     : printablep (c))
131

132
#define ISALNUM(c) (IS_REAL_ASCII (c)			\
133 134 135
		    ? (((c) >= 'a' && (c) <= 'z')	\
		       || ((c) >= 'A' && (c) <= 'Z')	\
		       || ((c) >= '0' && (c) <= '9'))	\
136
		    : alphanumericp (c))
137

138
#define ISALPHA(c) (IS_REAL_ASCII (c)			\
139 140
		    ? (((c) >= 'a' && (c) <= 'z')	\
		       || ((c) >= 'A' && (c) <= 'Z'))	\
141
		    : alphabeticp (c))
142

143
#define ISLOWER(c) lowercasep (c)
144

145
#define ISPUNCT(c) (IS_REAL_ASCII (c)				\
146 147
		    ? ((c) > ' ' && (c) < 0177			\
		       && !(((c) >= 'a' && (c) <= 'z')		\
148 149
		            || ((c) >= 'A' && (c) <= 'Z')	\
		            || ((c) >= '0' && (c) <= '9')))	\
150 151
		    : SYNTAX (c) != Sword)

152
#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
153

154
#define ISUPPER(c) uppercasep (c)
155

156
#define ISWORD(c) (SYNTAX (c) == Sword)
157

158
/* Use alloca instead of malloc.  This is because using malloc in
159 160
   re_search* or re_match* could cause memory leaks when C-g is used
   in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
161
   via 'record_xmalloc' which uses 'unwind_protect' to ensure the
162 163 164
   memory is freed even in case of non-local exits); also, malloc is
   slower and causes storage fragmentation.  On the other hand, malloc
   is more portable, and easier to debug.
165

166
   Because we sometimes use alloca, some routines have to be macros,
167
   not functions -- 'alloca'-allocated space disappears at the end of the
168 169
   function it is called in.  */

170 171 172
/* This may be adjusted in main(), if the stack is successfully grown.  */
ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
/* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca.  */
173 174
#define REGEX_USE_SAFE_ALLOCA					       \
  USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca
175

176
/* Assumes a 'char *destination' variable.  */
177 178
#define REGEX_REALLOCATE(source, osize, nsize)				\
  (destination = SAFE_ALLOCA (nsize),					\
179
   memcpy (destination, source, osize))
180

181 182
/* True if 'size1' is non-NULL and PTR is pointing anywhere inside
   'string1' or just past its end.  This works if PTR is NULL, which is
183
   a good thing.  */
Richard M. Stallman's avatar
Richard M. Stallman committed
184
#define FIRST_STRING_P(ptr)					\
185 186
  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)

187
#define BYTEWIDTH 8 /* In bits.  */
188

189 190 191
/* Type of source-pattern and string chars.  */
typedef const unsigned char re_char;

192 193
static void re_compile_fastmap (struct re_pattern_buffer *);
static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp,
194 195
				     re_char *string1, ptrdiff_t size1,
				     re_char *string2, ptrdiff_t size2,
196
				     ptrdiff_t pos,
Paul Eggert's avatar
Paul Eggert committed
197
				     struct re_registers *regs,
198
				     ptrdiff_t stop);
199 200

/* These are the command codes that appear in compiled regular
201
   expressions.  Some opcodes are followed by argument bytes.  A
202 203 204 205 206 207 208
   command code can specify any interpretation whatsoever for its
   arguments.  Zero bytes may appear in the compiled regular expression.  */

typedef enum
{
  no_op = 0,

209
  /* Succeed right away--no more backtracking.  */
210 211
  succeed,

Richard M. Stallman's avatar
Richard M. Stallman committed
212
	/* Followed by one byte giving n, then by n literal bytes.  */
213 214
  exactn,

Richard M. Stallman's avatar
Richard M. Stallman committed
215
	/* Matches any (more or less) character.  */
216 217
  anychar,

Richard M. Stallman's avatar
Richard M. Stallman committed
218 219 220 221 222
	/* Matches any one char belonging to specified set.  First
	   following byte is number of bitmap bytes.  Then come bytes
	   for a bitmap saying which chars are in.  Bits in each byte
	   are ordered low-bit-first.  A character is in the set if its
	   bit is 1.  A character too large to have a bit in the map is
223 224 225 226 227
	   automatically not in the set.

	   If the length byte has the 0x80 bit set, then that stuff
	   is followed by a range table:
	       2 bytes of flags for character sets (low 8 bits, high 8 bits)
228
		   See RANGE_TABLE_WORK_BITS below.
229
	       2 bytes, the number of pairs that follow (upto 32767)
230
	       pairs, each 2 multibyte characters,
231
		   each multibyte character represented as 3 bytes.  */
232 233
  charset,

Richard M. Stallman's avatar
Richard M. Stallman committed
234
	/* Same parameters as charset, but match any character that is
235
	   not one of those specified.  */
236 237
  charset_not,

Richard M. Stallman's avatar
Richard M. Stallman committed
238 239 240
	/* Start remembering the text that is matched, for storing in a
	   register.  Followed by one byte with the register number, in
	   the range 0 to one less than the pattern buffer's re_nsub
241
	   field.  */
242 243
  start_memory,

Richard M. Stallman's avatar
Richard M. Stallman committed
244 245
	/* Stop remembering the text that is matched and store it in a
	   memory register.  Followed by one byte with the register
246
	   number, in the range 0 to one less than 're_nsub' in the
247
	   pattern buffer.  */
248 249
  stop_memory,

Richard M. Stallman's avatar
Richard M. Stallman committed
250
	/* Match a duplicate of something remembered. Followed by one
251
	   byte containing the register number.  */
252 253
  duplicate,

Richard M. Stallman's avatar
Richard M. Stallman committed
254
	/* Fail unless at beginning of line.  */
255 256
  begline,

257
	/* Fail unless at end of line.  */
258 259
  endline,

260
	/* Succeeds if at beginning of buffer.  */
261 262
  begbuf,

Richard M. Stallman's avatar
Richard M. Stallman committed
263
	/* Analogously, for end of buffer/string.  */
264
  endbuf,
265

Richard M. Stallman's avatar
Richard M. Stallman committed
266
	/* Followed by two byte relative address to which to jump.  */
267
  jump,
268

Richard M. Stallman's avatar
Richard M. Stallman committed
269
	/* Followed by two-byte relative address of place to resume at
Juanma Barranquero's avatar
Juanma Barranquero committed
270
	   in case of failure.  */
271
  on_failure_jump,
272

Richard M. Stallman's avatar
Richard M. Stallman committed
273 274
	/* Like on_failure_jump, but pushes a placeholder instead of the
	   current string position when executed.  */
275
  on_failure_keep_string_jump,
276

277
	/* Just like 'on_failure_jump', except that it checks that we
278 279 280 281
	   don't get stuck in an infinite loop (matching an empty string
	   indefinitely).  */
  on_failure_jump_loop,

282
	/* Just like 'on_failure_jump_loop', except that it checks for
283 284
	   a different kind of loop (the kind that shows up with non-greedy
	   operators).  This operation has to be immediately preceded
285
	   by a 'no_op'.  */
286 287
  on_failure_jump_nastyloop,

288
	/* A smart 'on_failure_jump' used for greedy * and + operators.
Juanma Barranquero's avatar
Juanma Barranquero committed
289
	   It analyzes the loop before which it is put and if the
290
	   loop does not require backtracking, it changes itself to
291 292 293
	   'on_failure_keep_string_jump' and short-circuits the loop,
	   else it just defaults to changing itself into 'on_failure_jump'.
	   It assumes that it is pointing to just past a 'jump'.  */
294
  on_failure_jump_smart,
295

Richard M. Stallman's avatar
Richard M. Stallman committed
296
	/* Followed by two-byte relative address and two-byte number n.
297 298 299
	   After matching N times, jump to the address upon failure.
	   Does not work if N starts at 0: use on_failure_jump_loop
	   instead.  */
300 301
  succeed_n,

Richard M. Stallman's avatar
Richard M. Stallman committed
302 303
	/* Followed by two-byte relative address, and two-byte number n.
	   Jump to the address N times, then fail.  */
304 305
  jump_n,

Richard M. Stallman's avatar
Richard M. Stallman committed
306
	/* Set the following two-byte relative address to the
Juanma Barranquero's avatar
Juanma Barranquero committed
307
	   subsequent two-byte number.  The address *includes* the two
Richard M. Stallman's avatar
Richard M. Stallman committed
308
	   bytes of number.  */
309 310 311 312 313 314
  set_number_at,

  wordbeg,	/* Succeeds if at word beginning.  */
  wordend,	/* Succeeds if at word end.  */

  wordbound,	/* Succeeds if at a word boundary.  */
Juanma Barranquero's avatar
Juanma Barranquero committed
315
  notwordbound,	/* Succeeds if not at a word boundary.  */
316

317 318 319
  symbeg,       /* Succeeds if at symbol beginning.  */
  symend,       /* Succeeds if at symbol end.  */

320
	/* Matches any character whose syntax is specified.  Followed by
Richard M. Stallman's avatar
Richard M. Stallman committed
321
	   a byte which contains a syntax code, e.g., Sword.  */
322 323 324
  syntaxspec,

	/* Matches any character whose syntax is not that specified.  */
325
  notsyntaxspec,
326

327
  at_dot,	/* Succeeds if at point.  */
328 329

  /* Matches any character whose category-set contains the specified
Juanma Barranquero's avatar
Juanma Barranquero committed
330 331
     category.  The operator is followed by a byte which contains a
     category code (mnemonic ASCII character).  */
332 333 334 335 336 337
  categoryspec,

  /* Matches any character whose category-set does not contain the
     specified category.  The operator is followed by a byte which
     contains the category code (mnemonic ASCII character).  */
  notcategoryspec
338 339 340 341 342 343 344 345 346 347
} re_opcode_t;

/* Common operations on the compiled pattern.  */

/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */

#define STORE_NUMBER(destination, number)				\
  do {									\
    (destination)[0] = (number) & 0377;					\
    (destination)[1] = (number) >> 8;					\
348
  } while (false)
349 350 351 352 353 354 355 356 357

/* Same as STORE_NUMBER, except increment DESTINATION to
   the byte after where the number is stored.  Therefore, DESTINATION
   must be an lvalue.  */

#define STORE_NUMBER_AND_INCR(destination, number)			\
  do {									\
    STORE_NUMBER (destination, number);					\
    (destination) += 2;							\
358
  } while (false)
359 360 361 362 363

/* Put into DESTINATION a number stored in two contiguous bytes starting
   at SOURCE.  */

#define EXTRACT_NUMBER(destination, source)				\
364
  ((destination) = extract_number (source))
365

366 367
static int
extract_number (re_char *source)
368
{
369 370
  signed char leading_byte = source[1];
  return leading_byte * 256 + source[0];
371 372 373 374 375 376
}

/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
   SOURCE must be an lvalue.  */

#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
377
  ((destination) = extract_number_and_incr (&source))
378

379 380
static int
extract_number_and_incr (re_char **source)
381
{
382
  int num = extract_number (*source);
383
  *source += 2;
384
  return num;
385 386
}

387 388
/* Store a multibyte character in three contiguous bytes starting
   DESTINATION, and increment DESTINATION to the byte after where the
Juanma Barranquero's avatar
Juanma Barranquero committed
389
   character is stored.  Therefore, DESTINATION must be an lvalue.  */
390 391 392 393 394 395 396

#define STORE_CHARACTER_AND_INCR(destination, character)	\
  do {								\
    (destination)[0] = (character) & 0377;			\
    (destination)[1] = ((character) >> 8) & 0377;		\
    (destination)[2] = (character) >> 16;			\
    (destination) += 3;						\
397
  } while (false)
398 399

/* Put into DESTINATION a character stored in three contiguous bytes
Juanma Barranquero's avatar
Juanma Barranquero committed
400
   starting at SOURCE.  */
401 402 403 404 405 406

#define EXTRACT_CHARACTER(destination, source)	\
  do {						\
    (destination) = ((source)[0]		\
		     | ((source)[1] << 8)	\
		     | ((source)[2] << 16));	\
407
  } while (false)
408 409 410 411 412 413 414 415 416


/* Macros for charset. */

/* Size of bitmap of charset P in bytes.  P is a start of charset,
   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)

/* Nonzero if charset P has range table.  */
417
#define CHARSET_RANGE_TABLE_EXISTS_P(p)	 (((p)[1] & 0x80) != 0)
418 419 420

/* Return the address of range table of charset P.  But not the start
   of table itself, but the before where the number of ranges is
421
   stored.  '2 +' means to skip re_opcode_t and size of bitmap,
422 423 424 425 426 427 428
   and the 2 bytes of flags at the start of the range table.  */
#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])

/* Extract the bit flags that start a range table.  */
#define CHARSET_RANGE_TABLE_BITS(p)		\
  ((p)[2 + CHARSET_BITMAP_SIZE (p)]		\
   + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
429 430

/* Return the address of end of RANGE_TABLE.  COUNT is number of
431 432
   ranges (which is a pair of (start, end)) in the RANGE_TABLE.  '* 2'
   is start of range and end of range.  '* 3' is size of each start
433 434 435 436
   and end.  */
#define CHARSET_RANGE_TABLE_END(range_table, count)	\
  ((range_table) + (count) * 2 * 3)

437 438
/* If REGEX_EMACS_DEBUG is defined, print many voluminous messages
   (if the variable regex_emacs_debug is positive).  */
439

440
#ifdef REGEX_EMACS_DEBUG
441

442
/* Use standard I/O for debugging.  */
443
# include "sysstdio.h"
444

445
static int regex_emacs_debug = -100000;
446

447
# define DEBUG_STATEMENT(e) e
448 449
# define DEBUG_PRINT(...)                                       \
  if (regex_emacs_debug > 0) fprintf (stderr, __VA_ARGS__)
450
# define DEBUG_COMPILES_ARGUMENTS
451
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
452
  if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e)
453
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
454
  if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
455

456 457 458 459
static void
debug_putchar (int c)
{
  if (c >= 32 && c <= 126)
460
    putc (c, stderr);
461
  else
462 463 464 465
    {
      unsigned int uc = c;
      fprintf (stderr, "{%02x}", uc);
    }
466
}
467 468 469

/* Print the fastmap in human-readable form.  */

470 471
static void
print_fastmap (char *fastmap)
472
{
473 474
  bool was_a_range = false;
  int i = 0;
475

476 477 478 479
  while (i < (1 << BYTEWIDTH))
    {
      if (fastmap[i++])
	{
480
	  was_a_range = false;
481
	  debug_putchar (i - 1);
Richard M. Stallman's avatar
Richard M. Stallman committed
482 483
	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
	    {
484
	      was_a_range = true;
Richard M. Stallman's avatar
Richard M. Stallman committed
485 486
	      i++;
	    }
487
	  if (was_a_range)
Richard M. Stallman's avatar
Richard M. Stallman committed
488
	    {
489
	      debug_putchar ('-');
490
	      debug_putchar (i - 1);
Richard M. Stallman's avatar
Richard M. Stallman committed
491 492
	    }
	}
493
    }
494
  putc ('\n', stderr);
495 496 497 498 499 500
}


/* Print a compiled pattern string in human-readable form, starting at
   the START pointer into it and ending just before the pointer END.  */

501 502
static void
print_partial_compiled_pattern (re_char *start, re_char *end)
503 504
{
  int mcnt, mcnt2;
505 506
  re_char *p = start;
  re_char *pend = end;
507 508 509

  if (start == NULL)
    {
510
      fputs ("(null)\n", stderr);
511 512
      return;
    }
513

514 515 516
  /* Loop over pattern commands.  */
  while (p < pend)
    {
517
      fprintf (stderr, "%td:\t", p - start);
518 519 520

      switch ((re_opcode_t) *p++)
	{
Richard M. Stallman's avatar
Richard M. Stallman committed
521
	case no_op:
522
	  fputs ("/no_op", stderr);
Richard M. Stallman's avatar
Richard M. Stallman committed
523
	  break;
524

525
	case succeed:
526
	  fputs ("/succeed", stderr);
527 528
	  break;

529 530
	case exactn:
	  mcnt = *p++;
531
	  fprintf (stderr, "/exactn/%d", mcnt);
Richard M. Stallman's avatar
Richard M. Stallman committed
532
	  do
533
	    {
534
	      debug_putchar ('/');
535
	      debug_putchar (*p++);
Richard M. Stallman's avatar
Richard M. Stallman committed
536 537 538
	    }
	  while (--mcnt);
	  break;
539 540

	case start_memory:
541
	  fprintf (stderr, "/start_memory/%d", *p++);
Richard M. Stallman's avatar
Richard M. Stallman committed
542
	  break;
543 544

	case stop_memory:
545
	  fprintf (stderr, "/stop_memory/%d", *p++);
Richard M. Stallman's avatar
Richard M. Stallman committed
546
	  break;
547 548

	case duplicate:
549
	  fprintf (stderr, "/duplicate/%d", *p++);
550 551 552
	  break;

	case anychar:
553
	  fputs ("/anychar", stderr);
554 555 556
	  break;

	case charset:
Richard M. Stallman's avatar
Richard M. Stallman committed
557 558
	case charset_not:
	  {
559 560
	    int c, last = -100;
	    bool in_range = false;
561
	    int length = CHARSET_BITMAP_SIZE (p - 1);
562
	    bool has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
563

564
	    fprintf (stderr, "/charset [%s",
Kenichi Handa's avatar
Kenichi Handa committed
565
		     (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
566

Kenichi Handa's avatar
Kenichi Handa committed
567
	    if (p + *p >= pend)
568
	      fputs (" !extends past end of pattern! ", stderr);
569

Richard M. Stallman's avatar
Richard M. Stallman committed
570
	    for (c = 0; c < 256; c++)
571
	      if (c / 8 < length
572 573 574 575 576
		  && (p[1 + (c/8)] & (1 << (c % 8))))
		{
		  /* Are we starting a range?  */
		  if (last + 1 == c && ! in_range)
		    {
577
		      debug_putchar ('-');
578
		      in_range = true;
579 580 581
		    }
		  /* Have we broken a range?  */
		  else if (last + 1 != c && in_range)
582
		    {
583
		      debug_putchar (last);
584
		      in_range = false;
585
		    }
586

587
		  if (! in_range)
588
		    debug_putchar (c);
589 590

		  last = c;
Richard M. Stallman's avatar
Richard M. Stallman committed
591
	      }
592 593

	    if (in_range)
594
	      debug_putchar (last);
595

596
	    debug_putchar (']');
597

598
	    p += 1 + length;
599 600

	    if (has_range_table)
601 602
	      {
		int count;
603
		fputs ("has-range-table", stderr);
604 605 606 607 608 609

		/* ??? Should print the range table; for now, just skip it.  */
		p += 2;		/* skip range table bits */
		EXTRACT_NUMBER_AND_INCR (count, p);
		p = CHARSET_RANGE_TABLE_END (p, count);
	      }
610 611 612 613
	  }
	  break;

	case begline:
614
	  fputs ("/begline", stderr);
Richard M. Stallman's avatar
Richard M. Stallman committed
615
	  break;
616 617

	case endline:
618
	  fputs ("/endline", stderr);
Richard M. Stallman's avatar
Richard M. Stallman committed
619
	  break;
620 621

	case on_failure_jump:
622 623
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_jump to %td", p + mcnt - start);
Richard M. Stallman's avatar
Richard M. Stallman committed
624
	  break;
625 626

	case on_failure_keep_string_jump:
627 628 629
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_keep_string_jump to %td",
		   p + mcnt - start);
Richard M. Stallman's avatar
Richard M. Stallman committed
630
	  break;
631

632
	case on_failure_jump_nastyloop:
633 634 635
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_jump_nastyloop to %td",
		   p + mcnt - start);
636 637
	  break;

638
	case on_failure_jump_loop:
639 640 641
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_jump_loop to %td",
		   p + mcnt - start);
642 643
	  break;

644
	case on_failure_jump_smart:
645 646 647
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/on_failure_jump_smart to %td",
		   p + mcnt - start);
648 649
	  break;

Richard M. Stallman's avatar
Richard M. Stallman committed
650
	case jump:
651 652
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  fprintf (stderr, "/jump to %td", p + mcnt - start);
653 654
	  break;

Richard M. Stallman's avatar
Richard M. Stallman committed
655
	case succeed_n:
656 657 658 659
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  EXTRACT_NUMBER_AND_INCR (mcnt2, p);
	  fprintf (stderr, "/succeed_n to %td, %d times",
		   p - 2 + mcnt - start, mcnt2);
Richard M. Stallman's avatar
Richard M. Stallman committed
660
	  break;
661

Richard M. Stallman's avatar
Richard M. Stallman committed
662
	case jump_n:
663 664 665 666
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  EXTRACT_NUMBER_AND_INCR (mcnt2, p);
	  fprintf (stderr, "/jump_n to %td, %d times",
		   p - 2 + mcnt - start, mcnt2);
Richard M. Stallman's avatar
Richard M. Stallman committed
667
	  break;
668

Richard M. Stallman's avatar
Richard M. Stallman committed
669
	case set_number_at:
670 671 672 673
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
	  EXTRACT_NUMBER_AND_INCR (mcnt2, p);
	  fprintf (stderr, "/set_number_at location %td to %d",
		   p - 2 + mcnt - start, mcnt2);
Richard M. Stallman's avatar
Richard M. Stallman committed
674
	  break;
675

Richard M. Stallman's avatar
Richard M. Stallman committed
676
	case wordbound:
677
	  fputs ("/wordbound", stderr);
678 679 680
	  break;

	case notwordbound:
681
	  fputs ("/notwordbound", stderr);
Richard M. Stallman's avatar
Richard M. Stallman committed
682
	  break;
683 684

	case wordbeg:
685
	  fputs ("/wordbeg", stderr);
686
	  break;
687

688
	case wordend:
689
	  fputs ("/wordend", stderr);
690
	  break;
691

692
	case symbeg:
693
	  fputs ("/symbeg", stderr);
694 695 696
	  break;

	case symend:
697
	  fputs ("/symend", stderr);
698
	  break;
699

700
	case syntaxspec:
701
	  fputs ("/syntaxspec", stderr);
702
	  mcnt = *p++;
703
	  fprintf (stderr, "/%d", mcnt);
704 705 706
	  break;

	case notsyntaxspec:
707
	  fputs ("/notsyntaxspec", stderr);
708
	  mcnt = *p++;
709
	  fprintf (stderr, "/%d", mcnt);
710 711
	  break;

712
	case at_dot:
713
	  fputs ("/at_dot", stderr);
Richard M. Stallman's avatar
Richard M. Stallman committed
714
	  break;
715

716
	case categoryspec:
717
	  fputs ("/categoryspec", stderr);
718
	  mcnt = *p++;
719
	  fprintf (stderr, "/%d", mcnt);
Richard M. Stallman's avatar
Richard M. Stallman committed
720
	  break;
721

722
	case notcategoryspec:
723
	  fputs ("/notcategoryspec", stderr);
724
	  mcnt = *p++;
725
	  fprintf (stderr, "/%d", mcnt);
726 727 728
	  break;

	case begbuf:
729
	  fputs ("/begbuf", stderr);
Richard M. Stallman's avatar
Richard M. Stallman committed
730
	  break;
731 732

	case endbuf:
733
	  fputs ("/endbuf", stderr);
Richard M. Stallman's avatar
Richard M. Stallman committed
734
	  break;
735

Richard M. Stallman's avatar
Richard M. Stallman committed
736
	default:
737
	  fprintf (stderr, "?%d", *(p-1));
738 739
	}

740
      putc ('\n', stderr);
741 742
    }

743
  fprintf (stderr, "%td:\tend of pattern.\n", p - start);
744 745 746
}


747 748
static void
print_compiled_pattern (struct re_pattern_buffer *bufp)
749
{
750
  re_char *buffer = bufp->buffer;
751 752

  print_partial_compiled_pattern (buffer, buffer + bufp->used);
753
  fprintf (stderr, "%td bytes used/%td bytes allocated.\n",
754
           bufp->used, bufp->allocated);
755 756 757

  if (bufp->fastmap_accurate && bufp->fastmap)
    {
758
      fputs ("fastmap: ", stderr);
759 760 761
      print_fastmap (bufp->fastmap);
    }

762
  fprintf (stderr, "re_nsub: %td\t", bufp->re_nsub);
763
  fprintf (stderr, "regs_alloc: %d\t", bufp->regs_allocated);
764
  fprintf (stderr, "can_be_null: %d\n", bufp->can_be_null);
765 766 767 768
  /* Perhaps we should print the translate table?  */
}


769
static void
770 771
print_double_string (re_char *where, re_char *string1, ptrdiff_t size1,
		     re_char *string2, ptrdiff_t size2)
772 773
{
  if (where == NULL)
774
    fputs ("(null)", stderr);
775 776
  else
    {
777
      int i;
778
      if (FIRST_STRING_P (where))
Richard M. Stallman's avatar
Richard M. Stallman committed
779
	{
780 781
	  for (i = 0; i < string1 + size1 - where; i++)
	    debug_putchar (where[i]);
Richard M. Stallman's avatar
Richard M. Stallman committed
782 783
	  where = string2;
	}