regex.c 180 KB
Newer Older
Richard M. Stallman's avatar
Richard M. Stallman committed
1 2
/* Extended regular expression matching and search library, version
   0.12.  (Implements POSIX draft P10003.2/D11.2, except for
Karl Berry's avatar
Karl Berry committed
3 4
   internationalization features.)

5
   Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
Karl Berry's avatar
Karl Berry committed
6

7 8 9 10 11 12 13
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Richard M. Stallman's avatar
Richard M. Stallman committed
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
15 16 17 18
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
Karl Heuer's avatar
Karl Heuer committed
19
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
Richard M. Stallman's avatar
Richard M. Stallman committed
20
   USA.	 */
21 22 23 24 25 26

/* AIX requires this to be the first thing in the file. */
#if defined (_AIX) && !defined (REGEX_MALLOC)
  #pragma alloca
#endif

27
#undef	_GNU_SOURCE
28 29
#define _GNU_SOURCE

Richard M. Stallman's avatar
Richard M. Stallman committed
30 31
/* Converts the pointer to the char to BEG-based offset from the start.	 */
#define PTR_TO_OFFSET(d)						\
32 33 34 35
	POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING			\
			  ? (d) - string1 : (d) - (string2 - size1))
#define POS_AS_IN_BUFFER(p) ((p) + 1)

36 37 38 39
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

Richard M. Stallman's avatar
Richard M. Stallman committed
40
/* We need this for `regex.h', and perhaps for the Emacs include files.	 */
41 42
#include <sys/types.h>

Richard M. Stallman's avatar
Richard M. Stallman committed
43
/* This is for other GNU distributions with internationalized messages.	 */
44 45 46 47 48 49
#if HAVE_LIBINTL_H || defined (_LIBC)
# include <libintl.h>
#else
# define gettext(msgid) (msgid)
#endif

50 51 52 53 54 55
#ifndef gettext_noop
/* This define is so xgettext can find the internationalizable
   strings.  */
#define gettext_noop(String) String
#endif

56 57 58 59 60 61
/* The `emacs' switch turns on certain matching commands
   that make sense only in Emacs. */
#ifdef emacs

#include "lisp.h"
#include "buffer.h"
62 63 64 65

/* Make syntax table lookup grant data in gl_state.  */
#define SYNTAX_ENTRY_VIA_PROPERTY

66
#include "syntax.h"
67 68
#include "charset.h"
#include "category.h"
69

70 71 72
#define malloc xmalloc
#define free xfree

73 74 75 76 77 78 79 80 81 82 83 84 85 86
#else  /* not emacs */

/* If we are not linking with Emacs proper,
   we can't use the relocating allocator
   even if config.h says that we can.  */
#undef REL_ALLOC

#if defined (STDC_HEADERS) || defined (_LIBC)
#include <stdlib.h>
#else
char *malloc ();
char *realloc ();
#endif

87
/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
Richard M. Stallman's avatar
Richard M. Stallman committed
88
   If nothing else has been done, use the method below.	 */
89 90 91 92 93 94 95 96 97 98 99
#ifdef INHIBIT_STRING_HEADER
#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
#if !defined (bzero) && !defined (bcopy)
#undef INHIBIT_STRING_HEADER
#endif
#endif
#endif

/* This is the normal way of making sure we have a bcopy and a bzero.
   This is used in most programs--a few other programs avoid this
   by defining INHIBIT_STRING_HEADER.  */
100
#ifndef INHIBIT_STRING_HEADER
101
#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
#include <string.h>
#ifndef bcmp
#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
#endif
#ifndef bcopy
#define bcopy(s, d, n)	memcpy ((d), (s), (n))
#endif
#ifndef bzero
#define bzero(s, n)	memset ((s), 0, (n))
#endif
#else
#include <strings.h>
#endif
#endif

/* Define the syntax stuff for \<, \>, etc.  */

/* This must be nonzero for the wordchar and notwordchar pattern
   commands in re_match_2.  */
121
#ifndef Sword
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
#define Sword 1
#endif

#ifdef SWITCH_ENUM_BUG
#define SWITCH_ENUM_CAST(x) ((int)(x))
#else
#define SWITCH_ENUM_CAST(x) (x)
#endif

#ifdef SYNTAX_TABLE

extern char *re_syntax_table;

#else /* not SYNTAX_TABLE */

/* How many characters in the character set.  */
#define CHAR_SET_SIZE 256

static char re_syntax_table[CHAR_SET_SIZE];

static void
init_syntax_once ()
{
   register int c;
   static int done = 0;

   if (done)
     return;

   bzero (re_syntax_table, sizeof re_syntax_table);

   for (c = 'a'; c <= 'z'; c++)
     re_syntax_table[c] = Sword;

   for (c = 'A'; c <= 'Z'; c++)
     re_syntax_table[c] = Sword;

   for (c = '0'; c <= '9'; c++)
     re_syntax_table[c] = Sword;

   re_syntax_table['_'] = Sword;

   done = 1;
}

#endif /* not SYNTAX_TABLE */

#define SYNTAX(c) re_syntax_table[c]

171
/* Dummy macros for non-Emacs environments.  */
172 173 174 175 176 177 178 179 180 181 182 183
#define BASE_LEADING_CODE_P(c) (0)
#define WORD_BOUNDARY_P(c1, c2) (0)
#define CHAR_HEAD_P(p) (1)
#define SINGLE_BYTE_CHAR_P(c) (1)
#define SAME_CHARSET_P(c1, c2) (1)
#define MULTIBYTE_FORM_LENGTH(p, s) (1)
#define STRING_CHAR(p, s) (*(p))
#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
  (c = ((p) == (end1) ? *(str2) : *(p)))
#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
184 185 186 187 188 189 190 191 192 193 194 195 196
#endif /* not emacs */

/* Get the interface, including the syntax bits.  */
#include "regex.h"

/* isalpha etc. are used for the character classes.  */
#include <ctype.h>

/* Jim Meyering writes:

   "... Some ctype macros are valid only for character codes that
   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
   using /bin/cc or gcc but without giving an ansi option).  So, all
Richard M. Stallman's avatar
Richard M. Stallman committed
197
   ctype uses should be through macros like ISPRINT...	If
198 199 200
   STDC_HEADERS is defined, then autoconf has verified that the ctype
   macros don't need to be guarded with references to isascii. ...
   Defining isascii to 1 should let any compiler worth its salt
Richard M. Stallman's avatar
Richard M. Stallman committed
201
   eliminate the && through constant folding."	*/
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231

#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
#define ISASCII(c) 1
#else
#define ISASCII(c) isascii(c)
#endif

#ifdef isblank
#define ISBLANK(c) (ISASCII (c) && isblank (c))
#else
#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
#else
#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
#endif

#define ISPRINT(c) (ISASCII (c) && isprint (c))
#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
#define ISALNUM(c) (ISASCII (c) && isalnum (c))
#define ISALPHA(c) (ISASCII (c) && isalpha (c))
#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
#define ISLOWER(c) (ISASCII (c) && islower (c))
#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
#define ISSPACE(c) (ISASCII (c) && isspace (c))
#define ISUPPER(c) (ISASCII (c) && isupper (c))
#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))

#ifndef NULL
Karl Heuer's avatar
Karl Heuer committed
232
#define NULL (void *)0
233 234 235 236 237
#endif

/* We remove any previous definition of `SIGN_EXTEND_CHAR',
   since ours (we hope) works properly with all combinations of
   machines, compilers, `char' and `unsigned char' argument types.
Richard M. Stallman's avatar
Richard M. Stallman committed
238
   (Per Bothner suggested the basic approach.)	*/
239 240 241 242 243 244 245 246 247 248 249 250
#undef SIGN_EXTEND_CHAR
#if __STDC__
#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
#else  /* not __STDC__ */
/* As in Harbison and Steele.  */
#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
#endif

/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
   use `alloca' instead of `malloc'.  This is because using malloc in
   re_search* or re_match* could cause memory leaks when C-g is used in
   Emacs; also, malloc is slower and causes storage fragmentation.  On
251 252
   the other hand, malloc is more portable, and easier to debug.

253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
   Because we sometimes use alloca, some routines have to be macros,
   not functions -- `alloca'-allocated space disappears at the end of the
   function it is called in.  */

#ifdef REGEX_MALLOC

#define REGEX_ALLOCATE malloc
#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
#define REGEX_FREE free

#else /* not REGEX_MALLOC  */

/* Emacs already defines alloca, sometimes.  */
#ifndef alloca

/* Make alloca work the best possible way.  */
#ifdef __GNUC__
#define alloca __builtin_alloca
#else /* not __GNUC__ */
#if HAVE_ALLOCA_H
#include <alloca.h>
#else /* not __GNUC__ or HAVE_ALLOCA_H */
Richard M. Stallman's avatar
Richard M. Stallman committed
275
#if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
Richard M. Stallman's avatar
Richard M. Stallman committed
276
#ifndef _AIX /* Already did AIX, up at the top.	 */
277 278
char *alloca ();
#endif /* not _AIX */
Richard M. Stallman's avatar
Richard M. Stallman committed
279
#endif
280
#endif /* not HAVE_ALLOCA_H */
281 282 283 284 285 286 287 288 289 290 291 292 293
#endif /* not __GNUC__ */

#endif /* not alloca */

#define REGEX_ALLOCATE alloca

/* Assumes a `char *destination' variable.  */
#define REGEX_REALLOCATE(source, osize, nsize)				\
  (destination = (char *) alloca (nsize),				\
   bcopy (source, destination, osize),					\
   destination)

/* No need to do anything to free, after alloca.  */
294
#define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
295 296 297 298 299

#endif /* not REGEX_MALLOC */

/* Define how to allocate the failure stack.  */

Karl Heuer's avatar
Karl Heuer committed
300
#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
301

302 303 304 305 306 307 308
#define REGEX_ALLOCATE_STACK(size)				\
  r_alloc (&failure_stack_ptr, (size))
#define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
  r_re_alloc (&failure_stack_ptr, (nsize))
#define REGEX_FREE_STACK(ptr)					\
  r_alloc_free (&failure_stack_ptr)

309
#else /* not using relocating allocator */
310 311 312 313 314 315 316 317 318 319 320 321 322

#ifdef REGEX_MALLOC

#define REGEX_ALLOCATE_STACK malloc
#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
#define REGEX_FREE_STACK free

#else /* not REGEX_MALLOC */

#define REGEX_ALLOCATE_STACK alloca

#define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
   REGEX_REALLOCATE (source, osize, nsize)
Richard M. Stallman's avatar
Richard M. Stallman committed
323
/* No need to explicitly free anything.	 */
324 325 326
#define REGEX_FREE_STACK(arg)

#endif /* not REGEX_MALLOC */
327
#endif /* not using relocating allocator */
328 329 330 331 332


/* True if `size1' is non-NULL and PTR is pointing anywhere inside
   `string1' or just past its end.  This works if PTR is NULL, which is
   a good thing.  */
Richard M. Stallman's avatar
Richard M. Stallman committed
333
#define FIRST_STRING_P(ptr)					\
334 335 336 337 338 339 340 341 342
  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)

/* (Re)Allocate N items of type T using malloc, or fail.  */
#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
#define RETALLOC_IF(addr, n, t) \
  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))

Richard M. Stallman's avatar
Richard M. Stallman committed
343
#define BYTEWIDTH 8 /* In bits.	 */
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358

#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))

#undef MAX
#undef MIN
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))

typedef char boolean;
#define false 0
#define true 1

static int re_match_2_internal ();

/* These are the command codes that appear in compiled regular
Richard M. Stallman's avatar
Richard M. Stallman committed
359
   expressions.	 Some opcodes are followed by argument bytes.  A
360 361 362 363 364 365 366
   command code can specify any interpretation whatsoever for its
   arguments.  Zero bytes may appear in the compiled regular expression.  */

typedef enum
{
  no_op = 0,

Richard M. Stallman's avatar
Richard M. Stallman committed
367
  /* Succeed right away--no more backtracking.	*/
368 369
  succeed,

Richard M. Stallman's avatar
Richard M. Stallman committed
370
	/* Followed by one byte giving n, then by n literal bytes.  */
371 372
  exactn,

Richard M. Stallman's avatar
Richard M. Stallman committed
373
	/* Matches any (more or less) character.  */
374 375
  anychar,

Richard M. Stallman's avatar
Richard M. Stallman committed
376 377 378 379 380 381
	/* Matches any one char belonging to specified set.  First
	   following byte is number of bitmap bytes.  Then come bytes
	   for a bitmap saying which chars are in.  Bits in each byte
	   are ordered low-bit-first.  A character is in the set if its
	   bit is 1.  A character too large to have a bit in the map is
	   automatically not in the set.  */
382 383
  charset,

Richard M. Stallman's avatar
Richard M. Stallman committed
384 385
	/* Same parameters as charset, but match any character that is
	   not one of those specified.	*/
386 387
  charset_not,

Richard M. Stallman's avatar
Richard M. Stallman committed
388 389 390 391 392 393 394
	/* Start remembering the text that is matched, for storing in a
	   register.  Followed by one byte with the register number, in
	   the range 0 to one less than the pattern buffer's re_nsub
	   field.  Then followed by one byte with the number of groups
	   inner to this one.  (This last has to be part of the
	   start_memory only because we need it in the on_failure_jump
	   of re_match_2.)  */
395 396
  start_memory,

Richard M. Stallman's avatar
Richard M. Stallman committed
397 398 399 400 401 402 403
	/* Stop remembering the text that is matched and store it in a
	   memory register.  Followed by one byte with the register
	   number, in the range 0 to one less than `re_nsub' in the
	   pattern buffer, and one byte with the number of inner groups,
	   just like `start_memory'.  (We need the number of inner
	   groups here because we don't have any easy way of finding the
	   corresponding start_memory when we're at a stop_memory.)  */
404 405
  stop_memory,

Richard M. Stallman's avatar
Richard M. Stallman committed
406 407
	/* Match a duplicate of something remembered. Followed by one
	   byte containing the register number.	 */
408 409
  duplicate,

Richard M. Stallman's avatar
Richard M. Stallman committed
410
	/* Fail unless at beginning of line.  */
411 412
  begline,

Richard M. Stallman's avatar
Richard M. Stallman committed
413
	/* Fail unless at end of line.	*/
414 415
  endline,

Richard M. Stallman's avatar
Richard M. Stallman committed
416 417
	/* Succeeds if at beginning of buffer (if emacs) or at beginning
	   of string to be matched (if not).  */
418 419
  begbuf,

Richard M. Stallman's avatar
Richard M. Stallman committed
420
	/* Analogously, for end of buffer/string.  */
421
  endbuf,
422

Richard M. Stallman's avatar
Richard M. Stallman committed
423
	/* Followed by two byte relative address to which to jump.  */
424
  jump,
425 426 427 428

	/* Same as jump, but marks the end of an alternative.  */
  jump_past_alt,

Richard M. Stallman's avatar
Richard M. Stallman committed
429 430
	/* Followed by two-byte relative address of place to resume at
	   in case of failure.	*/
431
  on_failure_jump,
432

Richard M. Stallman's avatar
Richard M. Stallman committed
433 434
	/* Like on_failure_jump, but pushes a placeholder instead of the
	   current string position when executed.  */
435
  on_failure_keep_string_jump,
436

Richard M. Stallman's avatar
Richard M. Stallman committed
437 438
	/* Throw away latest failure point and then jump to following
	   two-byte relative address.  */
439 440
  pop_failure_jump,

Richard M. Stallman's avatar
Richard M. Stallman committed
441 442 443 444 445 446 447
	/* Change to pop_failure_jump if know won't have to backtrack to
	   match; otherwise change to jump.  This is used to jump
	   back to the beginning of a repeat.  If what follows this jump
	   clearly won't match what the repeat does, such that we can be
	   sure that there is no use backtracking out of repetitions
	   already matched, then we change it to a pop_failure_jump.
	   Followed by two-byte address.  */
448 449
  maybe_pop_jump,

Richard M. Stallman's avatar
Richard M. Stallman committed
450 451 452 453 454
	/* Jump to following two-byte address, and push a dummy failure
	   point. This failure point will be thrown away if an attempt
	   is made to use it for a failure.  A `+' construct makes this
	   before the first repeat.  Also used as an intermediary kind
	   of jump when compiling an alternative.  */
455 456 457 458 459 460
  dummy_failure_jump,

	/* Push a dummy failure point and continue.  Used at the end of
	   alternatives.  */
  push_dummy_failure,

Richard M. Stallman's avatar
Richard M. Stallman committed
461 462
	/* Followed by two-byte relative address and two-byte number n.
	   After matching N times, jump to the address upon failure.  */
463 464
  succeed_n,

Richard M. Stallman's avatar
Richard M. Stallman committed
465 466
	/* Followed by two-byte relative address, and two-byte number n.
	   Jump to the address N times, then fail.  */
467 468
  jump_n,

Richard M. Stallman's avatar
Richard M. Stallman committed
469 470 471
	/* Set the following two-byte relative address to the
	   subsequent two-byte number.	The address *includes* the two
	   bytes of number.  */
472 473 474 475 476 477 478 479 480
  set_number_at,

  wordchar,	/* Matches any word-constituent character.  */
  notwordchar,	/* Matches any char that is not a word-constituent.  */

  wordbeg,	/* Succeeds if at word beginning.  */
  wordend,	/* Succeeds if at word end.  */

  wordbound,	/* Succeeds if at a word boundary.  */
Richard M. Stallman's avatar
Richard M. Stallman committed
481
  notwordbound	/* Succeeds if not at a word boundary.	*/
482 483 484 485 486 487 488

#ifdef emacs
  ,before_dot,	/* Succeeds if before point.  */
  at_dot,	/* Succeeds if at point.  */
  after_dot,	/* Succeeds if after point.  */

	/* Matches any character whose syntax is specified.  Followed by
Richard M. Stallman's avatar
Richard M. Stallman committed
489
	   a byte which contains a syntax code, e.g., Sword.  */
490 491 492
  syntaxspec,

	/* Matches any character whose syntax is not that specified.  */
493 494 495
  notsyntaxspec,

  /* Matches any character whose category-set contains the specified
Richard M. Stallman's avatar
Richard M. Stallman committed
496 497
     category.	The operator is followed by a byte which contains a
     category code (mnemonic ASCII character).	*/
498 499 500 501 502 503
  categoryspec,

  /* Matches any character whose category-set does not contain the
     specified category.  The operator is followed by a byte which
     contains the category code (mnemonic ASCII character).  */
  notcategoryspec
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
#endif /* emacs */
} re_opcode_t;

/* Common operations on the compiled pattern.  */

/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */

#define STORE_NUMBER(destination, number)				\
  do {									\
    (destination)[0] = (number) & 0377;					\
    (destination)[1] = (number) >> 8;					\
  } while (0)

/* Same as STORE_NUMBER, except increment DESTINATION to
   the byte after where the number is stored.  Therefore, DESTINATION
   must be an lvalue.  */

#define STORE_NUMBER_AND_INCR(destination, number)			\
  do {									\
    STORE_NUMBER (destination, number);					\
    (destination) += 2;							\
  } while (0)

/* Put into DESTINATION a number stored in two contiguous bytes starting
   at SOURCE.  */

#define EXTRACT_NUMBER(destination, source)				\
  do {									\
    (destination) = *(source) & 0377;					\
    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
  } while (0)

#ifdef DEBUG
static void
extract_number (dest, source)
    int *dest;
    unsigned char *source;
{
542
  int temp = SIGN_EXTEND_CHAR (*(source + 1));
543 544 545 546
  *dest = *source & 0377;
  *dest += temp << 8;
}

Richard M. Stallman's avatar
Richard M. Stallman committed
547
#ifndef EXTRACT_MACROS /* To debug the macros.	*/
548 549 550 551 552 553 554 555 556 557 558 559
#undef EXTRACT_NUMBER
#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
#endif /* not EXTRACT_MACROS */

#endif /* DEBUG */

/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
   SOURCE must be an lvalue.  */

#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
  do {									\
    EXTRACT_NUMBER (destination, source);				\
Richard M. Stallman's avatar
Richard M. Stallman committed
560
    (source) += 2;							\
561 562 563 564 565 566 567
  } while (0)

#ifdef DEBUG
static void
extract_number_and_incr (destination, source)
    int *destination;
    unsigned char **source;
568
{
569 570 571 572 573 574 575 576 577 578 579 580
  extract_number (destination, *source);
  *source += 2;
}

#ifndef EXTRACT_MACROS
#undef EXTRACT_NUMBER_AND_INCR
#define EXTRACT_NUMBER_AND_INCR(dest, src) \
  extract_number_and_incr (&dest, &src)
#endif /* not EXTRACT_MACROS */

#endif /* DEBUG */

581 582
/* Store a multibyte character in three contiguous bytes starting
   DESTINATION, and increment DESTINATION to the byte after where the
Richard M. Stallman's avatar
Richard M. Stallman committed
583
   character is stored.	 Therefore, DESTINATION must be an lvalue.  */
584 585 586 587 588 589 590 591 592 593

#define STORE_CHARACTER_AND_INCR(destination, character)	\
  do {								\
    (destination)[0] = (character) & 0377;			\
    (destination)[1] = ((character) >> 8) & 0377;		\
    (destination)[2] = (character) >> 16;			\
    (destination) += 3;						\
  } while (0)

/* Put into DESTINATION a character stored in three contiguous bytes
Richard M. Stallman's avatar
Richard M. Stallman committed
594
   starting at SOURCE.	*/
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610

#define EXTRACT_CHARACTER(destination, source)	\
  do {						\
    (destination) = ((source)[0]		\
		     | ((source)[1] << 8)	\
		     | ((source)[2] << 16));	\
  } while (0)


/* Macros for charset. */

/* Size of bitmap of charset P in bytes.  P is a start of charset,
   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)

/* Nonzero if charset P has range table.  */
Richard M. Stallman's avatar
Richard M. Stallman committed
611
#define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
612 613 614

/* Return the address of range table of charset P.  But not the start
   of table itself, but the before where the number of ranges is
Richard M. Stallman's avatar
Richard M. Stallman committed
615
   stored.  `2 +' means to skip re_opcode_t and size of bitmap.	 */
616 617 618 619 620 621 622 623
#define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])

/* Test if C is listed in the bitmap of charset P.  */
#define CHARSET_LOOKUP_BITMAP(p, c)				\
  ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH			\
   && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))

/* Return the address of end of RANGE_TABLE.  COUNT is number of
Richard M. Stallman's avatar
Richard M. Stallman committed
624 625
   ranges (which is a pair of (start, end)) in the RANGE_TABLE.	 `* 2'
   is start of range and end of range.	`* 3' is size of each start
626 627 628 629
   and end.  */
#define CHARSET_RANGE_TABLE_END(range_table, count)	\
  ((range_table) + (count) * 2 * 3)

Richard M. Stallman's avatar
Richard M. Stallman committed
630
/* Test if C is in RANGE_TABLE.	 A flag NOT is negated if C is in.
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
   COUNT is number of ranges in RANGE_TABLE.  */
#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
  do									\
    {									\
      int range_start, range_end;					\
      unsigned char *p;							\
      unsigned char *range_table_end					\
	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
									\
      for (p = (range_table); p < range_table_end; p += 2 * 3)		\
	{								\
	  EXTRACT_CHARACTER (range_start, p);				\
	  EXTRACT_CHARACTER (range_end, p + 3);				\
									\
	  if (range_start <= (c) && (c) <= range_end)			\
	    {								\
	      (not) = !(not);						\
	      break;							\
	    }								\
	}								\
    }									\
  while (0)

/* Test if C is in range table of CHARSET.  The flag NOT is negated if
   C is listed in it.  */
#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)			\
  do									\
    {									\
      /* Number of ranges in range table. */				\
      int count;							\
      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);	\
									\
      EXTRACT_NUMBER_AND_INCR (count, range_table);			\
      CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
    }									\
  while (0)

668 669 670 671
/* If DEBUG is defined, Regex prints many voluminous messages about what
   it is doing (if the variable `debug' is nonzero).  If linked with the
   main program in `iregex.c', you can enter patterns and strings
   interactively.  And if linked with the main program in `main.c' and
Richard M. Stallman's avatar
Richard M. Stallman committed
672
   the other test files, you can run the already-written tests.	 */
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688

#ifdef DEBUG

/* We use standard I/O for debugging.  */
#include <stdio.h>

/* It is useful to test things that ``must'' be true when debugging.  */
#include <assert.h>

static int debug = 0;

#define DEBUG_STATEMENT(e) e
#define DEBUG_PRINT1(x) if (debug) printf (x)
#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
Richard M. Stallman's avatar
Richard M. Stallman committed
689
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
690 691 692 693 694 695 696 697 698 699 700 701
  if (debug) print_partial_compiled_pattern (s, e)
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
  if (debug) print_double_string (w, s1, sz1, s2, sz2)


/* Print the fastmap in human-readable form.  */

void
print_fastmap (fastmap)
    char *fastmap;
{
  unsigned was_a_range = 0;
702 703
  unsigned i = 0;

704 705 706 707 708
  while (i < (1 << BYTEWIDTH))
    {
      if (fastmap[i++])
	{
	  was_a_range = 0;
Richard M. Stallman's avatar
Richard M. Stallman committed
709 710 711 712 713 714
	  putchar (i - 1);
	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
	    {
	      was_a_range = 1;
	      i++;
	    }
715
	  if (was_a_range)
Richard M. Stallman's avatar
Richard M. Stallman committed
716 717 718 719 720
	    {
	      printf ("-");
	      putchar (i - 1);
	    }
	}
721
    }
722
  putchar ('\n');
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
}


/* Print a compiled pattern string in human-readable form, starting at
   the START pointer into it and ending just before the pointer END.  */

void
print_partial_compiled_pattern (start, end)
    unsigned char *start;
    unsigned char *end;
{
  int mcnt, mcnt2;
  unsigned char *p = start;
  unsigned char *pend = end;

  if (start == NULL)
    {
      printf ("(null)\n");
      return;
    }
743

744 745 746 747 748 749 750
  /* Loop over pattern commands.  */
  while (p < pend)
    {
      printf ("%d:\t", p - start);

      switch ((re_opcode_t) *p++)
	{
Richard M. Stallman's avatar
Richard M. Stallman committed
751 752 753
	case no_op:
	  printf ("/no_op");
	  break;
754 755 756

	case exactn:
	  mcnt = *p++;
Richard M. Stallman's avatar
Richard M. Stallman committed
757 758
	  printf ("/exactn/%d", mcnt);
	  do
759
	    {
Richard M. Stallman's avatar
Richard M. Stallman committed
760
	      putchar ('/');
761
	      putchar (*p++);
Richard M. Stallman's avatar
Richard M. Stallman committed
762 763 764
	    }
	  while (--mcnt);
	  break;
765 766

	case start_memory:
Richard M. Stallman's avatar
Richard M. Stallman committed
767 768 769
	  mcnt = *p++;
	  printf ("/start_memory/%d/%d", mcnt, *p++);
	  break;
770 771

	case stop_memory:
Richard M. Stallman's avatar
Richard M. Stallman committed
772
	  mcnt = *p++;
773
	  printf ("/stop_memory/%d/%d", mcnt, *p++);
Richard M. Stallman's avatar
Richard M. Stallman committed
774
	  break;
775 776 777 778 779 780 781 782 783 784

	case duplicate:
	  printf ("/duplicate/%d", *p++);
	  break;

	case anychar:
	  printf ("/anychar");
	  break;

	case charset:
Richard M. Stallman's avatar
Richard M. Stallman committed
785 786 787
	case charset_not:
	  {
	    register int c, last = -100;
788 789 790
	    register int in_range = 0;

	    printf ("/charset [%s",
Richard M. Stallman's avatar
Richard M. Stallman committed
791
		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
792

Richard M. Stallman's avatar
Richard M. Stallman committed
793
	    assert (p + *p < pend);
794

Richard M. Stallman's avatar
Richard M. Stallman committed
795
	    for (c = 0; c < 256; c++)
796 797 798 799 800 801 802 803 804 805 806
	      if (c / 8 < *p
		  && (p[1 + (c/8)] & (1 << (c % 8))))
		{
		  /* Are we starting a range?  */
		  if (last + 1 == c && ! in_range)
		    {
		      putchar ('-');
		      in_range = 1;
		    }
		  /* Have we broken a range?  */
		  else if (last + 1 != c && in_range)
Richard M. Stallman's avatar
Richard M. Stallman committed
807
	      {
808 809 810
		      putchar (last);
		      in_range = 0;
		    }
811

812 813 814 815
		  if (! in_range)
		    putchar (c);

		  last = c;
Richard M. Stallman's avatar
Richard M. Stallman committed
816
	      }
817 818 819 820 821 822 823 824 825 826 827 828

	    if (in_range)
	      putchar (last);

	    putchar (']');

	    p += 1 + *p;
	  }
	  break;

	case begline:
	  printf ("/begline");
Richard M. Stallman's avatar
Richard M. Stallman committed
829
	  break;
830 831

	case endline:
Richard M. Stallman's avatar
Richard M. Stallman committed
832 833
	  printf ("/endline");
	  break;
834 835

	case on_failure_jump:
Richard M. Stallman's avatar
Richard M. Stallman committed
836 837 838
	  extract_number_and_incr (&mcnt, &p);
	  printf ("/on_failure_jump to %d", p + mcnt - start);
	  break;
839 840

	case on_failure_keep_string_jump:
Richard M. Stallman's avatar
Richard M. Stallman committed
841 842 843
	  extract_number_and_incr (&mcnt, &p);
	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
	  break;
844 845

	case dummy_failure_jump:
Richard M. Stallman's avatar
Richard M. Stallman committed
846 847 848
	  extract_number_and_incr (&mcnt, &p);
	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
	  break;
849 850

	case push_dummy_failure:
Richard M. Stallman's avatar
Richard M. Stallman committed
851 852
	  printf ("/push_dummy_failure");
	  break;
853

Richard M. Stallman's avatar
Richard M. Stallman committed
854 855 856
	case maybe_pop_jump:
	  extract_number_and_incr (&mcnt, &p);
	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
857 858
	  break;

Richard M. Stallman's avatar
Richard M. Stallman committed
859
	case pop_failure_jump:
860
	  extract_number_and_incr (&mcnt, &p);
Richard M. Stallman's avatar
Richard M. Stallman committed
861
	  printf ("/pop_failure_jump to %d", p + mcnt - start);
862 863
	  break;

Richard M. Stallman's avatar
Richard M. Stallman committed
864
	case jump_past_alt:
865
	  extract_number_and_incr (&mcnt, &p);
Richard M. Stallman's avatar
Richard M. Stallman committed
866
	  printf ("/jump_past_alt to %d", p + mcnt - start);
867 868
	  break;

Richard M. Stallman's avatar
Richard M. Stallman committed
869
	case jump:
870
	  extract_number_and_incr (&mcnt, &p);
Richard M. Stallman's avatar
Richard M. Stallman committed
871
	  printf ("/jump to %d", p + mcnt - start);
872 873
	  break;

Richard M. Stallman's avatar
Richard M. Stallman committed
874 875 876
	case succeed_n:
	  extract_number_and_incr (&mcnt, &p);
	  extract_number_and_incr (&mcnt2, &p);
877
	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
Richard M. Stallman's avatar
Richard M. Stallman committed
878
	  break;
879

Richard M. Stallman's avatar
Richard M. Stallman committed
880 881 882
	case jump_n:
	  extract_number_and_incr (&mcnt, &p);
	  extract_number_and_incr (&mcnt2, &p);
883
	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
Richard M. Stallman's avatar
Richard M. Stallman committed
884
	  break;
885

Richard M. Stallman's avatar
Richard M. Stallman committed
886 887 888
	case set_number_at:
	  extract_number_and_incr (&mcnt, &p);
	  extract_number_and_incr (&mcnt2, &p);
889
	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
Richard M. Stallman's avatar
Richard M. Stallman committed
890
	  break;
891

Richard M. Stallman's avatar
Richard M. Stallman committed
892
	case wordbound:
893 894 895 896 897
	  printf ("/wordbound");
	  break;

	case notwordbound:
	  printf ("/notwordbound");
Richard M. Stallman's avatar
Richard M. Stallman committed
898
	  break;
899 900 901 902

	case wordbeg:
	  printf ("/wordbeg");
	  break;
903

904 905
	case wordend:
	  printf ("/wordend");
906

907 908 909
#ifdef emacs
	case before_dot:
	  printf ("/before_dot");
Richard M. Stallman's avatar
Richard M. Stallman committed
910
	  break;
911 912 913

	case at_dot:
	  printf ("/at_dot");
Richard M. Stallman's avatar
Richard M. Stallman committed
914
	  break;
915 916 917

	case after_dot:
	  printf ("/after_dot");
Richard M. Stallman's avatar
Richard M. Stallman committed
918
	  break;
919 920

	case syntaxspec:
Richard M. Stallman's avatar
Richard M. Stallman committed
921
	  printf ("/syntaxspec");
922 923
	  mcnt = *p++;
	  printf ("/%d", mcnt);
Richard M. Stallman's avatar
Richard M. Stallman committed
924
	  break;
925

926
	case notsyntaxspec:
Richard M. Stallman's avatar
Richard M. Stallman committed
927
	  printf ("/notsyntaxspec");
928 929 930 931 932 933 934
	  mcnt = *p++;
	  printf ("/%d", mcnt);
	  break;
#endif /* emacs */

	case wordchar:
	  printf ("/wordchar");
Richard M. Stallman's avatar
Richard M. Stallman committed
935
	  break;
936

937 938
	case notwordchar:
	  printf ("/notwordchar");
Richard M. Stallman's avatar
Richard M. Stallman committed
939
	  break;
940 941 942

	case begbuf:
	  printf ("/begbuf");
Richard M. Stallman's avatar
Richard M. Stallman committed
943
	  break;
944 945 946

	case endbuf:
	  printf ("/endbuf");
Richard M. Stallman's avatar
Richard M. Stallman committed
947
	  break;
948

Richard M. Stallman's avatar
Richard M. Stallman committed
949 950
	default:
	  printf ("?%d", *(p-1));
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
	}

      putchar ('\n');
    }

  printf ("%d:\tend of pattern.\n", p - start);
}


void
print_compiled_pattern (bufp)
    struct re_pattern_buffer *bufp;
{
  unsigned char *buffer = bufp->buffer;

  print_partial_compiled_pattern (buffer, buffer + bufp->used);
  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);

  if (bufp->fastmap_accurate && bufp->fastmap)
    {
      printf ("fastmap: ");
      print_fastmap (bufp->fastmap);
    }

  printf ("re_nsub: %d\t", bufp->re_nsub);
  printf ("regs_alloc: %d\t", bufp->regs_allocated);
  printf ("can_be_null: %d\t", bufp->can_be_null);
  printf ("newline_anchor: %d\n", bufp->newline_anchor);
  printf ("no_sub: %d\t", bufp->no_sub);
  printf ("not_bol: %d\t", bufp->not_bol);
  printf ("not_eol: %d\t", bufp->not_eol);
  printf ("syntax: %d\n", bufp->syntax);
  /* Perhaps we should print the translate table?  */
}


void
print_double_string (where, string1, size1, string2, size2)
    const char *where;
    const char *string1;
    const char *string2;
    int size1;
    int size2;
{
  unsigned this_char;
996

997 998 999 1000 1001
  if (where == NULL)
    printf ("(null)");
  else
    {
      if (FIRST_STRING_P (where))
Richard M. Stallman's avatar
Richard M. Stallman committed
1002 1003 1004
	{
	  for (this_char = where - string1; this_char < size1; this_char++)
	    putchar (string1[this_char]);
1005

Richard M. Stallman's avatar
Richard M. Stallman committed
1006 1007
	  where = string2;
	}
1008 1009

      for (this_char = where - string2; this_char < size2; this_char++)
Richard M. Stallman's avatar
Richard M. Stallman committed
1010
	putchar (string2[this_char]);
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
    }
}

#else /* not DEBUG */

#undef assert
#define assert(e)

#define DEBUG_STATEMENT(e)
#define DEBUG_PRINT1(x)
#define DEBUG_PRINT2(x1, x2)
#define DEBUG_PRINT3(x1, x2, x3)
#define DEBUG_PRINT4(x1, x2, x3, x4)
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)

#endif /* not DEBUG */

/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
   also be assigned to arbitrarily: each pattern buffer stores its own
   syntax, so it can be changed between regex compilations.  */
/* This has no initializer because initialized variables in Emacs
   become read-only after dumping.  */
reg_syntax_t re_syntax_options;


/* Specify the precise syntax of regexps for compilation.  This provides
   for compatibility for various utilities which historically have
   different, incompatible syntaxes.

   The argument SYNTAX is a bit mask comprised of the various bits
Richard M. Stallman's avatar
Richard M. Stallman committed
1042
   defined in regex.h.	We return the old syntax.  */
1043 1044 1045 1046 1047 1048

reg_syntax_t
re_set_syntax (syntax)
    reg_syntax_t syntax;
{
  reg_syntax_t ret = re_syntax_options;
1049

1050 1051 1052 1053 1054
  re_syntax_options = syntax;
  return ret;
}

/* This table gives an error message for each of the error codes listed
Richard M. Stallman's avatar
Richard M. Stallman committed
1055
   in regex.h.	Obviously the order here has to be same as there.
1056
   POSIX doesn't require that we do anything for REG_NOERROR,
Richard M. Stallman's avatar
Richard M. Stallman committed
1057
   but why not be nice?	 */
1058 1059

static const char *re_error_msgid[] =
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077
  {
    gettext_noop ("Success"),	/* REG_NOERROR */
    gettext_noop ("No match"),	/* REG_NOMATCH */
    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1078 1079
  };

Richard M. Stallman's avatar
Richard M. Stallman committed
1080
/* Avoiding alloca during matching, to placate r_alloc.	 */
1081 1082 1083 1084 1085 1086 1087 1088 1089

/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
   searching and matching functions should not call alloca.  On some
   systems, alloca is implemented in terms of malloc, and if we're
   using the relocating allocator routines, then malloc could cause a
   relocation, which might (if the strings being searched are in the
   ralloc heap) shift the data out from underneath the regexp
   routines.

1090
   Here's another reason to avoid allocation: Emacs
1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111
   processes input from X in a signal handler; processing X input may
   call malloc; if input arrives while a matching routine is calling
   malloc, then we're scrod.  But Emacs can't just block input while
   calling matching routines; then we don't notice interrupts when
   they come in.  So, Emacs blocks input around all regexp calls
   except the matching calls, which it leaves unprotected, in the
   faith that they will not malloc.  */

/* Normally, this is fine.  */
#define MATCH_MAY_ALLOCATE

/* When using GNU C, we are not REALLY using the C alloca, no matter
   what config.h may say.  So don't take precautions for it.  */
#ifdef __GNUC__
#undef C_ALLOCA
#endif

/* The match routines may not allocate if (1) they would do it with malloc
   and (2) it's not safe for them to use malloc.
   Note that if REL_ALLOC is defined, matching would not use malloc for the
   failure stack, but we would still use it for the register vectors;
Richard M. Stallman's avatar
Richard M. Stallman committed
1112
   so REL_ALLOC should not affect this.	 */
1113 1114 1115 1116 1117 1118 1119 1120
#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
#undef MATCH_MAY_ALLOCATE
#endif


/* Failure stack declarations and macros; both re_compile_fastmap and
   re_match_2 use a failure stack.  These have to be macros because of
   REGEX_ALLOCATE_STACK.  */
1121

1122

1123
/* Approximate number of failure points for which to initially allocate space
1124 1125 1126
   when matching.  If this number is exceeded, we allocate more
   space, so it is not a hard limit.  */
#ifndef INIT_FAILURE_ALLOC
1127
#define INIT_FAILURE_ALLOC 20
1128 1129 1130
#endif

/* Roughly the maximum number of failure points on the stack.  Would be
1131
   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1132
   This is a variable only so users of regex can assign to it; we never
Richard M. Stallman's avatar
Richard M. Stallman committed
1133
   change it ourselves.	 */
1134
#if defined (MATCH_MAY_ALLOCATE)
1135 1136 1137 1138 1139
/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
   whose default stack limit is 2mb.  In order for a larger
   value to work reliably, you have to try to make it accord
   with the process stack limit.  */
int re_max_failures = 40000;
1140
#else
1141
int re_max_failures = 4000;
1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
#endif

union fail_stack_elt
{
  unsigned char *pointer;
  int integer;
};

typedef union fail_stack_elt fail_stack_elt_t;

typedef struct
{
  fail_stack_elt_t *stack;
  unsigned size;
  unsigned avail;			/* Offset of next open position.  */
} fail_stack_type;

#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)


/* Define macros to initialize and free the failure stack.
   Do `return -2' if the alloc fails.  */

#ifdef MATCH_MAY_ALLOCATE
#define INIT_FAIL_STACK()						\
  do {									\
    fail_stack.stack = (fail_stack_elt_t *)				\
1171 1172
      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
			    * sizeof (fail_stack_elt_t));		\
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
									\
    if (fail_stack.stack == NULL)					\
      return -2;							\
									\
    fail_stack.size = INIT_FAILURE_ALLOC;				\
    fail_stack.avail = 0;						\
  } while (0)

#define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
#else
#define INIT_FAIL_STACK()						\
  do {									\
    fail_stack.avail = 0;						\
  } while (0)

#define RESET_FAIL_STACK()
#endif


1192 1193
/* Double the size of FAIL_STACK, up to a limit
   which allows approximately `re_max_failures' items.
1194 1195

   Return 1 if succeeds, and 0 if either ran out of memory
1196 1197
   allocating space for it or it was already too large.

Richard M. Stallman's avatar
Richard M. Stallman committed
1198
   REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
1199

1200 1201 1202 1203 1204 1205 1206 1207
/* Factor to increase the failure stack size by
   when we increase it.
   This used to be 2, but 2 was too wasteful
   because the old discarded stacks added up to as much space
   were as ultimate, maximum-size stack.  */
#define FAIL_STACK_GROWTH_FACTOR 4

#define GROW_FAIL_STACK(fail_stack)					\
1208 1209
  (((fail_stack).size * sizeof (fail_stack_elt_t)			\
    >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
1210
   ? 0									\
1211 1212
   : ((fail_stack).stack						\
      = (fail_stack_elt_t *)						\
Richard M. Stallman's avatar
Richard M. Stallman committed
1213 1214
	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
1215 1216 1217
	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
		* FAIL_STACK_GROWTH_FACTOR))),				\
1218 1219 1220
									\
      (fail_stack).stack == NULL					\
      ? 0								\
1221 1222 1223 1224 1225
      : ((fail_stack).size						\
	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
		  * FAIL_STACK_GROWTH_FACTOR))				\
	    / sizeof (fail_stack_elt_t)),				\
Richard M. Stallman's avatar
Richard M. Stallman committed
1226
	 1)))
1227 1228


1229
/* Push pointer POINTER on FAIL_STACK.
1230 1231 1232 1233
   Return 1 if was able to do so and 0 if ran out of memory allocating
   space to do so.  */
#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
  ((FAIL_STACK_FULL ()							\
1234
    && !GROW_FAIL_STACK (FAIL_STACK))					\
1235 1236 1237 1238 1239 1240
   ? 0									\
   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
      1))

/* Push a pointer value onto the failure stack.
   Assumes the variable `fail_stack'.  Probably should only
Richard M. Stallman's avatar
Richard M. Stallman committed
1241
   be called from within `PUSH_FAILURE_POINT'.	*/
1242 1243 1244 1245 1246
#define PUSH_FAILURE_POINTER(item)					\
  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)

/* This pushes an integer-valued item onto the failure stack.
   Assumes the variable `fail_stack'.  Probably should only
Richard M. Stallman's avatar
Richard M. Stallman committed
1247
   be called from within `PUSH_FAILURE_POINT'.	*/
1248 1249 1250 1251 1252
#define PUSH_FAILURE_INT(item)					\
  fail_stack.stack[fail_stack.avail++].integer = (item)

/* Push a fail_stack_elt_t value onto the failure stack.
   Assumes the variable `fail_stack'.  Probably should only