regex-emacs.h 24 KB
Newer Older
Karl Berry's avatar
Karl Berry committed
1
/* Definitions for data structures and routines for the regular
Jim Blandy's avatar
Jim Blandy committed
2
   expression library, version 0.12.
Karl Berry's avatar
Karl Berry committed
3

Paul Eggert's avatar
Paul Eggert committed
4
   Copyright (C) 1985, 1989-1993, 1995, 2000-2018 Free Software
5
   Foundation, Inc.
Karl Berry's avatar
Karl Berry committed
6 7 8

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
Miles Bader's avatar
Miles Bader committed
9
   the Free Software Foundation; either version 3, or (at your option)
Karl Berry's avatar
Karl Berry committed
10 11 12 13 14 15 16 17
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
18
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
Karl Berry's avatar
Karl Berry committed
19

20 21 22
#ifndef _REGEX_H
#define _REGEX_H 1

Michal Nazarewicz's avatar
Michal Nazarewicz committed
23
#if defined emacs && (defined _REGEX_RE_COMP || defined _LIBC)
24
/* We're not defining re_set_syntax and using a different prototype of
Michal Nazarewicz's avatar
Michal Nazarewicz committed
25 26 27 28 29
   re_compile_pattern when building Emacs so fail compilation early with
   a (somewhat helpful) error message when conflict is detected. */
# error "_REGEX_RE_COMP nor _LIBC can be defined if emacs is defined."
#endif

Paul Eggert's avatar
Paul Eggert committed
30 31
#include <sys/types.h>

32 33 34 35
/* Allow the use in C++ code.  */
#ifdef __cplusplus
extern "C" {
#endif
Karl Berry's avatar
Karl Berry committed
36

37
#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
Karl Berry's avatar
Karl Berry committed
38 39
/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
   should be there.  */
40
# include <stddef.h>
Jim Blandy's avatar
Jim Blandy committed
41 42
#endif

Karl Berry's avatar
Karl Berry committed
43
/* The following bits are used to determine the regexp syntax we
44 45 46
   recognize.  The set/not-set meanings where historically chosen so
   that Emacs syntax had the value 0.
   The bits are given in alphabetical order, and
Karl Berry's avatar
Karl Berry committed
47 48
   the definitions shifted by one from the previous bit; thus, when we
   add or remove a bit, only one other definition need change.  */
49
typedef unsigned long reg_syntax_t;
50

Karl Berry's avatar
Karl Berry committed
51 52
/* If this bit is not set, then \ inside a bracket expression is literal.
   If set, then such a \ quotes the following character.  */
53
#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
Karl Berry's avatar
Karl Berry committed
54 55

/* If this bit is not set, then + and ? are operators, and \+ and \? are
56
     literals.
Karl Berry's avatar
Karl Berry committed
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
   If set, then \+ and \? are operators and + and ? are literals.  */
#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)

/* If this bit is set, then character classes are supported.  They are:
     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
   If not set, then character classes are not supported.  */
#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)

/* If this bit is set, then ^ and $ are always anchors (outside bracket
     expressions, of course).
   If this bit is not set, then it depends:
        ^  is an anchor if it is at the beginning of a regular
           expression or after an open-group or an alternation operator;
        $  is an anchor if it is at the end of a regular expression, or
72
           before a close-group or an alternation operator.
Karl Berry's avatar
Karl Berry committed
73 74 75 76 77 78 79 80 81 82

   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
   POSIX draft 11.2 says that * etc. in leading positions is undefined.
   We already implemented a previous draft which made those constructs
   invalid, though, so we haven't changed the code back.  */
#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)

/* If this bit is set, then special characters are always special
     regardless of where they are in the pattern.
   If this bit is not set, then special characters are special only in
83
     some contexts; otherwise they are ordinary.  Specifically,
Karl Berry's avatar
Karl Berry committed
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
     * + ? and intervals are only special when not after the beginning,
     open-group, or alternation operator.  */
#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)

/* If this bit is set, then *, +, ?, and { cannot be first in an re or
     immediately after an alternation or begin-group operator.  */
#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)

/* If this bit is set, then . matches newline.
   If not set, then it doesn't.  */
#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)

/* If this bit is set, then . doesn't match NUL.
   If not set, then it does.  */
#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)

/* If this bit is set, nonmatching lists [^...] do not match newline.
   If not set, they do.  */
#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)

/* If this bit is set, either \{...\} or {...} defines an
105
     interval, depending on RE_NO_BK_BRACES.
Karl Berry's avatar
Karl Berry committed
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
   If not set, \{, \}, {, and } are literals.  */
#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)

/* If this bit is set, +, ? and | aren't recognized as operators.
   If not set, they are.  */
#define RE_LIMITED_OPS (RE_INTERVALS << 1)

/* If this bit is set, newline is an alternation operator.
   If not set, newline is literal.  */
#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)

/* If this bit is set, then `{...}' defines an interval, and \{ and \}
     are literals.
  If not set, then `\{...\}' defines an interval.  */
#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)

/* If this bit is set, (...) defines a group, and \( and \) are literals.
   If not set, \(...\) defines a group, and ( and ) are literals.  */
#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)

/* If this bit is set, then \<digit> matches <digit>.
   If not set, then \<digit> is a back-reference.  */
#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)

130
/* If this bit is set, then | is an alternation operator, and \| is literal.
Karl Berry's avatar
Karl Berry committed
131 132 133 134 135 136 137 138 139
   If not set, then \| is an alternation operator, and | is literal.  */
#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)

/* If this bit is set, then an ending range point collating higher
     than the starting range point, as in [z-a], is invalid.
   If not set, then when ending range point collates higher than the
     starting range point, the range is ignored.  */
#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)

140 141 142 143
/* If this bit is set, then an unmatched ) is ordinary.
   If not set, then an unmatched ) is invalid.  */
#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)

144 145
/* If this bit is set, succeed as soon as we match the whole pattern,
   without further backtracking.  */
146
#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
147

148 149 150
/* If this bit is set, do not process the GNU regex operators.
   If not set, then the GNU regex operators are recognized. */
#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
Stefan Monnier's avatar
Stefan Monnier committed
151

152
/* If this bit is set, then *?, +? and ?? match non greedily. */
153
#define RE_FRUGAL (RE_NO_GNU_OPS << 1)
154 155 156

/* If this bit is set, then (?:...) is treated as a shy group.  */
#define RE_SHY_GROUPS (RE_FRUGAL << 1)
157

158 159 160 161 162
/* If this bit is set, ^ and $ only match at beg/end of buffer.  */
#define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1)

/* If this bit is set, turn on internal regex debugging.
   If not set, and debugging was on, turn it off.
163
   This only works if regex-emacs.c is compiled -DDEBUG.
164
   We define this bit always, so that all that's needed to turn on
165
   debugging is to recompile regex-emacs.c; the calling code can always have
166 167 168
   this bit set, and it won't affect anything in the normal case. */
#define RE_DEBUG (RE_NO_NEWLINE_ANCHOR << 1)

Karl Berry's avatar
Karl Berry committed
169 170 171 172
/* This global variable defines the particular regexp syntax to use (for
   some interfaces).  When a regexp is compiled, the syntax used is
   stored in the pattern buffer, so changing this does not affect
   already-compiled regexps.  */
173
/* extern reg_syntax_t re_syntax_options; */
174 175

#ifdef emacs
176
# include "lisp.h"
177
/* In Emacs, this is the string or buffer in which we are matching.
178
   It is used for looking up syntax properties.
179 180 181

   If the value is a Lisp string object, we are matching text in that
   string; if it's nil, we are matching text in the current buffer; if
Eli Zaretskii's avatar
Eli Zaretskii committed
182 183
   it's t, we are matching text in a C string.

184 185 186 187 188 189 190 191 192
   This value is effectively another parameter to re_search_2 and
   re_match_2.  No calls into Lisp or thread switches are allowed
   before setting re_match_object and calling into the regex search
   and match functions.  These functions capture the current value of
   re_match_object into gl_state on entry.

   TODO: once we get rid of the !emacs case in this code, turn into an
   actual function parameter.  */
extern Lisp_Object re_match_object;
193 194
#endif

Paul Eggert's avatar
Paul Eggert committed
195
/* Roughly the maximum number of failure points on the stack.  */
196 197 198 199 200 201
extern size_t emacs_re_max_failures;

#ifdef emacs
/* Amount of memory that we can safely stack allocate.  */
extern ptrdiff_t emacs_re_safe_alloca;
#endif
Paul Eggert's avatar
Paul Eggert committed
202

Karl Berry's avatar
Karl Berry committed
203 204 205

/* Define combinations of the above bits for the standard possibilities.
   (The [[[ comments delimit what gets put into the Texinfo file, so
206
   don't delete them!)  */
Karl Berry's avatar
Karl Berry committed
207
/* [[[begin syntaxes]]] */
208 209
#define RE_SYNTAX_EMACS							\
  (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL)
Karl Berry's avatar
Karl Berry committed
210 211

#define RE_SYNTAX_AWK							\
212 213 214 215 216 217 218 219 220
  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)

#define RE_SYNTAX_GNU_AWK						\
  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
Karl Berry's avatar
Karl Berry committed
221

Paul Eggert's avatar
Paul Eggert committed
222
#define RE_SYNTAX_POSIX_AWK						\
223 224
  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
   | RE_INTERVALS	    | RE_NO_GNU_OPS)
Karl Berry's avatar
Karl Berry committed
225 226 227 228

#define RE_SYNTAX_GREP							\
  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
229
   | RE_NEWLINE_ALT)
Karl Berry's avatar
Karl Berry committed
230 231 232 233 234

#define RE_SYNTAX_EGREP							\
  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
235
   | RE_NO_BK_VBAR)
Karl Berry's avatar
Karl Berry committed
236 237 238 239

#define RE_SYNTAX_POSIX_EGREP						\
  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)

Karl Berry's avatar
Karl Berry committed
240 241 242
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC

Karl Berry's avatar
Karl Berry committed
243 244 245 246 247
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC

/* Syntax bits common to both basic and extended POSIX regex syntax.  */
#define _RE_SYNTAX_POSIX_COMMON						\
  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
248
   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
Karl Berry's avatar
Karl Berry committed
249 250 251 252 253 254 255 256 257 258 259

#define RE_SYNTAX_POSIX_BASIC						\
  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)

/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
   isn't minimal, since other operators, such as \`, aren't disabled.  */
#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)

#define RE_SYNTAX_POSIX_EXTENDED					\
260 261 262 263
  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
Karl Berry's avatar
Karl Berry committed
264

265 266
/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
   removed and RE_NO_BK_REFS is added.  */
Karl Berry's avatar
Karl Berry committed
267 268 269 270 271 272 273 274 275 276 277
#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
/* [[[end syntaxes]]] */

/* Maximum number of duplicates an interval can allow.  Some systems
   (erroneously) define this in other header files, but we want our
   value, so remove any previous define.  */
#ifdef RE_DUP_MAX
278
# undef RE_DUP_MAX
Karl Berry's avatar
Karl Berry committed
279
#endif
280 281 282 283
/* Repeat counts are stored in opcodes as 2 byte integers.  This was
   previously limited to 7fff because the parsing code uses signed
   ints.  But Emacs only runs on 32 bit platforms anyway.  */
#define RE_DUP_MAX (0xffff)
Karl Berry's avatar
Karl Berry committed
284 285 286 287 288 289 290 291 292 293 294


/* POSIX `cflags' bits (i.e., information for `regcomp').  */

/* If this bit is set, then use extended regular expression syntax.
   If not set, then use basic regular expression syntax.  */
#define REG_EXTENDED 1

/* If this bit is set, then ignore case when matching.
   If not set, then case is significant.  */
#define REG_ICASE (REG_EXTENDED << 1)
295

Karl Berry's avatar
Karl Berry committed
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
/* If this bit is set, then anchors do not match at newline
     characters in the string.
   If not set, then anchors do match at newlines.  */
#define REG_NEWLINE (REG_ICASE << 1)

/* If this bit is set, then report only success or fail in regexec.
   If not set, then returns differ between not matching and errors.  */
#define REG_NOSUB (REG_NEWLINE << 1)


/* POSIX `eflags' bits (i.e., information for regexec).  */

/* If this bit is set, then the beginning-of-line operator doesn't match
     the beginning of the string (presumably because it's not the
     beginning of a line).
   If not set, then the beginning-of-line operator does match the
     beginning of the string.  */
#define REG_NOTBOL 1

/* Like REG_NOTBOL, except for the end-of-line.  */
#define REG_NOTEOL (1 << 1)


/* If any error codes are removed, changed, or added, update the
320
   `re_error_msg' table in regex-emacs.c.  */
Karl Berry's avatar
Karl Berry committed
321 322
typedef enum
{
323 324 325 326
#ifdef _XOPEN_SOURCE
  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
#endif

Karl Berry's avatar
Karl Berry committed
327 328 329 330 331 332 333 334 335 336 337
  REG_NOERROR = 0,	/* Success.  */
  REG_NOMATCH,		/* Didn't find a match (for regexec).  */

  /* POSIX regcomp return error codes.  (In the order listed in the
     standard.)  */
  REG_BADPAT,		/* Invalid pattern.  */
  REG_ECOLLATE,		/* Not implemented.  */
  REG_ECTYPE,		/* Invalid character class name.  */
  REG_EESCAPE,		/* Trailing backslash.  */
  REG_ESUBREG,		/* Invalid back reference.  */
  REG_EBRACK,		/* Unmatched left bracket.  */
338
  REG_EPAREN,		/* Parenthesis imbalance.  */
Karl Berry's avatar
Karl Berry committed
339 340 341 342 343 344 345 346 347
  REG_EBRACE,		/* Unmatched \{.  */
  REG_BADBR,		/* Invalid contents of \{\}.  */
  REG_ERANGE,		/* Invalid range end.  */
  REG_ESPACE,		/* Ran out of memory.  */
  REG_BADRPT,		/* No preceding re for repetition op.  */

  /* Error codes we've added.  */
  REG_EEND,		/* Premature end.  */
  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
348
  REG_ERPAREN,		/* Unmatched ) or \); not returned from regcomp.  */
349 350
  REG_ERANGEX,		/* Range striding over charsets.  */
  REG_ESIZEBR           /* n or m too big in \{n,m\} */
Karl Berry's avatar
Karl Berry committed
351 352
} reg_errcode_t;

Paul Eggert's avatar
Paul Eggert committed
353 354 355 356 357
/* Use a type compatible with Emacs.  */
#define RE_TRANSLATE_TYPE Lisp_Object
#define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C)
#define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0)))

Karl Berry's avatar
Karl Berry committed
358 359 360 361 362 363
/* This data structure represents a compiled pattern.  Before calling
   the pattern compiler, the fields `buffer', `allocated', `fastmap',
   `translate', and `no_sub' can be set.  After the pattern has been
   compiled, the `re_nsub' field is available.  All other fields are
   private to the regex routines.  */

364
#ifndef RE_TRANSLATE_TYPE
365
# define RE_TRANSLATE_TYPE char *
366 367
#endif

Karl Berry's avatar
Karl Berry committed
368 369 370 371 372 373 374 375 376
struct re_pattern_buffer
{
/* [[[begin pattern_buffer]]] */
	/* Space that holds the compiled pattern.  It is declared as
          `unsigned char *' because its elements are
           sometimes used as array indexes.  */
  unsigned char *buffer;

	/* Number of bytes to which `buffer' points.  */
377
  size_t allocated;
Karl Berry's avatar
Karl Berry committed
378 379

	/* Number of bytes actually used in `buffer'.  */
380
  size_t used;
Karl Berry's avatar
Karl Berry committed
381

382 383 384 385
#ifdef emacs
        /* Charset of unibyte characters at compiling time. */
  int charset_unibyte;
#else
Karl Berry's avatar
Karl Berry committed
386 387
        /* Syntax setting with which the pattern was compiled.  */
  reg_syntax_t syntax;
388
#endif
Karl Berry's avatar
Karl Berry committed
389 390 391 392 393 394 395 396 397
        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
           the fastmap, if there is one, to skip over impossible
           starting points for matches.  */
  char *fastmap;

        /* Either a translate table to apply to all characters before
           comparing them, or zero for no translation.  The translation
           is applied to a pattern when it is compiled and to a string
           when it is matched.  */
398
  RE_TRANSLATE_TYPE translate;
Karl Berry's avatar
Karl Berry committed
399 400 401 402 403 404 405

	/* Number of subexpressions found by the compiler.  */
  size_t re_nsub;

        /* Zero if this pattern cannot match the empty string, one else.
           Well, in truth it's used only in `re_search_2', to see
           whether or not we should use the fastmap, so we don't set
Stefan Monnier's avatar
Stefan Monnier committed
406
           this absolutely perfectly; see `re_compile_fastmap'.  */
Karl Berry's avatar
Karl Berry committed
407 408 409 410 411 412 413 414 415 416 417
  unsigned can_be_null : 1;

        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
             for `max (RE_NREGS, re_nsub + 1)' groups.
           If REGS_REALLOCATE, reallocate space if necessary.
           If REGS_FIXED, use what's there.  */
#define REGS_UNALLOCATED 0
#define REGS_REALLOCATE 1
#define REGS_FIXED 2
  unsigned regs_allocated : 2;

Karl Berry's avatar
Karl Berry committed
418 419
        /* Set to zero when `regex_compile' compiles a pattern; set to one
           by `re_compile_fastmap' if it updates the fastmap.  */
Karl Berry's avatar
Karl Berry committed
420 421
  unsigned fastmap_accurate : 1;

Karl Berry's avatar
Karl Berry committed
422 423
        /* If set, `re_match_2' does not return information about
           subexpressions.  */
Karl Berry's avatar
Karl Berry committed
424 425 426
  unsigned no_sub : 1;

        /* If set, a beginning-of-line anchor doesn't match at the
427
           beginning of the string.  */
Karl Berry's avatar
Karl Berry committed
428 429 430 431 432
  unsigned not_bol : 1;

        /* Similarly for an end-of-line anchor.  */
  unsigned not_eol : 1;

Stefan Monnier's avatar
Stefan Monnier committed
433 434 435 436
  /* If true, the compilation of the pattern had to look up the syntax table,
     so the compiled pattern is only valid for the current syntax table.  */
  unsigned used_syntax : 1;

437
#ifdef emacs
438
  /* If true, multi-byte form in the regexp pattern should be
439
     recognized as a multibyte character.  */
440
  unsigned multibyte : 1;
441 442 443 444

  /* If true, multi-byte form in the target of match should be
     recognized as a multibyte character.  */
  unsigned target_multibyte : 1;
445
#endif
446

Karl Berry's avatar
Karl Berry committed
447 448 449 450 451
/* [[[end pattern_buffer]]] */
};

typedef struct re_pattern_buffer regex_t;

Paul Eggert's avatar
Paul Eggert committed
452 453 454 455
/* POSIX 1003.1-2008 requires that regoff_t be at least as wide as
   ptrdiff_t and ssize_t.  We don't know of any hosts where ptrdiff_t
   is wider than ssize_t, so ssize_t is safe.  ptrdiff_t is not
   necessarily visible here, so use ssize_t.  */
456
typedef ssize_t regoff_t;
Karl Berry's avatar
Karl Berry committed
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472


/* This is the structure we store register match data in.  See
   regex.texinfo for a full description of what registers match.  */
struct re_registers
{
  unsigned num_regs;
  regoff_t *start;
  regoff_t *end;
};


/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
   `re_match_2' returns information about at least this many registers
   the first time a `regs' structure is passed.  */
#ifndef RE_NREGS
473
# define RE_NREGS 30
Karl Berry's avatar
Karl Berry committed
474 475 476 477 478 479 480 481 482 483 484 485 486 487
#endif


/* POSIX specification for registers.  Aside from the different names than
   `re_registers', POSIX uses an array of structures, instead of a
   structure of arrays.  */
typedef struct
{
  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
} regmatch_t;

/* Declarations for routines.  */

Michal Nazarewicz's avatar
Michal Nazarewicz committed
488 489
#ifndef emacs

Karl Berry's avatar
Karl Berry committed
490 491
/* Sets the current default syntax to SYNTAX, and return the old syntax.
   You can also simply assign to the `re_syntax_options' variable.  */
Paul Eggert's avatar
Paul Eggert committed
492
extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
Karl Berry's avatar
Karl Berry committed
493

Michal Nazarewicz's avatar
Michal Nazarewicz committed
494 495
#endif

Karl Berry's avatar
Karl Berry committed
496 497 498
/* Compile the regular expression PATTERN, with length LENGTH
   and syntax given by the global `re_syntax_options', into the buffer
   BUFFER.  Return NULL if successful, and an error string if not.  */
Paul Eggert's avatar
Paul Eggert committed
499
extern const char *re_compile_pattern (const char *__pattern, size_t __length,
Michal Nazarewicz's avatar
Michal Nazarewicz committed
500
#ifdef emacs
501
				       bool posix_backtracking,
502
				       const char *whitespace_regexp,
Michal Nazarewicz's avatar
Michal Nazarewicz committed
503
#endif
Paul Eggert's avatar
Paul Eggert committed
504
				       struct re_pattern_buffer *__buffer);
Karl Berry's avatar
Karl Berry committed
505 506 507 508 509


/* Compile a fastmap for the compiled pattern in BUFFER; used to
   accelerate searches.  Return 0 if successful and -2 if was an
   internal error.  */
Paul Eggert's avatar
Paul Eggert committed
510
extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
Karl Berry's avatar
Karl Berry committed
511 512 513 514 515 516 517


/* Search in the string STRING (with length LENGTH) for the pattern
   compiled into BUFFER.  Start searching at position START, for RANGE
   characters.  Return the starting position of the match, -1 for no
   match, or -2 for an internal error.  Also return register
   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
Paul Eggert's avatar
Paul Eggert committed
518 519 520 521
extern regoff_t re_search (struct re_pattern_buffer *__buffer,
			   const char *__string, size_t __length,
			   ssize_t __start, ssize_t __range,
			   struct re_registers *__regs);
Karl Berry's avatar
Karl Berry committed
522 523 524 525


/* Like `re_search', but search in the concatenation of STRING1 and
   STRING2.  Also, stop searching at index START + STOP.  */
Paul Eggert's avatar
Paul Eggert committed
526 527 528 529 530 531
extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
			     const char *__string1, size_t __length1,
			     const char *__string2, size_t __length2,
			     ssize_t __start, ssize_t __range,
			     struct re_registers *__regs,
			     ssize_t __stop);
Karl Berry's avatar
Karl Berry committed
532 533 534 535


/* Like `re_search', but return how many characters in STRING the regexp
   in BUFFER matched, starting at position START.  */
Paul Eggert's avatar
Paul Eggert committed
536 537 538
extern regoff_t re_match (struct re_pattern_buffer *__buffer,
			  const char *__string, size_t __length,
			  ssize_t __start, struct re_registers *__regs);
Karl Berry's avatar
Karl Berry committed
539 540 541


/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
Paul Eggert's avatar
Paul Eggert committed
542 543 544 545 546
extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
			    const char *__string1, size_t __length1,
			    const char *__string2, size_t __length2,
			    ssize_t __start, struct re_registers *__regs,
			    ssize_t __stop);
Karl Berry's avatar
Karl Berry committed
547 548 549 550 551 552 553 554 555 556 557 558 559 560


/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
   for recording register information.  STARTS and ENDS must be
   allocated with malloc, and must each be at least `NUM_REGS * sizeof
   (regoff_t)' bytes long.

   If NUM_REGS == 0, then subsequent matches should allocate their own
   register data.

   Unless this function is called, the first search or match using
   PATTERN_BUFFER will allocate its own register data, without
   freeing the old data.  */
Paul Eggert's avatar
Paul Eggert committed
561 562 563 564
extern void re_set_registers (struct re_pattern_buffer *__buffer,
			      struct re_registers *__regs,
			      unsigned __num_regs,
			      regoff_t *__starts, regoff_t *__ends);
Karl Berry's avatar
Karl Berry committed
565

566 567
#if defined _REGEX_RE_COMP || defined _LIBC
# ifndef _CRAY
Karl Berry's avatar
Karl Berry committed
568
/* 4.2 bsd compatibility.  */
Paul Eggert's avatar
Paul Eggert committed
569 570
extern char *re_comp (const char *);
extern int re_exec (const char *);
571 572 573 574
# endif
#endif

/* GCC 2.95 and later have "__restrict"; C99 compilers have
Paul Eggert's avatar
Paul Eggert committed
575 576 577 578 579 580 581 582 583 584 585
   "restrict", and "configure" may have defined "restrict".
   Other compilers use __restrict, __restrict__, and _Restrict, and
   'configure' might #define 'restrict' to those words, so pick a
   different name.  */
#ifndef _Restrict_
# if 199901L <= __STDC_VERSION__
#  define _Restrict_ restrict
# elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)
#  define _Restrict_ __restrict
# else
#  define _Restrict_
586
# endif
587
#endif
Paul Eggert's avatar
Paul Eggert committed
588 589 590 591 592 593 594 595 596 597 598 599
/* gcc 3.1 and up support the [restrict] syntax.  Don't trust
   sys/cdefs.h's definition of __restrict_arr, though, as it
   mishandles gcc -ansi -pedantic.  */
#ifndef _Restrict_arr_
# if ((199901L <= __STDC_VERSION__					\
       || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__))	\
	   && !defined __STRICT_ANSI__))					\
      && !defined __GNUG__)
#  define _Restrict_arr_ _Restrict_
# else
#  define _Restrict_arr_
# endif
600
#endif
Karl Berry's avatar
Karl Berry committed
601 602

/* POSIX compatibility.  */
Paul Eggert's avatar
Paul Eggert committed
603 604
extern reg_errcode_t regcomp (regex_t *_Restrict_ __preg,
			      const char *_Restrict_ __pattern,
Paul Eggert's avatar
Paul Eggert committed
605
			      int __cflags);
606

Paul Eggert's avatar
Paul Eggert committed
607 608 609
extern reg_errcode_t regexec (const regex_t *_Restrict_ __preg,
			      const char *_Restrict_ __string, size_t __nmatch,
			      regmatch_t __pmatch[_Restrict_arr_],
Paul Eggert's avatar
Paul Eggert committed
610
			      int __eflags);
611

Paul Eggert's avatar
Paul Eggert committed
612 613
extern size_t regerror (int __errcode, const regex_t * __preg,
			char *__errbuf, size_t __errbuf_size);
614

Paul Eggert's avatar
Paul Eggert committed
615
extern void regfree (regex_t *__preg);
616 617 618 619 620 621


#ifdef __cplusplus
}
#endif	/* C++ */

Paul Eggert's avatar
Paul Eggert committed
622
/* For platform which support the ISO C amendment 1 functionality we
623 624 625 626 627 628 629 630 631 632 633 634
   support user defined character classes.  */
#if WIDE_CHAR_SUPPORT
/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
# include <wchar.h>
# include <wctype.h>

typedef wctype_t re_wctype_t;
typedef wchar_t re_wchar_t;
# define re_wctype wctype
# define re_iswctype iswctype
# define re_wctype_to_bit(cc) 0
#else
635 636 637
# ifndef emacs
#  define btowc(c) c
# endif
638 639 640 641 642 643 644 645 646 647 648 649 650

/* Character classes.  */
typedef enum { RECC_ERROR = 0,
	       RECC_ALNUM, RECC_ALPHA, RECC_WORD,
	       RECC_GRAPH, RECC_PRINT,
	       RECC_LOWER, RECC_UPPER,
	       RECC_PUNCT, RECC_CNTRL,
	       RECC_DIGIT, RECC_XDIGIT,
	       RECC_BLANK, RECC_SPACE,
	       RECC_MULTIBYTE, RECC_NONASCII,
	       RECC_ASCII, RECC_UNIBYTE
} re_wctype_t;

651
extern char re_iswctype (int ch,    re_wctype_t cc);
652
extern re_wctype_t re_wctype_parse (const unsigned char **strp, unsigned limit);
653

654 655 656 657
typedef int re_wchar_t;

#endif /* not WIDE_CHAR_SUPPORT */

658
#endif /* regex-emacs.h */
Karl Berry's avatar
Karl Berry committed
659