regex-emacs.h 7.2 KB
Newer Older
1
/* Emacs regular expression API
Karl Berry's avatar
Karl Berry committed
2

Paul Eggert's avatar
Paul Eggert committed
3 4
   Copyright (C) 1985, 1989-1993, 1995, 2000-2019 Free Software Foundation,
   Inc.
Karl Berry's avatar
Karl Berry committed
5 6 7

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
Miles Bader's avatar
Miles Bader committed
8
   the Free Software Foundation; either version 3, or (at your option)
Karl Berry's avatar
Karl Berry committed
9 10 11 12 13 14 15 16
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
17
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
Karl Berry's avatar
Karl Berry committed
18

19 20 21 22 23
#ifndef EMACS_REGEX_H
#define EMACS_REGEX_H 1

#include <stddef.h>

24
/* This is the structure we store register match data in.
25 26 27 28 29 30 31 32 33 34 35
   Declare this before including lisp.h, since lisp.h (via thread.h)
   uses struct re_registers.  */
struct re_registers
{
  unsigned num_regs;
  ptrdiff_t *start;
  ptrdiff_t *end;
};

#include "lisp.h"

36
/* The string or buffer being matched.
37
   It is used for looking up syntax properties.
38

39 40 41
   If the value is a Lisp string object, match text in that string; if
   it's nil, match text in the current buffer; if it's t, match text
   in a C string.
Eli Zaretskii's avatar
Eli Zaretskii committed
42

43 44 45 46 47 48
   This value is effectively another parameter to re_search_2 and
   re_match_2.  No calls into Lisp or thread switches are allowed
   before setting re_match_object and calling into the regex search
   and match functions.  These functions capture the current value of
   re_match_object into gl_state on entry.

49
   TODO: turn into an actual function parameter.  */
50
extern Lisp_Object re_match_object;
51

Paul Eggert's avatar
Paul Eggert committed
52
/* Roughly the maximum number of failure points on the stack.  */
53 54 55 56
extern size_t emacs_re_max_failures;

/* Amount of memory that we can safely stack allocate.  */
extern ptrdiff_t emacs_re_safe_alloca;
Karl Berry's avatar
Karl Berry committed
57 58

/* This data structure represents a compiled pattern.  Before calling
59 60 61
   the pattern compiler, the fields 'buffer', 'allocated', 'fastmap',
   and 'translate' can be set.  After the pattern has been
   compiled, the 're_nsub' field is available.  All other fields are
Karl Berry's avatar
Karl Berry committed
62 63 64 65 66
   private to the regex routines.  */

struct re_pattern_buffer
{
	/* Space that holds the compiled pattern.  It is declared as
67
          'unsigned char *' because its elements are
Karl Berry's avatar
Karl Berry committed
68 69 70
           sometimes used as array indexes.  */
  unsigned char *buffer;

71
	/* Number of bytes to which 'buffer' points.  */
72
  size_t allocated;
Karl Berry's avatar
Karl Berry committed
73

74
	/* Number of bytes actually used in 'buffer'.  */
75
  size_t used;
Karl Berry's avatar
Karl Berry committed
76

77
        /* Charset of unibyte characters at compiling time.  */
78
  int charset_unibyte;
79

Karl Berry's avatar
Karl Berry committed
80 81 82 83 84 85 86
        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
           the fastmap, if there is one, to skip over impossible
           starting points for matches.  */
  char *fastmap;

        /* Either a translate table to apply to all characters before
           comparing them, or zero for no translation.  The translation
87
           applies to a pattern when it is compiled and to a string
Karl Berry's avatar
Karl Berry committed
88
           when it is matched.  */
89
  Lisp_Object translate;
Karl Berry's avatar
Karl Berry committed
90 91 92 93

	/* Number of subexpressions found by the compiler.  */
  size_t re_nsub;

94 95
        /* True if and only if this pattern can match the empty string.
           Well, in truth it's used only in 're_search_2', to see
Karl Berry's avatar
Karl Berry committed
96
           whether or not we should use the fastmap, so we don't set
97
           this absolutely perfectly; see 're_compile_fastmap'.  */
Karl Berry's avatar
Karl Berry committed
98 99
  unsigned can_be_null : 1;

100 101
        /* If REGS_UNALLOCATED, allocate space in the 'regs' structure
             for 'max (RE_NREGS, re_nsub + 1)' groups.
Karl Berry's avatar
Karl Berry committed
102 103 104 105
           If REGS_REALLOCATE, reallocate space if necessary.
           If REGS_FIXED, use what's there.  */
  unsigned regs_allocated : 2;

106 107
        /* Set to false when 'regex_compile' compiles a pattern; set to true
           by 're_compile_fastmap' if it updates the fastmap.  */
Karl Berry's avatar
Karl Berry committed
108 109
  unsigned fastmap_accurate : 1;

Stefan Monnier's avatar
Stefan Monnier committed
110
  /* If true, the compilation of the pattern had to look up the syntax table,
111
     so the compiled pattern is valid for the current syntax table only.  */
Stefan Monnier's avatar
Stefan Monnier committed
112 113
  unsigned used_syntax : 1;

114
  /* If true, multi-byte form in the regexp pattern should be
115
     recognized as a multibyte character.  */
116
  unsigned multibyte : 1;
117 118 119 120

  /* If true, multi-byte form in the target of match should be
     recognized as a multibyte character.  */
  unsigned target_multibyte : 1;
Karl Berry's avatar
Karl Berry committed
121 122 123 124 125
};

/* Declarations for routines.  */

/* Compile the regular expression PATTERN, with length LENGTH
126
   and syntax given by the global 're_syntax_options', into the buffer
Karl Berry's avatar
Karl Berry committed
127
   BUFFER.  Return NULL if successful, and an error string if not.  */
128
extern const char *re_compile_pattern (const char *pattern, size_t length,
129
				       bool posix_backtracking,
130
				       const char *whitespace_regexp,
131
				       struct re_pattern_buffer *buffer);
Karl Berry's avatar
Karl Berry committed
132 133 134 135 136 137


/* Search in the string STRING (with length LENGTH) for the pattern
   compiled into BUFFER.  Start searching at position START, for RANGE
   characters.  Return the starting position of the match, -1 for no
   match, or -2 for an internal error.  Also return register
138
   information in REGS (if REGS is non-null).  */
139 140 141 142
extern ptrdiff_t re_search (struct re_pattern_buffer *buffer,
			   const char *string, size_t length,
			   ptrdiff_t start, ptrdiff_t range,
			   struct re_registers *regs);
Karl Berry's avatar
Karl Berry committed
143 144


145
/* Like 're_search', but search in the concatenation of STRING1 and
Karl Berry's avatar
Karl Berry committed
146
   STRING2.  Also, stop searching at index START + STOP.  */
147 148 149 150 151 152
extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer,
			     const char *string1, size_t length1,
			     const char *string2, size_t length2,
			     ptrdiff_t start, ptrdiff_t range,
			     struct re_registers *regs,
			     ptrdiff_t stop);
Karl Berry's avatar
Karl Berry committed
153 154


155
/* Like 're_search_2', but return how many characters in STRING the regexp
Karl Berry's avatar
Karl Berry committed
156
   in BUFFER matched, starting at position START.  */
157 158 159 160 161
extern ptrdiff_t re_match_2 (struct re_pattern_buffer *buffer,
			    const char *string1, size_t length1,
			    const char *string2, size_t length2,
			    ptrdiff_t start, struct re_registers *regs,
			    ptrdiff_t stop);
Karl Berry's avatar
Karl Berry committed
162 163 164 165 166


/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
   for recording register information.  STARTS and ENDS must be
167
   allocated with malloc, and must each be at least 'NUM_REGS * sizeof
168
   (ptrdiff_t)' bytes long.
Karl Berry's avatar
Karl Berry committed
169 170 171 172 173 174 175

   If NUM_REGS == 0, then subsequent matches should allocate their own
   register data.

   Unless this function is called, the first search or match using
   PATTERN_BUFFER will allocate its own register data, without
   freeing the old data.  */
176 177 178 179
extern void re_set_registers (struct re_pattern_buffer *buffer,
			      struct re_registers *regs,
			      unsigned num_regs,
			      ptrdiff_t *starts, ptrdiff_t *ends);
180 181 182 183 184 185 186 187 188 189 190 191 192

/* Character classes.  */
typedef enum { RECC_ERROR = 0,
	       RECC_ALNUM, RECC_ALPHA, RECC_WORD,
	       RECC_GRAPH, RECC_PRINT,
	       RECC_LOWER, RECC_UPPER,
	       RECC_PUNCT, RECC_CNTRL,
	       RECC_DIGIT, RECC_XDIGIT,
	       RECC_BLANK, RECC_SPACE,
	       RECC_MULTIBYTE, RECC_NONASCII,
	       RECC_ASCII, RECC_UNIBYTE
} re_wctype_t;

193 194 195
extern bool re_iswctype (int ch, re_wctype_t cc);
extern re_wctype_t re_wctype_parse (const unsigned char **strp,
				    unsigned limit);
196

197
#endif /* EMACS_REGEX_H */