Commit 9114e279 authored by Karl Berry's avatar Karl Berry
Browse files

*** empty log message ***

parent 9549c46d
......@@ -47,9 +47,15 @@
`BSTRING', as far as I know, and neither of them use this code. */
#if USG || STDC_HEADERS
#include <string.h>
#ifndef bcmp
#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
#endif
#ifndef bcopy
#define bcopy(s, d, n) memcpy ((d), (s), (n))
#endif
#ifndef bzero
#define bzero(s, n) memset ((s), 0, (n))
#endif
#else
#include <strings.h>
#endif
......@@ -135,12 +141,8 @@ init_syntax_once ()
(Per Bothner suggested the basic approach.) */
#undef SIGN_EXTEND_CHAR
#if __STDC__
#ifndef VMS
#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
#else /* On VMS, VAXC doesn't recognize `signed' before `char' */
#define SIGN_EXTEND_CHAR(c) ((char) (c))
#endif /* VMS */
#else
#else /* not __STDC__ */
/* As in Harbison and Steele. */
#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
#endif
......@@ -447,6 +449,7 @@ static int debug = 0;
#define DEBUG_PRINT1(x) if (debug) printf (x)
#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
if (debug) print_partial_compiled_pattern (s, e)
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
......@@ -760,6 +763,7 @@ print_double_string (where, string1, size1, string2, size2)
#define DEBUG_PRINT1(x)
#define DEBUG_PRINT2(x1, x2)
#define DEBUG_PRINT3(x1, x2, x3)
#define DEBUG_PRINT4(x1, x2, x3, x4)
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
......@@ -1025,9 +1029,9 @@ typedef struct
`buffer' is the compiled pattern;
`syntax' is set to SYNTAX;
`used' is set to the length of the compiled pattern;
`fastmap_accurate' is set to zero;
`re_nsub' is set to the number of groups in PATTERN;
`not_bol' and `not_eol' are set to zero.
`fastmap_accurate' is zero;
`re_nsub' is the number of subexpressions in PATTERN;
`not_bol' and `not_eol' are zero;
The `fastmap' and `newline_anchor' fields are neither
examined nor set. */
......@@ -1676,10 +1680,10 @@ regex_compile (pattern, size, syntax, bufp)
| v | v
a | b | c
If we are at `b,' then fixup_alt_jump right now points to a
three-byte space after `a.' We'll put in the jump, set
fixup_alt_jump to right after `b,' and leave behind three
bytes which we'll fill in when we get to after `c.' */
If we are at `b', then fixup_alt_jump right now points to a
three-byte space after `a'. We'll put in the jump, set
fixup_alt_jump to right after `b', and leave behind three
bytes which we'll fill in when we get to after `c'. */
if (fixup_alt_jump)
STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
......@@ -2320,6 +2324,7 @@ typedef struct
int this_reg; \
\
DEBUG_STATEMENT (failure_id++); \
DEBUG_STATEMENT (nfailure_points_pushed++); \
DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
......@@ -2473,6 +2478,8 @@ typedef struct
regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
} \
\
DEBUG_STATEMENT (nfailure_points_popped++); \
} /* POP_FAILURE_POINT */
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
......@@ -2860,15 +2867,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
else if (endpos > total_size)
range = total_size - startpos;
/* Update the fastmap now if not correct already. */
if (fastmap && !bufp->fastmap_accurate)
if (re_compile_fastmap (bufp) == -2)
return -2;
/* If the search isn't to be a backwards one, don't waste time in a
long search for a pattern that says it is anchored. */
if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
&& range > 0)
search for a pattern that must be anchored. */
if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
{
if (startpos > 0)
return -1;
......@@ -2876,6 +2877,12 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
range = 1;
}
/* Update the fastmap now if not correct already. */
if (fastmap && !bufp->fastmap_accurate)
if (re_compile_fastmap (bufp) == -2)
return -2;
/* Loop through the string, looking for a place to start matching. */
for (;;)
{
/* If a fastmap is supplied, skip quickly over characters that
......@@ -2913,7 +2920,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
? string2[startpos - size1]
: string1[startpos]);
if (!fastmap[TRANSLATE (c)])
if (!fastmap[(unsigned char) TRANSLATE (c)])
goto advance;
}
}
......@@ -2987,12 +2994,9 @@ typedef union
#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
/* Call this when have matched something; it sets `matched' flags for the
registers corresponding to the group of which we currently are inside.
Also records whether this group ever matched something. We only care
about this information at `stop_memory', and then only about the
previous time through the loop (if the group is starred or whatever).
So it is ok to clear all the nonactive registers here. */
/* Call this when have matched a real character; it sets `matched' flags
for the subexpressions which we are currently inside. Also records
that those subexprs have matched. */
#define SET_REGS_MATCHED() \
do \
{ \
......@@ -3037,24 +3041,24 @@ typedef union
/* Test if at very beginning or at very end of the virtual concatenation
of `string1' and `string2'. If only one string, it's `string2'. */
#define AT_STRINGS_BEG() (d == (size1 ? string1 : string2) || !size2)
#define AT_STRINGS_END() (d == end2)
#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
#define AT_STRINGS_END(d) ((d) == end2)
/* Test if D points to a character which is word-constituent. We have
two special cases to check for: if past the end of string1, look at
the first character in string2; and if before the beginning of
string2, look at the last character in string1.
Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG (). */
#define LETTER_P(d) \
string2, look at the last character in string1. */
#define WORDCHAR_P(d) \
(SYNTAX ((d) == end1 ? *string2 \
: (d) == string2 - 1 ? *(end1 - 1) : *(d)) == Sword)
: (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
== Sword)
/* Test if the character before D and the one at D differ with respect
to being word-constituent. */
#define AT_WORD_BOUNDARY(d) \
(AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d - 1) != LETTER_P (d))
(AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
|| WORDCHAR_P (d - 1) != WORDCHAR_P (d))
/* Free everything we malloc. */
......@@ -3161,6 +3165,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
fail_stack_type fail_stack;
#ifdef DEBUG
static unsigned failure_id = 0;
unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
#endif
/* We fill all the registers internally, independent of what we
......@@ -3254,8 +3259,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
else
{
/* We must initialize all our variables to NULL, so that
`FREE_VARIABLES' doesn't try to free them. Too bad this isn't
Lisp, so we could have a list of variables. As it is, */
`FREE_VARIABLES' doesn't try to free them. */
regstart = regend = old_regstart = old_regend = best_regstart
= best_regend = reg_dummy = NULL;
reg_info = reg_info_dummy = (register_info_type *) NULL;
......@@ -3339,8 +3343,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
if (p == pend)
{ /* End of pattern means we might have succeeded. */
DEBUG_PRINT1 ("End of pattern: ");
/* If not end of string, try backtracking. Otherwise done. */
DEBUG_PRINT1 ("end of pattern ... ");
/* If we haven't matched the entire string, and we want the
longest match, try backtracking. */
if (d != end_match_2)
{
DEBUG_PRINT1 ("backtracking.\n");
......@@ -3378,6 +3384,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
For example, the pattern `x.*y.*z' against the
strings `x-' and `y-z-', if the two strings are
not consecutive in memory. */
DEBUG_PRINT1 ("Restoring best registers.\n");
d = match_end;
dend = ((d >= string1 && d <= end1)
? end_match_1 : end_match_2);
......@@ -3390,7 +3398,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
} /* d != end_match_2 */
DEBUG_PRINT1 ("\nAccepting match.\n");
DEBUG_PRINT1 ("Accepting match.\n");
/* If caller wants register contents data back, do it. */
if (regs && !bufp->no_sub)
......@@ -3456,7 +3464,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
} /* regs && !bufp->no_sub */
FREE_VARIABLES ();
DEBUG_PRINT2 ("%d registers pushed.\n", num_regs_pushed);
DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
nfailure_points_pushed, nfailure_points_popped,
nfailure_points_pushed - nfailure_points_popped);
DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
mcnt = d - pos - (MATCHING_IN_FIRST_STRING
? string1
......@@ -3658,7 +3669,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* If just failed to match something this time around with a
group that's operated on by a repetition operator, try to
force exit from the ``loop,'' and restore the register
force exit from the ``loop'', and restore the register
information for this group that we had before trying this
last match. */
if ((!MATCHED_SOMETHING (reg_info[*p])
......@@ -3802,7 +3813,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case begline:
DEBUG_PRINT1 ("EXECUTING begline.\n");
if (AT_STRINGS_BEG ())
if (AT_STRINGS_BEG (d))
{
if (!bufp->not_bol) break;
}
......@@ -3818,7 +3829,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case endline:
DEBUG_PRINT1 ("EXECUTING endline.\n");
if (AT_STRINGS_END ())
if (AT_STRINGS_END (d))
{
if (!bufp->not_eol) break;
}
......@@ -3835,7 +3846,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Match at the very beginning of the data. */
case begbuf:
DEBUG_PRINT1 ("EXECUTING begbuf.\n");
if (AT_STRINGS_BEG ())
if (AT_STRINGS_BEG (d))
break;
goto fail;
......@@ -3843,7 +3854,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Match at the very end of the data. */
case endbuf:
DEBUG_PRINT1 ("EXECUTING endbuf.\n");
if (AT_STRINGS_END ())
if (AT_STRINGS_END (d))
break;
goto fail;
......@@ -3897,7 +3908,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
the original * applied to a group), save the information
for that group and all inner ones, so that if we fail back
to this point, the group's information will be correct.
For example, in \(a*\)*\1, we only need the preceding group,
For example, in \(a*\)*\1, we need the preceding group,
and in \(\(a*\)b*\)\2, we need the inner group. */
/* We can't use `p' to check ahead because we push
......@@ -3927,8 +3938,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
break;
/* A smart repeat ends with a maybe_pop_jump.
We change it either to a pop_failure_jump or a jump. */
/* A smart repeat ends with `maybe_pop_jump'.
We change it to either `pop_failure_jump' or `jump'. */
case maybe_pop_jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
......@@ -3956,10 +3967,21 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* If we're at the end of the pattern, we can change. */
if (p2 == pend)
{
{ /* But if we're also at the end of the string, we might
as well skip changing anything. For example, in `a+'
against `a', we'll have already matched the `a', and
I don't see the the point of changing the opcode,
popping the failure point, finding out it fails, and
then going into our endgame. */
if (d == dend)
{
p = pend;
DEBUG_PRINT1 (" End of pattern & string => done.\n");
continue;
}
p[-3] = (unsigned char) pop_failure_jump;
DEBUG_PRINT1
(" End of pattern: change to `pop_failure_jump'.\n");
DEBUG_PRINT1 (" End of pattern => pop_failure_jump.\n");
}
else if ((re_opcode_t) *p2 == exactn
......@@ -3973,7 +3995,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
to the `maybe_finalize_jump' of this case. Examine what
follows. */
if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
p[-3] = (unsigned char) pop_failure_jump;
{
p[-3] = (unsigned char) pop_failure_jump;
DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
c, p1[5]);
}
else if ((re_opcode_t) p1[3] == charset
|| (re_opcode_t) p1[3] == charset_not)
{
......@@ -3988,9 +4015,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
if (!not)
{
p[-3] = (unsigned char) pop_failure_jump;
DEBUG_PRINT1
(" No match: change to `pop_failure_jump'.\n");
DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
}
}
}
......@@ -3999,6 +4024,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
if ((re_opcode_t) p[-1] != pop_failure_jump)
{
p[-1] = (unsigned char) jump;
DEBUG_PRINT1 (" Match => jump.\n");
goto unconditional_jump;
}
/* Note fall through. */
......@@ -4060,7 +4086,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* At the end of an alternative, we need to push a dummy failure
point in case we are followed by a pop_failure_jump', because
point in case we are followed by a `pop_failure_jump', because
we don't want the failure point for the alternative to be
popped. For example, matching `(a|ab)*' against `aab'
requires that we match the `ab' alternative. */
......@@ -4137,14 +4163,14 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case wordbeg:
DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
if (LETTER_P (d) && (AT_STRINGS_BEG () || !LETTER_P (d - 1)))
if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
break;
goto fail;
case wordend:
DEBUG_PRINT1 ("EXECUTING wordend.\n");
if (!AT_STRINGS_BEG () && LETTER_P (d - 1)
&& (!LETTER_P (d) || AT_STRINGS_END ()))
if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
&& (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
break;
goto fail;
......@@ -4181,11 +4207,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
goto matchsyntax;
case wordchar:
DEBUG_PRINT1 ("EXECUTING wordchar.\n");
DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
mcnt = (int) Sword;
matchsyntax:
PREFETCH ();
if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
goto fail;
SET_REGS_MATCHED ();
break;
......@@ -4195,11 +4222,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
goto matchnotsyntax;
case notwordchar:
DEBUG_PRINT1 ("EXECUTING notwordchar.\n");
DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
mcnt = (int) Sword;
matchnotsyntax: /* We goto here from notsyntaxspec. */
matchnotsyntax:
PREFETCH ();
if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
goto fail;
SET_REGS_MATCHED ();
break;
......@@ -4207,17 +4235,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case wordchar:
DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
PREFETCH ();
if (!LETTER_P (d))
if (!WORDCHAR_P (d))
goto fail;
SET_REGS_MATCHED ();
d++;
break;
case notwordchar:
DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
PREFETCH ();
if (LETTER_P (d))
if (WORDCHAR_P (d))
goto fail;
SET_REGS_MATCHED ();
d++;
break;
#endif /* not emacs */
......@@ -4812,7 +4842,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
/* Returns a message corresponding to an error code, ERRCODE, returned
from either regcomp or regexec. */
from either regcomp or regexec. We don't use PREG here. */
size_t
regerror (errcode, preg, errbuf, errbuf_size)
......
......@@ -20,12 +20,15 @@
#ifndef __REGEXP_LIBRARY_H__
#define __REGEXP_LIBRARY_H__
/* POSIX says that <sys/types.h> must be included (by the caller) before
<regex.h>. */
#ifdef VMS
/* POSIX says that size_t should be in stddef.h. */
/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
should be there. */
#include <stddef.h>
#endif
/* POSIX says that <sys/types.h> must be included before <regex.h>. */
/* The following bits are used to determine the regexp syntax we
recognize. The set/not-set meanings are chosen so that Emacs syntax
......@@ -162,6 +165,9 @@ extern reg_syntax_t re_syntax_options;
#define RE_SYNTAX_POSIX_EGREP \
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
/* Syntax bits common to both basic and extended POSIX regex syntax. */
......@@ -316,12 +322,12 @@ struct re_pattern_buffer
#define REGS_FIXED 2
unsigned regs_allocated : 2;
/* Set to zero when regex_compile compiles a pattern; set to one
by re_compile_fastmap when it updates the fastmap, if any. */
/* Set to zero when `regex_compile' compiles a pattern; set to one
by `re_compile_fastmap' if it updates the fastmap. */
unsigned fastmap_accurate : 1;
/* If set, regexec reports only success or failure and does not
return anything in pmatch. */
/* If set, `re_match_2' does not return information about
subexpressions. */
unsigned no_sub : 1;
/* If set, a beginning-of-line anchor doesn't match at the
......@@ -383,17 +389,17 @@ typedef struct
unfortunately clutters up the declarations a bit, but I think it's
worth it.
We also have to undo `const' if we are not ANSI and if it hasn't
previously being taken care of. */
We may also have to undo `const' if we are not ANSI -- but if it has
already been defined, as by Autoconf's AC_CONST, don't do anything. */
#if __STDC__
#define _RE_ARGS(args) args
#else
#else /* not __STDC__ */
#define _RE_ARGS(args) ()
#ifndef const
#if !const && !HAVE_CONST
#define const
#endif
#endif
#endif /* not __STDC__ */
/* Sets the current default syntax to SYNTAX, and return the old syntax.
You can also simply assign to the `re_syntax_options' variable. */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment