Commit 938d252d authored by Daniel Colascione's avatar Daniel Colascione

Make regex matching reentrant; update syntax during match

* src/lisp.h (compile_pattern): Remove prototype of
now-internal function.

* src/regex.c (POS_AS_IN_BUFFER): Consult gl_state instead of
re_match_object: the latter can change in Lisp.
(re_match_2_internal): Switch back to UPDATE_SYNTAX_* FROM
UPDATE_SYNTAX_FAST*, allowing calls into Lisp.

* src/regex.h (re_match_object): Uncomment declaration.

* src/search.c (struct regexp_cache): Add `busy' field.
(thaw_buffer_relocation): Delete; rely on unbind.
(compile_pattern_1): Assert pattern isn't busy.
(shrink_regexp_cache): Don't shrink busy patterns.
(clear_regexp_cache): Don't nuke busy patterns.
(unfreeze_pattern, freeze_pattern): New functions.
(compile_pattern): Return a regexp_cache pointer instead of the
re_pattern_buffer, allowing callers to use `freeze_pattern' if
needed.  Do not consider busy patterns as cache hit candidates;
error if we run out of non-busy cache entries.
(looking_at_1, fast_looking_at): Snapshot
Vinhibit_changing_match_data; mark pattern busy while we're
matching it; unbind.
(string_match_1, fast_string_match_internal)
(fast_c_string_match_ignore_case): Adjust for compile_pattern
return type.
(search_buffer_re): Regex code from old search_buffer moved here;
snapshot Vinhibit_changing_match_data; mark pattern busy while
we're matching it; unbind.
(search_buffer_non_re): Non-regex code from old search_buffer
moved here.
(search_buffer): Split into search_buffer_re,
search_buffer_non_re.
(syms_of_search): Staticpro re_match_object, even though we really
shouldn't have to.

* src/syntax.h (UPDATE_SYNTAX_TABLE_FORWARD_FAST):
(UPDATE_SYNTAX_TABLE_FAST): Remove.

* src/thread.h (struct thread_state): Remove m_re_match_object,
which is global again.  (It never needs to be preserved across
thread switch.)
parent 1502b377
......@@ -4029,10 +4029,6 @@ extern void restore_search_regs (void);
extern void update_search_regs (ptrdiff_t oldstart,
ptrdiff_t oldend, ptrdiff_t newend);
extern void record_unwind_save_match_data (void);
struct re_registers;
extern struct re_pattern_buffer *compile_pattern (Lisp_Object,
struct re_registers *,
Lisp_Object, bool, bool);
extern ptrdiff_t fast_string_match_internal (Lisp_Object, Lisp_Object,
Lisp_Object);
......
......@@ -155,7 +155,8 @@
# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
result to get the right base index. */
# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
# define POS_AS_IN_BUFFER(p) \
((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
......@@ -1233,6 +1234,15 @@ static const char *re_error_msgid[] =
# undef MATCH_MAY_ALLOCATE
#endif
/* While regex matching of a single compiled pattern isn't reentrant
(because we compile regexes to bytecode programs, and the bytecode
programs are self-modifying), the regex machinery must nevertheless
be reentrant with respect to _different_ patterns, and we do that
by avoiding global variables and using MATCH_MAY_ALLOCATE. */
#if !defined MATCH_MAY_ALLOCATE && defined emacs
# error "Emacs requires MATCH_MAY_ALLOCATE"
#endif
/* Failure stack declarations and macros; both re_compile_fastmap and
re_match_2 use a failure stack. These have to be macros because of
......@@ -5895,12 +5905,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d - 1);
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE_FAST (charpos);
UPDATE_SYNTAX_TABLE (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
#ifdef emacs
UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
PREFETCH_NOLIMIT ();
GET_CHAR_AFTER (c2, d, dummy);
......@@ -5937,7 +5947,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d);
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE_FAST (charpos);
UPDATE_SYNTAX_TABLE (charpos);
#endif
PREFETCH ();
GET_CHAR_AFTER (c2, d, dummy);
......@@ -5982,7 +5992,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d) - 1;
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE_FAST (charpos);
UPDATE_SYNTAX_TABLE (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
......@@ -5997,7 +6007,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
PREFETCH_NOLIMIT ();
GET_CHAR_AFTER (c2, d, dummy);
#ifdef emacs
UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
#endif
s2 = SYNTAX (c2);
......@@ -6026,7 +6036,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d);
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE_FAST (charpos);
UPDATE_SYNTAX_TABLE (charpos);
#endif
PREFETCH ();
c2 = RE_STRING_CHAR (d, target_multibyte);
......@@ -6069,7 +6079,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d) - 1;
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE_FAST (charpos);
UPDATE_SYNTAX_TABLE (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
......@@ -6084,7 +6094,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
PREFETCH_NOLIMIT ();
c2 = RE_STRING_CHAR (d, target_multibyte);
#ifdef emacs
UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
s2 = SYNTAX (c2);
......@@ -6107,7 +6117,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
ssize_t offset = PTR_TO_OFFSET (d);
ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE_FAST (pos1);
UPDATE_SYNTAX_TABLE (pos1);
}
#endif
{
......
......@@ -181,8 +181,15 @@ typedef unsigned long reg_syntax_t;
string; if it's nil, we are matching text in the current buffer; if
it's t, we are matching text in a C string.
This is defined as a macro in thread.h, which see. */
/* extern Lisp_Object re_match_object; */
This value is effectively another parameter to re_search_2 and
re_match_2. No calls into Lisp or thread switches are allowed
before setting re_match_object and calling into the regex search
and match functions. These functions capture the current value of
re_match_object into gl_state on entry.
TODO: once we get rid of the !emacs case in this code, turn into an
actual function parameter. */
extern Lisp_Object re_match_object;
#endif
/* Roughly the maximum number of failure points on the stack. */
......
This diff is collapsed.
......@@ -186,13 +186,6 @@ UPDATE_SYNTAX_TABLE_FORWARD (ptrdiff_t charpos)
false, gl_state.object);
}
INLINE void
UPDATE_SYNTAX_TABLE_FORWARD_FAST (ptrdiff_t charpos)
{
if (parse_sexp_lookup_properties && charpos >= gl_state.e_property)
update_syntax_table (charpos + gl_state.offset, 1, false, gl_state.object);
}
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
currently good for a position after CHARPOS. */
......@@ -212,13 +205,6 @@ UPDATE_SYNTAX_TABLE (ptrdiff_t charpos)
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
}
INLINE void
UPDATE_SYNTAX_TABLE_FAST (ptrdiff_t charpos)
{
UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
}
/* Set up the buffer-global syntax table. */
INLINE void
......
......@@ -137,15 +137,6 @@ struct thread_state
struct re_registers m_saved_search_regs;
#define saved_search_regs (current_thread->m_saved_search_regs)
/* This is the string or buffer in which we
are matching. It is used for looking up syntax properties.
If the value is a Lisp string object, we are matching text in that
string; if it's nil, we are matching text in the current buffer; if
it's t, we are matching text in a C string. */
Lisp_Object m_re_match_object;
#define re_match_object (current_thread->m_re_match_object)
/* This member is different from waiting_for_input.
It is used to communicate to a lisp process-filter/sentinel (via the
function Fwaiting_for_user_input_p) whether Emacs was waiting
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment