search.c 64.8 KB
Newer Older
Jim Blandy's avatar
Jim Blandy committed
1
/* String search routines for GNU Emacs.
Karl Heuer's avatar
Karl Heuer committed
2
   Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
Jim Blandy's avatar
Jim Blandy committed
3 4 5 6 7

This file is part of GNU Emacs.

GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Karl Heuer's avatar
Karl Heuer committed
8
the Free Software Foundation; either version 2, or (at your option)
Jim Blandy's avatar
Jim Blandy committed
9 10 11 12 13 14 15 16 17
any later version.

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING.  If not, write to
18 19
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */
Jim Blandy's avatar
Jim Blandy committed
20 21


22
#include <config.h>
Jim Blandy's avatar
Jim Blandy committed
23 24
#include "lisp.h"
#include "syntax.h"
Karl Heuer's avatar
Karl Heuer committed
25
#include "category.h"
Jim Blandy's avatar
Jim Blandy committed
26
#include "buffer.h"
Karl Heuer's avatar
Karl Heuer committed
27
#include "charset.h"
28
#include "region-cache.h"
Jim Blandy's avatar
Jim Blandy committed
29
#include "commands.h"
30
#include "blockinput.h"
31
#include "intervals.h"
Jim Blandy's avatar
Jim Blandy committed
32

Jim Blandy's avatar
Jim Blandy committed
33 34 35
#include <sys/types.h>
#include "regex.h"

36
#define REGEXP_CACHE_SIZE 20
Jim Blandy's avatar
Jim Blandy committed
37

38 39
/* If the regexp is non-nil, then the buffer contains the compiled form
   of that regexp, suitable for searching.  */
40 41
struct regexp_cache
{
42 43 44 45
  struct regexp_cache *next;
  Lisp_Object regexp;
  struct re_pattern_buffer buf;
  char fastmap[0400];
46 47
  /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  char posix;
48
};
Jim Blandy's avatar
Jim Blandy committed
49

50 51
/* The instances of that struct.  */
struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
Jim Blandy's avatar
Jim Blandy committed
52

53 54
/* The head of the linked list; points to the most recently used buffer.  */
struct regexp_cache *searchbuf_head;
Jim Blandy's avatar
Jim Blandy committed
55 56


Jim Blandy's avatar
Jim Blandy committed
57 58 59 60 61 62 63
/* Every call to re_match, etc., must pass &search_regs as the regs
   argument unless you can show it is unnecessary (i.e., if re_match
   is certainly going to be called again before region-around-match
   can be called).

   Since the registers are now dynamically allocated, we need to make
   sure not to refer to the Nth register before checking that it has
Jim Blandy's avatar
Jim Blandy committed
64 65 66
   been allocated by checking search_regs.num_regs.

   The regex code keeps track of whether it has allocated the search
67 68
   buffer using bits in the re_pattern_buffer.  This means that whenever
   you compile a new pattern, it completely forgets whether it has
Jim Blandy's avatar
Jim Blandy committed
69 70 71 72 73
   allocated any registers, and will allocate new registers the next
   time you call a searching or matching function.  Therefore, we need
   to call re_set_registers after compiling a new pattern or after
   setting the match registers, so that the regex functions will be
   able to free or re-allocate it properly.  */
Jim Blandy's avatar
Jim Blandy committed
74 75
static struct re_registers search_regs;

Jim Blandy's avatar
Jim Blandy committed
76 77 78 79
/* The buffer in which the last search was performed, or
   Qt if the last search was done in a string;
   Qnil if no searching has been done yet.  */
static Lisp_Object last_thing_searched;
Jim Blandy's avatar
Jim Blandy committed
80

Karl Heuer's avatar
Karl Heuer committed
81
/* error condition signaled when regexp compile_pattern fails */
Jim Blandy's avatar
Jim Blandy committed
82 83 84

Lisp_Object Qinvalid_regexp;

85
static void set_search_regs ();
86
static void save_search_regs ();
87

88 89
static int search_buffer ();

Jim Blandy's avatar
Jim Blandy committed
90 91 92 93 94 95 96 97 98 99 100 101
static void
matcher_overflow ()
{
  error ("Stack overflow in regexp matcher");
}

#ifdef __STDC__
#define CONST const
#else
#define CONST
#endif

102 103 104 105 106 107 108 109 110
/* Compile a regexp and signal a Lisp error if anything goes wrong.
   PATTERN is the pattern to compile.
   CP is the place to put the result.
   TRANSLATE is a translation table for ignoring case, or NULL for none.
   REGP is the structure that says where to store the "register"
   values that will result from matching this pattern.
   If it is 0, we should compile the pattern not to record any
   subexpression bounds.
   POSIX is nonzero if we want full backtracking (POSIX style)
Karl Heuer's avatar
Karl Heuer committed
111 112 113 114
   for this pattern.  0 means backtrack only enough to get a valid match.
   MULTIBYTE is nonzero if we want to handle multibyte characters in
   PATTERN.  0 means all multibyte characters are recognized just as
   sequences of binary data.  */
Jim Blandy's avatar
Jim Blandy committed
115

116
static void
Karl Heuer's avatar
Karl Heuer committed
117
compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
118
     struct regexp_cache *cp;
Jim Blandy's avatar
Jim Blandy committed
119
     Lisp_Object pattern;
120
     Lisp_Object *translate;
121
     struct re_registers *regp;
122
     int posix;
Karl Heuer's avatar
Karl Heuer committed
123
     int multibyte;
Jim Blandy's avatar
Jim Blandy committed
124
{
125
  char *val;
126
  reg_syntax_t old;
Jim Blandy's avatar
Jim Blandy committed
127

128 129
  cp->regexp = Qnil;
  cp->buf.translate = translate;
130
  cp->posix = posix;
Karl Heuer's avatar
Karl Heuer committed
131
  cp->buf.multibyte = multibyte;
132
  BLOCK_INPUT;
133 134
  old = re_set_syntax (RE_SYNTAX_EMACS
		       | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
135 136
  val = (char *) re_compile_pattern ((char *) XSTRING (pattern)->data,
				     XSTRING (pattern)->size, &cp->buf);
137
  re_set_syntax (old);
138
  UNBLOCK_INPUT;
Jim Blandy's avatar
Jim Blandy committed
139
  if (val)
140
    Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
Jim Blandy's avatar
Jim Blandy committed
141

142 143 144 145
  cp->regexp = Fcopy_sequence (pattern);
}

/* Compile a regexp if necessary, but first check to see if there's one in
146 147 148 149 150 151 152 153 154
   the cache.
   PATTERN is the pattern to compile.
   TRANSLATE is a translation table for ignoring case, or NULL for none.
   REGP is the structure that says where to store the "register"
   values that will result from matching this pattern.
   If it is 0, we should compile the pattern not to record any
   subexpression bounds.
   POSIX is nonzero if we want full backtracking (POSIX style)
   for this pattern.  0 means backtrack only enough to get a valid match.  */
155 156

struct re_pattern_buffer *
157
compile_pattern (pattern, regp, translate, posix)
158 159
     Lisp_Object pattern;
     struct re_registers *regp;
160
     Lisp_Object *translate;
161
     int posix;
162 163
{
  struct regexp_cache *cp, **cpp;
Karl Heuer's avatar
Karl Heuer committed
164 165 166
  /* Should we check it here, or add an argument `multibyte' to this
     function?  */
  int multibyte = !NILP (current_buffer->enable_multibyte_characters);
167 168 169 170

  for (cpp = &searchbuf_head; ; cpp = &cp->next)
    {
      cp = *cpp;
171 172
      if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
	  && !NILP (Fstring_equal (cp->regexp, pattern))
173
	  && cp->buf.translate == translate
Karl Heuer's avatar
Karl Heuer committed
174 175
	  && cp->posix == posix
	  && cp->buf.multibyte == multibyte)
176 177 178 179 180
	break;

      /* If we're at the end of the cache, compile into the last cell.  */
      if (cp->next == 0)
	{
Karl Heuer's avatar
Karl Heuer committed
181
	  compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
182 183 184 185 186 187 188 189 190 191
	  break;
	}
    }

  /* When we get here, cp (aka *cpp) contains the compiled pattern,
     either because we found it in the cache or because we just compiled it.
     Move it to the front of the queue to mark it as most recently used.  */
  *cpp = cp->next;
  cp->next = searchbuf_head;
  searchbuf_head = cp;
Jim Blandy's avatar
Jim Blandy committed
192

193 194 195 196 197
  /* Advise the searching functions about the space we have allocated
     for register data.  */
  if (regp)
    re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);

198
  return &cp->buf;
Jim Blandy's avatar
Jim Blandy committed
199 200 201 202 203 204 205 206 207 208 209 210 211
}

/* Error condition used for failing searches */
Lisp_Object Qsearch_failed;

Lisp_Object
signal_failure (arg)
     Lisp_Object arg;
{
  Fsignal (Qsearch_failed, Fcons (arg, Qnil));
  return Qnil;
}

212 213
static Lisp_Object
looking_at_1 (string, posix)
Jim Blandy's avatar
Jim Blandy committed
214
     Lisp_Object string;
215
     int posix;
Jim Blandy's avatar
Jim Blandy committed
216 217 218 219 220
{
  Lisp_Object val;
  unsigned char *p1, *p2;
  int s1, s2;
  register int i;
221
  struct re_pattern_buffer *bufp;
Jim Blandy's avatar
Jim Blandy committed
222

223 224 225
  if (running_asynch_code)
    save_search_regs ();

Jim Blandy's avatar
Jim Blandy committed
226
  CHECK_STRING (string, 0);
227 228
  bufp = compile_pattern (string, &search_regs,
			  (!NILP (current_buffer->case_fold_search)
229
			   ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
230
			  posix);
Jim Blandy's avatar
Jim Blandy committed
231 232 233 234 235 236 237 238

  immediate_quit = 1;
  QUIT;			/* Do a pending quit right away, to avoid paradoxical behavior */

  /* Get pointers and sizes of the two strings
     that make up the visible portion of the buffer. */

  p1 = BEGV_ADDR;
239
  s1 = GPT_BYTE - BEGV_BYTE;
Jim Blandy's avatar
Jim Blandy committed
240
  p2 = GAP_END_ADDR;
241
  s2 = ZV_BYTE - GPT_BYTE;
Jim Blandy's avatar
Jim Blandy committed
242 243 244
  if (s1 < 0)
    {
      p2 = p1;
245
      s2 = ZV_BYTE - BEGV_BYTE;
Jim Blandy's avatar
Jim Blandy committed
246 247 248 249
      s1 = 0;
    }
  if (s2 < 0)
    {
250
      s1 = ZV_BYTE - BEGV_BYTE;
Jim Blandy's avatar
Jim Blandy committed
251 252
      s2 = 0;
    }
253 254

  re_match_object = Qnil;
Jim Blandy's avatar
Jim Blandy committed
255
  
256
  i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
257 258
		  PT_BYTE - BEGV_BYTE, &search_regs,
		  ZV_BYTE - BEGV_BYTE);
Jim Blandy's avatar
Jim Blandy committed
259 260 261 262
  if (i == -2)
    matcher_overflow ();

  val = (0 <= i ? Qt : Qnil);
263 264 265 266 267 268 269 270 271
  if (i >= 0)
    for (i = 0; i < search_regs.num_regs; i++)
      if (search_regs.start[i] >= 0)
	{
	  search_regs.start[i]
	    = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
	  search_regs.end[i]
	    = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
	}
272
  XSETBUFFER (last_thing_searched, current_buffer);
Jim Blandy's avatar
Jim Blandy committed
273 274 275 276
  immediate_quit = 0;
  return val;
}

277
DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
278
  "Return t if text after point matches regular expression REGEXP.\n\
279 280 281
This function modifies the match data that `match-beginning',\n\
`match-end' and `match-data' access; save and restore the match\n\
data if you want to preserve them.")
282 283
  (regexp)
     Lisp_Object regexp;
284
{
285
  return looking_at_1 (regexp, 0);
286 287 288
}

DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
289
  "Return t if text after point matches regular expression REGEXP.\n\
290 291 292 293
Find the longest match, in accord with Posix regular expression rules.\n\
This function modifies the match data that `match-beginning',\n\
`match-end' and `match-data' access; save and restore the match\n\
data if you want to preserve them.")
294 295
  (regexp)
     Lisp_Object regexp;
296
{
297
  return looking_at_1 (regexp, 1);
298 299 300 301
}

static Lisp_Object
string_match_1 (regexp, string, start, posix)
Jim Blandy's avatar
Jim Blandy committed
302
     Lisp_Object regexp, string, start;
303
     int posix;
Jim Blandy's avatar
Jim Blandy committed
304 305 306
{
  int val;
  int s;
307
  struct re_pattern_buffer *bufp;
Jim Blandy's avatar
Jim Blandy committed
308

309 310 311
  if (running_asynch_code)
    save_search_regs ();

Jim Blandy's avatar
Jim Blandy committed
312 313 314 315 316 317 318 319 320 321 322 323
  CHECK_STRING (regexp, 0);
  CHECK_STRING (string, 1);

  if (NILP (start))
    s = 0;
  else
    {
      int len = XSTRING (string)->size;

      CHECK_NUMBER (start, 2);
      s = XINT (start);
      if (s < 0 && -s <= len)
Karl Heuer's avatar
Karl Heuer committed
324
	s = len + s;
Jim Blandy's avatar
Jim Blandy committed
325 326 327 328
      else if (0 > s || s > len)
	args_out_of_range (string, start);
    }

329 330
  bufp = compile_pattern (regexp, &search_regs,
			  (!NILP (current_buffer->case_fold_search)
331
			   ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
332
			  posix);
Jim Blandy's avatar
Jim Blandy committed
333
  immediate_quit = 1;
334 335
  re_match_object = string;
  
336
  val = re_search (bufp, (char *) XSTRING (string)->data,
Jim Blandy's avatar
Jim Blandy committed
337 338 339
		   XSTRING (string)->size, s, XSTRING (string)->size - s,
		   &search_regs);
  immediate_quit = 0;
Jim Blandy's avatar
Jim Blandy committed
340
  last_thing_searched = Qt;
Jim Blandy's avatar
Jim Blandy committed
341 342 343 344 345
  if (val == -2)
    matcher_overflow ();
  if (val < 0) return Qnil;
  return make_number (val);
}
Richard M. Stallman's avatar
Richard M. Stallman committed
346

347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
  "Return index of start of first match for REGEXP in STRING, or nil.\n\
If third arg START is non-nil, start search at that index in STRING.\n\
For index of first char beyond the match, do (match-end 0).\n\
`match-end' and `match-beginning' also give indices of substrings\n\
matched by parenthesis constructs in the pattern.")
  (regexp, string, start)
     Lisp_Object regexp, string, start;
{
  return string_match_1 (regexp, string, start, 0);
}

DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
  "Return index of start of first match for REGEXP in STRING, or nil.\n\
Find the longest match, in accord with Posix regular expression rules.\n\
If third arg START is non-nil, start search at that index in STRING.\n\
For index of first char beyond the match, do (match-end 0).\n\
`match-end' and `match-beginning' also give indices of substrings\n\
matched by parenthesis constructs in the pattern.")
  (regexp, string, start)
     Lisp_Object regexp, string, start;
{
  return string_match_1 (regexp, string, start, 1);
}

Richard M. Stallman's avatar
Richard M. Stallman committed
372 373 374 375 376 377 378 379 380
/* Match REGEXP against STRING, searching all of STRING,
   and return the index of the match, or negative on failure.
   This does not clobber the match data.  */

int
fast_string_match (regexp, string)
     Lisp_Object regexp, string;
{
  int val;
381
  struct re_pattern_buffer *bufp;
Richard M. Stallman's avatar
Richard M. Stallman committed
382

383
  bufp = compile_pattern (regexp, 0, 0, 0);
Richard M. Stallman's avatar
Richard M. Stallman committed
384
  immediate_quit = 1;
385 386
  re_match_object = string;
  
387
  val = re_search (bufp, (char *) XSTRING (string)->data,
Richard M. Stallman's avatar
Richard M. Stallman committed
388 389 390 391 392
		   XSTRING (string)->size, 0, XSTRING (string)->size,
		   0);
  immediate_quit = 0;
  return val;
}
Karl Heuer's avatar
Karl Heuer committed
393 394 395 396 397 398 399 400

/* Match REGEXP against STRING, searching all of STRING ignoring case,
   and return the index of the match, or negative on failure.
   This does not clobber the match data.  */

extern Lisp_Object Vascii_downcase_table;

int
401
fast_c_string_match_ignore_case (regexp, string)
Karl Heuer's avatar
Karl Heuer committed
402 403 404 405 406 407 408
     Lisp_Object regexp;
     char *string;
{
  int val;
  struct re_pattern_buffer *bufp;
  int len = strlen (string);

409
  re_match_object = Qt;
Karl Heuer's avatar
Karl Heuer committed
410 411 412 413 414 415 416
  bufp = compile_pattern (regexp, 0,
			  XCHAR_TABLE (Vascii_downcase_table)->contents, 0);
  immediate_quit = 1;
  val = re_search (bufp, string, len, 0, len, 0);
  immediate_quit = 0;
  return val;
}
Jim Blandy's avatar
Jim Blandy committed
417

418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
/* max and min.  */

static int
max (a, b)
     int a, b;
{
  return ((a > b) ? a : b);
}

static int
min (a, b)
     int a, b;
{
  return ((a < b) ? a : b);
}


/* The newline cache: remembering which sections of text have no newlines.  */

/* If the user has requested newline caching, make sure it's on.
   Otherwise, make sure it's off.
   This is our cheezy way of associating an action with the change of
   state of a buffer-local variable.  */
static void
newline_cache_on_off (buf)
     struct buffer *buf;
{
  if (NILP (buf->cache_long_line_scans))
    {
      /* It should be off.  */
      if (buf->newline_cache)
        {
          free_region_cache (buf->newline_cache);
          buf->newline_cache = 0;
        }
    }
  else
    {
      /* It should be on.  */
      if (buf->newline_cache == 0)
        buf->newline_cache = new_region_cache ();
    }
}


/* Search for COUNT instances of the character TARGET between START and END.

   If COUNT is positive, search forwards; END must be >= START.
   If COUNT is negative, search backwards for the -COUNTth instance;
      END must be <= START.
   If COUNT is zero, do anything you please; run rogue, for all I care.

   If END is zero, use BEGV or ZV instead, as appropriate for the
   direction indicated by COUNT.
Jim Blandy's avatar
Jim Blandy committed
472 473

   If we find COUNT instances, set *SHORTAGE to zero, and return the
Richard M. Stallman's avatar
Richard M. Stallman committed
474 475
   position after the COUNTth match.  Note that for reverse motion
   this is not the same as the usual convention for Emacs motion commands.
Jim Blandy's avatar
Jim Blandy committed
476

477 478
   If we don't find COUNT instances before reaching END, set *SHORTAGE
   to the number of TARGETs left unfound, and return END.
Jim Blandy's avatar
Jim Blandy committed
479

480 481 482
   If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
   except when inside redisplay.  */

483 484 485 486 487
scan_buffer (target, start, end, count, shortage, allow_quit)
     register int target;
     int start, end;
     int count;
     int *shortage;
488
     int allow_quit;
Jim Blandy's avatar
Jim Blandy committed
489
{
490 491
  struct region_cache *newline_cache;
  int direction; 
Jim Blandy's avatar
Jim Blandy committed
492

493 494 495 496 497 498 499 500 501 502
  if (count > 0)
    {
      direction = 1;
      if (! end) end = ZV;
    }
  else
    {
      direction = -1;
      if (! end) end = BEGV;
    }
Jim Blandy's avatar
Jim Blandy committed
503

504 505
  newline_cache_on_off (current_buffer);
  newline_cache = current_buffer->newline_cache;
Jim Blandy's avatar
Jim Blandy committed
506 507 508 509

  if (shortage != 0)
    *shortage = 0;

510
  immediate_quit = allow_quit;
Jim Blandy's avatar
Jim Blandy committed
511

Jim Blandy's avatar
Jim Blandy committed
512
  if (count > 0)
513
    while (start != end)
Jim Blandy's avatar
Jim Blandy committed
514
      {
515 516 517 518 519
        /* Our innermost scanning loop is very simple; it doesn't know
           about gaps, buffer ends, or the newline cache.  ceiling is
           the position of the last character before the next such
           obstacle --- the last character the dumb search loop should
           examine.  */
520 521
	int ceiling_byte = CHAR_TO_BYTE (end) - 1;
	int start_byte = CHAR_TO_BYTE (start);
522 523 524 525 526 527 528 529

        /* If we're looking for a newline, consult the newline cache
           to see where we can avoid some scanning.  */
        if (target == '\n' && newline_cache)
          {
            int next_change;
            immediate_quit = 0;
            while (region_cache_forward
530 531
                   (current_buffer, newline_cache, start_byte, &next_change))
              start_byte = next_change;
532
            immediate_quit = allow_quit;
533

534 535 536
            /* START should never be after END.  */
            if (start_byte > ceiling_byte)
              start_byte = ceiling_byte;
537 538 539

            /* Now the text after start is an unknown region, and
               next_change is the position of the next known region. */
540
            ceiling_byte = min (next_change - 1, ceiling_byte);
541 542 543 544 545 546
          }

        /* The dumb loop can only scan text stored in contiguous
           bytes. BUFFER_CEILING_OF returns the last character
           position that is contiguous, so the ceiling is the
           position after that.  */
547
        ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte);
548 549 550

        {
          /* The termination address of the dumb loop.  */ 
551 552 553 554
          register unsigned char *ceiling_addr
	    = BYTE_POS_ADDR (ceiling_byte) + 1;
          register unsigned char *cursor
	    = BYTE_POS_ADDR (start_byte);
555 556 557 558 559 560 561 562 563 564 565 566 567 568
          unsigned char *base = cursor;

          while (cursor < ceiling_addr)
            {
              unsigned char *scan_start = cursor;

              /* The dumb loop.  */
              while (*cursor != target && ++cursor < ceiling_addr)
                ;

              /* If we're looking for newlines, cache the fact that
                 the region from start to cursor is free of them. */
              if (target == '\n' && newline_cache)
                know_region_cache (current_buffer, newline_cache,
569 570
                                   start_byte + scan_start - base,
                                   start_byte + cursor - base);
571 572 573 574 575 576 577

              /* Did we find the target character?  */
              if (cursor < ceiling_addr)
                {
                  if (--count == 0)
                    {
                      immediate_quit = 0;
578
                      return BYTE_TO_CHAR (start_byte + cursor - base + 1);
579 580 581 582 583
                    }
                  cursor++;
                }
            }

584
          start = BYTE_TO_CHAR (start_byte + cursor - base);
585
        }
Jim Blandy's avatar
Jim Blandy committed
586 587
      }
  else
588 589 590
    while (start > end)
      {
        /* The last character to check before the next obstacle.  */
591 592
	int ceiling_byte = CHAR_TO_BYTE (end);
	int start_byte = CHAR_TO_BYTE (start);
593 594 595 596 597 598 599

        /* Consult the newline cache, if appropriate.  */
        if (target == '\n' && newline_cache)
          {
            int next_change;
            immediate_quit = 0;
            while (region_cache_backward
600 601
                   (current_buffer, newline_cache, start_byte, &next_change))
              start_byte = next_change;
602
            immediate_quit = allow_quit;
603 604

            /* Start should never be at or before end.  */
605 606
            if (start_byte <= ceiling_byte)
              start_byte = ceiling_byte + 1;
607 608 609

            /* Now the text before start is an unknown region, and
               next_change is the position of the next known region. */
610
            ceiling_byte = max (next_change, ceiling_byte);
611 612 613
          }

        /* Stop scanning before the gap.  */
614
        ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte);
615 616 617

        {
          /* The termination address of the dumb loop.  */
618 619
          register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
          register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
620 621 622 623 624 625 626 627 628 629 630 631 632
          unsigned char *base = cursor;

          while (cursor >= ceiling_addr)
            {
              unsigned char *scan_start = cursor;

              while (*cursor != target && --cursor >= ceiling_addr)
                ;

              /* If we're looking for newlines, cache the fact that
                 the region from after the cursor to start is free of them.  */
              if (target == '\n' && newline_cache)
                know_region_cache (current_buffer, newline_cache,
633 634
                                   start_byte + cursor - base,
                                   start_byte + scan_start - base);
635 636 637 638 639 640 641

              /* Did we find the target character?  */
              if (cursor >= ceiling_addr)
                {
                  if (++count >= 0)
                    {
                      immediate_quit = 0;
642
                      return BYTE_TO_CHAR (start_byte + cursor - base);
643 644 645 646 647
                    }
                  cursor--;
                }
            }

648
	  start = BYTE_TO_CHAR (start_byte + cursor - base);
649 650 651
        }
      }

Jim Blandy's avatar
Jim Blandy committed
652 653
  immediate_quit = 0;
  if (shortage != 0)
Jim Blandy's avatar
Jim Blandy committed
654
    *shortage = count * direction;
655
  return start;
Jim Blandy's avatar
Jim Blandy committed
656
}
657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672

/* Search for COUNT instances of a line boundary, which means either a
   newline or (if selective display enabled) a carriage return.
   Start at START.  If COUNT is negative, search backwards.

   We report the resulting position by calling TEMP_SET_PT_BOTH.

   If we find COUNT instances. we position after (always after,
   even if scanning backwards) the COUNTth match, and return 0.

   If we don't find COUNT instances before reaching the end of the
   buffer (or the beginning, if scanning backwards), we return
   the number of line boundaries left unfound, and position at
   the limit we bumped up against.

   If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
673
   except in special cases.  */
Jim Blandy's avatar
Jim Blandy committed
674

675
int
676 677 678 679 680
scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
     int start, start_byte;
     int limit, limit_byte;
     register int count;
     int allow_quit;
681
{
682 683 684 685 686 687 688 689
  int direction = ((count > 0) ? 1 : -1);

  register unsigned char *cursor;
  unsigned char *base;

  register int ceiling;
  register unsigned char *ceiling_addr;

690 691
  int old_immediate_quit = immediate_quit;

692 693 694 695 696 697 698 699
  /* If we are not in selective display mode,
     check only for newlines.  */
  int selective_display = (!NILP (current_buffer->selective_display)
			   && !INTEGERP (current_buffer->selective_display));

  /* The code that follows is like scan_buffer
     but checks for either newline or carriage return.  */

700 701
  if (allow_quit)
    immediate_quit++;
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721

  start_byte = CHAR_TO_BYTE (start);

  if (count > 0)
    {
      while (start_byte < limit_byte)
	{
	  ceiling =  BUFFER_CEILING_OF (start_byte);
	  ceiling = min (limit_byte - 1, ceiling);
	  ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
	  base = (cursor = BYTE_POS_ADDR (start_byte));
	  while (1)
	    {
	      while (*cursor != '\n' && ++cursor != ceiling_addr)
		;

	      if (cursor != ceiling_addr)
		{
		  if (--count == 0)
		    {
722
		      immediate_quit = old_immediate_quit;
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
		      start_byte = start_byte + cursor - base + 1;
		      start = BYTE_TO_CHAR (start_byte);
		      TEMP_SET_PT_BOTH (start, start_byte);
		      return 0;
		    }
		  else
		    if (++cursor == ceiling_addr)
		      break;
		}
	      else
		break;
	    }
	  start_byte += cursor - base;
	}
    }
  else
    {
      int start_byte = CHAR_TO_BYTE (start);
      while (start_byte > limit_byte)
	{
	  ceiling = BUFFER_FLOOR_OF (start_byte - 1);
	  ceiling = max (limit_byte, ceiling);
	  ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
	  base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
	  while (1)
	    {
	      while (--cursor != ceiling_addr && *cursor != '\n')
		;

	      if (cursor != ceiling_addr)
		{
		  if (++count == 0)
		    {
756
		      immediate_quit = old_immediate_quit;
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
		      /* Return the position AFTER the match we found.  */
		      start_byte = start_byte + cursor - base + 1;
		      start = BYTE_TO_CHAR (start_byte);
		      TEMP_SET_PT_BOTH (start, start_byte);
		      return 0;
		    }
		}
	      else
		break;
	    }
	  /* Here we add 1 to compensate for the last decrement
	     of CURSOR, which took it past the valid range.  */
	  start_byte += cursor - base + 1;
	}
    }

  TEMP_SET_PT_BOTH (limit, limit_byte);
774
  immediate_quit = old_immediate_quit;
775 776

  return count * direction;
777 778
}

Jim Blandy's avatar
Jim Blandy committed
779
int
780
find_next_newline_no_quit (from, cnt)
Jim Blandy's avatar
Jim Blandy committed
781 782
     register int from, cnt;
{
783
  return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
784 785 786 787 788
}

/* Like find_next_newline, but returns position before the newline,
   not after, and only search up to TO.  This isn't just
   find_next_newline (...)-1, because you might hit TO.  */
789

790 791
int
find_before_next_newline (from, to, cnt)
792
     int from, to, cnt;
793 794 795 796 797 798 799 800
{
  int shortage;
  int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);

  if (shortage == 0)
    pos--;
  
  return pos;
Jim Blandy's avatar
Jim Blandy committed
801 802 803 804 805
}

/* Subroutines of Lisp buffer search functions. */

static Lisp_Object
806
search_command (string, bound, noerror, count, direction, RE, posix)
Jim Blandy's avatar
Jim Blandy committed
807 808 809
     Lisp_Object string, bound, noerror, count;
     int direction;
     int RE;
810
     int posix;
Jim Blandy's avatar
Jim Blandy committed
811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
{
  register int np;
  int lim;
  int n = direction;

  if (!NILP (count))
    {
      CHECK_NUMBER (count, 3);
      n *= XINT (count);
    }

  CHECK_STRING (string, 0);
  if (NILP (bound))
    lim = n > 0 ? ZV : BEGV;
  else
    {
      CHECK_NUMBER_COERCE_MARKER (bound, 1);
      lim = XINT (bound);
829
      if (n > 0 ? lim < PT : lim > PT)
Jim Blandy's avatar
Jim Blandy committed
830 831 832 833 834 835 836
	error ("Invalid search bound (wrong side of point)");
      if (lim > ZV)
	lim = ZV;
      if (lim < BEGV)
	lim = BEGV;
    }

837
  np = search_buffer (string, PT, lim, n, RE,
Jim Blandy's avatar
Jim Blandy committed
838
		      (!NILP (current_buffer->case_fold_search)
839 840
		       ? XCHAR_TABLE (current_buffer->case_canon_table)->contents
		       : 0),
Jim Blandy's avatar
Jim Blandy committed
841
		      (!NILP (current_buffer->case_fold_search)
842 843
		       ? XCHAR_TABLE (current_buffer->case_eqv_table)->contents
		       : 0),
844
		      posix);
Jim Blandy's avatar
Jim Blandy committed
845 846 847 848 849 850 851 852
  if (np <= 0)
    {
      if (NILP (noerror))
	return signal_failure (string);
      if (!EQ (noerror, Qt))
	{
	  if (lim < BEGV || lim > ZV)
	    abort ();
853 854 855 856
	  SET_PT (lim);
	  return Qnil;
#if 0 /* This would be clean, but maybe programs depend on
	 a value of nil here.  */
857
	  np = lim;
858
#endif
Jim Blandy's avatar
Jim Blandy committed
859
	}
860 861
      else
	return Qnil;
Jim Blandy's avatar
Jim Blandy committed
862 863 864 865 866 867 868 869 870 871
    }

  if (np < BEGV || np > ZV)
    abort ();

  SET_PT (np);

  return make_number (np);
}

872 873
/* Return 1 if REGEXP it matches just one constant string.  */

Karl Heuer's avatar
Karl Heuer committed
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
static int
trivial_regexp_p (regexp)
     Lisp_Object regexp;
{
  int len = XSTRING (regexp)->size;
  unsigned char *s = XSTRING (regexp)->data;
  unsigned char c;
  while (--len >= 0)
    {
      switch (*s++)
	{
	case '.': case '*': case '+': case '?': case '[': case '^': case '$':
	  return 0;
	case '\\':
	  if (--len < 0)
	    return 0;
	  switch (*s++)
	    {
	    case '|': case '(': case ')': case '`': case '\'': case 'b':
	    case 'B': case '<': case '>': case 'w': case 'W': case 's':
894
	    case 'S': case '=':
Karl Heuer's avatar
Karl Heuer committed
895
	    case 'c': case 'C':	/* for categoryspec and notcategoryspec */
896
	    case '1': case '2': case '3': case '4': case '5':
Karl Heuer's avatar
Karl Heuer committed
897 898 899 900 901 902 903 904
	    case '6': case '7': case '8': case '9':
	      return 0;
	    }
	}
    }
  return 1;
}

905
/* Search for the n'th occurrence of STRING in the current buffer,
Jim Blandy's avatar
Jim Blandy committed
906
   starting at position POS and stopping at position LIM,
907
   treating STRING as a literal string if RE is false or as
Jim Blandy's avatar
Jim Blandy committed
908 909 910 911 912 913 914
   a regular expression if RE is true.

   If N is positive, searching is forward and LIM must be greater than POS.
   If N is negative, searching is backward and LIM must be less than POS.

   Returns -x if only N-x occurrences found (x > 0),
   or else the position at the beginning of the Nth occurrence
915 916 917 918
   (if searching backward) or the end (if searching forward).

   POSIX is nonzero if we want full backtracking (POSIX style)
   for this pattern.  0 means backtrack only enough to get a valid match.  */
Jim Blandy's avatar
Jim Blandy committed
919

920 921
static int
search_buffer (string, pos, lim, n, RE, trt, inverse_trt, posix)
Jim Blandy's avatar
Jim Blandy committed
922 923 924 925 926
     Lisp_Object string;
     int pos;
     int lim;
     int n;
     int RE;
927 928
     Lisp_Object *trt;
     Lisp_Object *inverse_trt;
929
     int posix;
Jim Blandy's avatar
Jim Blandy committed
930 931 932 933 934 935 936 937 938 939 940 941 942
{
  int len = XSTRING (string)->size;
  unsigned char *base_pat = XSTRING (string)->data;
  register int *BM_tab;
  int *BM_tab_base;
  register int direction = ((n > 0) ? 1 : -1);
  register int dirlen;
  int infinity, limit, k, stride_for_teases;
  register unsigned char *pat, *cursor, *p_limit;  
  register int i, j;
  unsigned char *p1, *p2;
  int s1, s2;

943 944 945
  if (running_asynch_code)
    save_search_regs ();

Jim Blandy's avatar
Jim Blandy committed
946
  /* Null string is found at starting position.  */
947
  if (len == 0)
948 949 950 951
    {
      set_search_regs (pos, 0);
      return pos;
    }
952 953 954

  /* Searching 0 times means don't move.  */
  if (n == 0)
Jim Blandy's avatar
Jim Blandy committed
955 956
    return pos;

Karl Heuer's avatar
Karl Heuer committed
957
  if (RE && !trivial_regexp_p (string))
Jim Blandy's avatar
Jim Blandy committed
958
    {
959 960
      struct re_pattern_buffer *bufp;

961
      bufp = compile_pattern (string, &search_regs, trt, posix);
Jim Blandy's avatar
Jim Blandy committed
962 963 964 965 966 967 968 969 970 971

      immediate_quit = 1;	/* Quit immediately if user types ^G,
				   because letting this function finish
				   can take too long. */
      QUIT;			/* Do a pending quit right away,
				   to avoid paradoxical behavior */
      /* Get pointers and sizes of the two strings
	 that make up the visible portion of the buffer. */

      p1 = BEGV_ADDR;
972
      s1 = GPT_BYTE - BEGV_BYTE;
Jim Blandy's avatar
Jim Blandy committed
973
      p2 = GAP_END_ADDR;
974
      s2 = ZV_BYTE - GPT_BYTE;
Jim Blandy's avatar
Jim Blandy committed
975 976 977
      if (s1 < 0)
	{
	  p2 = p1;
978
	  s2 = ZV_BYTE - BEGV_BYTE;
Jim Blandy's avatar
Jim Blandy committed
979 980 981 982
	  s1 = 0;
	}
      if (s2 < 0)
	{
983
	  s1 = ZV_BYTE - BEGV_BYTE;
Jim Blandy's avatar
Jim Blandy committed
984 985
	  s2 = 0;
	}
986 987
      re_match_object = Qnil;
  
Jim Blandy's avatar
Jim Blandy committed
988 989
      while (n < 0)
	{
990
	  int val;
991
	  val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
992 993 994
			     pos - BEGV, lim - pos, &search_regs,
			     /* Don't allow match past current point */
			     pos - BEGV);
Jim Blandy's avatar
Jim Blandy committed
995
	  if (val == -2)
Karl Heuer's avatar
Karl Heuer committed
996 997 998
	    {
	      matcher_overflow ();
	    }
Jim Blandy's avatar
Jim Blandy committed
999 1000
	  if (val >= 0)
	    {
Jim Blandy's avatar
Jim Blandy committed
1001
	      for (i = 0; i < search_regs.num_regs; i++)
Jim Blandy's avatar
Jim Blandy committed
1002 1003
		if (search_regs.start[i] >= 0)
		  {
1004 1005 1006 1007
		    search_regs.start[i]
		      = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
		    search_regs.end[i]
		      = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
Jim Blandy's avatar
Jim Blandy committed
1008
		  }
1009
	      XSETBUFFER (last_thing_searched, current_buffer);
Jim Blandy's avatar
Jim Blandy committed
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
	      /* Set pos to the new position. */
	      pos = search_regs.start[0];
	    }
	  else
	    {
	      immediate_quit = 0;
	      return (n);
	    }
	  n++;
	}
      while (n > 0)
	{
1022
	  int val;
1023
	  val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1024 1025
			     pos - BEGV, lim - pos, &search_regs,
			     lim - BEGV);
Jim Blandy's avatar
Jim Blandy committed
1026
	  if (val == -2)
Karl Heuer's avatar
Karl Heuer committed
1027 1028 1029
	    {
	      matcher_overflow ();
	    }
Jim Blandy's avatar
Jim Blandy committed
1030 1031
	  if (val >= 0)
	    {
Jim Blandy's avatar
Jim Blandy committed
1032
	      for (i = 0; i < search_regs.num_regs; i++)
Jim Blandy's avatar
Jim Blandy committed
1033 1034
		if (search_regs.start[i] >= 0)
		  {
1035 1036 1037 1038
		    search_regs.start[i]
		      = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
		    search_regs.end[i]
		      = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
Jim Blandy's avatar
Jim Blandy committed
1039
		  }
1040
	      XSETBUFFER (last_thing_searched, current_buffer);
Jim Blandy's avatar
Jim Blandy committed
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
	      pos = search_regs.end[0];
	    }
	  else
	    {
	      immediate_quit = 0;
	      return (0 - n);
	    }
	  n--;
	}
      immediate_quit = 0;
      return (pos);
    }
  else				/* non-RE case */
    {
1055 1056
      int pos_byte = CHAR_TO_BYTE (pos);
      int lim_byte = CHAR_TO_BYTE (lim);
Jim Blandy's avatar
Jim Blandy committed
1057 1058 1059 1060 1061 1062
#ifdef C_ALLOCA
      int BM_tab_space[0400];
      BM_tab = &BM_tab_space[0];
#else
      BM_tab = (int *) alloca (0400 * sizeof (int));
#endif
Karl Heuer's avatar
Karl Heuer committed
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
      {
	unsigned char *patbuf = (unsigned char *) alloca (len);
	pat = patbuf;
	while (--len >= 0)
	  {
	    /* If we got here and the RE flag is set, it's because we're
	       dealing with a regexp known to be trivial, so the backslash
	       just quotes the next character.  */
	    if (RE && *base_pat == '\\')
	      {
		len--;
		base_pat++;
	      }
1076
	    *pat++ = (trt ? XINT (trt[*base_pat++]) : *base_pat++);
Karl Heuer's avatar
Karl Heuer committed
1077 1078 1079 1080
	  }
	len = pat - patbuf;
	pat = base_pat = patbuf;
      }
Jim Blandy's avatar
Jim Blandy committed
1081 1082 1083 1084