lread.c 151 KB
Newer Older
Jim Blandy's avatar
Jim Blandy committed
1
/* Lisp parsing and input streams.
2

Paul Eggert's avatar
Paul Eggert committed
3
Copyright (C) 1985-1989, 1993-1995, 1997-2019 Free Software Foundation,
4
Inc.
Jim Blandy's avatar
Jim Blandy committed
5 6 7

This file is part of GNU Emacs.

8
GNU Emacs is free software: you can redistribute it and/or modify
Jim Blandy's avatar
Jim Blandy committed
9
it under the terms of the GNU General Public License as published by
10 11
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
Jim Blandy's avatar
Jim Blandy committed
12 13 14 15 16 17 18

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
19
along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
Jim Blandy's avatar
Jim Blandy committed
20

21 22
/* Tell globals.h to define tables needed by init_obarray.  */
#define DEFINE_SYMBOLS
Jim Blandy's avatar
Jim Blandy committed
23

Richard M. Stallman's avatar
Richard M. Stallman committed
24
#include <config.h>
25
#include "sysstdio.h"
Paul Eggert's avatar
Paul Eggert committed
26
#include <stdlib.h>
Jim Blandy's avatar
Jim Blandy committed
27 28 29
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/file.h>
30
#include <errno.h>
31
#include <math.h>
32
#include <stat-time.h>
Jim Blandy's avatar
Jim Blandy committed
33
#include "lisp.h"
34
#include "dispextern.h"
35
#include "intervals.h"
Kenichi Handa's avatar
Kenichi Handa committed
36
#include "character.h"
37
#include "buffer.h"
Karl Heuer's avatar
Karl Heuer committed
38
#include "charset.h"
39
#include <epaths.h>
Jim Blandy's avatar
Jim Blandy committed
40
#include "commands.h"
41
#include "keyboard.h"
42
#include "systime.h"
43
#include "termhooks.h"
YAMAMOTO Mitsuharu's avatar
YAMAMOTO Mitsuharu committed
44
#include "blockinput.h"
45
#include <c-ctype.h>
Jim Blandy's avatar
Jim Blandy committed
46

47 48
#ifdef MSDOS
#include "msdos.h"
Eli Zaretskii's avatar
Eli Zaretskii committed
49 50 51 52
#if __DJGPP__ == 2 && __DJGPP_MINOR__ < 5
# define INFINITY  __builtin_inf()
# define NAN       __builtin_nan("")
#endif
53 54
#endif

55 56 57 58
#ifdef HAVE_NS
#include "nsterm.h"
#endif

Andreas Schwab's avatar
Andreas Schwab committed
59
#include <unistd.h>
Jim Blandy's avatar
Jim Blandy committed
60

Richard M. Stallman's avatar
Richard M. Stallman committed
61 62 63 64
#ifdef HAVE_SETLOCALE
#include <locale.h>
#endif /* HAVE_SETLOCALE */

Dave Love's avatar
Dave Love committed
65
#include <fcntl.h>
Richard M. Stallman's avatar
Richard M. Stallman committed
66

67
#ifdef HAVE_FSEEKO
68 69 70 71 72 73 74
#define file_offset off_t
#define file_tell ftello
#else
#define file_offset long
#define file_tell ftell
#endif

75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
/* The objects or placeholders read with the #n=object form.

   A hash table maps a number to either a placeholder (while the
   object is still being parsed, in case it's referenced within its
   own definition) or to the completed object.  With small integers
   for keys, it's effectively little more than a vector, but it'll
   manage any needed resizing for us.

   The variable must be reset to an empty hash table before all
   top-level calls to read0.  In between calls, it may be an empty
   hash table left unused from the previous call (to reduce
   allocations), or nil.  */
static Lisp_Object read_objects_map;

/* The recursive objects read with the #n=object form.

   Objects that might have circular references are stored here, so
   that recursive substitution knows not to keep processing them
   multiple times.

   Only objects that are completely processed, including substituting
   references to themselves (but not necessarily replacing
   placeholders for other objects still being read), are stored.

   A hash table is used for efficient lookups of keys.  We don't care
   what the value slots hold.  The variable must be set to an empty
   hash table before all top-level calls to read0.  In between calls,
   it may be an empty hash table left unused from the previous call
   (to reduce allocations), or nil.  */
static Lisp_Object read_objects_completed;
105

106 107 108 109 110 111 112 113 114 115 116 117 118 119
/* File and lookahead for get-file-char and get-emacs-mule-file-char
   to read from.  Used by Fload.  */
static struct infile
{
  /* The input stream.  */
  FILE *stream;

  /* Lookahead byte count.  */
  signed char lookahead;

  /* Lookahead bytes, in reverse order.  Keep these here because it is
     not portable to ungetc more than one byte at a time.  */
  unsigned char buf[MAX_MULTIBYTE_LENGTH - 1];
} *infile;
Jim Blandy's avatar
Jim Blandy committed
120

121
/* For use within read-from-string (this reader is non-reentrant!!)  */
122 123 124
static ptrdiff_t read_from_string_index;
static ptrdiff_t read_from_string_index_byte;
static ptrdiff_t read_from_string_limit;
125

126
/* Number of characters read in the current call to Fread or
127
   Fread_from_string.  */
128
static EMACS_INT readchar_count;
129

130
/* This contains the last string skipped with #@.  */
131 132
static char *saved_doc_string;
/* Length of buffer allocated in saved_doc_string.  */
133
static ptrdiff_t saved_doc_string_size;
134
/* Length of actual data in saved_doc_string.  */
135
static ptrdiff_t saved_doc_string_length;
136
/* This is the file position that string came from.  */
137
static file_offset saved_doc_string_position;
138

139 140 141 142 143
/* This contains the previous string skipped with #@.
   We copy it from saved_doc_string when a new string
   is put in saved_doc_string.  */
static char *prev_saved_doc_string;
/* Length of buffer allocated in prev_saved_doc_string.  */
144
static ptrdiff_t prev_saved_doc_string_size;
145
/* Length of actual data in prev_saved_doc_string.  */
146
static ptrdiff_t prev_saved_doc_string_length;
147
/* This is the file position that string came from.  */
148
static file_offset prev_saved_doc_string_position;
149

150
/* True means inside a new-style backquote
151
   with no surrounding parentheses.
152
   Fread initializes this to false, so we need not specbind it
153
   or worry about what happens to it when there is an error.  */
154
static bool new_backquote_flag;
155 156 157 158

/* A list of file names for files being loaded in Fload.  Used to
   check for recursive loads.  */

Lute Kamstra's avatar
Lute Kamstra committed
159
static Lisp_Object Vloads_in_progress;
160

161 162
static int read_emacs_mule_char (int, int (*) (int, Lisp_Object),
                                 Lisp_Object);
Kenichi Handa's avatar
Kenichi Handa committed
163

164
static void readevalloop (Lisp_Object, struct infile *, Lisp_Object, bool,
165 166
                          Lisp_Object, Lisp_Object,
                          Lisp_Object, Lisp_Object);
Jim Blandy's avatar
Jim Blandy committed
167

Kenichi Handa's avatar
Kenichi Handa committed
168 169 170 171 172 173
/* Functions that read one byte from the current source READCHARFUN
   or unreads one byte.  If the integer argument C is -1, it returns
   one read byte, or -1 when there's no more byte in the source.  If C
   is 0 or positive, it unreads C, and the return value is not
   interesting.  */

174 175 176
static int readbyte_for_lambda (int, Lisp_Object);
static int readbyte_from_file (int, Lisp_Object);
static int readbyte_from_string (int, Lisp_Object);
Kenichi Handa's avatar
Kenichi Handa committed
177

Jim Blandy's avatar
Jim Blandy committed
178 179
/* Handle unreading and rereading of characters.
   Write READCHAR to read a character,
Karl Heuer's avatar
Karl Heuer committed
180 181
   UNREAD(c) to unread c to be read again.

Kenichi Handa's avatar
Kenichi Handa committed
182
   These macros correctly read/unread multibyte characters.  */
Jim Blandy's avatar
Jim Blandy committed
183

184
#define READCHAR readchar (readcharfun, NULL)
Jim Blandy's avatar
Jim Blandy committed
185 186
#define UNREAD(c) unreadchar (readcharfun, c)

187 188 189
/* Same as READCHAR but set *MULTIBYTE to the multibyteness of the source.  */
#define READCHAR_REPORT_MULTIBYTE(multibyte) readchar (readcharfun, multibyte)

Kenichi Handa's avatar
Kenichi Handa committed
190
/* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char,
Kenichi Handa's avatar
Kenichi Handa committed
191 192
   Qlambda, or a cons, we use this to keep an unread character because
   a file stream can't handle multibyte-char unreading.  The value -1
193
   means that there's no unread character.  */
Kenichi Handa's avatar
Kenichi Handa committed
194 195
static int unread_char;

Jim Blandy's avatar
Jim Blandy committed
196
static int
197
readchar (Lisp_Object readcharfun, bool *multibyte)
Jim Blandy's avatar
Jim Blandy committed
198 199
{
  Lisp_Object tem;
200
  register int c;
201
  int (*readbyte) (int, Lisp_Object);
Kenichi Handa's avatar
Kenichi Handa committed
202 203
  unsigned char buf[MAX_MULTIBYTE_LENGTH];
  int i, len;
204
  bool emacs_mule_encoding = 0;
Jim Blandy's avatar
Jim Blandy committed
205

206 207 208
  if (multibyte)
    *multibyte = 0;

209
  readchar_count++;
210

211
  if (BUFFERP (readcharfun))
Jim Blandy's avatar
Jim Blandy committed
212
    {
213
      register struct buffer *inbuffer = XBUFFER (readcharfun);
Jim Blandy's avatar
Jim Blandy committed
214

215
      ptrdiff_t pt_byte = BUF_PT_BYTE (inbuffer);
216

217 218 219
      if (! BUFFER_LIVE_P (inbuffer))
	return -1;

220 221
      if (pt_byte >= BUF_ZV_BYTE (inbuffer))
	return -1;
Jim Blandy's avatar
Jim Blandy committed
222

Tom Tromey's avatar
Tom Tromey committed
223
      if (! NILP (BVAR (inbuffer, enable_multibyte_characters)))
224
	{
225
	  /* Fetch the character code from the buffer.  */
226 227
	  unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte);
	  BUF_INC_POS (inbuffer, pt_byte);
228
	  c = STRING_CHAR (p);
229 230
	  if (multibyte)
	    *multibyte = 1;
231 232 233 234
	}
      else
	{
	  c = BUF_FETCH_BYTE (inbuffer, pt_byte);
235
	  if (! ASCII_CHAR_P (c))
Kenichi Handa's avatar
Kenichi Handa committed
236
	    c = BYTE8_TO_CHAR (c);
237
	  pt_byte++;
238
	}
239
      SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
240

241
      return c;
Jim Blandy's avatar
Jim Blandy committed
242
    }
243
  if (MARKERP (readcharfun))
Jim Blandy's avatar
Jim Blandy committed
244
    {
245
      register struct buffer *inbuffer = XMARKER (readcharfun)->buffer;
Jim Blandy's avatar
Jim Blandy committed
246

247
      ptrdiff_t bytepos = marker_byte_position (readcharfun);
248

249 250
      if (bytepos >= BUF_ZV_BYTE (inbuffer))
	return -1;
251

Tom Tromey's avatar
Tom Tromey committed
252
      if (! NILP (BVAR (inbuffer, enable_multibyte_characters)))
253
	{
254
	  /* Fetch the character code from the buffer.  */
255 256
	  unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos);
	  BUF_INC_POS (inbuffer, bytepos);
257
	  c = STRING_CHAR (p);
258 259
	  if (multibyte)
	    *multibyte = 1;
260 261 262 263
	}
      else
	{
	  c = BUF_FETCH_BYTE (inbuffer, bytepos);
264
	  if (! ASCII_CHAR_P (c))
Kenichi Handa's avatar
Kenichi Handa committed
265
	    c = BYTE8_TO_CHAR (c);
266
	  bytepos++;
267 268
	}

269 270 271 272
      XMARKER (readcharfun)->bytepos = bytepos;
      XMARKER (readcharfun)->charpos++;

      return c;
Jim Blandy's avatar
Jim Blandy committed
273
    }
274 275

  if (EQ (readcharfun, Qlambda))
Kenichi Handa's avatar
Kenichi Handa committed
276 277 278 279
    {
      readbyte = readbyte_for_lambda;
      goto read_multibyte;
    }
280

Jim Blandy's avatar
Jim Blandy committed
281
  if (EQ (readcharfun, Qget_file_char))
282
    {
Kenichi Handa's avatar
Kenichi Handa committed
283 284
      readbyte = readbyte_from_file;
      goto read_multibyte;
285
    }
Jim Blandy's avatar
Jim Blandy committed
286

287
  if (STRINGP (readcharfun))
Jim Blandy's avatar
Jim Blandy committed
288
    {
289
      if (read_from_string_index >= read_from_string_limit)
Jim Blandy's avatar
Jim Blandy committed
290
	c = -1;
291 292 293 294 295 296 297 298
      else if (STRING_MULTIBYTE (readcharfun))
	{
	  if (multibyte)
	    *multibyte = 1;
	  FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, readcharfun,
					      read_from_string_index,
					      read_from_string_index_byte);
	}
299
      else
300 301 302 303 304
	{
	  c = SREF (readcharfun, read_from_string_index_byte);
	  read_from_string_index++;
	  read_from_string_index_byte++;
	}
Jim Blandy's avatar
Jim Blandy committed
305 306 307
      return c;
    }

308
  if (CONSP (readcharfun) && STRINGP (XCAR (readcharfun)))
Kenichi Handa's avatar
Kenichi Handa committed
309 310 311 312 313 314 315 316 317 318 319
    {
      /* This is the case that read_vector is reading from a unibyte
	 string that contains a byte sequence previously skipped
	 because of #@NUMBER.  The car part of readcharfun is that
	 string, and the cdr part is a value of readcharfun given to
	 read_vector.  */
      readbyte = readbyte_from_string;
      if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char))
	emacs_mule_encoding = 1;
      goto read_multibyte;
    }
Kenichi Handa's avatar
Kenichi Handa committed
320

Kenichi Handa's avatar
Kenichi Handa committed
321 322 323 324 325 326
  if (EQ (readcharfun, Qget_emacs_mule_file_char))
    {
      readbyte = readbyte_from_file;
      emacs_mule_encoding = 1;
      goto read_multibyte;
    }
Jim Blandy's avatar
Jim Blandy committed
327 328 329

  tem = call0 (readcharfun);

Jim Blandy's avatar
Jim Blandy committed
330
  if (NILP (tem))
Jim Blandy's avatar
Jim Blandy committed
331 332
    return -1;
  return XINT (tem);
Kenichi Handa's avatar
Kenichi Handa committed
333 334 335 336 337 338 339 340 341

 read_multibyte:
  if (unread_char >= 0)
    {
      c = unread_char;
      unread_char = -1;
      return c;
    }
  c = (*readbyte) (-1, readcharfun);
342
  if (c < 0)
343 344 345
    return c;
  if (multibyte)
    *multibyte = 1;
346
  if (ASCII_CHAR_P (c))
Kenichi Handa's avatar
Kenichi Handa committed
347 348 349 350 351 352 353 354
    return c;
  if (emacs_mule_encoding)
    return read_emacs_mule_char (c, readbyte, readcharfun);
  i = 0;
  buf[i++] = c;
  len = BYTES_BY_CHAR_HEAD (c);
  while (i < len)
    {
355
      buf[i++] = c = (*readbyte) (-1, readcharfun);
Kenichi Handa's avatar
Kenichi Handa committed
356 357
      if (c < 0 || ! TRAILING_CODE_P (c))
	{
358
	  for (i -= c < 0; 0 < --i; )
Kenichi Handa's avatar
Kenichi Handa committed
359 360 361 362
	    (*readbyte) (buf[i], readcharfun);
	  return BYTE8_TO_CHAR (buf[0]);
	}
    }
363
  return STRING_CHAR (buf);
Jim Blandy's avatar
Jim Blandy committed
364 365
}

366 367 368 369
#define FROM_FILE_P(readcharfun)			\
  (EQ (readcharfun, Qget_file_char)			\
   || EQ (readcharfun, Qget_emacs_mule_file_char))

370 371 372
static void
skip_dyn_bytes (Lisp_Object readcharfun, ptrdiff_t n)
{
373
  if (FROM_FILE_P (readcharfun))
374 375
    {
      block_input ();		/* FIXME: Not sure if it's needed.  */
376
      fseek (infile->stream, n - infile->lookahead, SEEK_CUR);
377
      unblock_input ();
378
      infile->lookahead = 0;
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
    }
  else
    { /* We're not reading directly from a file.  In that case, it's difficult
	 to reliably count bytes, since these are usually meant for the file's
	 encoding, whereas we're now typically in the internal encoding.
	 But luckily, skip_dyn_bytes is used to skip over a single
	 dynamic-docstring (or dynamic byte-code) which is always quoted such
	 that \037 is the final char.  */
      int c;
      do {
	c = READCHAR;
      } while (c >= 0 && c != '\037');
    }
}

394 395 396 397 398 399
static void
skip_dyn_eof (Lisp_Object readcharfun)
{
  if (FROM_FILE_P (readcharfun))
    {
      block_input ();		/* FIXME: Not sure if it's needed.  */
400
      fseek (infile->stream, 0, SEEK_END);
401
      unblock_input ();
402
      infile->lookahead = 0;
403 404 405 406 407
    }
  else
    while (READCHAR >= 0);
}

Jim Blandy's avatar
Jim Blandy committed
408 409 410 411
/* Unread the character C in the way appropriate for the stream READCHARFUN.
   If the stream is a user function, call it with the char as argument.  */

static void
412
unreadchar (Lisp_Object readcharfun, int c)
Jim Blandy's avatar
Jim Blandy committed
413
{
414
  readchar_count--;
415 416 417 418
  if (c == -1)
    /* Don't back up the pointer if we're unreading the end-of-input mark,
       since readchar didn't advance it when we read it.  */
    ;
419
  else if (BUFFERP (readcharfun))
420
    {
421
      struct buffer *b = XBUFFER (readcharfun);
422 423
      ptrdiff_t charpos = BUF_PT (b);
      ptrdiff_t bytepos = BUF_PT_BYTE (b);
424

Tom Tromey's avatar
Tom Tromey committed
425
      if (! NILP (BVAR (b, enable_multibyte_characters)))
Kenichi Handa's avatar
Kenichi Handa committed
426
	BUF_DEC_POS (b, bytepos);
427
      else
Kenichi Handa's avatar
Kenichi Handa committed
428
	bytepos--;
429

430
      SET_BUF_PT_BOTH (b, charpos - 1, bytepos);
431
    }
432
  else if (MARKERP (readcharfun))
433
    {
434
      struct buffer *b = XMARKER (readcharfun)->buffer;
435
      ptrdiff_t bytepos = XMARKER (readcharfun)->bytepos;
436

Kenichi Handa's avatar
Kenichi Handa committed
437
      XMARKER (readcharfun)->charpos--;
Tom Tromey's avatar
Tom Tromey committed
438
      if (! NILP (BVAR (b, enable_multibyte_characters)))
Kenichi Handa's avatar
Kenichi Handa committed
439
	BUF_DEC_POS (b, bytepos);
440
      else
Kenichi Handa's avatar
Kenichi Handa committed
441
	bytepos--;
442

Kenichi Handa's avatar
Kenichi Handa committed
443
      XMARKER (readcharfun)->bytepos = bytepos;
444
    }
445
  else if (STRINGP (readcharfun))
446 447 448 449 450
    {
      read_from_string_index--;
      read_from_string_index_byte
	= string_char_to_byte (readcharfun, read_from_string_index);
    }
451
  else if (CONSP (readcharfun) && STRINGP (XCAR (readcharfun)))
Kenichi Handa's avatar
Kenichi Handa committed
452 453 454
    {
      unread_char = c;
    }
455
  else if (EQ (readcharfun, Qlambda))
Kenichi Handa's avatar
Kenichi Handa committed
456 457 458
    {
      unread_char = c;
    }
459
  else if (FROM_FILE_P (readcharfun))
Kenichi Handa's avatar
Kenichi Handa committed
460
    {
461
      unread_char = c;
Kenichi Handa's avatar
Kenichi Handa committed
462
    }
Jim Blandy's avatar
Jim Blandy committed
463 464 465 466
  else
    call1 (readcharfun, make_number (c));
}

Kenichi Handa's avatar
Kenichi Handa committed
467
static int
468
readbyte_for_lambda (int c, Lisp_Object readcharfun)
Kenichi Handa's avatar
Kenichi Handa committed
469 470 471 472 473 474
{
  return read_bytecode_char (c >= 0);
}


static int
475
readbyte_from_stdio (void)
Kenichi Handa's avatar
Kenichi Handa committed
476
{
477 478 479 480 481
  if (infile->lookahead)
    return infile->buf[--infile->lookahead];

  int c;
  FILE *instream = infile->stream;
Kenichi Handa's avatar
Kenichi Handa committed
482

483
  block_input ();
Miles Bader's avatar
Miles Bader committed
484

485
  /* Interrupted reads have been observed while reading over the network.  */
486 487
  while ((c = getc_unlocked (instream)) == EOF && errno == EINTR
	 && ferror_unlocked (instream))
Miles Bader's avatar
Miles Bader committed
488
    {
489
      unblock_input ();
Paul Eggert's avatar
Paul Eggert committed
490
      maybe_quit ();
491
      block_input ();
492
      clearerr_unlocked (instream);
YAMAMOTO Mitsuharu's avatar
YAMAMOTO Mitsuharu committed
493
    }
Miles Bader's avatar
Miles Bader committed
494

495
  unblock_input ();
Miles Bader's avatar
Miles Bader committed
496

Kenichi Handa's avatar
Kenichi Handa committed
497 498 499
  return (c == EOF ? -1 : c);
}

500 501 502 503 504 505 506 507 508 509 510 511 512
static int
readbyte_from_file (int c, Lisp_Object readcharfun)
{
  if (c >= 0)
    {
      eassert (infile->lookahead < sizeof infile->buf);
      infile->buf[infile->lookahead++] = c;
      return 0;
    }

  return readbyte_from_stdio ();
}

Kenichi Handa's avatar
Kenichi Handa committed
513
static int
514
readbyte_from_string (int c, Lisp_Object readcharfun)
Kenichi Handa's avatar
Kenichi Handa committed
515 516 517 518 519 520 521 522 523
{
  Lisp_Object string = XCAR (readcharfun);

  if (c >= 0)
    {
      read_from_string_index--;
      read_from_string_index_byte
	= string_char_to_byte (string, read_from_string_index);
    }
Kenichi Handa's avatar
Kenichi Handa committed
524

Kenichi Handa's avatar
Kenichi Handa committed
525 526 527 528 529 530 531 532 533 534
  if (read_from_string_index >= read_from_string_limit)
    c = -1;
  else
    FETCH_STRING_CHAR_ADVANCE (c, string,
			       read_from_string_index,
			       read_from_string_index_byte);
  return c;
}


535
/* Read one non-ASCII character from INFILE.  The character is
Kenichi Handa's avatar
Kenichi Handa committed
536 537 538 539
   encoded in `emacs-mule' and the first byte is already read in
   C.  */

static int
540
read_emacs_mule_char (int c, int (*readbyte) (int, Lisp_Object), Lisp_Object readcharfun)
Kenichi Handa's avatar
Kenichi Handa committed
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
{
  /* Emacs-mule coding uses at most 4-byte for one character.  */
  unsigned char buf[4];
  int len = emacs_mule_bytes[c];
  struct charset *charset;
  int i;
  unsigned code;

  if (len == 1)
    /* C is not a valid leading-code of `emacs-mule'.  */
    return BYTE8_TO_CHAR (c);

  i = 0;
  buf[i++] = c;
  while (i < len)
    {
557
      buf[i++] = c = (*readbyte) (-1, readcharfun);
Kenichi Handa's avatar
Kenichi Handa committed
558 559
      if (c < 0xA0)
	{
560
	  for (i -= c < 0; 0 < --i; )
Kenichi Handa's avatar
Kenichi Handa committed
561 562 563 564 565 566 567
	    (*readbyte) (buf[i], readcharfun);
	  return BYTE8_TO_CHAR (buf[0]);
	}
    }

  if (len == 2)
    {
568
      charset = CHARSET_FROM_ID (emacs_mule_charset[buf[0]]);
Kenichi Handa's avatar
Kenichi Handa committed
569 570 571 572 573 574 575
      code = buf[1] & 0x7F;
    }
  else if (len == 3)
    {
      if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11
	  || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12)
	{
576
	  charset = CHARSET_FROM_ID (emacs_mule_charset[buf[1]]);
Kenichi Handa's avatar
Kenichi Handa committed
577 578 579 580
	  code = buf[2] & 0x7F;
	}
      else
	{
581
	  charset = CHARSET_FROM_ID (emacs_mule_charset[buf[0]]);
Kenichi Handa's avatar
Kenichi Handa committed
582 583 584 585 586
	  code = ((buf[1] << 8) | buf[2]) & 0x7F7F;
	}
    }
  else
    {
587
      charset = CHARSET_FROM_ID (emacs_mule_charset[buf[1]]);
Kenichi Handa's avatar
Kenichi Handa committed
588
      code = ((buf[2] << 8) | buf[3]) & 0x7F7F;
Kenichi Handa's avatar
Kenichi Handa committed
589 590 591 592
    }
  c = DECODE_CHAR (charset, code);
  if (c < 0)
    Fsignal (Qinvalid_read_syntax,
593
	     list1 (build_string ("invalid multibyte form")));
Kenichi Handa's avatar
Kenichi Handa committed
594 595 596 597
  return c;
}


598 599 600 601 602 603 604 605 606 607 608 609 610 611
/* An in-progress substitution of OBJECT for PLACEHOLDER.  */
struct subst
{
  Lisp_Object object;
  Lisp_Object placeholder;

  /* Hash table of subobjects of OBJECT that might be circular.  If
     Qt, all such objects might be circular.  */
  Lisp_Object completed;

  /* List of subobjects of OBJECT that have already been visited.  */
  Lisp_Object seen;
};

612 613 614
static Lisp_Object read_internal_start (Lisp_Object, Lisp_Object,
                                        Lisp_Object);
static Lisp_Object read0 (Lisp_Object);
615
static Lisp_Object read1 (Lisp_Object, int *, bool);
616

617 618
static Lisp_Object read_list (bool, Lisp_Object);
static Lisp_Object read_vector (Lisp_Object, bool);
619

620 621
static Lisp_Object substitute_object_recurse (struct subst *, Lisp_Object);
static void substitute_in_interval (INTERVAL, void *);
622

Jim Blandy's avatar
Jim Blandy committed
623

624
/* Get a character from the tty.  */
Jim Blandy's avatar
Jim Blandy committed
625

626 627
/* Read input events until we get one that's acceptable for our purposes.

628
   If NO_SWITCH_FRAME, switch-frame events are stashed
629 630 631
   until we get a character we like, and then stuffed into
   unread_switch_frame.

632
   If ASCII_REQUIRED, check function key events to see
633 634 635
   if the unmodified version of the symbol has a Qascii_character
   property, and use that character, if present.

636 637 638
   If ERROR_NONASCII, signal an error if the input we
   get isn't an ASCII character with modifiers.  If it's false but
   ASCII_REQUIRED is true, just re-read until we get an ASCII
639 640
   character.

641
   If INPUT_METHOD, invoke the current input method
642 643
   if the character warrants that.

644
   If SECONDS is a number, wait that many seconds for input, and
645
   return Qnil if no input arrives within that time.  */
646

Andreas Schwab's avatar
Andreas Schwab committed
647
static Lisp_Object
648 649
read_filtered_event (bool no_switch_frame, bool ascii_required,
		     bool error_nonascii, bool input_method, Lisp_Object seconds)
650
{
651
  Lisp_Object val, delayed_switch_frame;
652
  struct timespec end_time;
653

654
#ifdef HAVE_WINDOW_SYSTEM
655 656
  if (display_hourglass_p)
    cancel_hourglass ();
657
#endif
658

659
  delayed_switch_frame = Qnil;
660

661 662 663
  /* Compute timeout.  */
  if (NUMBERP (seconds))
    {
664
      double duration = XFLOATINT (seconds);
665 666
      struct timespec wait_time = dtotimespec (duration);
      end_time = timespec_add (current_timespec (), wait_time);
667 668
    }

669
  /* Read until we get an acceptable event.  */
670
 retry:
671
  do
672
    val = read_char (0, Qnil, (input_method ? Qnil : Qt), 0,
673
		     NUMBERP (seconds) ? &end_time : NULL);
674
  while (INTEGERP (val) && XINT (val) == -2); /* wrong_kboard_jmpbuf */
675

676
  if (BUFFERP (val))
677 678
    goto retry;

679
  /* `switch-frame' events are put off until after the next ASCII
Karl Heuer's avatar
Karl Heuer committed
680
     character.  This is better than signaling an error just because
681 682 683 684 685
     the last characters were typed to a separate minibuffer frame,
     for example.  Eventually, some code which can deal with
     switch-frame events will read it and process it.  */
  if (no_switch_frame
      && EVENT_HAS_PARAMETERS (val)
686
      && EQ (EVENT_HEAD_KIND (EVENT_HEAD (val)), Qswitch_frame))
687 688 689 690 691
    {
      delayed_switch_frame = val;
      goto retry;
    }

692
  if (ascii_required && !(NUMBERP (seconds) && NILP (val)))
693 694
    {
      /* Convert certain symbols to their ASCII equivalents.  */
695
      if (SYMBOLP (val))
696
	{
697
	  Lisp_Object tem, tem1;
698 699 700 701 702 703 704
	  tem = Fget (val, Qevent_symbol_element_mask);
	  if (!NILP (tem))
	    {
	      tem1 = Fget (Fcar (tem), Qascii_character);
	      /* Merge this symbol's modifier bits
		 with the ASCII equivalent of its basic code.  */
	      if (!NILP (tem1))
705
		XSETFASTINT (val, XINT (tem1) | XINT (Fcar (Fcdr (tem))));
706 707
	    }
	}
708

709
      /* If we don't have a character now, deal with it appropriately.  */
710
      if (!INTEGERP (val))
711 712 713
	{
	  if (error_nonascii)
	    {
714
	      Vunread_command_events = list1 (val);
715 716 717 718 719 720 721 722 723 724
	      error ("Non-character input-event");
	    }
	  else
	    goto retry;
	}
    }

  if (! NILP (delayed_switch_frame))
    unread_switch_frame = delayed_switch_frame;

725 726
#if 0

727
#ifdef HAVE_WINDOW_SYSTEM
728 729
  if (display_hourglass_p)
    start_hourglass ();
730
#endif
731 732 733

#endif

734 735 736
  return val;
}

Paul Eggert's avatar
Paul Eggert committed
737
DEFUN ("read-char", Fread_char, Sread_char, 0, 3, 0,
738
       doc: /* Read a character event from the command input (keyboard or macro).
739
It is returned as a number.
740 741 742 743 744 745
If the event has modifiers, they are resolved and reflected in the
returned character code if possible (e.g. C-SPC yields 0 and C-a yields 97).
If some of the modifiers cannot be reflected in the character code, the
returned value will include those modifiers, and will not be a valid
character code: it will fail the `characterp' test.  Use `event-basic-type'
to recover the character code with the modifiers removed.
746

747 748
If the user generates an event which is not a character (i.e. a mouse
click or function key event), `read-char' signals an error.  As an
749 750
exception, switch-frame events are put off until non-character events
can be read.
751 752 753 754 755 756
If you want to read non-character events, or ignore them, call
`read-event' or `read-char-exclusive' instead.

If the optional argument PROMPT is non-nil, display that as a prompt.
If the optional argument INHERIT-INPUT-METHOD is non-nil and some
input method is turned on in the current buffer, that input method
757 758 759 760 761
is used for reading a character.
If the optional argument SECONDS is non-nil, it should be a number
specifying the maximum number of seconds to wait for input.  If no
input arrives in that time, return nil.  SECONDS may be a
floating-point value.  */)
762
  (Lisp_Object prompt, Lisp_Object inherit_input_method, Lisp_Object seconds)
Jim Blandy's avatar
Jim Blandy committed
763
{
764 765
  Lisp_Object val;

766 767
  if (! NILP (prompt))
    message_with_string ("%s", prompt, 0);
768
  val = read_filtered_event (1, 1, 1, ! NILP (inherit_input_method), seconds);
769 770 771

  return (NILP (val) ? Qnil
	  : make_number (char_resolve_modifier_mask (XINT (val))));
Jim Blandy's avatar
Jim Blandy committed
772 773
}

Paul Eggert's avatar
Paul Eggert committed
774
DEFUN ("read-event", Fread_event, Sread_event, 0, 3, 0,
775 776 777 778
       doc: /* Read an event object from the input stream.
If the optional argument PROMPT is non-nil, display that as a prompt.
If the optional argument INHERIT-INPUT-METHOD is non-nil and some
input method is turned on in the current buffer, that input method
779 780 781 782 783
is used for reading a character.
If the optional argument SECONDS is non-nil, it should be a number
specifying the maximum number of seconds to wait for input.  If no
input arrives in that time, return nil.  SECONDS may be a
floating-point value.  */)
784
  (Lisp_Object prompt, Lisp_Object inherit_input_method, Lisp_Object seconds)
Jim Blandy's avatar
Jim Blandy committed
785
{
786 787
  if (! NILP (prompt))
    message_with_string ("%s", prompt, 0);
788
  return read_filtered_event (0, 0, 0, ! NILP (inherit_input_method), seconds);
Jim Blandy's avatar
Jim Blandy committed
789 790
}

791
DEFUN ("read-char-exclusive", Fread_char_exclusive, Sread_char_exclusive, 0, 3, 0,
792
       doc: /* Read a character event from the command input (keyboard or macro).
793
It is returned as a number.  Non-character events are ignored.
794 795 796 797 798 799
If the event has modifiers, they are resolved and reflected in the
returned character code if possible (e.g. C-SPC yields 0 and C-a yields 97).
If some of the modifiers cannot be reflected in the character code, the
returned value will include those modifiers, and will not be a valid
character code: it will fail the `characterp' test.  Use `event-basic-type'
to recover the character code with the modifiers removed.
800 801 802 803

If the optional argument PROMPT is non-nil, display that as a prompt.
If the optional argument INHERIT-INPUT-METHOD is non-nil and some
input method is turned on in the current buffer, that input method
804 805 806 807 808
is used for reading a character.
If the optional argument SECONDS is non-nil, it should be a number
specifying the maximum number of seconds to wait for input.  If no
input arrives in that time, return nil.  SECONDS may be a
floating-point value.  */)
809
  (Lisp_Object prompt, Lisp_Object inherit_input_method, Lisp_Object seconds)
Jim Blandy's avatar
Jim Blandy committed
810
{
811 812
  Lisp_Object val;

813 814
  if (! NILP (prompt))
    message_with_string ("%s", prompt, 0);
815

816
  val = read_filtered_event (1, 1, 0, ! NILP (inherit_input_method), seconds);
817 818 819

  return (NILP (val) ? Qnil
	  : make_number (char_resolve_modifier_mask (XINT (val))));
Jim Blandy's avatar
Jim Blandy committed
820 821 822
}

DEFUN ("get-file-char", Fget_file_char, Sget_file_char, 0, 0, 0,
823
       doc: /* Don't use this yourself.  */)
824
  (void)
Jim Blandy's avatar
Jim Blandy committed
825
{
826 827 828
  if (!infile)
    error ("get-file-char misused");
  return make_number (readbyte_from_stdio ());
Jim Blandy's avatar
Jim Blandy committed
829
}
830 831


832 833 834 835


/* Return true if the lisp code read using READCHARFUN defines a non-nil
   `lexical-binding' file variable.  After returning, the stream is
836 837
   positioned following the first line, if it is a comment or #! line,
   otherwise nothing is read.  */
838

839
static bool
Stefan Monnier's avatar
Stefan Monnier committed
840
lisp_file_lexically_bound_p (Lisp_Object readcharfun)
841 842
{
  int ch = READCHAR;
843 844 845 846 847 848 849 850 851 852 853 854 855

  if (ch == '#')
    {
      ch = READCHAR;
      if (ch != '!')
        {
          UNREAD (ch);
          UNREAD ('#');
          return 0;
        }
      while (ch != '\n' && ch != EOF)
        ch = READCHAR;
      if (ch == '\n') ch = READCHAR;
Glenn Morris's avatar
Glenn Morris committed
856 857
      /* It is OK to leave the position after a #! line, since
         that is what read1 does.  */
858 859
    }

860 861 862 863 864 865 866 867 868
  if (ch != ';')
    /* The first line isn't a comment, just give up.  */
    {
      UNREAD (ch);
      return 0;
    }
  else
    /* Look for an appropriate file-variable in the first line.  */
    {
869
      bool rv = 0;
870
      enum {
Paul Eggert's avatar
Paul Eggert committed
871
	NOMINAL, AFTER_FIRST_DASH, AFTER_ASTERIX
872
      } beg_end_state = NOMINAL;
873
      bool in_file_vars = 0;
874

875 876 877 878 879 880 881 882 883 884
#define UPDATE_BEG_END_STATE(ch)				\
  if (beg_end_state == NOMINAL)					\
    beg_end_state = (ch == '-' ? AFTER_FIRST_DASH : NOMINAL);	\
  else if (beg_end_state == AFTER_FIRST_DASH)			\
    beg_end_state = (ch == '*' ? AFTER_ASTERIX : NOMINAL);	\
  else if (beg_end_state == AFTER_ASTERIX)			\
    {								\
      if (ch == '-')						\
	in_file_vars = !in_file_vars;				\
      beg_end_state = NOMINAL;					\
885 886 887 888 889 890 891 892 893 894 895 896
    }

      /* Skip until we get to the file vars, if any.  */
      do
	{
	  ch = READCHAR;
	  UPDATE_BEG_END_STATE (ch);
	}
      while (!in_file_vars && ch != '\n' && ch != EOF);

      while (in_file_vars)
	{
897
	  char var[100], val[100];
898
	  unsigned i;
899 900 901 902 903 904 905

	  ch = READCHAR;

	  /* Read a variable name.  */
	  while (ch == ' ' || ch == '\t')
	    ch = READCHAR;

906
	  i = 0;
907
	  beg_end_state = NOMINAL;
908
	  while (ch != ':' && ch != '\n' && ch != EOF && in_file_vars)
909
	    {
910 911
	      if (i < sizeof var - 1)
		var[i++] = ch;
912 913 914
	      UPDATE_BEG_END_STATE (ch);
	      ch = READCHAR;
	    }
915

916
	  /* Stop scanning if no colon was found before end marker.  */
917
	  if (!in_file_vars || ch == '\n' || ch == EOF)
918 919
	    break;

920 921 922
	  while (i > 0 && (var[i - 1] == ' ' || var[i - 1] == '\t'))
	    i--;
	  var[i] = '\0';
923 924 925 926 927 928 929 930 931

	  if (ch == ':')
	    {
	      /* Read a variable value.  */
	      ch = READCHAR;

	      while (ch == ' ' || ch == '\t')
		ch = READCHAR;

932
	      i = 0;
933
	      beg_end_state = NOMINAL;
934 935
	      while (ch != ';' && ch != '\n' && ch != EOF && in_file_vars)
		{
936 937
		  if (i < sizeof val - 1)
		    val[i++] = ch;
938 939 940 941
		  UPDATE_BEG_END_STATE (ch);
		  ch = READCHAR;
		}
	      if (! in_file_vars)
942
		/* The value was terminated by an end-marker, which remove.  */
943 944 945 946
		i -= 3;
	      while (i > 0 && (val[i - 1] == ' ' || val[i - 1] == '\t'))
		i--;
	      val[i] = '\0';
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962

	      if (strcmp (var, "lexical-binding") == 0)
		/* This is it...  */
		{
		  rv = (strcmp (val, "nil") != 0);
		  break;
		}
	    }
	}

      while (ch != '\n' && ch != EOF)
	ch = READCHAR;

      return rv;
    }
}
963

Kenichi Handa's avatar
Kenichi Handa committed
964
/* Value is a version number of byte compiled code if the file
Miles Bader's avatar
Miles Bader committed
965
   associated with file descriptor FD is a compiled Lisp file that's
Kenichi Handa's avatar
Kenichi Handa committed
966 967 968
   safe to load.  Only files compiled with Emacs are safe to load.
   Files compiled with XEmacs can lead to a crash in Fbyte_code
   because of an incompatible change in the byte compiler.  */
969 970

static int
971
safe_to_load_version (int fd)
972 973 974
{
  char buf[512];
  int nbytes, i;
Kenichi Handa's avatar
Kenichi Handa committed
975
  int version = 1;
976 977 978

  /* Read the first few bytes from the file, and look for a line
     specifying the byte compiler version used.  */
979
  nbytes = emacs_read_quit (fd, buf, sizeof buf);
980 981 982
  if (nbytes > 0)
    {
      /* Skip to the next newline, skipping over the initial `ELC'
Dave Love's avatar
Dave Love committed
983
	 with NUL bytes following it, but note the version.  */
984
      for (i = 0; i < nbytes && buf[i] != '\n'; ++i)
Dave Love's avatar
Dave Love committed
985
	if (i == 4)
Kenichi Handa's avatar
Kenichi Handa committed
986
	  version = buf[i];
987

988
      if (i >= nbytes
989
	  || fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
990
					      buf + i, nbytes - i) < 0)
991
	version = 0;
992 993 994
    }

  lseek (fd, 0, SEEK_SET);
995
  return version;
996 997 998
}


999 1000 1001
/* Callback for record_unwind_protect.  Restore the old load list OLD,
   after loading a file successfully.  */

1002
static void
1003
record_load_unwind (Lisp_Object old)
1004
{
1005
  Vloads_in_progress = old;
1006 1007
}

1008 1009 1010
/* This handler function is used via internal_condition_case_1.  */

static Lisp_Object
1011
load_error_handler (Lisp_Object data)
1012 1013 1014
{
  return Qnil;
}
1015

1016
static void
1017
load_warn_old_style_backquotes (Lisp_Object file)
1018
{
1019
  if (!NILP (Vlread_old_style_backquotes))
1020
    {
1021
      AUTO_STRING (format, "Loading `%s': old-style backquotes detected!");
1022
      CALLN (Fmessage, format, file);
1023
    }
1024 1025
}

1026 1027 1028 1029 1030
static void
load_warn_unescaped_character_literals (Lisp_Object file)
{
  if (NILP (Vlread_unescaped_character_literals)) return;
  CHECK_CONS (Vlread_unescaped_character_literals);
1031 1032 1033 1034
  Lisp_Object format =
    build_string ("Loading `%s': unescaped character literals %s detected!");
  Lisp_Object separator = build_string (", ");
  Lisp_Object inner_format = build_string ("`?%c'");
1035 1036
  CALLN (Fmessage,
         format, file,
1037 1038
         Fmapconcat (list3 (Qlambda, list1 (Qchar),
                            list3 (Qformat, inner_format, Qchar)),
1039 1040 1041 1042
                     Fsort (Vlread_unescaped_character_literals, Qlss),
                     separator));
}

Paul Eggert's avatar
Paul Eggert committed
1043
DEFUN ("get-load-suffixes", Fget_load_suffixes, Sget_load_suffixes, 0, 0, 0,
1044 1045 1046
       doc: /* Return the suffixes that `load' should try if a suffix is \
required.
This uses the variables `load-suffixes' and `load-file-rep-suffixes'.  */)
1047
  (void)