Commit bed23cb2 authored by Richard M. Stallman's avatar Richard M. Stallman

(read_from_string_index_byte): New variable.

(read_from_string_index): Now counts characters.
(readchar, unreadchar, Fread_from_string): Changed accordingly.

(readchar): Read a multibyte char all at once
from a buffer, marker or string.
(unreadchar): Unread a multibyte char all at once.
(read1): Properly handle non-escaped multibyte chars.
They force a string to be multibyte.
When reading direct from a file, any multibyte sequence means
a multibyte string.
Insist on MAX_LENGTH_OF_MULTI_BYTE_FORM bytes when checking
for read_buffer full; this way need not check specially for multibyte.
parent d67e2df9
...@@ -131,6 +131,7 @@ static int read_pure; ...@@ -131,6 +131,7 @@ static int read_pure;
/* For use within read-from-string (this reader is non-reentrant!!) */ /* For use within read-from-string (this reader is non-reentrant!!) */
static int read_from_string_index; static int read_from_string_index;
static int read_from_string_index_byte;
static int read_from_string_limit; static int read_from_string_limit;
/* Number of bytes left to read in the buffer character /* Number of bytes left to read in the buffer character
...@@ -169,64 +170,59 @@ readchar (readcharfun) ...@@ -169,64 +170,59 @@ readchar (readcharfun)
Lisp_Object readcharfun; Lisp_Object readcharfun;
{ {
Lisp_Object tem; Lisp_Object tem;
register struct buffer *inbuffer;
register int c, mpos; register int c, mpos;
if (BUFFERP (readcharfun)) if (BUFFERP (readcharfun))
{ {
inbuffer = XBUFFER (readcharfun); register struct buffer *inbuffer = XBUFFER (readcharfun);
if (readchar_backlog == 0) int pt_byte = BUF_PT_BYTE (inbuffer);
{ int orig_pt_byte = pt_byte;
int pt_byte = BUF_PT_BYTE (inbuffer);
int orig_pt_byte = pt_byte;
if (pt_byte >= BUF_ZV_BYTE (inbuffer)) if (pt_byte >= BUF_ZV_BYTE (inbuffer))
return -1; return -1;
if (! NILP (inbuffer->enable_multibyte_characters)) if (! NILP (inbuffer->enable_multibyte_characters))
BUF_INC_POS (inbuffer, pt_byte); {
else unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte);
pt_byte++; BUF_INC_POS (inbuffer, pt_byte);
SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); c = STRING_CHAR (p, pt_byte - orig_pt_byte);
readchar_backlog = pt_byte - orig_pt_byte; }
else
{
c = BUF_FETCH_BYTE (inbuffer, pt_byte);
pt_byte++;
} }
SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
/* We get the address of the byte just passed, return c;
which is the last byte of the character.
The other bytes in this character are consecutive with it,
because the gap can't be in the middle of a character. */
return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1)
- --readchar_backlog);
} }
if (MARKERP (readcharfun)) if (MARKERP (readcharfun))
{ {
inbuffer = XMARKER (readcharfun)->buffer; register struct buffer *inbuffer = XMARKER (readcharfun)->buffer;
if (readchar_backlog == 0)
{
int bytepos = marker_byte_position (readcharfun);
int orig_bytepos = bytepos;
if (bytepos >= BUF_ZV_BYTE (inbuffer)) int bytepos = marker_byte_position (readcharfun);
return -1; int orig_bytepos = bytepos;
if (! NILP (inbuffer->enable_multibyte_characters)) if (bytepos >= BUF_ZV_BYTE (inbuffer))
INC_POS (bytepos); return -1;
else
bytepos++;
XMARKER (readcharfun)->bytepos = bytepos;
XMARKER (readcharfun)->charpos++;
readchar_backlog = bytepos - orig_bytepos; if (! NILP (inbuffer->enable_multibyte_characters))
{
unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos);
BUF_INC_POS (inbuffer, bytepos);
c = STRING_CHAR (p, bytepos - orig_bytepos);
}
else
{
c = BUF_FETCH_BYTE (inbuffer, bytepos);
bytepos++;
} }
/* We get the address of the byte just passed, XMARKER (readcharfun)->bytepos = bytepos;
which is the last byte of the character. XMARKER (readcharfun)->charpos++;
The other bytes in this character are consecutive with it,
because the gap can't be in the middle of a character. */ return c;
return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1)
- --readchar_backlog);
} }
if (EQ (readcharfun, Qget_file_char)) if (EQ (readcharfun, Qget_file_char))
{ {
...@@ -244,13 +240,14 @@ readchar (readcharfun) ...@@ -244,13 +240,14 @@ readchar (readcharfun)
if (STRINGP (readcharfun)) if (STRINGP (readcharfun))
{ {
register int c; if (read_from_string_index >= read_from_string_limit)
/* This used to be return of a conditional expression,
but that truncated -1 to a char on VMS. */
if (read_from_string_index < read_from_string_limit)
c = XSTRING (readcharfun)->data[read_from_string_index++];
else
c = -1; c = -1;
else if (STRING_MULTIBYTE (readcharfun))
FETCH_STRING_CHAR_ADVANCE (c, readcharfun,
read_from_string_index,
read_from_string_index_byte);
else
c = XSTRING (readcharfun)->data[read_from_string_index++];
return c; return c;
} }
...@@ -276,42 +273,36 @@ unreadchar (readcharfun, c) ...@@ -276,42 +273,36 @@ unreadchar (readcharfun, c)
; ;
else if (BUFFERP (readcharfun)) else if (BUFFERP (readcharfun))
{ {
if (!SINGLE_BYTE_CHAR_P (c)) struct buffer *b = XBUFFER (readcharfun);
readchar_backlog++; int bytepos = BUF_PT_BYTE (b);
else
{
struct buffer *b = XBUFFER (readcharfun);
int bytepos = BUF_PT_BYTE (b);
BUF_PT (b)--; BUF_PT (b)--;
if (! NILP (b->enable_multibyte_characters)) if (! NILP (b->enable_multibyte_characters))
BUF_DEC_POS (b, bytepos); BUF_DEC_POS (b, bytepos);
else else
bytepos--; bytepos--;
BUF_PT_BYTE (b) = bytepos; BUF_PT_BYTE (b) = bytepos;
}
} }
else if (MARKERP (readcharfun)) else if (MARKERP (readcharfun))
{ {
if (!SINGLE_BYTE_CHAR_P (c)) struct buffer *b = XMARKER (readcharfun)->buffer;
readchar_backlog++; int bytepos = XMARKER (readcharfun)->bytepos;
else
{
struct buffer *b = XMARKER (readcharfun)->buffer;
int bytepos = XMARKER (readcharfun)->bytepos;
XMARKER (readcharfun)->charpos--; XMARKER (readcharfun)->charpos--;
if (! NILP (b->enable_multibyte_characters)) if (! NILP (b->enable_multibyte_characters))
BUF_DEC_POS (b, bytepos); BUF_DEC_POS (b, bytepos);
else else
bytepos--; bytepos--;
XMARKER (readcharfun)->bytepos = bytepos; XMARKER (readcharfun)->bytepos = bytepos;
}
} }
else if (STRINGP (readcharfun)) else if (STRINGP (readcharfun))
read_from_string_index--; {
read_from_string_index--;
read_from_string_index_byte
= string_char_to_byte (readcharfun, read_from_string_index);
}
else if (EQ (readcharfun, Qget_file_char)) else if (EQ (readcharfun, Qget_file_char))
ungetc (c, instream); ungetc (c, instream);
else else
...@@ -321,7 +312,7 @@ unreadchar (readcharfun, c) ...@@ -321,7 +312,7 @@ unreadchar (readcharfun, c)
static Lisp_Object read0 (), read1 (), read_list (), read_vector (); static Lisp_Object read0 (), read1 (), read_list (), read_vector ();
static int read_multibyte (); static int read_multibyte ();
/* get a character from the tty */ /* Get a character from the tty. */
extern Lisp_Object read_char (); extern Lisp_Object read_char ();
...@@ -1180,12 +1171,11 @@ START and END optionally delimit a substring of STRING from which to read;\n\ ...@@ -1180,12 +1171,11 @@ START and END optionally delimit a substring of STRING from which to read;\n\
CHECK_STRING (string,0); CHECK_STRING (string,0);
if (NILP (end)) if (NILP (end))
endval = STRING_BYTES (XSTRING (string)); endval = XSTRING (string)->size;
else else
{ {
CHECK_NUMBER (end, 2); CHECK_NUMBER (end, 2);
endval = string_char_to_byte (string, XINT (end)); if (endval < 0 || endval > XSTRING (string)->size)
if (endval < 0 || endval > STRING_BYTES (XSTRING (string)))
args_out_of_range (string, end); args_out_of_range (string, end);
} }
...@@ -1194,21 +1184,19 @@ START and END optionally delimit a substring of STRING from which to read;\n\ ...@@ -1194,21 +1184,19 @@ START and END optionally delimit a substring of STRING from which to read;\n\
else else
{ {
CHECK_NUMBER (start, 1); CHECK_NUMBER (start, 1);
startval = string_char_to_byte (string, XINT (start));
if (startval < 0 || startval > endval) if (startval < 0 || startval > endval)
args_out_of_range (string, start); args_out_of_range (string, start);
} }
read_from_string_index = startval; read_from_string_index = startval;
read_from_string_index_byte = string_char_to_byte (string, startval);
read_from_string_limit = endval; read_from_string_limit = endval;
new_backquote_flag = 0; new_backquote_flag = 0;
read_objects = Qnil; read_objects = Qnil;
tem = read0 (string); tem = read0 (string);
endval = string_byte_to_char (string, return Fcons (tem, make_number (read_from_string_index));
read_from_string_index);
return Fcons (tem, make_number (endval));
} }
/* Use this for recursive reads, in contexts where internal tokens /* Use this for recursive reads, in contexts where internal tokens
...@@ -1744,49 +1732,45 @@ read1 (readcharfun, pch, first_in_list) ...@@ -1744,49 +1732,45 @@ read1 (readcharfun, pch, first_in_list)
while ((c = READCHAR) >= 0 while ((c = READCHAR) >= 0
&& c != '\"') && c != '\"')
{ {
if (p == end) if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM)
{ {
char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2);
p += new - read_buffer; p += new - read_buffer;
read_buffer += new - read_buffer; read_buffer += new - read_buffer;
end = read_buffer + read_buffer_size; end = read_buffer + read_buffer_size;
} }
if (c == '\\') if (c == '\\')
{ {
c = read_escape (readcharfun, 1); c = read_escape (readcharfun, 1);
if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META)))
/* C is -1 if \ newline has just been seen */
if (c == -1)
{ {
unsigned char workbuf[4]; if (p == read_buffer)
unsigned char *str = workbuf; cancel = 1;
int length;
length = non_ascii_char_to_string (c, workbuf, &str);
if (length > 1)
force_multibyte = 1;
if (p + length > end)
{
char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2);
p += new - read_buffer;
read_buffer += new - read_buffer;
end = read_buffer + read_buffer_size;
}
bcopy (str, p, length);
p += length;
continue; continue;
} }
/* If an escape specifies a non-ASCII single-byte character, /* If an escape specifies a non-ASCII single-byte character,
this must be a unibyte string. */ this must be a unibyte string. */
else if (! ASCII_BYTE_P (c)) if (SINGLE_BYTE_CHAR_P ((c & ~CHAR_META))
&& ! ASCII_BYTE_P (c))
force_singlebyte = 1; force_singlebyte = 1;
} }
/* c is -1 if \ newline has just been seen */ if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META)))
if (c == -1)
{ {
if (p == read_buffer) unsigned char workbuf[4];
cancel = 1; unsigned char *str = workbuf;
int length;
length = non_ascii_char_to_string (c, workbuf, &str);
if (length > 1)
force_multibyte = 1;
bcopy (str, p, length);
p += length;
} }
else else
{ {
...@@ -1814,7 +1798,7 @@ read1 (readcharfun, pch, first_in_list) ...@@ -1814,7 +1798,7 @@ read1 (readcharfun, pch, first_in_list)
return make_number (0); return make_number (0);
if (force_singlebyte && force_multibyte) if (force_singlebyte && force_multibyte)
error ("Multibyte and single-byte escapes in one string constant"); error ("Multibyte and unibyte characters in one string constant");
if (force_singlebyte) if (force_singlebyte)
nchars = p - read_buffer; nchars = p - read_buffer;
...@@ -1831,7 +1815,14 @@ read1 (readcharfun, pch, first_in_list) ...@@ -1831,7 +1815,14 @@ read1 (readcharfun, pch, first_in_list)
return Fstring_make_unibyte (string); return Fstring_make_unibyte (string);
} }
} }
else if (EQ (readcharfun, Qget_file_char))
/* Nowadays, reading directly from a file
is used only for compiled Emacs Lisp files,
and those always use the Emacs internal encoding. */
nchars = multibyte_chars_in_text (read_buffer, p - read_buffer);
else else
/* In all other cases, if we read these bytes as
separate characters, treat them as separate characters now. */
nchars = p - read_buffer; nchars = p - read_buffer;
if (read_pure) if (read_pure)
...@@ -1884,7 +1875,7 @@ read1 (readcharfun, pch, first_in_list) ...@@ -1884,7 +1875,7 @@ read1 (readcharfun, pch, first_in_list)
|| c == '[' || c == ']' || c == '#' || c == '[' || c == ']' || c == '#'
)) ))
{ {
if (p == end) if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM)
{ {
register char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); register char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2);
p += new - read_buffer; p += new - read_buffer;
...@@ -1897,7 +1888,19 @@ read1 (readcharfun, pch, first_in_list) ...@@ -1897,7 +1888,19 @@ read1 (readcharfun, pch, first_in_list)
quoted = 1; quoted = 1;
} }
*p++ = c; if (! SINGLE_BYTE_CHAR_P (c))
{
unsigned char workbuf[4];
unsigned char *str = workbuf;
int length;
length = non_ascii_char_to_string (c, workbuf, &str);
bcopy (str, p, length);
p += length;
}
else
*p++ = c;
c = READCHAR; c = READCHAR;
} }
...@@ -2553,7 +2556,7 @@ init_obarray () ...@@ -2553,7 +2556,7 @@ init_obarray ()
Qvariable_documentation = intern ("variable-documentation"); Qvariable_documentation = intern ("variable-documentation");
staticpro (&Qvariable_documentation); staticpro (&Qvariable_documentation);
read_buffer_size = 100; read_buffer_size = 100 + MAX_LENGTH_OF_MULTI_BYTE_FORM;
read_buffer = (char *) malloc (read_buffer_size); read_buffer = (char *) malloc (read_buffer_size);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment