Commit b7dbcc19 authored by Kenichi Handa's avatar Kenichi Handa
Browse files

(char_quoted): Use FETCH_CHAR_AS_MULTIBYTE to convert

unibyte chars to multibyte.
(back_comment): Likewise.
(scan_words): Likewise.
(skip_chars): The arg syntaxp is deleted, and the code for
handling syntaxes is moved to skip_syntaxes.  Callers changed.
Fix the case that the multibyteness of STRING and the current
buffer doesn't match.
(skip_syntaxes): New function.
(SYNTAX_WITH_MULTIBYTE_CHECK): Check C by ASCII_CHAR_P, not by
SINGLE_BYTE_CHAR_P.
(Fforward_comment): Use FETCH_CHAR_AS_MULTIBYTE to convert unibyte
chars to multibyte.
(scan_lists): Likewise.
(Fbackward_prefix_chars): Likewise.
(scan_sexps_forward): Likewise.
parent 93daa011
......@@ -97,7 +97,8 @@ static int find_start_modiff;
static int find_defun_start P_ ((int, int));
static int back_comment P_ ((int, int, int, int, int, int *, int *));
static int char_quoted P_ ((int, int));
static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object));
static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
static Lisp_Object scan_lists P_ ((int, int, int, int));
static void scan_sexps_forward P_ ((struct lisp_parse_state *,
int, int, int, int,
......@@ -293,7 +294,7 @@ char_quoted (charpos, bytepos)
while (bytepos >= beg)
{
UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
code = SYNTAX (FETCH_CHAR (bytepos));
code = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (bytepos));
if (! (code == Scharquote || code == Sescape))
break;
......@@ -378,10 +379,10 @@ find_defun_start (pos, pos_byte)
{
/* Open-paren at start of line means we may have found our
defun-start. */
if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
{
SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
break;
/* Now fallback to the default value. */
gl_state.current_syntax_table = current_buffer->syntax_table;
......@@ -502,7 +503,7 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p
UPDATE_SYNTAX_TABLE_BACKWARD (from);
prev_syntax = syntax;
c = FETCH_CHAR (from_byte);
c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
syntax = SYNTAX_WITH_FLAGS (c);
code = SYNTAX (c);
......@@ -531,7 +532,7 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p
int next = from, next_byte = from_byte, next_c, next_syntax;
DEC_BOTH (next, next_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (next);
next_c = FETCH_CHAR (next_byte);
next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
next_syntax = SYNTAX_WITH_FLAGS (next_c);
if (((com2start || comnested)
&& SYNTAX_FLAGS_COMEND_SECOND (syntax)
......@@ -1195,7 +1196,7 @@ scan_words (from, count)
return 0;
}
UPDATE_SYNTAX_TABLE_FORWARD (from);
ch0 = FETCH_CHAR (from_byte);
ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (ch0);
INC_BOTH (from, from_byte);
if (words_include_escapes
......@@ -1224,7 +1225,7 @@ scan_words (from, count)
{
if (from == end) break;
UPDATE_SYNTAX_TABLE_FORWARD (from);
ch1 = FETCH_CHAR (from_byte);
ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (ch1);
if ((code != Sword
&& (! words_include_escapes
......@@ -1251,7 +1252,7 @@ scan_words (from, count)
}
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
ch1 = FETCH_CHAR (from_byte);
ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (ch1);
if (words_include_escapes
&& (code == Sescape || code == Scharquote))
......@@ -1283,7 +1284,7 @@ scan_words (from, count)
break;
temp_byte = dec_bytepos (from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
ch0 = FETCH_CHAR (temp_byte);
ch0 = FETCH_CHAR_AS_MULTIBYTE (temp_byte);
code = SYNTAX (ch0);
if ((code != Sword
&& (! words_include_escapes
......@@ -1339,7 +1340,7 @@ Returns the distance traveled, either zero or positive. */)
(string, lim)
Lisp_Object string, lim;
{
return skip_chars (1, 0, string, lim);
return skip_chars (1, string, lim);
}
DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
......@@ -1349,7 +1350,7 @@ Returns the distance traveled, either zero or negative. */)
(string, lim)
Lisp_Object string, lim;
{
return skip_chars (0, 0, string, lim);
return skip_chars (0, string, lim);
}
DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
......@@ -1361,7 +1362,7 @@ This function returns the distance traveled, either zero or positive. */)
(syntax, lim)
Lisp_Object syntax, lim;
{
return skip_chars (1, 1, syntax, lim);
return skip_syntaxes (1, syntax, lim);
}
DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
......@@ -1373,53 +1374,32 @@ This function returns the distance traveled, either zero or negative. */)
(syntax, lim)
Lisp_Object syntax, lim;
{
return skip_chars (0, 1, syntax, lim);
return skip_syntaxes (0, syntax, lim);
}
static Lisp_Object
skip_chars (forwardp, syntaxp, string, lim)
int forwardp, syntaxp;
skip_chars (forwardp, string, lim)
int forwardp;
Lisp_Object string, lim;
{
register unsigned int c;
unsigned char fastmap[0400];
/* If SYNTAXP is 0, STRING may contain multi-byte form of characters
of which codes don't fit in FASTMAP. In that case, set the
ranges of characters in CHAR_RANGES. */
/* Store the ranges of non-ASCII characters. */
int *char_ranges;
int n_char_ranges = 0;
int negate = 0;
register int i, i_byte;
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
/* Set to 1 if the current buffer is multibyte and the region
contains non-ASCII chars. */
int multibyte;
/* Set to 1 if STRING is multibyte and it contains non-ASCII
chars. */
int string_multibyte;
int size_byte;
unsigned char *str;
int len;
CHECK_STRING (string);
char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
string_multibyte = STRING_MULTIBYTE (string);
str = XSTRING (string)->data;
size_byte = STRING_BYTES (XSTRING (string));
/* Adjust the multibyteness of the string to that of the buffer. */
if (multibyte != string_multibyte)
{
int nbytes;
if (multibyte)
nbytes = count_size_as_multibyte (XSTRING (string)->data,
XSTRING (string)->size);
else
nbytes = XSTRING (string)->size;
if (nbytes != size_byte)
{
str = (unsigned char *) alloca (nbytes);
copy_text (XSTRING (string)->data, str, size_byte,
string_multibyte, multibyte);
size_byte = nbytes;
}
}
if (NILP (lim))
XSETINT (lim, forwardp ? ZV : BEGV);
......@@ -1432,10 +1412,15 @@ skip_chars (forwardp, syntaxp, string, lim)
if (XINT (lim) < BEGV)
XSETFASTINT (lim, BEGV);
multibyte = (!NILP (current_buffer->enable_multibyte_characters)
&& (lim - PT != CHAR_TO_BYTE (lim) - PT_BYTE));
string_multibyte = STRING_BYTES (XSTRING (string)) > XSTRING (string)->size;
bzero (fastmap, sizeof fastmap);
if (multibyte)
char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
i_byte = 0;
if (i_byte < size_byte
&& XSTRING (string)->data[0] == '^')
{
......@@ -1443,20 +1428,25 @@ skip_chars (forwardp, syntaxp, string, lim)
}
/* Find the characters specified and set their elements of fastmap.
If syntaxp, each character counts as itself.
Otherwise, handle backslashes and ranges specially. */
Handle backslashes and ranges specially.
if (size_byte == XSTRING (string)->size)
while (i_byte < size_byte)
{
c = str[i_byte++];
If STRING contains non-ASCII characters, setup char_ranges for
them and use fastmap only for their leading codes. */
if (syntaxp)
fastmap[syntax_spec_code[c]] = 1;
else
{
if (c == '\\')
{
str = XSTRING (string)->data;
size_byte = STRING_BYTES (XSTRING (string));
if (! string_multibyte)
{
int string_has_eight_bit = 0;
/* At first setup fastmap. */
while (i_byte < size_byte)
{
c = str[i_byte++];
if (c == '\\')
{
if (i_byte == size_byte)
break;
......@@ -1475,67 +1465,299 @@ skip_chars (forwardp, syntaxp, string, lim)
/* Get the end of the range. */
c2 = str[i_byte++];
if (c2 == '\\'
&& i_byte < size_byte)
c2 = str[i_byte++];
while (c <= c2)
fastmap[c++] = 1;
if (! ASCII_CHAR_P (c2))
string_has_eight_bit = 1;
}
else
fastmap[c] = 1;
{
fastmap[c] = 1;
if (! ASCII_CHAR_P (c))
string_has_eight_bit = 1;
}
}
/* If the current range is multibyte and STRING contains
eight-bit chars, arrange fastmap and setup char_ranges for
the corresponding multibyte chars. */
if (multibyte && string_has_eight_bit)
{
unsigned char fastmap2[0400];
int range_start_byte, range_start_char;
bcopy (fastmap2 + 0200, fastmap + 0200, 0200);
bzero (fastmap + 0200, 0200);
/* We are sure that this loop stops. */
for (i = 0200; ! fastmap2[i]; i++);
c = unibyte_char_to_multibyte (i);
fastmap[CHAR_LEADING_CODE (c)] = 1;
range_start_byte = i;
range_start_char = c;
for (i = 129; i < 0400; i++)
{
c = unibyte_char_to_multibyte (i);
fastmap[CHAR_LEADING_CODE (c)] = 1;
if (i - range_start_byte != c - range_start_char)
{
char_ranges[n_char_ranges++] = range_start_char;
char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
+ range_start_char);
range_start_byte = i;
range_start_char = c;
}
}
char_ranges[n_char_ranges++] = range_start_char;
char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
+ range_start_char);
}
}
else
while (i_byte < size_byte)
{
c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
i_byte += len;
{
while (i_byte < size_byte)
{
unsigned char leading_code;
if (syntaxp)
fastmap[syntax_spec_code[c & 0377]] = 1;
else
{
if (c == '\\')
{
if (i_byte == size_byte)
break;
leading_code = str[i_byte];
c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
i_byte += len;
c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
i_byte += len;
}
if (i_byte < size_byte
&& str[i_byte] == '-')
{
unsigned int c2;
if (c == '\\')
{
if (i_byte == size_byte)
break;
leading_code = str[i_byte];
c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
i_byte += len;
}
if (i_byte < size_byte
&& str[i_byte] == '-')
{
unsigned int c2;
unsigned char leading_code2;
/* Skip over the dash. */
i_byte++;
/* Skip over the dash. */
i_byte++;
if (i_byte == size_byte)
break;
if (i_byte == size_byte)
break;
/* Get the end of the range. */
c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
i_byte += len;
/* Get the end of the range. */
leading_code2 = str[i_byte];
c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
i_byte += len;
if (ASCII_CHAR_P (c))
if (c2 == '\\'
&& i_byte < size_byte)
{
leading_code2 = str[i_byte];
c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
i_byte += len;
}
if (ASCII_CHAR_P (c))
{
while (c <= c2 && c < 0x80)
fastmap[c++] = 1;
if (c <= c2)
{
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c2;
}
}
else
{
if (ASCII_CHAR_P (c))
fastmap[c] = 1;
else
{
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c;
}
}
leading_code = CHAR_LEADING_CODE (c);
}
if (! ASCII_CHAR_P (c))
{
while (leading_code <= leading_code2)
fastmap[leading_code++] = 1;
if (c <= c2)
{
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c2;
}
}
}
else
{
if (ASCII_CHAR_P (c))
fastmap[c] = 1;
else
{
fastmap[leading_code] = 1;
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c;
}
}
}
/* If the current range is unibyte and STRING contains non-ASCII
chars, arrange fastmap for the corresponding unibyte
chars. */
if (! multibyte && n_char_ranges > 0)
{
bzero (fastmap + 0200, 0200);
for (i = 0; i < n_char_ranges; i += 2)
{
int c1 = char_ranges[i];
int c2 = char_ranges[i + 1];
for (; c1 <= c2; c1++)
fastmap[CHAR_TO_BYTE8 (c1)] = 1;
}
}
}
/* If ^ was the first character, complement the fastmap. */
if (negate)
{
if (! multibyte)
for (i = 0; i < sizeof fastmap; i++)
fastmap[i] ^= 1;
else
{
for (i = 0; i < 0200; i++)
fastmap[i] ^= 1;
/* All non-ASCII chars possibly match. */
for (; i < sizeof fastmap; i++)
fastmap[i] = 1;
}
}
{
int start_point = PT;
int pos = PT;
int pos_byte = PT_BYTE;
immediate_quit = 1;
if (forwardp)
{
if (multibyte)
while (pos < XINT (lim))
{
c = FETCH_BYTE (pos_byte);
if (! fastmap[c])
break;
if (! ASCII_CHAR_P (c))
{
c = FETCH_MULTIBYTE_CHAR (pos_byte);
/* As we are looking at a multibyte character, we
must look up the character in the table
CHAR_RANGES. If there's no data in the table,
that character is not what we want to skip. */
/* The following code do the right thing even if
n_char_ranges is zero (i.e. no data in
CHAR_RANGES). */
for (i = 0; i < n_char_ranges; i += 2)
if (c >= char_ranges[i] && c <= char_ranges[i + 1])
break;
if (!(negate ^ (i < n_char_ranges)))
break;
}
/* Since we already checked for multibyteness, avoid
using INC_BOTH which checks again. */
INC_POS (pos_byte);
pos++;
}
else
{
while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
pos++;
pos_byte = pos;
}
}
else
{
if (multibyte)
while (pos > XINT (lim))
{
int prev_pos_byte = pos_byte;
DEC_POS (prev_pos_byte);
c = FETCH_BYTE (prev_pos_byte);
if (! fastmap[c])
break;
if (! ASCII_CHAR_P (c))
{
c = FETCH_MULTIBYTE_CHAR (prev_pos_byte);
/* See the comment in the previous similar code. */
for (i = 0; i < n_char_ranges; i += 2)
if (c >= char_ranges[i] && c <= char_ranges[i + 1])
break;
if (!(negate ^ (i < n_char_ranges)))
break;
}
pos--;
pos_byte = prev_pos_byte;
}
else
{
while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
pos--;
pos_byte = pos;
}
}
SET_PT_BOTH (pos, pos_byte);
immediate_quit = 0;
return make_number (PT - start_point);
}
}
static Lisp_Object
skip_syntaxes (forwardp, string, lim)
int forwardp;
Lisp_Object string, lim;
{
register unsigned int c;
unsigned char fastmap[0400];
int negate = 0;
register int i, i_byte;
int multibyte;
int size_byte;
unsigned char *str;
CHECK_STRING (string);
if (NILP (lim))
XSETINT (lim, forwardp ? ZV : BEGV);
else
CHECK_NUMBER_COERCE_MARKER (lim);
/* In any case, don't allow scan outside bounds of buffer. */
if (XINT (lim) > ZV)
XSETFASTINT (lim, ZV);
if (XINT (lim) < BEGV)
XSETFASTINT (lim, BEGV);
multibyte = (!NILP (current_buffer->enable_multibyte_characters)
&& (lim - PT != CHAR_TO_BYTE (lim) - PT_BYTE));
bzero (fastmap, sizeof fastmap);
i_byte = 0;
if (i_byte < size_byte
&& XSTRING (string)->data[0] == '^')
{
negate = 1; i_byte++;
}
if (STRING_BYTES (XSTRING (string)) > XSTRING (string)->size)
/* As this is very rare case, don't consider efficiency. */
string = string_make_unibyte (string);
str = XSTRING (string)->data;
size_byte = STRING_BYTES (XSTRING (string));
/* Find the syntaxes specified and set their elements of fastmap. */
while (i_byte < size_byte)
{
c = str[i_byte++];
fastmap[syntax_spec_code[c]] = 1;
}
/* If ^ was the first character, complement the fastmap. */
if (negate)
......@@ -1548,139 +1770,75 @@ skip_chars (forwardp, syntaxp, string, lim)
int pos_byte = PT_BYTE;
immediate_quit = 1;
if (syntaxp)
SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
if (forwardp)
{
SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
if (forwardp)
if (multibyte)
{
if (multibyte)
{
if (pos < XINT (lim))
while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
{
/* Since we already checked for multibyteness,
avoid using INC_BOTH which checks again. */
INC_POS (pos_byte);
pos++;
if (pos >= XINT (lim))
break;
UPDATE_SYNTAX_TABLE_FORWARD (pos);
}
}
else
{
while (pos < XINT (lim)
&& fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
{
pos++;
UPDATE_SYNTAX_TABLE_FORWARD (pos);
}
}
if (pos < XINT (lim))
while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
{
/* Since we already checked for multibyteness,
avoid using INC_BOTH which checks again. */
INC_POS (pos_byte);
pos++;
if (pos >= XINT (lim))
break;
UPDATE_SYNTAX_TABLE_FORWARD (pos);
}
}
else
{
if (multibyte)
{
while (pos > XINT (lim))
{
int savepos = pos_byte;
/* Since we already checked for multibyteness,
avoid using DEC_BOTH which checks again. */
pos--;
DEC_POS (pos_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (pos);
if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
{
pos++;