Commit 82ebc97b authored by Kenichi Handa's avatar Kenichi Handa

Fix bidi-composition interaction.

parent 1f8162fe
2010-05-14 Kenichi Handa <handa@m17n.org>
* dispextern.h (struct composition_it): New member reversed_p.
* composite.c (composition_compute_stop_pos): Search backward if
ENDPOS < CHARPOS.
(composition_reseat_it): Handle the case that ENDPOS < CHARPOS.
Set CMP_IT->reversed_p.
(composition_update_it): Pay attention ot CMP_IT->reversed_p.
* xdisp.c (set_iterator_to_next): Call
composition_compute_stop_pos with negative ENDPOS if we are
scanning backward. Call composition_compute_stop_pos if scan
direction is changed.
(next_element_from_buffer): Call composition_compute_stop_pos with
negative ENDPOS if we are scanning backward.
(next_element_from_composition): Pay attention to
IT->cmp_it.reversed_p.
2010-05-10 Glenn Morris <rgm@gnu.org>
* Makefile.in (LIBS_SYSTEM): Set using configure, not cpp.
......
......@@ -1018,12 +1018,29 @@ autocmp_chars (cft_element, charpos, bytepos, limit, win, face, string)
return unbind_to (count, Qnil);
}
static Lisp_Object _work_val;
static int _work_char;
/* 1 iff the character C is composable. */
#define CHAR_COMPOSABLE_P(C) \
((C) == 0x200C || (C) == 0x200D \
|| (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \
(SYMBOLP (_work_val) \
&& (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \
&& _work_char != 'Z')))
/* Update cmp_it->stop_pos to the next position after CHARPOS (and
BYTEPOS) where character composition may happen. If BYTEPOS is
negative, compute it. If it is a static composition, set
cmp_it->ch to -1. Otherwise, set cmp_it->ch to the character that
triggers a automatic composition. */
negative, compute it. ENDPOS is a limit of searching. If it is
less than CHARPOS, search backward to ENDPOS+1 assuming that
set_iterator_to_next works in reverse order. In this case, if a
composition closest to CHARPOS is found, set cmp_it->stop_pos to
the last character of the composition.
If no composition is found, set cmp_it->ch to -2. If a static
composition is found, set cmp_it->ch to -1. Otherwise, set
cmp_it->ch to the character that triggers the automatic
composition. */
void
composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string)
......@@ -1036,60 +1053,200 @@ composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string)
/* This is from forward_to_next_line_start in xdisp.c. */
const int MAX_NEWLINE_DISTANCE = 500;
if (endpos > charpos + MAX_NEWLINE_DISTANCE)
endpos = charpos + MAX_NEWLINE_DISTANCE;
cmp_it->stop_pos = endpos;
if (charpos < endpos)
{
if (endpos > charpos + MAX_NEWLINE_DISTANCE)
endpos = charpos + MAX_NEWLINE_DISTANCE;
}
else if (endpos < charpos)
{
/* We search backward for a position to check composition. */
if (endpos < 0)
{
/* But we don't know where to stop the searching. */
endpos = NILP (string) ? BEGV - 1 : -1;
/* Usually we don't reach ENDPOS because we stop searching
at an uncomposable character (NL, LRE, etc). */
}
}
cmp_it->id = -1;
cmp_it->ch = -2;
if (find_composition (charpos, endpos, &start, &end, &prop, string)
cmp_it->reversed_p = 0;
cmp_it->stop_pos = endpos;
if (charpos == endpos)
return;
/* FIXME: Bidi is not yet handled well in static composition. */
if (charpos < endpos
&& find_composition (charpos, endpos, &start, &end, &prop, string)
&& COMPOSITION_VALID_P (start, end, prop))
{
cmp_it->stop_pos = endpos = start;
cmp_it->ch = -1;
}
if (NILP (string) && PT > charpos && PT < endpos)
cmp_it->stop_pos = PT;
if (NILP (string))
{
/* A composition never strides over PT. */
if (PT > charpos)
{
if (PT < endpos)
cmp_it->stop_pos = endpos = PT;
}
else if (PT < charpos && PT > endpos)
{
cmp_it->stop_pos = endpos = PT - 1;
}
}
if (NILP (current_buffer->enable_multibyte_characters)
|| NILP (Vauto_composition_mode))
return;
if (bytepos < 0)
{
if (STRINGP (string))
bytepos = string_char_to_byte (string, charpos);
else
if (NILP (string))
bytepos = CHAR_TO_BYTE (charpos);
else
bytepos = string_char_to_byte (string, charpos);
}
start = charpos;
while (charpos < endpos)
if (charpos < endpos)
{
if (STRINGP (string))
FETCH_STRING_CHAR_ADVANCE (c, string, charpos, bytepos);
else
FETCH_CHAR_ADVANCE (c, charpos, bytepos);
if (c == '\n')
/* Forward search. */
while (charpos < endpos)
{
cmp_it->ch = -2;
break;
if (STRINGP (string))
FETCH_STRING_CHAR_ADVANCE (c, string, charpos, bytepos);
else
FETCH_CHAR_ADVANCE (c, charpos, bytepos);
if (c == '\n')
{
cmp_it->ch = -2;
break;
}
val = CHAR_TABLE_REF (Vcomposition_function_table, c);
if (! NILP (val))
{
Lisp_Object elt;
for (; CONSP (val); val = XCDR (val))
{
elt = XCAR (val);
if (VECTORP (elt) && ASIZE (elt) == 3
&& NATNUMP (AREF (elt, 1))
&& charpos - 1 - XFASTINT (AREF (elt, 1)) >= start)
break;
}
if (CONSP (val))
{
cmp_it->lookback = XFASTINT (AREF (elt, 1));
cmp_it->stop_pos = charpos - 1 - cmp_it->lookback;
cmp_it->ch = c;
return;
}
}
}
val = CHAR_TABLE_REF (Vcomposition_function_table, c);
if (! NILP (val))
}
else
{
/* Search backward for a pattern that may be composed and the
position of (possibly) the last character of the match is
closest to (but not after) START. The reason for the last
character is that set_iterator_to_next works in reverse order
and, thus we must stop at the last character for composition
check. */
unsigned char *p;
int len;
/* limit byte position used in fast_looking_at. This is the
byte position of the next character of START. */
EMACS_INT limit;
if (NILP (string))
p = BYTE_POS_ADDR (bytepos);
else
p = SDATA (string) + bytepos;
c = STRING_CHAR_AND_LENGTH (p, len);
limit = bytepos + len;
while (CHAR_COMPOSABLE_P (c))
{
Lisp_Object elt;
for (val = CHAR_TABLE_REF (Vcomposition_function_table, c);
CONSP (val); val = XCDR (val))
{
Lisp_Object elt = XCAR (val);
int back, len;
for (; CONSP (val); val = XCDR (val))
if (VECTORP (elt) && ASIZE (elt) == 3
&& NATNUMP (AREF (elt, 1))
&& charpos - (back = XFASTINT (AREF (elt, 1))) > endpos)
{
EMACS_INT cpos = charpos - back, bpos;
if (back == 0)
bpos = bytepos;
else
bpos = (NILP (string) ? CHAR_TO_BYTE (cpos)
: string_char_to_byte (string, cpos));
if (STRINGP (AREF (elt, 0)))
len = fast_looking_at (AREF (elt, 0), cpos, bpos,
start + 1, limit, string);
else
len = 1;
if (len > 0)
{
/* Make CPOS points the last character of match.
Note that LEN is byte-length. */
bpos += len;
if (NILP (string))
cpos = BYTE_TO_CHAR (bpos) - 1;
else
cpos = string_byte_to_char (string, bpos) - 1;
back = cpos - (charpos - back);
if (cmp_it->stop_pos < cpos
|| (cmp_it->stop_pos == cpos
&& cmp_it->lookback < back))
{
cmp_it->stop_pos = cpos;
cmp_it->ch = c;
cmp_it->lookback = back;
}
}
}
}
if (charpos - 1 == endpos)
break;
if (STRINGP (string))
{
elt = XCAR (val);
if (VECTORP (elt) && ASIZE (elt) == 3 && NATNUMP (AREF (elt, 1))
&& charpos - 1 - XFASTINT (AREF (elt, 1)) >= start)
break;
p--, bytepos--;
while (! CHAR_HEAD_P (*p))
p--, bytepos--;
charpos--;
}
else
{
DEC_BOTH (charpos, bytepos);
p = BYTE_POS_ADDR (bytepos);
}
if (CONSP (val))
c = STRING_CHAR (p);
}
if (cmp_it->ch >= 0)
/* We found a position to check. */
return;
/* Skip all uncomposable characters. */
if (NILP (string))
{
while (charpos - 1 > endpos && ! CHAR_COMPOSABLE_P (c))
{
DEC_BOTH (charpos, bytepos);
c = FETCH_MULTIBYTE_CHAR (bytepos);
}
}
else
{
while (charpos - 1 > endpos && ! CHAR_COMPOSABLE_P (c))
{
cmp_it->lookback = XFASTINT (AREF (elt, 1));
cmp_it->stop_pos = charpos - 1 - cmp_it->lookback;
cmp_it->ch = c;
return;
p--;
while (! CHAR_HEAD_P (*p))
p--;
charpos--;
c = STRING_CHAR (p);
}
}
}
......@@ -1104,8 +1261,8 @@ composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string)
string. In that case, FACE must not be NULL.
If the character is composed, setup members of CMP_IT (id, nglyphs,
and from), and return 1. Otherwise, update CMP_IT->stop_pos, and
return 0. */
from, to, reversed_p), and return 1. Otherwise, update
CMP_IT->stop_pos, and return 0. */
int
composition_reseat_it (cmp_it, charpos, bytepos, endpos, w, face, string)
......@@ -1115,13 +1272,29 @@ composition_reseat_it (cmp_it, charpos, bytepos, endpos, w, face, string)
struct face *face;
Lisp_Object string;
{
if (NILP (string) && charpos < PT && PT < endpos)
endpos = PT;
if (endpos <= charpos)
{
if (NILP (string))
{
if (endpos < 0)
endpos = BEGV;
if (endpos < PT && PT < charpos)
endpos = PT;
}
else if (endpos < 0)
endpos = 0;
}
else
{
if (NILP (string) && charpos < PT && PT < endpos)
endpos = PT;
}
if (cmp_it->ch == -2)
{
composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string);
if (cmp_it->ch == -2)
if (cmp_it->stop_pos != charpos)
/* The current position is not composed. */
return 0;
}
......@@ -1145,18 +1318,46 @@ composition_reseat_it (cmp_it, charpos, bytepos, endpos, w, face, string)
int i;
val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch);
for (; CONSP (val); val = XCDR (val))
if (charpos < endpos)
{
elt = XCAR (val);
if (cmp_it->lookback == XFASTINT (AREF (elt, 1)))
break;
for (; CONSP (val); val = XCDR (val))
{
elt = XCAR (val);
if (cmp_it->lookback == XFASTINT (AREF (elt, 1)))
break;
}
if (NILP (val))
goto no_composition;
val = autocmp_chars (val, charpos, bytepos, endpos, w, face, string);
if (! composition_gstring_p (val))
goto no_composition;
cmp_it->reversed_p = 0;
}
if (NILP (val))
goto no_composition;
else
{
EMACS_INT saved_charpos = charpos, saved_bytepos = bytepos;
val = autocmp_chars (val, charpos, bytepos, endpos, w, face, string);
if (! composition_gstring_p (val))
goto no_composition;
if (cmp_it->lookback > 0)
{
charpos -= cmp_it->lookback;
if (charpos < endpos)
goto no_composition;
if (STRINGP (string))
bytepos = string_char_to_byte (string, charpos);
else
bytepos = CHAR_TO_BYTE (charpos);
}
val = autocmp_chars (val, charpos, bytepos, saved_charpos + 1,
w, face, string);
if (! composition_gstring_p (val)
|| charpos + LGSTRING_CHAR_LEN (val) <= saved_charpos)
{
charpos = saved_charpos, bytepos = saved_bytepos;
goto no_composition;
}
cmp_it->reversed_p = 1;
}
if (NILP (LGSTRING_ID (val)))
val = composition_gstring_put_cache (val, -1);
cmp_it->id = XINT (LGSTRING_ID (val));
......@@ -1164,22 +1365,40 @@ composition_reseat_it (cmp_it, charpos, bytepos, endpos, w, face, string)
if (NILP (LGSTRING_GLYPH (val, i)))
break;
cmp_it->nglyphs = i;
cmp_it->from = 0;
cmp_it->to = i;
}
else
goto no_composition;
cmp_it->from = 0;
return 1;
no_composition:
charpos++;
if (STRINGP (string))
bytepos += MULTIBYTE_LENGTH_NO_CHECK (SDATA (string) + bytepos);
if (charpos == endpos)
return 0;
if (charpos < endpos)
{
charpos++;
if (STRINGP (string))
bytepos += MULTIBYTE_LENGTH_NO_CHECK (SDATA (string) + bytepos);
else
INC_POS (bytepos);
}
else
INC_POS (bytepos);
{
charpos--;
/* BYTEPOS is calculated in composition_compute_stop_pos */
bytepos = -1;
}
composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string);
return 0;
}
/* Update nchars, nbytes, and width of the current grapheme cluster
which is identified by CMP_IT->from (if the composition is static
or automatic in l2r context) or CMPT_IT->to (if the composition is
automatic in r2l context). In addition, in the former case, update
CMP_IT->to, and in the latter case, update CMP_IT->from. */
int
composition_update_it (cmp_it, charpos, bytepos, string)
struct composition_it *cmp_it;
......@@ -1215,7 +1434,7 @@ composition_update_it (cmp_it, charpos, bytepos, string)
cmp_it->nchars = LGSTRING_CHAR_LEN (gstring);
cmp_it->width = 0;
}
else
else if (! cmp_it->reversed_p)
{
Lisp_Object glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
int from = LGLYPH_FROM (glyph);
......@@ -1234,6 +1453,33 @@ composition_update_it (cmp_it, charpos, bytepos, string)
cmp_it->width += CHAR_WIDTH (LGLYPH_CHAR (glyph));
}
}
else
{
int from_idx = cmp_it->to - 1;
Lisp_Object glyph = LGSTRING_GLYPH (gstring, from_idx);
int from = LGLYPH_FROM (glyph);
c = XINT (LGSTRING_CHAR (gstring, from));
cmp_it->nchars = LGLYPH_TO (glyph) - from + 1;
cmp_it->width = (LGLYPH_WIDTH (glyph) > 0
? CHAR_WIDTH (LGLYPH_CHAR (glyph)) : 0);
for (from_idx--; from_idx >= 0; from_idx--)
{
glyph = LGSTRING_GLYPH (gstring, from_idx);
if (LGLYPH_FROM (glyph) != from)
break;
if (LGLYPH_WIDTH (glyph) > 0)
cmp_it->width += CHAR_WIDTH (LGLYPH_CHAR (glyph));
}
cmp_it->from = from_idx + 1;
charpos -= cmp_it->nchars - 1;
bytepos += CHAR_BYTES (c);
if (STRINGP (string))
cmp_it->nbytes = bytepos - string_char_to_byte (string, charpos);
else
cmp_it->nbytes = bytepos - CHAR_TO_BYTE (charpos);
return c;
}
}
charpos += cmp_it->nchars;
......@@ -1279,17 +1525,6 @@ struct position_record
(POSITION).pos--; \
} while (0)
static Lisp_Object _work_val;
static int _work_char;
/* 1 iff the character C is composable. */
#define CHAR_COMPOSABLE_P(C) \
((C) == 0x200C || (C) == 0x200D \
|| (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \
(SYMBOLP (_work_val) \
&& (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \
&& _work_char != 'Z')))
/* This is like find_composition, but find an automatic composition
instead. If found, set *GSTRING to the glyph-string representing
the composition, and return 1. Otherwise, return 0. */
......
......@@ -1987,6 +1987,10 @@ struct composition_it
graphic display and in units of canonical characters on a
terminal display. */
int width;
/* Nonzero iff the composition is created while buffer is scanned in
reverse order, and thus the grapheme clusters must be rendered
from the last to the first. */
int reversed_p;
};
struct it
......
......@@ -6281,25 +6281,96 @@ set_iterator_to_next (it, reseat_p)
reseat_at_next_visible_line_start (it, 0);
else if (it->cmp_it.id >= 0)
{
IT_CHARPOS (*it) += it->cmp_it.nchars;
IT_BYTEPOS (*it) += it->cmp_it.nbytes;
if (it->bidi_p)
/* We are currently getting glyphs from a composition. */
int i;
if (! it->bidi_p)
{
if (it->bidi_it.new_paragraph)
bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
/* Resync the bidi iterator with IT's new position.
FIXME: this doesn't support bidirectional text. */
while (it->bidi_it.charpos < IT_CHARPOS (*it))
IT_CHARPOS (*it) += it->cmp_it.nchars;
IT_BYTEPOS (*it) += it->cmp_it.nbytes;
if (it->cmp_it.to < it->cmp_it.nglyphs)
{
it->cmp_it.from = it->cmp_it.to;
}
else
{
it->cmp_it.id = -1;
composition_compute_stop_pos (&it->cmp_it, IT_CHARPOS (*it),
IT_BYTEPOS (*it),
it->stop_charpos, Qnil);
}
}
else if (! it->cmp_it.reversed_p)
{
/* Composition created while scanning forward. */
/* Update IT's char/byte positions to point the first
character of the next grapheme cluster, or to the
character visually after the current composition. */
#if 0
/* Is it ok to do this directly? */
IT_CHARPOS (*it) += it->cmp_it.nchars;
IT_BYTEPOS (*it) += it->cmp_it.nbytes;
#else
/* Or do we have to call bidi_get_next_char_visually
repeatedly (perhaps not to confuse some internal
state of bidi_it)? At least we must do this if we
have consumed all grapheme clusters in the current
composition because the next character will be in the
different bidi level. */
for (i = 0; i < it->cmp_it.nchars; i++)
bidi_get_next_char_visually (&it->bidi_it);
/* BTW, it seems that the name
bidi_get_next_char_visually is confusing because
it sounds like not advancing character position.
How about bidi_set_iterator_to_next? */
IT_BYTEPOS (*it) = it->bidi_it.bytepos;
IT_CHARPOS (*it) = it->bidi_it.charpos;
#endif
if (it->cmp_it.to < it->cmp_it.nglyphs)
{
/* Proceed to the next grapheme cluster. */
it->cmp_it.from = it->cmp_it.to;
}
else
{
/* No more grapheme cluster in this composition.
Find the next stop position. */
EMACS_INT stop = it->stop_charpos;
if (it->bidi_it.scan_dir < 0)
/* Now we are scanning backward and don't know
where to stop. */
stop = -1;
composition_compute_stop_pos (&it->cmp_it, IT_CHARPOS (*it),
IT_BYTEPOS (*it), stop, Qnil);
}
}
if (it->cmp_it.to < it->cmp_it.nglyphs)
it->cmp_it.from = it->cmp_it.to;
else
{
it->cmp_it.id = -1;
composition_compute_stop_pos (&it->cmp_it, IT_CHARPOS (*it),
IT_BYTEPOS (*it), it->stop_charpos,
Qnil);
/* Composition created while scanning backward. */
/* Update IT's char/byte positions to point the last
character of the previous grapheme cluster, or the
character visually after the current composition. */
bidi_get_next_char_visually (&it->bidi_it);
IT_BYTEPOS (*it) = it->bidi_it.bytepos;
IT_CHARPOS (*it) = it->bidi_it.charpos;
if (it->cmp_it.from > 0)
{
/* Proceed to the previous grapheme cluster. */
it->cmp_it.to = it->cmp_it.from;
}
else
{
/* No more grapheme cluster in this composition.
Find the next stop position. */
EMACS_INT stop = it->stop_charpos;
if (it->bidi_it.scan_dir < 0)
/* Now we are scanning backward and don't know
where to stop. */
stop = -1;
composition_compute_stop_pos (&it->cmp_it, IT_CHARPOS (*it),
IT_BYTEPOS (*it), stop, Qnil);
}
}
}
else
......@@ -6313,6 +6384,7 @@ set_iterator_to_next (it, reseat_p)
}
else
{
int prev_scan_dir = it->bidi_it.scan_dir;
/* If this is a new paragraph, determine its base
direction (a.k.a. its base embedding level). */
if (it->bidi_it.new_paragraph)
......@@ -6320,6 +6392,16 @@ set_iterator_to_next (it, reseat_p)
bidi_get_next_char_visually (&it->bidi_it);
IT_BYTEPOS (*it) = it->bidi_it.bytepos;
IT_CHARPOS (*it) = it->bidi_it.charpos;
if (prev_scan_dir != it->bidi_it.scan_dir)
{
/* As scan direction was changed, we must re-compute
the stop position for composition. */
EMACS_INT stop = it->stop_charpos;
if (it->bidi_it.scan_dir < 0)
stop = -1;
composition_compute_stop_pos (&it->cmp_it, IT_CHARPOS (*it),
IT_BYTEPOS (*it), stop, Qnil);
}
}
xassert (IT_BYTEPOS (*it) == CHAR_TO_BYTE (IT_CHARPOS (*it)));
}
......@@ -6816,6 +6898,13 @@ next_element_from_buffer (it)
IT_CHARPOS (*it) = it->bidi_it.charpos;
IT_BYTEPOS (*it) = it->bidi_it.bytepos;
SET_TEXT_POS (it->position, IT_CHARPOS (*it), IT_BYTEPOS (*it));
{
EMACS_INT stop = it->stop_charpos;
if (it->bidi_it.scan_dir < 0)
stop = -1;
composition_compute_stop_pos (&it->cmp_it, IT_CHARPOS (*it),
IT_BYTEPOS (*it), stop, Qnil);
}
}
if (IT_CHARPOS (*it) >= it->stop_charpos)
......@@ -6893,6 +6982,7 @@ next_element_from_buffer (it)
/* No face changes, overlays etc. in sight, so just return a
character from current_buffer. */
unsigned char *p;
EMACS_INT stop;
/* Maybe run the redisplay end trigger hook. Performance note:
This doesn't seem to cost measurable time. */
......@@ -6901,8 +6991,9 @@ next_element_from_buffer (it)