Commit d419e1d9 authored by Kenichi Handa's avatar Kenichi Handa
Browse files

Fix handling of 8-bit characters in a display table.

parent b295da47
2010-08-31 Kenichi Handa <handa@m17n.org>
* international/mule-cmds.el (standard-display-european-internal):
Setup standard-display-table for 8-bit characters by storing 8-bit
characters in the element vector.
* disp-table.el (standard-display-8bit): Setup
standard-display-table for 8-bit characters by storing 8-bit
characters in the element vector.
(standard-display-european): Likewise.
2010-08-26 Michael Albinus <michael.albinus@gmx.de>
Sync with Tramp 2.1.19.
......
......@@ -109,11 +109,27 @@ Valid symbols are `truncation', `wrap', `escape', `control',
;;;###autoload
(defun standard-display-8bit (l h)
"Display characters in the range L to H literally."
"Display characters representing raw bytes in the range L to H literally.
On a terminal display, each character in the range is displayed
by sending the corresponding byte directly to the terminal.
On a graphic display, each character in the range is displayed
using the default font by a glyph whose code is the corresponding
byte.
Note that ASCII printable characters (SPC to TILDA) are displayed
in the default way after this call."
(or standard-display-table
(setq standard-display-table (make-display-table)))
(if (> h 255)
(setq h 255))
(while (<= l h)
(aset standard-display-table l (if (or (< l ?\s) (>= l 127)) (vector l)))
(if (< l 128)
(aset standard-display-table l
(if (or (< l ?\s) (= l 127)) (vector l)))
(let ((c (unibyte-char-to-multibyte l)))
(aset standard-display-table c (vector c))))
(setq l (1+ l))))
;;;###autoload
......@@ -235,9 +251,12 @@ in `.emacs'."
(and (null arg)
(char-table-p standard-display-table)
;; Test 161, because 160 displays as a space.
(equal (aref standard-display-table 161) [161])))
(equal (aref standard-display-table
(unibyte-char-to-multibyte 161))
(vector (unibyte-char-to-multibyte 161)))))
(progn
(standard-display-default 160 255)
(standard-display-default
(unibyte-char-to-multibyte 160) (unibyte-char-to-multibyte 255))
(unless (or (memq window-system '(x w32 ns)))
(and (terminal-coding-system)
(set-terminal-coding-system nil))))
......
......@@ -1953,7 +1953,7 @@ See `set-language-info-alist' for use in programs."
(> (aref (number-to-string (nth 2 (x-server-version))) 0)
?3))
;; Make non-line-break space display as a plain space.
(aset standard-display-table 160 [32]))
(aset standard-display-table (unibyte-char-to-multibyte 160) [32]))
;; Most Windows programs send out apostrophes as \222. Most X fonts
;; don't contain a character at that position. Map it to the ASCII
;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
......@@ -1961,7 +1961,7 @@ See `set-language-info-alist' for use in programs."
;; fonts probably have the appropriate glyph at this position,
;; so they could use standard-display-8bit. It's better to use a
;; proper windows-1252 coding system. --fx]
(aset standard-display-table 146 [39]))))
(aset standard-display-table (unibyte-char-to-multibyte 146) [39]))))
(defun set-language-environment-coding-systems (language-name)
"Do various coding system setups for language environment LANGUAGE-NAME."
......
2010-08-31 Kenichi Handa <handa@m17n.org>
* dispextern.h (FACE_FOR_CHAR): Use an ASCII face for 8-bit
characters.
* term.c (encode_terminal_code): Fix the previous change.
(produce_glyphs): Don't set it->char_to_display here. Don't
handle unibyte-display-via-language-environment here.
(produce_special_glyphs): Set temp_it.char_to_display before
calling produce_glyphs.
* xdisp.c (get_next_display_element): Set it->char_to_display
here. Convert all 8-bit bytes from unibyte buffer/string to 8-bit
characters.
(get_overlay_arrow_glyph_row): Set it.char_to_display too before
calling PRODUCE_GLYPHS.
(append_space_for_newline): Save and store it->char_to_display.
Set it->char_to_display before calling PRODUCE_GLYPHS.
(extend_face_to_end_of_line): Set it->char_to_display before
calling PRODUCE_GLYPHS.
(get_glyph_face_and_encoding): Set the glyph code an 8-bit
character to its byte value.
(get_char_glyph_code): New function.
(produce_stretch_glyph): Set it2.char_to_display too before
calling x_produce_glyphs.
(x_produce_glyphs): Simplify by using the same code for ASCII and
non-ASCII characters. Don't set it->char_to_display here. Don't
handle unibyte-display-via-language-environment here. For a
charater of no glyph, use font->space_width instead of FONT_WIDTH.
2010-08-29 Kenichi Handa <handa@m17n.org>
* term.c (encode_terminal_code): Encode byte chars to the
......
......@@ -1694,7 +1694,7 @@ struct face_cache
This macro is only meaningful for multibyte character CHAR. */
#define FACE_FOR_CHAR(F, FACE, CHAR, POS, OBJECT) \
(ASCII_CHAR_P (CHAR) \
((ASCII_CHAR_P (CHAR) || CHAR_BYTE8_P (CHAR)) \
? (FACE)->ascii_face->id \
: face_for_char ((F), (FACE), (CHAR), (POS), (OBJECT)))
......@@ -2131,9 +2131,11 @@ struct it
composition. */
struct composition_it cmp_it;
/* The character to display, possibly translated to multibyte
if unibyte_display_via_language_environment is set. This
is set after produce_glyphs has been called. */
/* The character to display, possibly translated to multibyte if
multibyte_p is zero or unibyte_display_via_language_environment
is set. This is set after get_next_display_element has been
called. If we are setting it->C directly before calling
PRODUCE_GLYPHS, this should be set beforehand too. */
int char_to_display;
/* If what == IT_IMAGE, the id of the image to display. */
......
......@@ -695,12 +695,8 @@ encode_terminal_code (src, src_len, coding)
encode_terminal_src_size);
buf = encode_terminal_src + nbytes;
}
if (CHAR_BYTE8_P (c))
{
*buf++ = CHAR_TO_BYTE8 (c);
nchars++;
}
else if (char_charset (c, charset_list, NULL))
if (CHAR_BYTE8_P (c)
|| char_charset (c, charset_list, NULL))
{
/* Store the multibyte form of C at BUF. */
buf += CHAR_STRING (c, buf);
......@@ -1610,18 +1606,15 @@ produce_glyphs (it)
goto done;
}
/* Maybe translate single-byte characters to multibyte. */
it->char_to_display = it->c;
if (it->c >= 040 && it->c < 0177)
if (it->char_to_display >= 040 && it->char_to_display < 0177)
{
it->pixel_width = it->nglyphs = 1;
if (it->glyph_row)
append_glyph (it);
}
else if (it->c == '\n')
else if (it->char_to_display == '\n')
it->pixel_width = it->nglyphs = 0;
else if (it->c == '\t')
else if (it->char_to_display == '\t')
{
int absolute_x = (it->current_x
+ it->continuation_lines_width);
......@@ -1652,32 +1645,19 @@ produce_glyphs (it)
it->pixel_width = nspaces;
it->nglyphs = nspaces;
}
else if (CHAR_BYTE8_P (it->c))
else if (CHAR_BYTE8_P (it->char_to_display))
{
if (unibyte_display_via_language_environment
&& (it->c >= 0240))
{
it->char_to_display = BYTE8_TO_CHAR (it->c);
it->pixel_width = CHAR_WIDTH (it->char_to_display);
it->nglyphs = it->pixel_width;
if (it->glyph_row)
append_glyph (it);
}
else
{
/* Coming here means that it->c is from display table, thus
we must send the raw 8-bit byte as is to the terminal.
Although there's no way to know how many columns it
occupies on a screen, it is a good assumption that a
single byte code has 1-column width. */
it->pixel_width = it->nglyphs = 1;
if (it->glyph_row)
append_glyph (it);
}
/* Coming here means that we must send the raw 8-bit byte as is
to the terminal. Although there's no way to know how many
columns it occupies on a screen, it is a good assumption that
a single byte code has 1-column width. */
it->pixel_width = it->nglyphs = 1;
if (it->glyph_row)
append_glyph (it);
}
else
{
it->pixel_width = CHAR_WIDTH (it->c);
it->pixel_width = CHAR_WIDTH (it->char_to_display);
it->nglyphs = it->pixel_width;
if (it->glyph_row)
......@@ -1892,7 +1872,7 @@ produce_special_glyphs (it, what)
else
abort ();
temp_it.c = GLYPH_CHAR (glyph);
temp_it.c = temp_it.char_to_display = GLYPH_CHAR (glyph);
temp_it.face_id = GLYPH_FACE (glyph);
temp_it.len = CHAR_BYTES (temp_it.c);
......
......@@ -5664,11 +5664,24 @@ get_next_display_element (it)
Lisp_Object dv;
struct charset *unibyte = CHARSET_FROM_ID (charset_unibyte);
enum { char_is_other = 0, char_is_nbsp, char_is_soft_hyphen }
nbsp_or_shy = char_is_other;
int decoded = it->c;
nbsp_or_shy = char_is_other;
int c = it->c; /* This is the character to display. */
if (! it->multibyte_p && ! ASCII_CHAR_P (c))
{
xassert (SINGLE_BYTE_CHAR_P (c));
if (unibyte_display_via_language_environment)
{
c = DECODE_CHAR (unibyte, c);
if (c < 0)
c = BYTE8_TO_CHAR (it->c);
}
else
c = BYTE8_TO_CHAR (it->c);
}
if (it->dp
&& (dv = DISP_CHAR_VECTOR (it->dp, it->c),
&& (dv = DISP_CHAR_VECTOR (it->dp, c),
VECTORP (dv)))
{
struct Lisp_Vector *v = XVECTOR (dv);
......@@ -5694,21 +5707,10 @@ get_next_display_element (it)
goto get_next;
}
if (unibyte_display_via_language_environment
&& !ASCII_CHAR_P (it->c))
decoded = DECODE_CHAR (unibyte, it->c);
if (it->c >= 0x80 && ! NILP (Vnobreak_char_display))
{
if (it->multibyte_p)
nbsp_or_shy = (it->c == 0xA0 ? char_is_nbsp
: it->c == 0xAD ? char_is_soft_hyphen
: char_is_other);
else if (unibyte_display_via_language_environment)
nbsp_or_shy = (decoded == 0xA0 ? char_is_nbsp
: decoded == 0xAD ? char_is_soft_hyphen
: char_is_other);
}
if (! ASCII_CHAR_P (c) && ! NILP (Vnobreak_char_display))
nbsp_or_shy = (c == 0xA0 ? char_is_nbsp
: c == 0xAD ? char_is_soft_hyphen
: char_is_other);
/* Translate control characters into `\003' or `^C' form.
Control characters coming from a display table entry are
......@@ -5716,27 +5718,23 @@ get_next_display_element (it)
the translation. This could easily be changed but I
don't believe that it is worth doing.
If it->multibyte_p is nonzero, non-printable non-ASCII
characters are also translated to octal form.
NBSP and SOFT-HYPEN are property translated too.
If it->multibyte_p is zero, eight-bit characters that
don't have corresponding multibyte char code are also
Non-printable characters and raw-byte characters are also
translated to octal form. */
if ((it->c < ' '
if (((c < ' ' || c == 127) /* ASCII control chars */
? (it->area != TEXT_AREA
/* In mode line, treat \n, \t like other crl chars. */
|| (it->c != '\t'
|| (c != '\t'
&& it->glyph_row
&& (it->glyph_row->mode_line_p || it->avoid_cursor_p))
|| (it->c != '\n' && it->c != '\t'))
|| (c != '\n' && c != '\t'))
: (nbsp_or_shy
|| (it->multibyte_p
? ! CHAR_PRINTABLE_P (it->c)
: (! unibyte_display_via_language_environment
? it->c >= 0x80
: (decoded >= 0x80 && decoded < 0xA0))))))
|| CHAR_BYTE8_P (c)
|| ! CHAR_PRINTABLE_P (c))))
{
/* IT->c is a control character which must be displayed
/* C is a control character, NBSP, SOFT-HYPEN, raw-byte,
or a non-printable character which must be displayed
either as '\003' or as `^C' where the '\\' and '^'
can be defined in the display table. Fill
IT->ctl_chars with glyphs for what we have to
......@@ -5748,7 +5746,7 @@ get_next_display_element (it)
/* Handle control characters with ^. */
if (it->c < 128 && it->ctl_arrow_p)
if (ASCII_CHAR_P (c) && it->ctl_arrow_p)
{
int g;
......@@ -5781,7 +5779,7 @@ get_next_display_element (it)
}
XSETINT (it->ctl_chars[0], g);
XSETINT (it->ctl_chars[1], it->c ^ 0100);
XSETINT (it->ctl_chars[1], c ^ 0100);
ctl_len = 2;
goto display_control;
}
......@@ -5796,7 +5794,7 @@ get_next_display_element (it)
face_id = merge_faces (it->f, Qnobreak_space, 0,
it->face_id);
it->c = ' ';
c = ' ';
XSETINT (it->ctl_chars[0], ' ');
ctl_len = 1;
goto display_control;
......@@ -5842,7 +5840,6 @@ get_next_display_element (it)
if (EQ (Vnobreak_char_display, Qt)
&& nbsp_or_shy == char_is_soft_hyphen)
{
it->c = '-';
XSETINT (it->ctl_chars[0], '-');
ctl_len = 1;
goto display_control;
......@@ -5854,55 +5851,25 @@ get_next_display_element (it)
if (nbsp_or_shy)
{
XSETINT (it->ctl_chars[0], escape_glyph);
it->c = (nbsp_or_shy == char_is_nbsp ? ' ' : '-');
XSETINT (it->ctl_chars[1], it->c);
c = (nbsp_or_shy == char_is_nbsp ? ' ' : '-');
XSETINT (it->ctl_chars[1], c);
ctl_len = 2;
goto display_control;
}
{
unsigned char str[MAX_MULTIBYTE_LENGTH];
int len;
int i;
char str[10];
int len, i;
/* Set IT->ctl_chars[0] to the glyph for `\\'. */
if (CHAR_BYTE8_P (it->c))
{
str[0] = CHAR_TO_BYTE8 (it->c);
len = 1;
}
else if (it->c < 256)
{
str[0] = it->c;
len = 1;
}
else
{
/* It's an invalid character, which shouldn't
happen actually, but due to bugs it may
happen. Let's print the char as is, there's
not much meaningful we can do with it. */
str[0] = it->c;
str[1] = it->c >> 8;
str[2] = it->c >> 16;
str[3] = it->c >> 24;
len = 4;
}
if (CHAR_BYTE8_P (c))
/* Display \200 instead of \17777600. */
c = CHAR_TO_BYTE8 (c);
len = sprintf (str, "%03o", c);
XSETINT (it->ctl_chars[0], escape_glyph);
for (i = 0; i < len; i++)
{
int g;
XSETINT (it->ctl_chars[i * 4], escape_glyph);
/* Insert three more glyphs into IT->ctl_chars for
the octal display of the character. */
g = ((str[i] >> 6) & 7) + '0';
XSETINT (it->ctl_chars[i * 4 + 1], g);
g = ((str[i] >> 3) & 7) + '0';
XSETINT (it->ctl_chars[i * 4 + 2], g);
g = (str[i] & 7) + '0';
XSETINT (it->ctl_chars[i * 4 + 3], g);
}
ctl_len = len * 4;
XSETINT (it->ctl_chars[i + 1], str[i]);
ctl_len = len + 1;
}
display_control:
......@@ -5917,6 +5884,11 @@ get_next_display_element (it)
it->ellipsis_p = 0;
goto get_next;
}
it->char_to_display = c;
}
else if (success_p)
{
it->char_to_display = it->c;
}
}
......@@ -5943,7 +5915,8 @@ get_next_display_element (it)
: STRINGP (it->string) ? IT_STRING_CHARPOS (*it)
: IT_CHARPOS (*it));
it->face_id = FACE_FOR_CHAR (it->f, face, it->c, pos, it->string);
it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display, pos,
it->string);
}
}
#endif
......@@ -15871,15 +15844,19 @@ get_overlay_arrow_glyph_row (w, overlay_arrow_string)
/* Get the next character. */
if (multibyte_p)
it.c = string_char_and_length (p, &it.len);
it.c = it.char_to_display = string_char_and_length (p, &it.len);
else
it.c = *p, it.len = 1;
{
it.c = it.char_to_display = *p, it.len = 1;
if (! ASCII_CHAR_P (it.c))
it.char_to_display = BYTE8_TO_CHAR (it.c);
}
p += it.len;
/* Get its face. */
ilisp = make_number (p - arrow_string);
face = Fget_text_property (ilisp, Qface, overlay_arrow_string);
it.face_id = compute_char_face (f, it.c, face);
it.face_id = compute_char_face (f, it.char_to_display, face);
/* Compute its width, get its glyphs. */
n_glyphs_before = it.glyph_row->used[TEXT_AREA];
......@@ -16078,6 +16055,7 @@ append_space_for_newline (it, default_face_p)
append_space_for_newline has been called. */
enum display_element_type saved_what = it->what;
int saved_c = it->c, saved_len = it->len;
int saved_char_to_display = it->char_to_display;
int saved_x = it->current_x;
int saved_face_id = it->face_id;
struct text_pos saved_pos;
......@@ -16090,7 +16068,7 @@ append_space_for_newline (it, default_face_p)
it->what = IT_CHARACTER;
bzero (&it->position, sizeof it->position);
it->object = make_number (0);
it->c = ' ';
it->c = it->char_to_display = ' ';
it->len = 1;
if (default_face_p)
......@@ -16111,6 +16089,7 @@ append_space_for_newline (it, default_face_p)
it->face_id = saved_face_id;
it->len = saved_len;
it->c = saved_c;
it->char_to_display = saved_char_to_display;
return 1;
}
}
......@@ -16190,7 +16169,7 @@ extend_face_to_end_of_line (it)
it->what = IT_CHARACTER;
bzero (&it->position, sizeof it->position);
it->object = make_number (0);
it->c = ' ';
it->c = it->char_to_display = ' ';
it->len = 1;
it->face_id = face->id;
......@@ -19519,7 +19498,12 @@ get_glyph_face_and_encoding (f, glyph, char2b, two_byte_p)
if (face->font)
{
unsigned code = face->font->driver->encode_char (face->font, glyph->u.ch);
unsigned code;
if (CHAR_BYTE8_P (glyph->u.ch))
code = CHAR_TO_BYTE8 (glyph->u.ch);
else
code = face->font->driver->encode_char (face->font, glyph->u.ch);
if (code != FONT_INVALID_CODE)
STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
......@@ -19534,6 +19518,26 @@ get_glyph_face_and_encoding (f, glyph, char2b, two_byte_p)
}
/* Get glyph code of character C in FONT in the two-byte form CHAR2B.
Retunr 1 if FONT has a glyph for C, otherwise return 0. */
static INLINE int
get_char_glyph_code (int c, struct font *font, XChar2b *char2b)
{
unsigned code;
if (CHAR_BYTE8_P (c))
code = CHAR_TO_BYTE8 (c);
else
code = font->driver->encode_char (font, c);
if (code == FONT_INVALID_CODE)
return 0;
STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
return 1;
}
/* Fill glyph string S with composition components specified by S->cmp.
BASE_FACE is the base face of the composition.
......@@ -20906,10 +20910,14 @@ produce_stretch_glyph (it)
{
int maxlen = ((IT_BYTEPOS (*it) >= GPT ? ZV : GPT)
- IT_BYTEPOS (*it));
it2.c = STRING_CHAR_AND_LENGTH (p, it2.len);
it2.c = it2.char_to_display = STRING_CHAR_AND_LENGTH (p, it2.len);
}
else
it2.c = *p, it2.len = 1;
{
it2.c = it2.char_to_display = *p, it2.len = 1;
if (! ASCII_CHAR_P (it2.c))
it2.char_to_display = BYTE8_TO_CHAR (it2.c);
}
it2.glyph_row = NULL;
it2.what = IT_CHARACTER;
......@@ -21083,49 +21091,12 @@ x_produce_glyphs (it)
if (it->what == IT_CHARACTER)
{
XChar2b char2b;
struct font *font;
struct face *face = FACE_FROM_ID (it->f, it->face_id);
struct font_metrics *pcm;
int font_not_found_p;
struct font *font = face->font;
int font_not_found_p = font == NULL;
struct font_metrics *pcm = NULL;
int boff; /* baseline offset */
/* We may change it->multibyte_p upon unibyte<->multibyte
conversion. So, save the current value now and restore it
later.
Note: It seems that we don't have to record multibyte_p in
struct glyph because the character code itself tells whether
or not the character is multibyte. Thus, in the future, we
must consider eliminating the field `multibyte_p' in the
struct glyph. */
int saved_multibyte_p = it->multibyte_p;
/* Maybe translate single-byte characters to multibyte, or the
other way. */
it->char_to_display = it->c;
if (!ASCII_BYTE_P (it->c)
&& ! it->multibyte_p)
{
if (SINGLE_BYTE_CHAR_P (it->c)
&& unibyte_display_via_language_environment)
{
struct charset *unibyte = CHARSET_FROM_ID (charset_unibyte);
/* get_next_display_element assures that this decoding
never fails. */
it->char_to_display = DECODE_CHAR (unibyte, it->c);
it->multibyte_p = 1;
it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display,
-1, Qnil);
face = FACE_FROM_ID (it->f, it->face_id);
}
}
/* Get font to use. Encode IT->char_to_display. */
get_char_face_and_encoding (it->f, it->char_to_display, it->face_id,
&char2b, it->multibyte_p, 0);
font = face->font;
font_not_found_p = font == NULL;
if (font_not_found_p)
{
/* When no suitable font found, display an empty box based
......@@ -21145,16 +21116,12 @@ x_produce_glyphs (it)
boff = VCENTER_BASELINE_OFFSET (font, it->f) - boff;
}
if (it->char_to_display >= ' '
&& (!it->multibyte_p || it->char_to_display < 128))
if (it->char_to_display != '\n' && it->char_to_display != '\t')
{
/* Either unibyte or ASCII. */
int stretched_p;
it->nglyphs = 1;
pcm = get_per_char_metric (it->f, font, &char2b);
if (it->override_ascent >= 0)
{
it->ascent = it->override_ascent;
......@@ -21167,6 +21134,15 @@ x_produce_glyphs (it)
it->descent = FONT_DESCENT (font) - boff;
}
if (! font_not_found_p
&& get_char_glyph_code (it->char_to_display, font, &char2b))
{
pcm = get_per_char_metric (it->f, font, &char2b);
if (pcm->width == 0
&& pcm->rbearing == 0 && pcm->lbearing == 0)
pcm = NULL;
}
if (pcm)
{
it->phys_ascent = pcm->ascent + boff;
......@@ -21178,7 +21154,7 @@ x_produce_glyphs (it)
it->glyph_not_available_p = 1;
it->phys_ascent = it->ascent;
it->phys_descent = it->descent;
it->pixel_width = FONT_WIDTH (font);
it->pixel_width = font->space_width;
}
if (it->constrain_row_ascent_descent_p)
......@@ -21352,7 +21328,7 @@ x_produce_glyphs (it)
}
}
}
else if (it->char_to_display == '\t')
else /* i.e. (it->char_to_display == '\t') */
{
if (font->space_width > 0)
{
......@@ -21383,85 +21359,6 @@ x_produce_glyphs (it)
it->nglyphs = 1;
}
}
else
{
/* A multi-byte character. Assume that the display width of the
character is the width of the character multiplied by the
width of the font. */
/* If we found a font, this font should give us the right
metrics. If we didn't find a font, use the frame's
default font and calculate the width of the character by
multiplying the width of font by the width of the
character. */
pcm = get_per_char_metric (it->f, font, &char2b);
if (font_not_found_p || !pcm)