Commit bdd9fb48 authored by Kenichi Handa's avatar Kenichi Handa

(Valternate_charset_table): The valiable deleted.

(Venable_character_unification,
Vstandard_character_unification_table_for_read,
Vstandard_character_unification_table_for_write,
Qcharacter_unification_table): New variables.
(syms_of_coding): Initialize and declare them.
(DECODE_ISO_CHARACTER): Modified to handle a character unification
table instead of Valternate_charset_table.
(DECODE_DESIGNATION): Delete handling of Valternate_charset_table.
(decode_coding_iso2022): Handle a character unification table.
(ENCODE_ISO_CHARACTER): New macro.
(encode_designation_at_bol): Handle a character unification table.
Do not return -1 even if end-of-line is not in the current run.
(encode_coding_iso2022): Handle a character unification table.
Call macro ENCODE_ISO_CHARACTER instead of calling
ENCODE_ISO_CHARACTER_DIMENSION1 and
ENCODE_ISO_CHARACTER_DIMENSION2 directly.  Check the size of
carryover before copying it to destination.
(setup_coding_system): Initialize the member
character_unification_table of the struct coding system to Qnil.
(Fset_keyboard_coding_system): Doc string augmented.
parent ceb58510
......@@ -303,8 +303,15 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
"coding-category-binary"
};
/* Alist of charsets vs the alternate charsets. */
Lisp_Object Valternate_charset_table;
/* Flag to tell if we look up unification table on character code
conversion. */
Lisp_Object Venable_character_unification;
/* Standard unification table to look up on reading (decoding). */
Lisp_Object Vstandard_character_unification_table_for_read;
/* Standard unification table to look up on writing (encoding). */
Lisp_Object Vstandard_character_unification_table_for_write;
Lisp_Object Qcharacter_unification_table;
/* Alist of charsets vs revision number. */
Lisp_Object Vcharset_revision_alist;
......@@ -650,44 +657,46 @@ detect_coding_iso2022 (src, src_end)
}
/* Decode a character of which charset is CHARSET and the 1st position
code is C1. If dimension of CHARSET 2, the 2nd position code is
code is C1. If dimension of CHARSET is 2, the 2nd position code is
fetched from SRC and set to C2. If CHARSET is negative, it means
that we are decoding ill formed text, and what we can do is just to
read C1 as is. */
#define DECODE_ISO_CHARACTER(charset, c1) \
do { \
if ((charset) >= 0 && CHARSET_DIMENSION (charset) == 2) \
ONE_MORE_BYTE (c2); \
if (COMPOSING_HEAD_P (coding->composing)) \
{ \
*dst++ = LEADING_CODE_COMPOSITION; \
if (COMPOSING_WITH_RULE_P (coding->composing)) \
/* To tell composition rules are embeded. */ \
*dst++ = 0xFF; \
coding->composing += 2; \
} \
if ((charset) < 0) \
*dst++ = c1; \
else if ((charset) == CHARSET_ASCII) \
DECODE_CHARACTER_ASCII (c1); \
else if (CHARSET_DIMENSION (charset) == 1) \
DECODE_CHARACTER_DIMENSION1 (charset, c1); \
else \
DECODE_CHARACTER_DIMENSION2 (charset, c1, c2); \
if (COMPOSING_WITH_RULE_P (coding->composing)) \
/* To tell a composition rule follows. */ \
coding->composing = COMPOSING_WITH_RULE_RULE; \
#define DECODE_ISO_CHARACTER(charset, c1) \
do { \
int c_alt, charset_alt = (charset); \
if (COMPOSING_HEAD_P (coding->composing)) \
{ \
*dst++ = LEADING_CODE_COMPOSITION; \
if (COMPOSING_WITH_RULE_P (coding->composing)) \
/* To tell composition rules are embeded. */ \
*dst++ = 0xFF; \
coding->composing += 2; \
} \
if ((charset) >= 0) \
{ \
if (CHARSET_DIMENSION (charset) == 2) \
ONE_MORE_BYTE (c2); \
if (!NILP (unification_table) \
&& ((c_alt = unify_char (unification_table, \
-1, (charset), c1, c2)) >= 0)) \
SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
} \
if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
DECODE_CHARACTER_ASCII (c1); \
else if (CHARSET_DIMENSION (charset_alt) == 1) \
DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
else \
DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
if (COMPOSING_WITH_RULE_P (coding->composing)) \
/* To tell a composition rule follows. */ \
coding->composing = COMPOSING_WITH_RULE_RULE; \
} while (0)
/* Set designation state into CODING. */
#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
do { \
int charset = ISO_CHARSET_TABLE (dimension, chars, final_char); \
Lisp_Object temp \
= Fassq (CHARSET_SYMBOL (charset), Valternate_charset_table); \
if (! NILP (temp)) \
charset = get_charset_id (XCONS (temp)->cdr); \
if (charset >= 0) \
{ \
if (coding->direction == 1 \
......@@ -719,6 +728,10 @@ decode_coding_iso2022 (coding, source, destination,
/* Charsets invoked to graphic plane 0 and 1 respectively. */
int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
Lisp_Object unification_table = coding->character_unification_table;
if (!NILP (Venable_character_unification) && NILP (unification_table))
unification_table = Vstandard_character_unification_table_for_read;
while (src < src_end && dst < adjusted_dst_end)
{
......@@ -728,7 +741,7 @@ decode_coding_iso2022 (coding, source, destination,
ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset
to SRC_BASE before exiting. */
unsigned char *src_base = src;
unsigned char c1 = *src++, c2, cmprule;
int c1 = *src++, c2;
switch (iso_code_class [c1])
{
......@@ -1167,6 +1180,21 @@ decode_coding_iso2022 (coding, source, destination,
dst = encode_invocation_designation (charset, coding, dst); \
} while (1)
#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
do { \
int c_alt, charset_alt; \
if (!NILP (unification_table) \
&& ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
< 0)) \
SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
else \
charset_alt = charset; \
if (CHARSET_DIMENSION (charset_alt) == 1) \
ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \
else \
ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
} while (0)
/* Produce designation and invocation codes at a place pointed by DST
to use CHARSET. The element `spec.iso2022' of *CODING is updated.
Return new DST. */
......@@ -1266,48 +1294,57 @@ encode_invocation_designation (charset, coding, dst)
(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
} while (0)
int
encode_designation_at_bol (coding, src, src_end, dstp)
/* Produce designation sequences of charsets in the line started from
*SRC to a place pointed by DSTP.
If the current block ends before any end-of-line, we may fail to
find all the necessary *designations. */
encode_designation_at_bol (coding, table, src, src_end, dstp)
struct coding_system *coding;
Lisp_Object table;
unsigned char *src, *src_end, **dstp;
{
int charset, reg, r[4];
unsigned char *dst = *dstp, c;
for (reg = 0; reg < 4; reg++) r[reg] = -1;
while (src < src_end && (c = *src++) != '\n')
int charset, c, found = 0, reg;
/* Table of charsets to be designated to each graphic register. */
int r[4];
unsigned char *dst = *dstp;
for (reg = 0; reg < 4; reg++)
r[reg] = -1;
while (src < src_end && *src != '\n' && found < 4)
{
switch (emacs_code_class[c])
int bytes = BYTES_BY_CHAR_HEAD (*src);
if (NILP (table))
charset = CHARSET_AT (src);
else
{
case EMACS_ascii_code:
charset = CHARSET_ASCII;
break;
case EMACS_leading_code_2:
if (++src >= src_end) continue;
charset = c;
break;
case EMACS_leading_code_3:
if ((src += 2) >= src_end) continue;
charset = (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2));
break;
case EMACS_leading_code_4:
if ((src += 3) >= src_end) continue;
charset = *(src - 3);
break;
default:
continue;
int c_alt, c1, c2;
SPLIT_STRING(src, bytes, charset, c1, c2);
if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
charset = CHAR_CHARSET (c_alt);
}
reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
if (r[reg] < 0
&& CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset)
r[reg] = charset;
if (r[reg] < 0)
{
found++;
r[reg] = charset;
}
src += bytes;
}
if (found)
{
for (reg = 0; reg < 4; reg++)
if (r[reg] >= 0
&& CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
ENCODE_DESIGNATION (r[reg], reg, coding);
*dstp = dst;
}
if (c != '\n' && !coding->last_block)
return -1;
for (reg = 0; reg < 4; reg++)
if (r[reg] >= 0)
ENCODE_DESIGNATION (r[reg], reg, coding);
*dstp = dst;
return 0;
}
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
......@@ -1328,6 +1365,10 @@ encode_coding_iso2022 (coding, source, destination,
from DST_END to assure overflow checking is necessary only at the
head of loop. */
unsigned char *adjusted_dst_end = dst_end - 19;
Lisp_Object unification_table = coding->character_unification_table;
if (!NILP (Venable_character_unification) && NILP (unification_table))
unification_table = Vstandard_character_unification_table_for_write;
while (src < src_end && dst < adjusted_dst_end)
{
......@@ -1337,18 +1378,14 @@ encode_coding_iso2022 (coding, source, destination,
TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
reset to SRC_BASE before exiting. */
unsigned char *src_base = src;
unsigned char c1, c2, c3, c4;
int charset;
int charset, c1, c2, c3, c4;
if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
&& CODING_SPEC_ISO_BOL (coding))
{
/* We have to produce destination sequences now. */
if (encode_designation_at_bol (coding, src, src_end, &dst) < 0)
/* We can't find end of line in the current block. Let's
repeat encoding starting from the current position
pointed by SRC. */
break;
/* We have to produce designation sequences if any now. */
encode_designation_at_bol (coding, unification_table,
src, src_end, &dst);
CODING_SPEC_ISO_BOL (coding) = 0;
}
......@@ -1393,7 +1430,7 @@ encode_coding_iso2022 (coding, source, destination,
switch (emacs_code_class[c1])
{
case EMACS_ascii_code:
ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1);
ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
break;
case EMACS_control_code:
......@@ -1431,20 +1468,20 @@ encode_coding_iso2022 (coding, source, destination,
case EMACS_leading_code_2:
ONE_MORE_BYTE (c2);
ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2);
ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
break;
case EMACS_leading_code_3:
TWO_MORE_BYTES (c2, c3);
if (c1 < LEADING_CODE_PRIVATE_11)
ENCODE_ISO_CHARACTER_DIMENSION2 (c1, c2, c3);
ENCODE_ISO_CHARACTER (c1, c2, c3);
else
ENCODE_ISO_CHARACTER_DIMENSION1 (c2, c3);
ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
break;
case EMACS_leading_code_4:
THREE_MORE_BYTES (c2, c3, c4);
ENCODE_ISO_CHARACTER_DIMENSION2 (c2, c3, c4);
ENCODE_ISO_CHARACTER (c2, c3, c4);
break;
case EMACS_leading_code_composition:
......@@ -1472,20 +1509,21 @@ encode_coding_iso2022 (coding, source, destination,
label_end_of_loop:
coding->carryover_size = src - src_base;
bcopy (src_base, coding->carryover, coding->carryover_size);
src = src_base;
break;
}
/* If this is the last block of the text to be encoded, we must
reset the state of graphic planes and registers to initial one.
In addition, we had better just flush out all remaining codes in
the text although they are not valid characters. */
if (coding->last_block)
reset graphic planes and registers to the initial state. */
if (src >= src_end && coding->last_block)
{
ENCODE_RESET_PLANE_AND_REGISTER;
bcopy(src, dst, src_end - src);
dst += (src_end - src);
src = src_end;
if (coding->carryover_size > 0
&& coding->carryover_size < (dst_end - dst))
{
bcopy (coding->carryover, dst, coding->carryover_size);
dst += coding->carryover_size;
coding->carryover_size = 0;
}
}
*consumed = src - source;
return dst - destination;
......@@ -2063,6 +2101,9 @@ setup_coding_system (coding_system, coding)
coding->direction = 0;
coding->carryover_size = 0;
coding->post_read_conversion = coding->pre_write_conversion = Qnil;
/* We have not yet implemented a way to specify unification table in
a coding system. */
coding->character_unification_table = Qnil;
Vlast_coding_system_used = coding->symbol = coding_system;
eol_type = Qnil;
......@@ -3316,10 +3357,13 @@ DEFUN ("terminal-coding-system",
}
DEFUN ("set-keyboard-coding-system",
Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1,
"zCoding-system for keyboard input: ",
"Set coding-system of what is sent from terminal keyboard to CODING-SYSTEM.\n\
All inputs from terminal are decoded from this coding-system.")
Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1, 0,
"Set coding-system of codes sent from terminal keyboard to CODING-SYSTEM.\n\
In Encoded-kbd minor mode, user inputs are decoded\n\
accoding to CODING-SYSTEM.\n\
Do not call this function directly, but use the command\n\
encoded-kbd-set-coding-system to activate Encoded-kbd mode\n\
with a specific coding system.")
(coding_system)
Lisp_Object coding_system;
{
......@@ -3529,6 +3573,11 @@ syms_of_coding ()
}
}
Qcharacter_unification_table = intern ("character-unification-table");
staticpro (&Qcharacter_unification_table);
Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
make_number (0));
defsubr (&Scoding_system_vector);
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
......@@ -3613,11 +3662,19 @@ See the documentation of `find-coding-system' for more detail.");
"Mnemonic character indicating end-of-line format is not yet decided.");
eol_mnemonic_undecided = '-';
DEFVAR_LISP ("alternate-charset-table", &Valternate_charset_table,
"Alist of charsets vs the alternate charsets.\n\
While decoding, if a charset (car part of an element) is found,\n\
decode it as the alternate charset (cdr part of the element).");
Valternate_charset_table = Qnil;
DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
"Non-nil means ISO 2022 encoder/decoder do character unification.");
Venable_character_unification = Qt;
DEFVAR_LISP ("standard-character-unification-table-for-read",
&Vstandard_character_unification_table_for_read,
"Table for unifying characters when reading.");
Vstandard_character_unification_table_for_read = Qnil;
DEFVAR_LISP ("standard-character-unification-table-for-write",
&Vstandard_character_unification_table_for_write,
"Table for unifying characters when writing.");
Vstandard_character_unification_table_for_write = Qnil;
DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
"Alist of charsets vs revision numbers.\n\
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment