Commit ff0dacd7 authored by Kenichi Handa's avatar Kenichi Handa

(CATEGORY_MASK_RAW_TEXT): New macro.

(detect_coding_utf_8, detect_coding_utf_16)
(detect_coding_emacs_mule, detect_coding_iso_2022)
(detect_coding_sjis, detect_coding_big5)
(detect_coding_ccl, detect_coding_charset): Change argument MASK
to DETECT_INFO.  Update DETECT_INFO and return 1 if the byte
sequence is valid in this coding system.  Callers changed.
(MAX_ANNOTATION_LENGTH): New macro.
(ADD_ANNOTATION_DATA): New macro.
(ADD_COMPOSITION_DATA): Argument changed.  Callers changed.  Call
ADD_ANNOTATION_DATA.  The format of annotation data changed.
(ADD_CHARSET_DATA): New macro.
(emacs_mule_char): New argument ID.  Callers changed.
(decode_coding_emacs_mule, decode_coding_iso_2022)
(decode_coding_sjis, decode_coding_big5, decode_coding_charset):
Produce charset annotation data in coding->charbuf.
(encode_coding_emacs_mule, encode_coding_iso_2022): Pay attention
to charset annotation data in coding->charbuf.
(setup_coding_system): Add CODING_ANNOTATE_CHARSET_MASK
coding->common_flags if the coding system is iso-2022 based and
uses designation.
(produce_composition): Adjusted for the new annotation data
format.
(produce_charset): New function.
(produce_annotation): Handle charset annotation.
(handle_composition_annotation, handle_charset_annotation): New
functions.
(consume_chars): Handle charset annotation.  Utilize the above two
functions.
(encode_coding_object): If SRC_OBJECT and DST_OBJECT are the same
buffer, get the deleted text as a string and set
coding->src_object to that string.
(detect_coding, detect_coding_system): Use the new struct
coding_detection_info.
parent 4fecac5c
......@@ -144,26 +144,23 @@ STRUCT CODING_SYSTEM
/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
These functions check if a byte sequence specified as a source in
CODING conforms to the format of XXX. Return 1 if the data contains
a byte sequence which can be decoded into non-ASCII characters by
the coding system. Otherwize (i.e. the data contains only ASCII
characters or invalid sequence) return 0.
CODING conforms to the format of XXX, and update the members of
DETECT_INFO.
It also resets some bits of an integer pointed by MASK. The macros
CATEGORY_MASK_XXX specifies each bit of this integer.
Return 1 if the byte sequence conforms to XXX, otherwise return 0.
Below is the template of these functions. */
#if 0
static int
detect_coding_XXX (coding, mask)
detect_coding_XXX (coding, detect_info)
struct coding_system *coding;
int *mask;
struct coding_detection_info *detect_info;
{
unsigned char *src = coding->source;
unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int c;
int consumed_chars = 0;
int found = 0;
...;
......@@ -172,18 +169,19 @@ detect_coding_XXX (coding, mask)
/* Get one byte from the source. If the souce is exausted, jump
to no_more_source:. */
ONE_MORE_BYTE (c);
/* Check if it conforms to XXX. If not, break the loop. */
if (! __C_conforms_to_XXX___ (c))
break;
if (! __C_strongly_suggests_XXX__ (c))
found = CATEGORY_MASK_XXX;
}
/* As the data is invalid for XXX, reset a proper bits. */
*mask &= ~CODING_CATEGORY_XXX;
/* The byte sequence is invalid for XXX. */
detect_info->rejected |= CATEGORY_MASK_XXX;
return 0;
no_more_source:
/* The source exausted. */
if (!found)
/* ASCII characters only. */
return 0;
/* Some data should be decoded into non-ASCII characters. */
*mask &= CODING_CATEGORY_XXX;
/* The source exausted successfully. */
detect_info->found |= found;
return 1;
}
#endif
......@@ -408,31 +406,38 @@ Lisp_Object Vsjis_coding_system;
Lisp_Object Vbig5_coding_system;
static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
static int detect_coding_utf_8 P_ ((struct coding_system *,
struct coding_detection_info *info));
static void decode_coding_utf_8 P_ ((struct coding_system *));
static int encode_coding_utf_8 P_ ((struct coding_system *));
static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
static int detect_coding_utf_16 P_ ((struct coding_system *,
struct coding_detection_info *info));
static void decode_coding_utf_16 P_ ((struct coding_system *));
static int encode_coding_utf_16 P_ ((struct coding_system *));
static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
static int detect_coding_iso_2022 P_ ((struct coding_system *,
struct coding_detection_info *info));
static void decode_coding_iso_2022 P_ ((struct coding_system *));
static int encode_coding_iso_2022 P_ ((struct coding_system *));
static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
static int detect_coding_emacs_mule P_ ((struct coding_system *,
struct coding_detection_info *info));
static void decode_coding_emacs_mule P_ ((struct coding_system *));
static int encode_coding_emacs_mule P_ ((struct coding_system *));
static int detect_coding_sjis P_ ((struct coding_system *, int *));
static int detect_coding_sjis P_ ((struct coding_system *,
struct coding_detection_info *info));
static void decode_coding_sjis P_ ((struct coding_system *));
static int encode_coding_sjis P_ ((struct coding_system *));
static int detect_coding_big5 P_ ((struct coding_system *, int *));
static int detect_coding_big5 P_ ((struct coding_system *,
struct coding_detection_info *info));
static void decode_coding_big5 P_ ((struct coding_system *));
static int encode_coding_big5 P_ ((struct coding_system *));
static int detect_coding_ccl P_ ((struct coding_system *, int *));
static int detect_coding_ccl P_ ((struct coding_system *,
struct coding_detection_info *info));
static void decode_coding_ccl P_ ((struct coding_system *));
static int encode_coding_ccl P_ ((struct coding_system *));
......@@ -631,6 +636,7 @@ enum coding_category
#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
/* This value is returned if detect_coding_mask () find nothing other
than ASCII characters. */
......@@ -1002,6 +1008,54 @@ alloc_destination (coding, nbytes, dst)
return dst;
}
/** Macros for annotations. */
/* Maximum length of annotation data (sum of annotations for
composition and charset). */
#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
/* An annotation data is stored in the array coding->charbuf in this
format:
[ -LENGTH ANNOTATION_MASK FROM TO ... ]
LENGTH is the number of elements in the annotation.
ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
FROM and TO specify the range of text annotated. They are relative
to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
The format of the following elements depend on ANNOTATION_MASK.
In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
follows:
... METHOD [ COMPOSITION-COMPONENTS ... ]
METHOD is one of enum composition_method.
Optionnal COMPOSITION-COMPONENTS are characters and composition
rules.
In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
follows. */
#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
do { \
*(buf)++ = -(len); \
*(buf)++ = (mask); \
*(buf)++ = (from); \
*(buf)++ = (to); \
coding->annotated = 1; \
} while (0);
#define ADD_COMPOSITION_DATA(buf, from, to, method) \
do { \
ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
*buf++ = method; \
} while (0)
#define ADD_CHARSET_DATA(buf, from, to, id) \
do { \
ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
*buf++ = id; \
} while (0)
/*** 2. Emacs' internal format (emacs-utf-8) ***/
......@@ -1011,8 +1065,8 @@ alloc_destination (coding, nbytes, dst)
/*** 3. UTF-8 ***/
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in UTF-8. If it is, return
CATEGORY_MASK_UTF_8, else return 0. */
Check if a text is encoded in UTF-8. If it is, return 1, else
return 0. */
#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
......@@ -1022,9 +1076,9 @@ alloc_destination (coding, nbytes, dst)
#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
static int
detect_coding_utf_8 (coding, mask)
detect_coding_utf_8 (coding, detect_info)
struct coding_system *coding;
int *mask;
struct coding_detection_info *detect_info;
{
unsigned char *src = coding->source, *src_base = src;
unsigned char *src_end = coding->source + coding->src_bytes;
......@@ -1033,6 +1087,7 @@ detect_coding_utf_8 (coding, mask)
int found = 0;
int incomplete;
detect_info->checked |= CATEGORY_MASK_UTF_8;
/* A coding system of this category is always ASCII compatible. */
src += coding->head_ascii;
......@@ -1050,7 +1105,7 @@ detect_coding_utf_8 (coding, mask)
break;
if (UTF_8_2_OCTET_LEADING_P (c))
{
found++;
found = CATEGORY_MASK_UTF_8;
continue;
}
ONE_MORE_BYTE (c2);
......@@ -1058,7 +1113,7 @@ detect_coding_utf_8 (coding, mask)
break;
if (UTF_8_3_OCTET_LEADING_P (c))
{
found++;
found = CATEGORY_MASK_UTF_8;
continue;
}
ONE_MORE_BYTE (c3);
......@@ -1066,7 +1121,7 @@ detect_coding_utf_8 (coding, mask)
break;
if (UTF_8_4_OCTET_LEADING_P (c))
{
found++;
found = CATEGORY_MASK_UTF_8;
continue;
}
ONE_MORE_BYTE (c4);
......@@ -1074,21 +1129,22 @@ detect_coding_utf_8 (coding, mask)
break;
if (UTF_8_5_OCTET_LEADING_P (c))
{
found++;
found = CATEGORY_MASK_UTF_8;
continue;
}
break;
}
*mask &= ~CATEGORY_MASK_UTF_8;
detect_info->rejected |= CATEGORY_MASK_UTF_8;
return 0;
no_more_source:
if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
{
*mask &= ~CATEGORY_MASK_UTF_8;
detect_info->rejected |= CATEGORY_MASK_UTF_8;
return 0;
}
return found;
detect_info->found |= found;
return 1;
}
......@@ -1269,10 +1325,8 @@ encode_coding_utf_8 (coding)
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
Little Endian (otherwise). If it is, return
CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
else return 0. */
Check if a text is encoded in one of UTF-16 based coding systems.
If it is, return 1, else return 0. */
#define UTF_16_HIGH_SURROGATE_P(val) \
(((val) & 0xFC00) == 0xD800)
......@@ -1287,9 +1341,9 @@ encode_coding_utf_8 (coding)
static int
detect_coding_utf_16 (coding, mask)
detect_coding_utf_16 (coding, detect_info)
struct coding_system *coding;
int *mask;
struct coding_detection_info *detect_info;
{
unsigned char *src = coding->source, *src_base = src;
unsigned char *src_end = coding->source + coding->src_bytes;
......@@ -1297,21 +1351,29 @@ detect_coding_utf_16 (coding, mask)
int consumed_chars = 0;
int c1, c2;
*mask &= ~CATEGORY_MASK_UTF_16;
detect_info->checked |= CATEGORY_MASK_UTF_16;
if (coding->mode & CODING_MODE_LAST_BLOCK
&& (coding->src_bytes & 1))
{
detect_info->rejected |= CATEGORY_MASK_UTF_16;
return 0;
}
ONE_MORE_BYTE (c1);
ONE_MORE_BYTE (c2);
if ((c1 == 0xFF) && (c2 == 0xFE))
*mask |= CATEGORY_MASK_UTF_16_LE;
{
detect_info->found |= CATEGORY_MASK_UTF_16_LE;
detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
}
else if ((c1 == 0xFE) && (c2 == 0xFF))
*mask |= CATEGORY_MASK_UTF_16_BE;
else
*mask |= CATEGORY_MASK_UTF_16_BE_NOSIG | CATEGORY_MASK_UTF_16_LE_NOSIG;
return 1;
{
detect_info->found |= CATEGORY_MASK_UTF_16_BE;
detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
}
no_more_source:
return 0;
return 1;
}
static void
......@@ -1559,10 +1621,10 @@ encode_coding_utf_16 (coding)
char emacs_mule_bytes[256];
int
emacs_mule_char (coding, src, nbytes, nchars)
emacs_mule_char (coding, src, nbytes, nchars, id)
struct coding_system *coding;
unsigned char *src;
int *nbytes, *nchars;
int *nbytes, *nchars, *id;
{
unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
......@@ -1627,6 +1689,8 @@ emacs_mule_char (coding, src, nbytes, nchars)
goto invalid_code;
*nbytes = src - src_base;
*nchars = consumed_chars;
if (id)
*id = charset->id;
return c;
no_more_source:
......@@ -1638,12 +1702,13 @@ emacs_mule_char (coding, src, nbytes, nchars)
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in `emacs-mule'. */
Check if a text is encoded in `emacs-mule'. If it is, return 1,
else return 0. */
static int
detect_coding_emacs_mule (coding, mask)
detect_coding_emacs_mule (coding, detect_info)
struct coding_system *coding;
int *mask;
struct coding_detection_info *detect_info;
{
unsigned char *src = coding->source, *src_base = src;
unsigned char *src_end = coding->source + coding->src_bytes;
......@@ -1653,6 +1718,7 @@ detect_coding_emacs_mule (coding, mask)
int found = 0;
int incomplete;
detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
/* A coding system of this category is always ASCII compatible. */
src += coding->head_ascii;
......@@ -1680,7 +1746,7 @@ detect_coding_emacs_mule (coding, mask)
if (src - src_base <= 4)
break;
found = 1;
found = CATEGORY_MASK_EMACS_MULE;
if (c == 0x80)
goto repeat;
}
......@@ -1702,19 +1768,20 @@ detect_coding_emacs_mule (coding, mask)
while (c >= 0xA0);
if (src - src_base != emacs_mule_bytes[*src_base])
break;
found = 1;
found = CATEGORY_MASK_EMACS_MULE;
}
}
*mask &= ~CATEGORY_MASK_EMACS_MULE;
detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
return 0;
no_more_source:
if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
{
*mask &= ~CATEGORY_MASK_EMACS_MULE;
detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
return 0;
}
return found;
detect_info->found |= found;
return 1;
}
......@@ -1735,7 +1802,7 @@ detect_coding_emacs_mule (coding, mask)
\
if (src == src_end) \
break; \
c = emacs_mule_char (coding, src, &nbytes, &nchars); \
c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
if (c < 0) \
{ \
if (c == -2) \
......@@ -1792,16 +1859,6 @@ detect_coding_emacs_mule (coding, mask)
} while (0)
#define ADD_COMPOSITION_DATA(buf, method, nchars) \
do { \
*buf++ = -5; \
*buf++ = coding->produced_char + char_offset; \
*buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
*buf++ = method; \
*buf++ = nchars; \
} while (0)
#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
do { \
/* Emacs 21 style format. The first three bytes at SRC are \
......@@ -1810,6 +1867,7 @@ detect_coding_emacs_mule (coding, mask)
number of characters composed by this composition. */ \
enum composition_method method = c - 0xF2; \
int *charbuf_base = charbuf; \
int from, to; \
int consumed_chars_limit; \
int nbytes, nchars; \
\
......@@ -1819,7 +1877,9 @@ detect_coding_emacs_mule (coding, mask)
goto invalid_code; \
ONE_MORE_BYTE (c); \
nchars = c - 0xA0; \
ADD_COMPOSITION_DATA (charbuf, method, nchars); \
from = coding->produced + char_offset; \
to = from + nchars; \
ADD_COMPOSITION_DATA (charbuf, from, to, method); \
consumed_chars_limit = consumed_chars_base + nbytes; \
if (method != COMPOSITION_RELATIVE) \
{ \
......@@ -1843,9 +1903,11 @@ detect_coding_emacs_mule (coding, mask)
do { \
/* Emacs 20 style format for relative composition. */ \
/* Store multibyte form of characters to be composed. */ \
enum composition_method method = COMPOSITION_RELATIVE; \
int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
int *buf = components; \
int i, j; \
int from, to; \
\
src = src_base; \
ONE_MORE_BYTE (c); /* skip 0x80 */ \
......@@ -1853,7 +1915,9 @@ detect_coding_emacs_mule (coding, mask)
DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
if (i < 2) \
goto invalid_code; \
ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
from = coding->produced_char + char_offset; \
to = from + i; \
ADD_COMPOSITION_DATA (charbuf, from, to, method); \
for (j = 0; j < i; j++) \
*charbuf++ = components[j]; \
} while (0)
......@@ -1863,9 +1927,11 @@ detect_coding_emacs_mule (coding, mask)
do { \
/* Emacs 20 style format for rule-base composition. */ \
/* Store multibyte form of characters to be composed. */ \
enum composition_method method = COMPOSITION_WITH_RULE; \
int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
int *buf = components; \
int i, j; \
int from, to; \
\
DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
......@@ -1877,7 +1943,9 @@ detect_coding_emacs_mule (coding, mask)
goto invalid_code; \
if (charbuf + i + (i / 2) + 1 < charbuf_end) \
goto no_more_source; \
ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
from = coding->produced_char + char_offset; \
to = from + i; \
ADD_COMPOSITION_DATA (buf, from, to, method); \
for (j = 0; j < i; j++) \
*charbuf++ = components[j]; \
for (j = 0; j < i; j += 2) \
......@@ -1893,11 +1961,13 @@ decode_coding_emacs_mule (coding)
unsigned char *src_end = coding->source + coding->src_bytes;
unsigned char *src_base;
int *charbuf = coding->charbuf;
int *charbuf_end = charbuf + coding->charbuf_size;
int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
int consumed_chars = 0, consumed_chars_base;
int char_offset = 0;
int multibytep = coding->src_multibyte;
Lisp_Object attrs, eol_type, charset_list;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
CODING_GET_INFO (coding, attrs, eol_type, charset_list);
......@@ -1935,8 +2005,6 @@ decode_coding_emacs_mule (coding)
}
else if (c == 0x80)
{
if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
break;
ONE_MORE_BYTE (c);
if (c - 0xF2 >= COMPOSITION_RELATIVE
&& c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
......@@ -1947,20 +2015,28 @@ decode_coding_emacs_mule (coding)
DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
else
goto invalid_code;
coding->annotated = 1;
}
else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
{
int nbytes, nchars;
int id;
src = src_base;
consumed_chars = consumed_chars_base;
c = emacs_mule_char (coding, src, &nbytes, &nchars);
c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
if (c < 0)
{
if (c == -2)
break;
goto invalid_code;
}
if (last_id != id)
{
if (last_id != charset_ascii)
ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
last_id = id;
last_offset = char_offset;
}
*charbuf++ = c;
src += nbytes;
consumed_chars += nchars;
......@@ -1973,10 +2049,13 @@ decode_coding_emacs_mule (coding)
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
*charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
coding->errors++;
}
no_more_source:
if (last_id != charset_ascii)
ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
coding->consumed_char += consumed_chars_base;
coding->consumed = src_base - coding->source;
coding->charbuf_used = charbuf - coding->charbuf;
......@@ -2011,6 +2090,7 @@ encode_coding_emacs_mule (coding)
int produced_chars = 0;
Lisp_Object attrs, eol_type, charset_list;
int c;
int preferred_charset_id = -1;
CODING_GET_INFO (coding, attrs, eol_type, charset_list);
......@@ -2018,6 +2098,29 @@ encode_coding_emacs_mule (coding)
{
ASSURE_DESTINATION (safe_room);
c = *charbuf++;
if (c < 0)
{
/* Handle an annotation. */
switch (*charbuf)
{
case CODING_ANNOTATE_COMPOSITION_MASK:
/* Not yet implemented. */
break;
case CODING_ANNOTATE_CHARSET_MASK:
preferred_charset_id = charbuf[3];
if (preferred_charset_id >= 0
&& NILP (Fmemq (make_number (preferred_charset_id),
charset_list)))
preferred_charset_id = -1;
break;
default:
abort ();
}
charbuf += -c - 1;
continue;
}
if (ASCII_CHAR_P (c))
EMIT_ONE_ASCII_BYTE (c);
else if (CHAR_BYTE8_P (c))
......@@ -2033,7 +2136,14 @@ encode_coding_emacs_mule (coding)
int emacs_mule_id;
unsigned char leading_codes[2];
charset = char_charset (c, charset_list, &code);
if (preferred_charset_id >= 0)
{
charset = CHARSET_FROM_ID (preferred_charset_id);
if (! CHAR_CHARSET_P (c, charset))
charset = char_charset (c, charset_list, NULL);
}
else
charset = char_charset (c, charset_list, &code);
if (! charset)
{
c = coding->default_char;
......@@ -2319,32 +2429,26 @@ setup_iso_safe_charsets (attrs)
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in ISO2022. If it is, returns an
integer in which appropriate flag bits any of:
CATEGORY_MASK_ISO_7
CATEGORY_MASK_ISO_7_TIGHT
CATEGORY_MASK_ISO_8_1
CATEGORY_MASK_ISO_8_2
CATEGORY_MASK_ISO_7_ELSE
CATEGORY_MASK_ISO_8_ELSE
are set. If a code which should never appear in ISO2022 is found,
returns 0. */
Check if a text is encoded in one of ISO-2022 based codig systems.
If it is, return 1, else return 0. */
static int
detect_coding_iso_2022 (coding, mask)
detect_coding_iso_2022 (coding, detect_info)
struct coding_system *coding;
int *mask;
struct coding_detection_info *detect_info;
{
unsigned char *src = coding->source, *src_base = src;
unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int mask_iso = CATEGORY_MASK_ISO;
int mask_found = 0, mask_8bit_found = 0;
int reg[4], shift_out = 0, single_shifting = 0;
int single_shifting = 0;
int id;
int c, c1;
int consumed_chars = 0;
int i;
int rejected = 0;
int found = 0;
detect_info->checked |= CATEGORY_MASK_ISO;
for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
{
......@@ -2363,8 +2467,7 @@ detect_coding_iso_2022 (coding, mask)
/* A coding system of this category is always ASCII compatible. */
src += coding->head_ascii;
reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
while (mask_iso && src < src_end)
while (rejected != CATEGORY_MASK_ISO)
{
ONE_MORE_BYTE (c);
switch (c)
......@@ -2382,7 +2485,6 @@ detect_coding_iso_2022 (coding, mask)
|| (id = iso_charset_table[0][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
break;
reg[(c - '(') % 4] = id;
}
else if (c == '$')
{
......@@ -2390,7 +2492,7 @@ detect_coding_iso_2022 (coding, mask)
ONE_MORE_BYTE (c);
if (c >= '@' && c <= 'B')
/* Designation for JISX0208.1978, GB2312, or JISX0208. */
reg[0] = id = iso_charset_table[1][0][c];
id = iso_charset_table[1][0][c];
else if (c >= '(' && c <= '/')
{
ONE_MORE_BYTE (c1);
......@@ -2398,116 +2500,86 @@ detect_coding_iso_2022 (coding, mask)
|| (id = iso_charset_table[1][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
break;
reg[(c - '(') % 4] = id;
}
else
/* Invalid designation sequence. Just ignore. */
/* Invalid designation sequence. Just ignore it. */
break;
}
else if (c == 'N' || c == 'O')
{
/* ESC <Fe> for SS2 or SS3. */
mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
single_shifting = 1;
rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
break;
}
else if (c >= '0' && c <= '4')
{
/* ESC <Fp> for start/end composition. */
mask_found |= CATEGORY_MASK_ISO;
found |= CATEGORY_MASK_ISO;
break;
}
else
{
/* Invalid escape sequence. */
mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
/* Invalid escape sequence. Just ignore it. */
break;
}
/* We found a valid designation sequence for CHARSET. */
mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
rejected |= CATEGORY_MASK_ISO_8BIT;
if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
id))
mask_found |= CATEGORY_MASK_ISO_7;
found |= CATEGORY_MASK_ISO_7;
else
mask_iso &= ~CATEGORY_MASK_ISO_7;
rejected |= CATEGORY_MASK_ISO_7;