Commit c10842ea authored by Kenichi Handa's avatar Kenichi Handa
Browse files

Include "character.h".

(Qccl, Qcclp): New variables.
(CCL_WRITE_CHAR): Alway treat the arg CH as a character even if
it's less than 256.
(CCL_WRITE_MULTIBYTE_CHAR): Deleted.
(CCL_WRITE_STRING, CCL_READ_CHAR): Adjusted for the change of SRC
and DST type.
(ccl_driver): Types of arguments changed.  Code adjusted for that.
(Fccl_execute, Fccl_execute_on_string): Adjusted for the change of
ccl_driver.
(syms_of_ccl): Intern and staticpro Qccl and Qcclp.
parent 2eca16e6
......@@ -2,6 +2,9 @@
Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
Copyright (C) 2001 Free Software Foundation, Inc.
Licensed to the Free Software Foundation.
Copyright (C) 2001, 2002
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H13PRO009
This file is part of GNU Emacs.
......@@ -29,6 +32,7 @@ Boston, MA 02111-1307, USA. */
#ifdef emacs
#include "lisp.h"
#include "character.h"
#include "charset.h"
#include "ccl.h"
#include "coding.h"
......@@ -39,6 +43,8 @@ Boston, MA 02111-1307, USA. */
#endif /* not emacs */
Lisp_Object Qccl, Qcclp;
/* This contains all code conversion map available to CCL. */
Lisp_Object Vcode_conversion_map_vector;
......@@ -711,56 +717,24 @@ while(0)
/* Encode one character CH to multibyte form and write to the current
output buffer. If CH is less than 256, CH is written as is. */
#define CCL_WRITE_CHAR(ch) \
do { \
int bytes = SINGLE_BYTE_CHAR_P (ch) ? 1: CHAR_BYTES (ch); \
if (!dst) \
CCL_INVALID_CMD; \
else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
{ \
if (bytes == 1) \
{ \
*dst++ = (ch); \
if ((ch) >= 0x80 && (ch) < 0xA0) \
/* We may have to convert this eight-bit char to \
multibyte form later. */ \
extra_bytes++; \
} \
else if (CHAR_VALID_P (ch, 0)) \
dst += CHAR_STRING (ch, dst); \
else \
CCL_INVALID_CMD; \
} \
else \
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
/* Encode one character CH to multibyte form and write to the current
output buffer. The output bytes always forms a valid multibyte
sequence. */
#define CCL_WRITE_MULTIBYTE_CHAR(ch) \
do { \
int bytes = CHAR_BYTES (ch); \
if (!dst) \
CCL_INVALID_CMD; \
else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
{ \
if (CHAR_VALID_P ((ch), 0)) \
dst += CHAR_STRING ((ch), dst); \
else \
CCL_INVALID_CMD; \
} \
else \
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
#define CCL_WRITE_CHAR(ch) \
do { \
if (! dst) \
CCL_INVALID_CMD; \
else if (dst < dst_end) \
*dst++ = (ch); \
else \
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
/* Write a string at ccl_prog[IC] of length LEN to the current output
buffer. */
#define CCL_WRITE_STRING(len) \
do { \
int i; \
if (!dst) \
CCL_INVALID_CMD; \
else if (dst + len <= (dst_bytes ? dst_end : src)) \
else if (dst + len <= dst_end) \
for (i = 0; i < len; i++) \
*dst++ = ((XFASTINT (ccl_prog[ic + (i / 3)])) \
>> ((2 - (i % 3)) * 8)) & 0xFF; \
......@@ -768,78 +742,30 @@ while(0)
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
/* Read one byte from the current input buffer into REGth register. */
#define CCL_READ_CHAR(REG) \
do { \
if (!src) \
CCL_INVALID_CMD; \
else if (src < src_end) \
{ \
REG = *src++; \
if (REG == '\n' \
&& ccl->eol_type != CODING_EOL_LF) \
{ \
/* We are encoding. */ \
if (ccl->eol_type == CODING_EOL_CRLF) \
{ \
if (ccl->cr_consumed) \
ccl->cr_consumed = 0; \
else \
{ \
ccl->cr_consumed = 1; \
REG = '\r'; \
src--; \
} \
} \
else \
REG = '\r'; \
} \
if (REG == LEADING_CODE_8_BIT_CONTROL \
&& ccl->multibyte) \
REG = *src++ - 0x20; \
} \
else if (ccl->last_block) \
{ \
ic = ccl->eof_ic; \
goto ccl_repeat; \
} \
else \
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
} while (0)
/* Set C to the character code made from CHARSET and CODE. This is
like MAKE_CHAR but check the validity of CHARSET and CODE. If they
are not valid, set C to (CODE & 0xFF) because that is usually the
case that CCL_ReadMultibyteChar2 read an invalid code and it set
CODE to that invalid byte. */
#define CCL_MAKE_CHAR(charset, code, c) \
do { \
if (charset == CHARSET_ASCII) \
c = code & 0xFF; \
else if (CHARSET_DEFINED_P (charset) \
&& (code & 0x7F) >= 32 \
&& (code < 256 || ((code >> 7) & 0x7F) >= 32)) \
{ \
int c1 = code & 0x7F, c2 = 0; \
\
if (code >= 256) \
c2 = c1, c1 = (code >> 7) & 0x7F; \
c = MAKE_CHAR (charset, c1, c2); \
} \
else \
c = code & 0xFF; \
} while (0)
/* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting
text goes to a place pointed by DESTINATION, the length of which
should not exceed DST_BYTES. The bytes actually processed is
returned as *CONSUMED. The return value is the length of the
resulting text. As a side effect, the contents of CCL registers
are updated. If SOURCE or DESTINATION is NULL, only operations on
registers are permitted. */
/* Read one byte from the current input buffer into Rth register. */
#define CCL_READ_CHAR(r) \
do { \
if (! src) \
CCL_INVALID_CMD; \
else if (src < src_end) \
r = *src++; \
else if (ccl->last_block) \
{ \
ic = ccl->eof_ic; \
goto ccl_repeat; \
} \
else \
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
} while (0)
/* Execute CCL code on characters at SOURCE (length SRC_SIZE). The
resulting text goes to a place pointed by DESTINATION, the length
of which should not exceed DST_SIZE. As a side effect, how many
characters are consumed and produced are recorded in CCL->consumed
and CCL->produced, and the contents of CCL registers are updated.
If SOURCE or DESTINATION is NULL, only operations on registers are
permitted. */
#ifdef CCL_DEBUG
#define CCL_DEBUG_BACKTRACE_LEN 256
......@@ -856,34 +782,29 @@ struct ccl_prog_stack
/* For the moment, we only support depth 256 of stack. */
static struct ccl_prog_stack ccl_prog_stack_struct[256];
int
ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
void
ccl_driver (ccl, source, destination, src_size, dst_size)
struct ccl_program *ccl;
unsigned char *source, *destination;
int src_bytes, dst_bytes;
int *consumed;
int *source, *destination;
int src_size, dst_size;
{
register int *reg = ccl->reg;
register int ic = ccl->ic;
register int code = 0, field1, field2;
register Lisp_Object *ccl_prog = ccl->prog;
unsigned char *src = source, *src_end = src + src_bytes;
unsigned char *dst = destination, *dst_end = dst + dst_bytes;
int *src = source, *src_end = src + src_size;
int *dst = destination, *dst_end = dst + dst_size;
int jump_address;
int i = 0, j, op;
int stack_idx = ccl->stack_idx;
/* Instruction counter of the current CCL code. */
int this_ic = 0;
/* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F. But,
each of them will be converted to multibyte form of 2-byte
sequence. For that conversion, we remember how many more bytes
we must keep in DESTINATION in this variable. */
int extra_bytes = 0;
struct charset *charset;
if (ic >= ccl->eof_ic)
ic = CCL_HEADER_MAIN;
if (ccl->buf_magnification == 0) /* We can't produce any bytes. */
if (ccl->buf_magnification == 0) /* We can't read/produce any bytes. */
dst = NULL;
/* Set mapping stack pointer. */
......@@ -908,8 +829,8 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
/* We can't just signal Qquit, instead break the loop as if
the whole data is processed. Don't reset Vquit_flag, it
must be handled later at a safer place. */
if (consumed)
src = source + src_bytes;
if (src)
src = source + src_size;
ccl->status = CCL_STAT_QUIT;
break;
}
......@@ -1224,8 +1145,22 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
case CCL_LE: reg[rrr] = i <= j; break;
case CCL_GE: reg[rrr] = i >= j; break;
case CCL_NE: reg[rrr] = i != j; break;
case CCL_DECODE_SJIS: DECODE_SJIS (i, j, reg[rrr], reg[7]); break;
case CCL_ENCODE_SJIS: ENCODE_SJIS (i, j, reg[rrr], reg[7]); break;
case CCL_DECODE_SJIS:
{
i = (i << 8) | j;
SJIS_TO_JIS (i);
reg[rrr] = i >> 8;
reg[7] = i & 0xFF;
break;
}
case CCL_ENCODE_SJIS:
{
i = (i << 8) | j;
JIS_TO_SJIS (i);
reg[rrr] = i >> 8;
reg[7] = i & 0xFF;
break;
}
default: CCL_INVALID_CMD;
}
code &= 0x1F;
......@@ -1245,165 +1180,38 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
case CCL_ReadMultibyteChar2:
if (!src)
CCL_INVALID_CMD;
if (src >= src_end)
{
src++;
goto ccl_read_multibyte_character_suspend;
}
if (!ccl->multibyte)
{
int bytes;
if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
{
reg[RRR] = CHARSET_8_BIT_CONTROL;
reg[rrr] = *src++;
break;
}
}
i = *src++;
if (i == '\n' && ccl->eol_type != CODING_EOL_LF)
{
/* We are encoding. */
if (ccl->eol_type == CODING_EOL_CRLF)
{
if (ccl->cr_consumed)
ccl->cr_consumed = 0;
else
{
ccl->cr_consumed = 1;
i = '\r';
src--;
}
}
else
i = '\r';
reg[rrr] = i;
reg[RRR] = CHARSET_ASCII;
}
else if (i < 0x80)
{
/* ASCII */
reg[rrr] = i;
reg[RRR] = CHARSET_ASCII;
}
else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2)
{
int dimension = BYTES_BY_CHAR_HEAD (i) - 1;
if (dimension == 0)
{
/* `i' is a leading code for an undefined charset. */
reg[RRR] = CHARSET_8_BIT_GRAPHIC;
reg[rrr] = i;
}
else if (src + dimension > src_end)
goto ccl_read_multibyte_character_suspend;
else
{
reg[RRR] = i;
i = (*src++ & 0x7F);
if (dimension == 1)
reg[rrr] = i;
else
reg[rrr] = ((i << 7) | (*src++ & 0x7F));
}
}
else if ((i == LEADING_CODE_PRIVATE_11)
|| (i == LEADING_CODE_PRIVATE_12))
{
if ((src + 1) >= src_end)
goto ccl_read_multibyte_character_suspend;
reg[RRR] = *src++;
reg[rrr] = (*src++ & 0x7F);
}
else if ((i == LEADING_CODE_PRIVATE_21)
|| (i == LEADING_CODE_PRIVATE_22))
{
if ((src + 2) >= src_end)
goto ccl_read_multibyte_character_suspend;
reg[RRR] = *src++;
i = (*src++ & 0x7F);
reg[rrr] = ((i << 7) | (*src & 0x7F));
src++;
}
else if (i == LEADING_CODE_8_BIT_CONTROL)
{
if (src >= src_end)
goto ccl_read_multibyte_character_suspend;
reg[RRR] = CHARSET_8_BIT_CONTROL;
reg[rrr] = (*src++ - 0x20);
}
else if (i >= 0xA0)
{
reg[RRR] = CHARSET_8_BIT_GRAPHIC;
reg[rrr] = i;
}
else
{
/* INVALID CODE. Return a single byte character. */
reg[RRR] = CHARSET_ASCII;
reg[rrr] = i;
}
break;
ccl_read_multibyte_character_suspend:
if (src <= src_end && !ccl->multibyte && ccl->last_block)
{
reg[RRR] = CHARSET_8_BIT_CONTROL;
reg[rrr] = i;
break;
}
src--;
if (ccl->last_block)
{
ic = ccl->eof_ic;
goto ccl_repeat;
}
else
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC);
CCL_READ_CHAR (i);
charset = CHAR_CHARSET (i);
reg[rrr] = CHARSET_ID (charset);
reg[RRR] = ENCODE_CHAR (charset, i);
break;
case CCL_WriteMultibyteChar2:
i = reg[RRR]; /* charset */
if (i == CHARSET_ASCII
|| i == CHARSET_8_BIT_CONTROL
|| i == CHARSET_8_BIT_GRAPHIC)
i = reg[rrr] & 0xFF;
else if (CHARSET_DIMENSION (i) == 1)
i = ((i - 0x70) << 7) | (reg[rrr] & 0x7F);
else if (i < MIN_CHARSET_PRIVATE_DIMENSION2)
i = ((i - 0x8F) << 14) | reg[rrr];
else
i = ((i - 0xE0) << 14) | reg[rrr];
CCL_WRITE_MULTIBYTE_CHAR (i);
if (! dst)
CCL_INVALID_CMD;
charset = CHARSET_FROM_ID (reg[RRR]);
i = DECODE_CHAR (charset, reg[rrr]);
CCL_WRITE_CHAR (i);
break;
case CCL_TranslateCharacter:
CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]),
i, -1, 0, 0);
SPLIT_CHAR (op, reg[RRR], i, j);
if (j != -1)
i = (i << 7) | j;
reg[rrr] = i;
charset = CHARSET_FROM_ID (reg[RRR]);
i = DECODE_CHAR (charset, reg[rrr]);
op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), i);
charset = CHAR_CHARSET (op);
reg[RRR] = CHARSET_ID (charset);
reg[rrr] = ENCODE_CHAR (charset, op);
break;
case CCL_TranslateCharacterConstTbl:
op = XINT (ccl_prog[ic]); /* table */
ic++;
CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
op = translate_char (GET_TRANSLATION_TABLE (op), i, -1, 0, 0);
SPLIT_CHAR (op, reg[RRR], i, j);
if (j != -1)
i = (i << 7) | j;
reg[rrr] = i;
charset = CHARSET_FROM_ID (reg[RRR]);
i = DECODE_CHAR (charset, reg[rrr]);
op = translate_char (GET_TRANSLATION_TABLE (op), i);
charset = CHAR_CHARSET (op);
reg[RRR] = CHARSET_ID (charset);
reg[rrr] = ENCODE_CHAR (charset, op);
break;
case CCL_IterateMultipleMap:
......@@ -1821,28 +1629,10 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
}
msglen = strlen (msg);
if (dst + msglen <= (dst_bytes ? dst_end : src))
{
bcopy (msg, dst, msglen);
dst += msglen;
}
if (ccl->status == CCL_STAT_INVALID_CMD)
if (dst + msglen <= dst_end)
{
#if 0 /* If the remaining bytes contain 0x80..0x9F, copying them
results in an invalid multibyte sequence. */
/* Copy the remaining source data. */
int i = src_end - src;
if (dst_bytes && (dst_end - dst) < i)
i = dst_end - dst;
bcopy (src, dst, i);
src += i;
dst += i;
#else
/* Signal that we've consumed everything. */
src = src_end;
#endif
for (i = 0; i < msglen; i++)
*dst++ = msg[i];
}
}
......@@ -1850,10 +1640,8 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
ccl->ic = ic;
ccl->stack_idx = stack_idx;
ccl->prog = ccl_prog;
ccl->eight_bit_control = (extra_bytes > 0);
if (consumed)
*consumed = src - source;
return (dst ? dst - destination : 0);
ccl->consumed = src - source;
ccl->produced = dst - destination;
}
/* Resolve symbols in the specified CCL code (Lisp vector). This
......@@ -2003,7 +1791,6 @@ setup_ccl_program (ccl, ccl_prog)
ccl->private_state = 0;
ccl->status = 0;
ccl->stack_idx = 0;
ccl->eol_type = CODING_EOL_LF;
ccl->suppress_error = 0;
return 0;
}
......@@ -2066,7 +1853,7 @@ programs. */)
? XINT (AREF (reg, i))
: 0);
ccl_driver (&ccl, (unsigned char *)0, (unsigned char *)0, 0, 0, (int *)0);
ccl_driver (&ccl, NULL, NULL, 0, 0);
QUIT;
if (ccl.status != CCL_STAT_SUCCESS)
error ("Error in CCL program at %dth code", ccl.ic);
......@@ -2107,10 +1894,13 @@ See the documentation of `define-ccl-program' for the detail of CCL program. */
{
Lisp_Object val;
struct ccl_program ccl;
int i, produced;
int i;
int outbufsize;
char *outbuf;
struct gcpro gcpro1, gcpro2;
unsigned char *outbuf, *outp;
int str_chars, str_bytes;
#define CCL_EXECUTE_BUF_SIZE 1024
int source[CCL_EXECUTE_BUF_SIZE], destination[CCL_EXECUTE_BUF_SIZE];
int consumed_chars, consumed_bytes, produced_chars;
if (setup_ccl_program (&ccl, ccl_prog) < 0)
error ("Invalid CCL program");
......@@ -2119,8 +1909,8 @@ See the documentation of `define-ccl-program' for the detail of CCL program. */
if (ASIZE (status) != 9)
error ("Length of vector STATUS is not 9");
CHECK_STRING (str);
GCPRO2 (status, str);
str_chars = XSTRING (str)->size;
str_bytes = STRING_BYTES (XSTRING (str));
for (i = 0; i < 8; i++)
{
......@@ -2135,33 +1925,87 @@ See the documentation of `define-ccl-program' for the detail of CCL program. */
if (ccl.ic < i && i < ccl.size)
ccl.ic = i;
}
outbufsize = STRING_BYTES (XSTRING (str)) * ccl.buf_magnification + 256;
outbuf = (char *) xmalloc (outbufsize);
ccl.last_block = NILP (contin);
ccl.multibyte = STRING_MULTIBYTE (str);
produced = ccl_driver (&ccl, XSTRING (str)->data, outbuf,
STRING_BYTES (XSTRING (str)), outbufsize, (int *) 0);
for (i = 0; i < 8; i++)
XSET (AREF (status, i), Lisp_Int, ccl.reg[i]);
XSETINT (AREF (status, 8), ccl.ic);
UNGCPRO;
if (NILP (unibyte_p))
outbufsize = (ccl.buf_magnification
? str_bytes * ccl.buf_magnification + 256
: str_bytes + 256);
outp = outbuf = (unsigned char *) xmalloc (outbufsize);
consumed_chars = consumed_bytes = 0;
produced_chars = 0;
while (consumed_bytes < str_bytes)
{
int nchars;
unsigned char *p = XSTRING (str)->data + consumed_bytes;
unsigned char *endp = XSTRING (str)->data + str_bytes;
int i = 0;
int *src, src_size;
if (endp - p == str_chars - consumed_chars)
while (i < CCL_EXECUTE_BUF_SIZE && p < endp)
source[i++] = *p++;
else
while (i < CCL_EXECUTE_BUF_SIZE && p < endp)
source[i++] = STRING_CHAR_ADVANCE (p);
consumed_chars += i;
consumed_bytes = p - XSTRING (str)->data;
if (consumed_bytes == str_bytes)
ccl.last_block = NILP (contin);
src = source;
src_size = i;
while (1)
{
ccl_driver (&ccl, src, destination, src_size, CCL_EXECUTE_BUF_SIZE);
if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
break;
produced_chars += ccl.produced;
if (NILP (unibyte_p))
{
if (outp - outbuf + MAX_MULTIBYTE_LENGTH * ccl.produced
> outbufsize)
{
int offset = outp - outbuf;
outbufsize += MAX_MULTIBYTE_LENGTH * ccl.produced;
outbuf = (unsigned char *) xrealloc (outbuf, outbufsize);
outp = outbuf + offset;
}
for (i = 0; i < ccl.produced; i++)
CHAR_STRING_ADVANCE (destination[i], outp);
}
else
{
if (outp - outbuf + ccl.produced > outbufsize)
{
int offset = outp - outbuf;
outbufsize += ccl.produced;
outbuf = (unsigned char *) xrealloc (outbuf, outbufsize);
outp = outbuf + offset;
}
for (i = 0; i < ccl.produced; i++)
*outp++ = destination[i];
}
src += ccl.consumed;
src_size -= ccl.consumed;
}
if (ccl.status != CCL_STAT_SUSPEND_BY_SRC)
break;
}
produced = str_as_multibyte (outbuf, outbufsize, produced, &nchars);
val = make_multibyte_string (outbuf, nchars, produced);
}
else
val = make_unibyte_string (outbuf, produced);
xfree (outbuf);
QUIT;
if (ccl.status == CCL_STAT_SUSPEND_BY_DST)