coding.h 24.8 KB
Newer Older
Karl Heuer's avatar
Karl Heuer committed
1
/* Header for coding system handler.
2
   Copyright (C) 2001-2011  Free Software Foundation, Inc.
Kenichi Handa's avatar
Kenichi Handa committed
3
   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4
     2005, 2006, 2007, 2008, 2009, 2010, 2011
Kenichi Handa's avatar
Kenichi Handa committed
5 6
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H14PRO021
Kenichi Handa's avatar
Kenichi Handa committed
7
   Copyright (C) 2003
Kenichi Handa's avatar
Kenichi Handa committed
8 9
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009
Karl Heuer's avatar
Karl Heuer committed
10

Karl Heuer's avatar
Karl Heuer committed
11 12
This file is part of GNU Emacs.

13
GNU Emacs is free software: you can redistribute it and/or modify
Karl Heuer's avatar
Karl Heuer committed
14
it under the terms of the GNU General Public License as published by
15 16
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Karl Heuer's avatar
Karl Heuer committed
17

Karl Heuer's avatar
Karl Heuer committed
18 19 20 21
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
Karl Heuer's avatar
Karl Heuer committed
22

Karl Heuer's avatar
Karl Heuer committed
23
You should have received a copy of the GNU General Public License
24
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Karl Heuer's avatar
Karl Heuer committed
25

26 27
#ifndef EMACS_CODING_H
#define EMACS_CODING_H
Karl Heuer's avatar
Karl Heuer committed
28

Kenichi Handa's avatar
Kenichi Handa committed
29
/* Index to arguments of Fdefine_coding_system_internal.  */
Karl Heuer's avatar
Karl Heuer committed
30

Kenichi Handa's avatar
Kenichi Handa committed
31 32 33 34 35 36 37 38 39 40 41 42
enum define_coding_system_arg_index
  {
    coding_arg_name,
    coding_arg_mnemonic,
    coding_arg_coding_type,
    coding_arg_charset_list,
    coding_arg_ascii_compatible_p,
    coding_arg_decode_translation_table,
    coding_arg_encode_translation_table,
    coding_arg_post_read_conversion,
    coding_arg_pre_write_conversion,
    coding_arg_default_char,
Kenichi Handa's avatar
Kenichi Handa committed
43
    coding_arg_for_unibyte,
Kenichi Handa's avatar
Kenichi Handa committed
44 45 46 47
    coding_arg_plist,
    coding_arg_eol_type,
    coding_arg_max
  };
Karl Heuer's avatar
Karl Heuer committed
48

Kenichi Handa's avatar
Kenichi Handa committed
49
enum define_coding_iso2022_arg_index
Karl Heuer's avatar
Karl Heuer committed
50
  {
Kenichi Handa's avatar
Kenichi Handa committed
51 52 53 54 55
    coding_arg_iso2022_initial = coding_arg_max,
    coding_arg_iso2022_reg_usage,
    coding_arg_iso2022_request,
    coding_arg_iso2022_flags,
    coding_arg_iso2022_max
Karl Heuer's avatar
Karl Heuer committed
56 57
  };

58 59 60 61 62 63
enum define_coding_utf8_arg_index
  {
    coding_arg_utf8_bom = coding_arg_max,
    coding_arg_utf8_max
  };

Kenichi Handa's avatar
Kenichi Handa committed
64
enum define_coding_utf16_arg_index
Karl Heuer's avatar
Karl Heuer committed
65
  {
Kenichi Handa's avatar
Kenichi Handa committed
66 67 68
    coding_arg_utf16_bom = coding_arg_max,
    coding_arg_utf16_endian,
    coding_arg_utf16_max
Karl Heuer's avatar
Karl Heuer committed
69 70
  };

Kenichi Handa's avatar
Kenichi Handa committed
71 72
enum define_coding_ccl_arg_index
  {
73
    coding_arg_ccl_decoder = coding_arg_max,
Kenichi Handa's avatar
Kenichi Handa committed
74 75 76 77
    coding_arg_ccl_encoder,
    coding_arg_ccl_valids,
    coding_arg_ccl_max
  };
Karl Heuer's avatar
Karl Heuer committed
78

Kenichi Handa's avatar
Kenichi Handa committed
79 80 81 82 83 84 85
/* Hash table for all coding systems.  Keys are coding system symbols
   and values are spec vectors of the corresponding coding system.  A
   spec vector has the form [ ATTRS ALIASES EOL-TYPE ].  ATTRS is a
   vector of attribute of the coding system.  ALIASES is a list of
   aliases (symbols) of the coding system.  EOL-TYPE is `unix', `dos',
   `mac' or a vector of coding systems (symbols).  */

Kenichi Handa's avatar
Kenichi Handa committed
86
extern Lisp_Object Vcoding_system_hash_table;
Karl Heuer's avatar
Karl Heuer committed
87

Kenichi Handa's avatar
Kenichi Handa committed
88

Kenichi Handa's avatar
Kenichi Handa committed
89
/* Enumeration of coding system type.  */
Karl Heuer's avatar
Karl Heuer committed
90

Kenichi Handa's avatar
Kenichi Handa committed
91 92 93 94 95 96 97 98 99 100 101 102 103
enum coding_system_type
  {
    coding_type_charset,
    coding_type_utf_8,
    coding_type_utf_16,
    coding_type_iso_2022,
    coding_type_emacs_mule,
    coding_type_sjis,
    coding_type_ccl,
    coding_type_raw_text,
    coding_type_undecided,
    coding_type_max
  };
Karl Heuer's avatar
Karl Heuer committed
104 105


Kenichi Handa's avatar
Kenichi Handa committed
106
/* Enumeration of end-of-line format type.  */
Karl Heuer's avatar
Karl Heuer committed
107

Kenichi Handa's avatar
Kenichi Handa committed
108 109 110 111 112 113 114 115 116 117 118 119 120
enum end_of_line_type
  {
    eol_lf,		/* Line-feed only, same as Emacs' internal
			   format.  */
    eol_crlf,		/* Sequence of carriage-return and
			   line-feed.  */
    eol_cr,		/* Carriage-return only.  */
    eol_any,		/* Accept any of above.  Produce line-feed
			   only.  */
    eol_undecided,	/* This value is used to denote that the
			   eol-type is not yet undecided.  */
    eol_type_max
  };
Karl Heuer's avatar
Karl Heuer committed
121

Kenichi Handa's avatar
Kenichi Handa committed
122
/* Enumeration of index to an attribute vector of a coding system.  */
Karl Heuer's avatar
Karl Heuer committed
123

Kenichi Handa's avatar
Kenichi Handa committed
124 125 126 127 128 129 130 131 132 133
enum coding_attr_index
  {
    coding_attr_base_name,
    coding_attr_docstring,
    coding_attr_mnemonic,
    coding_attr_type,
    coding_attr_charset_list,
    coding_attr_ascii_compat,
    coding_attr_decode_tbl,
    coding_attr_encode_tbl,
134
    coding_attr_trans_tbl,
Kenichi Handa's avatar
Kenichi Handa committed
135 136 137
    coding_attr_post_read,
    coding_attr_pre_write,
    coding_attr_default_char,
Kenichi Handa's avatar
Kenichi Handa committed
138
    coding_attr_for_unibyte,
Kenichi Handa's avatar
Kenichi Handa committed
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
    coding_attr_plist,

    coding_attr_category,
    coding_attr_safe_charsets,

    /* The followings are extra attributes for each type.  */
    coding_attr_charset_valids,

    coding_attr_ccl_decoder,
    coding_attr_ccl_encoder,
    coding_attr_ccl_valids,

    coding_attr_iso_initial,
    coding_attr_iso_usage,
    coding_attr_iso_request,
    coding_attr_iso_flags,

156
    coding_attr_utf_bom,
Kenichi Handa's avatar
Kenichi Handa committed
157 158 159 160 161 162
    coding_attr_utf_16_endian,

    coding_attr_emacs_mule_full,

    coding_attr_last_index
  };
Karl Heuer's avatar
Karl Heuer committed
163 164


Kenichi Handa's avatar
Kenichi Handa committed
165 166
/* Macros to access an element of an attribute vector.  */

Juanma Barranquero's avatar
Juanma Barranquero committed
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
#define CODING_ATTR_BASE_NAME(attrs)	AREF (attrs, coding_attr_base_name)
#define CODING_ATTR_TYPE(attrs)		AREF (attrs, coding_attr_type)
#define CODING_ATTR_CHARSET_LIST(attrs)	AREF (attrs, coding_attr_charset_list)
#define CODING_ATTR_MNEMONIC(attrs)	AREF (attrs, coding_attr_mnemonic)
#define CODING_ATTR_DOCSTRING(attrs)	AREF (attrs, coding_attr_docstring)
#define CODING_ATTR_ASCII_COMPAT(attrs)	AREF (attrs, coding_attr_ascii_compat)
#define CODING_ATTR_DECODE_TBL(attrs)	AREF (attrs, coding_attr_decode_tbl)
#define CODING_ATTR_ENCODE_TBL(attrs)	AREF (attrs, coding_attr_encode_tbl)
#define CODING_ATTR_TRANS_TBL(attrs)	AREF (attrs, coding_attr_trans_tbl)
#define CODING_ATTR_POST_READ(attrs)	AREF (attrs, coding_attr_post_read)
#define CODING_ATTR_PRE_WRITE(attrs)	AREF (attrs, coding_attr_pre_write)
#define CODING_ATTR_DEFAULT_CHAR(attrs)	AREF (attrs, coding_attr_default_char)
#define CODING_ATTR_FOR_UNIBYTE(attrs)	AREF (attrs, coding_attr_for_unibyte)
#define CODING_ATTR_FLUSHING(attrs)	AREF (attrs, coding_attr_flushing)
#define CODING_ATTR_PLIST(attrs)	AREF (attrs, coding_attr_plist)
#define CODING_ATTR_CATEGORY(attrs)	AREF (attrs, coding_attr_category)
#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
184 185


Kenichi Handa's avatar
Kenichi Handa committed
186 187 188 189 190 191
/* Return the name of a coding system specified by ID.  */
#define CODING_ID_NAME(id) \
  (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))

/* Return the attribute vector of a coding system specified by ID.  */

Kenichi Handa's avatar
Kenichi Handa committed
192 193
#define CODING_ID_ATTRS(id)	\
  (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
194

Kenichi Handa's avatar
Kenichi Handa committed
195 196
/* Return the list of aliases of a coding system specified by ID.  */

Kenichi Handa's avatar
Kenichi Handa committed
197 198
#define CODING_ID_ALIASES(id)	\
  (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
199

Kenichi Handa's avatar
Kenichi Handa committed
200 201
/* Return the eol-type of a coding system specified by ID.  */

Kenichi Handa's avatar
Kenichi Handa committed
202 203
#define CODING_ID_EOL_TYPE(id)	\
  (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
204

Kenichi Handa's avatar
Kenichi Handa committed
205 206

/* Return the spec vector of CODING_SYSTEM_SYMBOL.  */
207

Kenichi Handa's avatar
Kenichi Handa committed
208 209
#define CODING_SYSTEM_SPEC(coding_system_symbol)	\
  (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
Karl Heuer's avatar
Karl Heuer committed
210

Kenichi Handa's avatar
Kenichi Handa committed
211 212 213

/* Return the ID of CODING_SYSTEM_SYMBOL.  */

Kenichi Handa's avatar
Kenichi Handa committed
214 215 216
#define CODING_SYSTEM_ID(coding_system_symbol)			\
  hash_lookup (XHASH_TABLE (Vcoding_system_hash_table),		\
	       coding_system_symbol, NULL)
Karl Heuer's avatar
Karl Heuer committed
217

Miles Bader's avatar
Miles Bader committed
218
/* Return 1 if CODING_SYSTEM_SYMBOL is a coding system.  */
Kenichi Handa's avatar
Kenichi Handa committed
219

220 221 222 223
#define CODING_SYSTEM_P(coding_system_symbol)		\
  (CODING_SYSTEM_ID (coding_system_symbol) >= 0		\
   || (! NILP (coding_system_symbol)			\
       && ! NILP (Fcoding_system_p (coding_system_symbol))))
Karl Heuer's avatar
Karl Heuer committed
224

Kenichi Handa's avatar
Kenichi Handa committed
225 226
/* Check if X is a coding system or not.  */

Kenichi Handa's avatar
Kenichi Handa committed
227
#define CHECK_CODING_SYSTEM(x)				\
Kenichi Handa's avatar
Kenichi Handa committed
228
  do {							\
229 230
    if (CODING_SYSTEM_ID (x) < 0			\
	&& NILP (Fcheck_coding_system (x)))		\
Kenichi Handa's avatar
Kenichi Handa committed
231
      wrong_type_argument (Qcoding_system_p, (x));	\
Kenichi Handa's avatar
Kenichi Handa committed
232
  } while (0)
233

Karl Heuer's avatar
Karl Heuer committed
234

Kenichi Handa's avatar
Kenichi Handa committed
235 236 237
/* Check if X is a coding system or not.  If it is, set SEPC to the
   spec vector of the coding system.  */

Kenichi Handa's avatar
Kenichi Handa committed
238 239 240
#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec)		\
  do {							\
    spec = CODING_SYSTEM_SPEC (x);			\
241 242 243 244 245
    if (NILP (spec))					\
      {							\
	Fcheck_coding_system (x);			\
	spec = CODING_SYSTEM_SPEC (x);			\
      }							\
Kenichi Handa's avatar
Kenichi Handa committed
246
    if (NILP (spec))					\
247
      wrong_type_argument (Qcoding_system_p, (x));	\
Kenichi Handa's avatar
Kenichi Handa committed
248
  } while (0)
249

250

Kenichi Handa's avatar
Kenichi Handa committed
251 252 253
/* Check if X is a coding system or not.  If it is, set ID to the
   ID of the coding system.  */

Kenichi Handa's avatar
Kenichi Handa committed
254 255 256 257
#define CHECK_CODING_SYSTEM_GET_ID(x, id)			\
  do								\
    {								\
      id = CODING_SYSTEM_ID (x);				\
258 259 260 261 262
      if (id < 0)						\
	{							\
	  Fcheck_coding_system (x);				\
	  id = CODING_SYSTEM_ID (x);				\
	}							\
Kenichi Handa's avatar
Kenichi Handa committed
263
      if (id < 0)						\
264
	wrong_type_argument (Qcoding_system_p, (x));	\
Kenichi Handa's avatar
Kenichi Handa committed
265
    } while (0)
Karl Heuer's avatar
Karl Heuer committed
266 267 268 269


/*** GENERAL section ***/

Kenichi Handa's avatar
Kenichi Handa committed
270 271
/* Enumeration of result code of code conversion.  */
enum coding_result_code
Karl Heuer's avatar
Karl Heuer committed
272
  {
Kenichi Handa's avatar
Kenichi Handa committed
273 274 275 276
    CODING_RESULT_SUCCESS,
    CODING_RESULT_INSUFFICIENT_SRC,
    CODING_RESULT_INSUFFICIENT_DST,
    CODING_RESULT_INCONSISTENT_EOL,
277
    CODING_RESULT_INVALID_SRC,
Kenichi Handa's avatar
Kenichi Handa committed
278 279
    CODING_RESULT_INTERRUPT,
    CODING_RESULT_INSUFFICIENT_MEM
Karl Heuer's avatar
Karl Heuer committed
280 281
  };

282

Kenichi Handa's avatar
Kenichi Handa committed
283
/* Macros used for the member `mode' of the struct coding_system.  */
284 285 286 287 288 289

/* If set, recover the original CR or LF of the already decoded text
   when the decoding routine encounters an inconsistent eol format.  */
#define CODING_MODE_INHIBIT_INCONSISTENT_EOL	0x01

/* If set, the decoding/encoding routines treat the current data as
Dave Love's avatar
Dave Love committed
290
   the last block of the whole text to be converted, and do the
Dave Love's avatar
Dave Love committed
291
   appropriate finishing job.  */
292 293 294 295 296 297 298 299 300 301
#define CODING_MODE_LAST_BLOCK			0x02

/* If set, it means that the current source text is in a buffer which
   enables selective display.  */
#define CODING_MODE_SELECTIVE_DISPLAY		0x04

/* This flag is used by the decoding/encoding routines on the fly.  If
   set, it means that right-to-left text is being processed.  */
#define CODING_MODE_DIRECTION			0x08

Kenichi Handa's avatar
Kenichi Handa committed
302 303
#define CODING_MODE_FIXED_DESTINATION		0x10

Kenichi Handa's avatar
Kenichi Handa committed
304 305
/* If set, it means that the encoding routines produces some safe
   ASCII characters (usually '?') for unsupported characters.  */
Kenichi Handa's avatar
Kenichi Handa committed
306 307
#define CODING_MODE_SAFE_ENCODING		0x20

Kenichi Handa's avatar
Kenichi Handa committed
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
  /* For handling composition sequence.  */
#include "composite.h"

enum composition_state
  {
    COMPOSING_NO,
    COMPOSING_CHAR,
    COMPOSING_RULE,
    COMPOSING_COMPONENT_CHAR,
    COMPOSING_COMPONENT_RULE
  };

/* Structure for the current composition status.  */
struct composition_status
{
  enum composition_state state;
  enum composition_method method;
  int old_form;		  /* 0:pre-21 form, 1:post-21 form */
  int length;		  /* number of elements produced in charbuf */
  int nchars;		  /* number of characters composed */
  int ncomps;		  /* number of composition components */
  /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
     See the comment in coding.c.  */
  int carryover[4 		/* annotation header */
		+ MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
		+ 2				     /* intermediate -1 -1 */
		+ MAX_COMPOSITION_COMPONENTS	     /* CHARs */
		];
};


Kenichi Handa's avatar
Kenichi Handa committed
339 340 341
/* Structure of the field `spec.iso_2022' in the structure
   `coding_system'.  */
struct iso_2022_spec
Karl Heuer's avatar
Karl Heuer committed
342
{
Kenichi Handa's avatar
Kenichi Handa committed
343
  /* Bit-wise-or of CODING_ISO_FLAG_XXX.  */
Kenichi Handa's avatar
Kenichi Handa committed
344
  unsigned flags;
Karl Heuer's avatar
Karl Heuer committed
345

Kenichi Handa's avatar
Kenichi Handa committed
346 347
  /* The current graphic register invoked to each graphic plane.  */
  int current_invocation[2];
348

Kenichi Handa's avatar
Kenichi Handa committed
349 350 351 352
  /* The current charset designated to each graphic register.  The
     value -1 means that not charset is designated, -2 means that
     there was an invalid designation previously.  */
  int current_designation[4];
Karl Heuer's avatar
Karl Heuer committed
353

Kenichi Handa's avatar
Kenichi Handa committed
354 355 356
  /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
     by single-shift while encoding.  */
  int single_shifting;
Karl Heuer's avatar
Karl Heuer committed
357

Kenichi Handa's avatar
Kenichi Handa committed
358 359
  /* Set to 1 temporarily only when processing at beginning of line.  */
  int bol;
Kenichi Handa's avatar
Kenichi Handa committed
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374

  /* If positive, we are now scanning CTEXT extended segment.  */
  int ctext_extended_segment_len;

  /* If nonzero, we are now scanning embedded UTF-8 sequence.  */
  int embedded_utf_8;

  /* The current composition.  */
  struct composition_status cmp_status;
};

struct emacs_mule_spec
{
  int full_support;
  struct composition_status cmp_status;
Kenichi Handa's avatar
Kenichi Handa committed
375
};
Karl Heuer's avatar
Karl Heuer committed
376

Kenichi Handa's avatar
Kenichi Handa committed
377
struct ccl_spec;
Karl Heuer's avatar
Karl Heuer committed
378

379
enum utf_bom_type
Kenichi Handa's avatar
Kenichi Handa committed
380
  {
381 382 383
    utf_detect_bom,
    utf_without_bom,
    utf_with_bom
Kenichi Handa's avatar
Kenichi Handa committed
384
  };
385

Kenichi Handa's avatar
Kenichi Handa committed
386 387 388 389 390
enum utf_16_endian_type
  {
    utf_16_big_endian,
    utf_16_little_endian
  };
391

Kenichi Handa's avatar
Kenichi Handa committed
392 393
struct utf_16_spec
{
394
  enum utf_bom_type bom;
Kenichi Handa's avatar
Kenichi Handa committed
395 396 397
  enum utf_16_endian_type endian;
  int surrogate;
};
398

399 400 401 402 403 404 405 406 407 408
struct coding_detection_info
{
  /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs.  */
  /* Which categories are already checked.  */
  int checked;
  /* Which categories are strongly found.  */
  int found;
  /* Which categories are rejected.  */
  int rejected;
};
409 410


Kenichi Handa's avatar
Kenichi Handa committed
411 412 413 414 415 416 417
struct coding_system
{
  /* ID number of the coding system.  This is an index to
     Vcoding_system_hash_table.  This value is set by
     setup_coding_system.  At the early stage of building time, this
     value is -1 in the array coding_categories to indicate that no
     coding-system of that category is yet defined.  */
418
  ptrdiff_t id;
Kenichi Handa's avatar
Kenichi Handa committed
419 420 421 422 423 424 425 426

  /* Flag bits of the coding system.  The meaning of each bit is common
     to all types of coding systems.  */
  int common_flags;

  /* Mode bits of the coding system.  See the comments of the macros
     CODING_MODE_XXX.  */
  unsigned int mode;
427

Karl Heuer's avatar
Karl Heuer committed
428
  /* Detailed information specific to each type of coding system.  */
Kenichi Handa's avatar
Kenichi Handa committed
429
  union
Karl Heuer's avatar
Karl Heuer committed
430
    {
Kenichi Handa's avatar
Kenichi Handa committed
431 432 433
      struct iso_2022_spec iso_2022;
      struct ccl_spec *ccl;	/* Defined in ccl.h.  */
      struct utf_16_spec utf_16;
434
      enum utf_bom_type utf_8_bom;
Kenichi Handa's avatar
Kenichi Handa committed
435
      struct emacs_mule_spec emacs_mule;
Karl Heuer's avatar
Karl Heuer committed
436 437
    } spec;

Kenichi Handa's avatar
Kenichi Handa committed
438
  int max_charset_id;
439
  unsigned char *safe_charsets;
440

Kenichi Handa's avatar
Kenichi Handa committed
441 442 443 444
  /* The following two members specify how binary 8-bit code 128..255
     are represented in source and destination text respectively.  1
     means they are represented by 2-byte sequence, 0 means they are
     represented by 1-byte as is (see the comment in character.h).  */
445 446 447
  unsigned src_multibyte : 1;
  unsigned dst_multibyte : 1;

448 449 450 451
  /* How may heading bytes we can skip for decoding.  This is set to
     -1 in setup_coding_system, and updated by detect_coding.  So,
     when this is equal to the byte length of the text being
     converted, we can skip the actual conversion process.  */
452
  EMACS_INT head_ascii;
453 454

  /* The following members are set by encoding/decoding routine.  */
Kenichi Handa's avatar
Kenichi Handa committed
455
  EMACS_INT produced, produced_char, consumed, consumed_char;
456

457 458 459
  /* Number of error source data found in a decoding routine.  */
  int errors;

Kenichi Handa's avatar
Kenichi Handa committed
460 461
  /* Store the positions of error source data. */
  EMACS_INT *error_positions;
462

Kenichi Handa's avatar
Kenichi Handa committed
463 464
  /* Finish status of code conversion.  */
  enum coding_result_code result;
465

Kenichi Handa's avatar
Kenichi Handa committed
466 467
  EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
  Lisp_Object src_object;
Kenichi Handa's avatar
Kenichi Handa committed
468
  const unsigned char *source;
Karl Heuer's avatar
Karl Heuer committed
469

Kenichi Handa's avatar
Kenichi Handa committed
470 471 472
  EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
  Lisp_Object dst_object;
  unsigned char *destination;
Karl Heuer's avatar
Karl Heuer committed
473

Miles Bader's avatar
Miles Bader committed
474
  /* Set to 1 if the source of conversion is not in the member
Kenichi Handa's avatar
Kenichi Handa committed
475
     `charbuf', but at `src_object'.  */
Kenichi Handa's avatar
Kenichi Handa committed
476
  int chars_at_source;
Karl Heuer's avatar
Karl Heuer committed
477

Kenichi Handa's avatar
Kenichi Handa committed
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
  /* If an element is non-negative, it is a character code.

     If it is in the range -128..-1, it is a 8-bit character code
     minus 256.

     If it is less than -128, it specifies the start of an annotation
     chunk.  The length of the chunk is -128 minus the value of the
     element.  The following elements are OFFSET, ANNOTATION-TYPE, and
     a sequence of actual data for the annotation.  OFFSET is a
     character position offset from dst_pos or src_pos,
     ANNOTATION-TYPE specfies the meaning of the annotation and how to
     handle the following data..  */
  int *charbuf;
  int charbuf_size, charbuf_used;

  /* Set to 1 if charbuf contains an annotation.  */
  int annotated;
Karl Heuer's avatar
Karl Heuer committed
495

Kenichi Handa's avatar
Kenichi Handa committed
496 497
  unsigned char carryover[64];
  int carryover_bytes;
498

Kenichi Handa's avatar
Kenichi Handa committed
499 500
  int default_char;

Jan D's avatar
Jan D committed
501 502 503 504
  int (*detector) (struct coding_system *,
                   struct coding_detection_info *);
  void (*decoder) (struct coding_system *);
  int (*encoder) (struct coding_system *);
Kenichi Handa's avatar
Kenichi Handa committed
505 506 507 508 509 510 511 512
};

/* Meanings of bits in the member `common_flags' of the structure
   coding_system.  The lowest 8 bits are reserved for various kind of
   annotations (currently two of them are used).  */
#define CODING_ANNOTATION_MASK			0x00FF
#define CODING_ANNOTATE_COMPOSITION_MASK	0x0001
#define CODING_ANNOTATE_DIRECTION_MASK		0x0002
513
#define CODING_ANNOTATE_CHARSET_MASK		0x0003
Kenichi Handa's avatar
Kenichi Handa committed
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
#define CODING_FOR_UNIBYTE_MASK			0x0100
#define CODING_REQUIRE_FLUSHING_MASK		0x0200
#define CODING_REQUIRE_DECODING_MASK		0x0400
#define CODING_REQUIRE_ENCODING_MASK		0x0800
#define CODING_REQUIRE_DETECTION_MASK		0x1000
#define CODING_RESET_AT_BOL_MASK		0x2000

/* Return 1 if the coding context CODING requires annotaion
   handling.  */
#define CODING_REQUIRE_ANNOTATION(coding) \
  ((coding)->common_flags & CODING_ANNOTATION_MASK)

/* Return 1 if the coding context CODING prefers decoding into unibyte.  */
#define CODING_FOR_UNIBYTE(coding) \
  ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)

/* Return 1 if the coding context CODING requires specific code to be
531 532 533 534
   attached at the tail of converted text.  */
#define CODING_REQUIRE_FLUSHING(coding) \
  ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)

Kenichi Handa's avatar
Kenichi Handa committed
535
/* Return 1 if the coding context CODING requires code conversion on
536 537
   decoding.  */
#define CODING_REQUIRE_DECODING(coding)	\
538 539
  ((coding)->dst_multibyte		\
   || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
540

Kenichi Handa's avatar
Kenichi Handa committed
541 542

/* Return 1 if the coding context CODING requires code conversion on
543 544 545 546
   encoding.
   The non-multibyte part of the condition is to support encoding of
   unibyte strings/buffers generated by string-as-unibyte or
   (set-buffer-multibyte nil) from multibyte strings/buffers.  */
Kenichi Handa's avatar
Kenichi Handa committed
547 548 549 550 551
#define CODING_REQUIRE_ENCODING(coding)				\
  ((coding)->src_multibyte					\
   || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK	\
   || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)

552

Kenichi Handa's avatar
Kenichi Handa committed
553
/* Return 1 if the coding context CODING requires some kind of code
554 555 556 557
   detection.  */
#define CODING_REQUIRE_DETECTION(coding) \
  ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)

Kenichi Handa's avatar
Kenichi Handa committed
558
/* Return 1 if the coding context CODING requires code conversion on
559
   decoding or some kind of code detection.  */
560
#define CODING_MAY_REQUIRE_DECODING(coding)	\
561 562
  (CODING_REQUIRE_DECODING (coding)		\
   || CODING_REQUIRE_DETECTION (coding))
Karl Heuer's avatar
Karl Heuer committed
563 564 565 566 567 568

/* Macros to decode or encode a character of JISX0208 in SJIS.  S1 and
   S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
   system.  C1 and C2 are the 1st and 2nd position codes of Emacs'
   internal format.  */

Kenichi Handa's avatar
Kenichi Handa committed
569 570 571 572 573 574 575 576 577 578 579 580 581
#define SJIS_TO_JIS(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    s1 = (code) >> 8, s2 = (code) & 0xFF;		\
							\
    if (s2 >= 0x9F)					\
      (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0),	\
       j2 = s2 - 0x7E);					\
    else						\
      (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1),	\
       j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F));		\
    (code) = (j1 << 8) | j2;				\
Karl Heuer's avatar
Karl Heuer committed
582 583
  } while (0)

584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
#define SJIS_TO_JIS2(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    s1 = (code) >> 8, s2 = (code) & 0xFF;		\
							\
    if (s2 >= 0x9F)					\
      {							\
	j1 = (s1 == 0xF0 ? 0x28				\
	      : s1 == 0xF1 ? 0x24			\
	      : s1 == 0xF2 ? 0x2C			\
	      : s1 == 0xF3 ? 0x2E			\
	      : 0x6E + (s1 - 0xF4) * 2);		\
	j2 = s2 - 0x7E;					\
      }							\
    else						\
      {							\
	j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2	\
	      : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2	\
	      : 0x6F + (s1 - 0xF5) * 2);		\
	j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F));		\
      }							\
    (code) = (j1 << 8) | j2;				\
  } while (0)

Kenichi Handa's avatar
Kenichi Handa committed
609 610

#define JIS_TO_SJIS(code)				\
Karl Heuer's avatar
Karl Heuer committed
611
  do {							\
Kenichi Handa's avatar
Kenichi Handa committed
612 613 614 615 616 617
    int s1, s2, j1, j2;					\
							\
    j1 = (code) >> 8, j2 = (code) & 0xFF;		\
    if (j1 & 1)						\
      (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1),	\
       s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F));		\
Karl Heuer's avatar
Karl Heuer committed
618
    else						\
Kenichi Handa's avatar
Kenichi Handa committed
619 620
      (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0),	\
       s2 = j2 + 0x7E);					\
621
    (code) = (s1 << 8) | s2;				\
Karl Heuer's avatar
Karl Heuer committed
622 623
  } while (0)

624 625 626 627 628 629 630 631
#define JIS_TO_SJIS2(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    j1 = (code) >> 8, j2 = (code) & 0xFF;		\
    if (j1 & 1)						\
      {							\
	s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2	\
632
	      : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2	\
633 634 635 636 637 638 639 640 641 642 643 644 645 646
	      : 0xF5 + (j1 - 0x6F) / 2);		\
	s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F);		\
      }							\
    else						\
      {							\
	s1 = (j1 == 0x28 ? 0xF0				\
	      : j1 == 0x24 ? 0xF1			\
	      : j1 == 0x2C ? 0xF2			\
	      : j1 == 0x2E ? 0xF3			\
	      : 0xF4 + (j1 - 0x6E) / 2);		\
	s2 = j2 + 0x7E;					\
      }							\
    (code) = (s1 << 8) | s2;				\
  } while (0)
Kenichi Handa's avatar
Kenichi Handa committed
647

648 649 650 651
/* Encode the file name NAME using the specified coding system
   for file names, if any.  */
#define ENCODE_FILE(name)						   \
  (! NILP (Vfile_name_coding_system)					   \
652
   && !EQ (Vfile_name_coding_system, make_number (0))			   \
Karl Heuer's avatar
Karl Heuer committed
653
   ? code_convert_string_norecord (name, Vfile_name_coding_system, 1)	   \
654
   : (! NILP (Vdefault_file_name_coding_system)				   \
655
      && !EQ (Vdefault_file_name_coding_system, make_number (0))	   \
Karl Heuer's avatar
Karl Heuer committed
656
      ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
657 658
      : name))

Kenichi Handa's avatar
Kenichi Handa committed
659

660 661 662 663
/* Decode the file name NAME using the specified coding system
   for file names, if any.  */
#define DECODE_FILE(name)						   \
  (! NILP (Vfile_name_coding_system)					   \
664
   && !EQ (Vfile_name_coding_system, make_number (0))			   \
Karl Heuer's avatar
Karl Heuer committed
665
   ? code_convert_string_norecord (name, Vfile_name_coding_system, 0)	   \
666
   : (! NILP (Vdefault_file_name_coding_system)				   \
667
      && !EQ (Vdefault_file_name_coding_system, make_number (0))	   \
Karl Heuer's avatar
Karl Heuer committed
668
      ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
669 670
      : name))

Kenichi Handa's avatar
Kenichi Handa committed
671

672
/* Encode the string STR using the specified coding system
673
   for system functions, if any.  */
674
#define ENCODE_SYSTEM(str)						   \
675
  (! NILP (Vlocale_coding_system)					   \
676
   && !EQ (Vlocale_coding_system, make_number (0))			   \
677
   ? code_convert_string_norecord (str, Vlocale_coding_system, 1)	   \
678 679 680
   : str)

/* Decode the string STR using the specified coding system
681
   for system functions, if any.  */
682
#define DECODE_SYSTEM(str)						   \
683
  (! NILP (Vlocale_coding_system)					   \
684
   && !EQ (Vlocale_coding_system, make_number (0))			   \
685
   ? code_convert_string_norecord (str, Vlocale_coding_system, 0)	   \
686
   : str)
687

688
/* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op.  */
Dave Love's avatar
Dave Love committed
689 690
#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)

Karl Heuer's avatar
Karl Heuer committed
691
/* Extern declarations.  */
Jan D's avatar
Jan D committed
692 693 694 695 696 697 698 699 700 701 702 703
extern Lisp_Object code_conversion_save (int, int);
extern int decoding_buffer_size (struct coding_system *, int);
extern int encoding_buffer_size (struct coding_system *, int);
extern void setup_coding_system (Lisp_Object, struct coding_system *);
extern Lisp_Object coding_charset_list (struct coding_system *);
extern Lisp_Object coding_system_charset_list (Lisp_Object);
extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
                                        Lisp_Object, int, int, int);
extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
                                                 int);
extern Lisp_Object raw_text_coding_system (Lisp_Object);
extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
704
extern Lisp_Object complement_process_encoding_system (Lisp_Object);
Jan D's avatar
Jan D committed
705 706 707 708 709 710 711 712 713

extern int decode_coding_gap (struct coding_system *,
                              EMACS_INT, EMACS_INT);
extern void decode_coding_object (struct coding_system *,
                                  Lisp_Object, EMACS_INT, EMACS_INT,
                                  EMACS_INT, EMACS_INT, Lisp_Object);
extern void encode_coding_object (struct coding_system *,
                                  Lisp_Object, EMACS_INT, EMACS_INT,
                                  EMACS_INT, EMACS_INT, Lisp_Object);
Kenichi Handa's avatar
Kenichi Handa committed
714

Kenichi Handa's avatar
Kenichi Handa committed
715 716
/* Macros for backward compatibility.  */

Kenichi Handa's avatar
Kenichi Handa committed
717 718 719 720 721 722 723 724 725 726 727 728 729
#define decode_coding_region(coding, from, to)		\
  decode_coding_object (coding, Fcurrent_buffer (),	\
			from, CHAR_TO_BYTE (from),	\
			to, CHAR_TO_BYTE (to), Fcurrent_buffer ())


#define encode_coding_region(coding, from, to)		\
  encode_coding_object (coding, Fcurrent_buffer (),	\
			from, CHAR_TO_BYTE (from),	\
			to, CHAR_TO_BYTE (to), Fcurrent_buffer ())


#define decode_coding_string(coding, string, nocopy)			\
730
  decode_coding_object (coding, string, 0, 0, SCHARS (string),		\
731
			SBYTES (string), Qt)
Kenichi Handa's avatar
Kenichi Handa committed
732 733

#define encode_coding_string(coding, string, nocopy)			\
734 735 736 737
  (STRING_MULTIBYTE(string) ?						\
    (encode_coding_object (coding, string, 0, 0, SCHARS (string),	\
			   SBYTES (string), Qt),			\
     (coding)->dst_object) : (string))
Kenichi Handa's avatar
Kenichi Handa committed
738 739 740 741 742 743 744 745 746 747 748


#define decode_coding_c_string(coding, src, bytes, dst_object)		\
  do {									\
    (coding)->source = (src);						\
    (coding)->src_chars = (coding)->src_bytes = (bytes);		\
    decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes),	\
			  (dst_object));				\
  } while (0)


749
extern Lisp_Object preferred_coding_system (void);
Kenichi Handa's avatar
Kenichi Handa committed
750 751


Kenichi Handa's avatar
Kenichi Handa committed
752 753
extern Lisp_Object Qutf_8, Qutf_8_emacs;

754
extern Lisp_Object Qcoding_category_index;
Kenichi Handa's avatar
Kenichi Handa committed
755 756
extern Lisp_Object Qcoding_system_p;
extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
Karl Heuer's avatar
Karl Heuer committed
757
extern Lisp_Object Qbuffer_file_coding_system;
Kenichi Handa's avatar
Kenichi Handa committed
758 759

extern Lisp_Object Qunix, Qdos, Qmac;
Karl Heuer's avatar
Karl Heuer committed
760

761 762
extern Lisp_Object Qtranslation_table;
extern Lisp_Object Qtranslation_table_id;
763

Karl Heuer's avatar
Karl Heuer committed
764 765
#ifdef emacs
extern Lisp_Object Qfile_coding_system;
Kim F. Storm's avatar
Kim F. Storm committed
766
extern Lisp_Object Qcall_process, Qcall_process_region;
Karl Heuer's avatar
Karl Heuer committed
767
extern Lisp_Object Qstart_process, Qopen_network_stream;
768
extern Lisp_Object Qwrite_region;
Karl Heuer's avatar
Karl Heuer committed
769

Jan D's avatar
Jan D committed
770
extern char *emacs_strerror (int);
771

772 773 774 775
/* Coding system to be used to encode text for terminal display when
   terminal coding system is nil.  */
extern struct coding_system safe_terminal_coding;

Karl Heuer's avatar
Karl Heuer committed
776 777
#endif

778 779 780
/* Error signaled when there's a problem with detecting coding system */
extern Lisp_Object Qcoding_system_error;

Kenichi Handa's avatar
Kenichi Handa committed
781
extern char emacs_mule_bytes[256];
Jan D's avatar
Jan D committed
782
extern int emacs_mule_string_char (unsigned char *);
Kenichi Handa's avatar
Kenichi Handa committed
783

784
#endif /* EMACS_CODING_H */