coding.h 25.6 KB
Newer Older
Karl Heuer's avatar
Karl Heuer committed
1
/* Header for coding system handler.
2 3
   Copyright (C) 2001, 2002, 2003, 2004, 2005,
                 2006, 2007  Free Software Foundation, Inc.
Kenichi Handa's avatar
Kenichi Handa committed
4
   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
Glenn Morris's avatar
Glenn Morris committed
5
     2005, 2006, 2007
Kenichi Handa's avatar
Kenichi Handa committed
6 7
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H14PRO021
Kenichi Handa's avatar
Kenichi Handa committed
8
   Copyright (C) 2003
Kenichi Handa's avatar
Kenichi Handa committed
9 10
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009
Karl Heuer's avatar
Karl Heuer committed
11

Karl Heuer's avatar
Karl Heuer committed
12 13 14 15
This file is part of GNU Emacs.

GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
16
the Free Software Foundation; either version 3, or (at your option)
Karl Heuer's avatar
Karl Heuer committed
17
any later version.
Karl Heuer's avatar
Karl Heuer committed
18

Karl Heuer's avatar
Karl Heuer committed
19 20 21 22
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
Karl Heuer's avatar
Karl Heuer committed
23

Karl Heuer's avatar
Karl Heuer committed
24 25
You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING.  If not, write to
Lute Kamstra's avatar
Lute Kamstra committed
26 27
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.  */
Karl Heuer's avatar
Karl Heuer committed
28

29 30
#ifndef EMACS_CODING_H
#define EMACS_CODING_H
Karl Heuer's avatar
Karl Heuer committed
31

Kenichi Handa's avatar
Kenichi Handa committed
32
/* Index to arguments of Fdefine_coding_system_internal.  */
Karl Heuer's avatar
Karl Heuer committed
33

Kenichi Handa's avatar
Kenichi Handa committed
34 35 36 37 38 39 40 41 42 43 44 45
enum define_coding_system_arg_index
  {
    coding_arg_name,
    coding_arg_mnemonic,
    coding_arg_coding_type,
    coding_arg_charset_list,
    coding_arg_ascii_compatible_p,
    coding_arg_decode_translation_table,
    coding_arg_encode_translation_table,
    coding_arg_post_read_conversion,
    coding_arg_pre_write_conversion,
    coding_arg_default_char,
Kenichi Handa's avatar
Kenichi Handa committed
46
    coding_arg_for_unibyte,
Kenichi Handa's avatar
Kenichi Handa committed
47 48 49 50
    coding_arg_plist,
    coding_arg_eol_type,
    coding_arg_max
  };
Karl Heuer's avatar
Karl Heuer committed
51

Kenichi Handa's avatar
Kenichi Handa committed
52
enum define_coding_iso2022_arg_index
Karl Heuer's avatar
Karl Heuer committed
53
  {
Kenichi Handa's avatar
Kenichi Handa committed
54 55 56 57 58
    coding_arg_iso2022_initial = coding_arg_max,
    coding_arg_iso2022_reg_usage,
    coding_arg_iso2022_request,
    coding_arg_iso2022_flags,
    coding_arg_iso2022_max
Karl Heuer's avatar
Karl Heuer committed
59 60
  };

Kenichi Handa's avatar
Kenichi Handa committed
61
enum define_coding_utf16_arg_index
Karl Heuer's avatar
Karl Heuer committed
62
  {
Kenichi Handa's avatar
Kenichi Handa committed
63 64 65
    coding_arg_utf16_bom = coding_arg_max,
    coding_arg_utf16_endian,
    coding_arg_utf16_max
Karl Heuer's avatar
Karl Heuer committed
66 67
  };

Kenichi Handa's avatar
Kenichi Handa committed
68 69
enum define_coding_ccl_arg_index
  {
70
    coding_arg_ccl_decoder = coding_arg_max,
Kenichi Handa's avatar
Kenichi Handa committed
71 72 73 74
    coding_arg_ccl_encoder,
    coding_arg_ccl_valids,
    coding_arg_ccl_max
  };
Karl Heuer's avatar
Karl Heuer committed
75

Kenichi Handa's avatar
Kenichi Handa committed
76 77 78 79 80 81 82
/* Hash table for all coding systems.  Keys are coding system symbols
   and values are spec vectors of the corresponding coding system.  A
   spec vector has the form [ ATTRS ALIASES EOL-TYPE ].  ATTRS is a
   vector of attribute of the coding system.  ALIASES is a list of
   aliases (symbols) of the coding system.  EOL-TYPE is `unix', `dos',
   `mac' or a vector of coding systems (symbols).  */

Kenichi Handa's avatar
Kenichi Handa committed
83
extern Lisp_Object Vcoding_system_hash_table;
Karl Heuer's avatar
Karl Heuer committed
84

Kenichi Handa's avatar
Kenichi Handa committed
85

Kenichi Handa's avatar
Kenichi Handa committed
86
/* Enumeration of coding system type.  */
Karl Heuer's avatar
Karl Heuer committed
87

Kenichi Handa's avatar
Kenichi Handa committed
88 89 90 91 92 93 94 95 96 97 98 99 100
enum coding_system_type
  {
    coding_type_charset,
    coding_type_utf_8,
    coding_type_utf_16,
    coding_type_iso_2022,
    coding_type_emacs_mule,
    coding_type_sjis,
    coding_type_ccl,
    coding_type_raw_text,
    coding_type_undecided,
    coding_type_max
  };
Karl Heuer's avatar
Karl Heuer committed
101 102


Kenichi Handa's avatar
Kenichi Handa committed
103
/* Enumeration of end-of-line format type.  */
Karl Heuer's avatar
Karl Heuer committed
104

Kenichi Handa's avatar
Kenichi Handa committed
105 106 107 108 109 110 111 112 113 114 115 116 117
enum end_of_line_type
  {
    eol_lf,		/* Line-feed only, same as Emacs' internal
			   format.  */
    eol_crlf,		/* Sequence of carriage-return and
			   line-feed.  */
    eol_cr,		/* Carriage-return only.  */
    eol_any,		/* Accept any of above.  Produce line-feed
			   only.  */
    eol_undecided,	/* This value is used to denote that the
			   eol-type is not yet undecided.  */
    eol_type_max
  };
Karl Heuer's avatar
Karl Heuer committed
118

Kenichi Handa's avatar
Kenichi Handa committed
119
/* Enumeration of index to an attribute vector of a coding system.  */
Karl Heuer's avatar
Karl Heuer committed
120

Kenichi Handa's avatar
Kenichi Handa committed
121 122 123 124 125 126 127 128 129 130
enum coding_attr_index
  {
    coding_attr_base_name,
    coding_attr_docstring,
    coding_attr_mnemonic,
    coding_attr_type,
    coding_attr_charset_list,
    coding_attr_ascii_compat,
    coding_attr_decode_tbl,
    coding_attr_encode_tbl,
131
    coding_attr_trans_tbl,
Kenichi Handa's avatar
Kenichi Handa committed
132 133 134
    coding_attr_post_read,
    coding_attr_pre_write,
    coding_attr_default_char,
Kenichi Handa's avatar
Kenichi Handa committed
135
    coding_attr_for_unibyte,
Kenichi Handa's avatar
Kenichi Handa committed
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
    coding_attr_plist,

    coding_attr_category,
    coding_attr_safe_charsets,

    /* The followings are extra attributes for each type.  */
    coding_attr_charset_valids,

    coding_attr_ccl_decoder,
    coding_attr_ccl_encoder,
    coding_attr_ccl_valids,

    coding_attr_iso_initial,
    coding_attr_iso_usage,
    coding_attr_iso_request,
    coding_attr_iso_flags,

    coding_attr_utf_16_bom,
    coding_attr_utf_16_endian,

    coding_attr_emacs_mule_full,

    coding_attr_last_index
  };
Karl Heuer's avatar
Karl Heuer committed
160 161


Kenichi Handa's avatar
Kenichi Handa committed
162 163
/* Macros to access an element of an attribute vector.  */

Kenichi Handa's avatar
Kenichi Handa committed
164 165 166 167 168 169 170 171
#define CODING_ATTR_BASE_NAME(attrs)	AREF (attrs, coding_attr_base_name)
#define CODING_ATTR_TYPE(attrs)		AREF (attrs, coding_attr_type)
#define CODING_ATTR_CHARSET_LIST(attrs)	AREF (attrs, coding_attr_charset_list)
#define CODING_ATTR_MNEMONIC(attrs)	AREF (attrs, coding_attr_mnemonic)
#define CODING_ATTR_DOCSTRING(attrs)	AREF (attrs, coding_attr_docstring)
#define CODING_ATTR_ASCII_COMPAT(attrs)	AREF (attrs, coding_attr_ascii_compat)
#define CODING_ATTR_DECODE_TBL(attrs)	AREF (attrs, coding_attr_decode_tbl)
#define CODING_ATTR_ENCODE_TBL(attrs)	AREF (attrs, coding_attr_encode_tbl)
172
#define CODING_ATTR_TRANS_TBL(attrs)	AREF (attrs, coding_attr_trans_tbl)
Kenichi Handa's avatar
Kenichi Handa committed
173 174 175
#define CODING_ATTR_POST_READ(attrs)	AREF (attrs, coding_attr_post_read)
#define CODING_ATTR_PRE_WRITE(attrs)	AREF (attrs, coding_attr_pre_write)
#define CODING_ATTR_DEFAULT_CHAR(attrs)	AREF (attrs, coding_attr_default_char)
Kenichi Handa's avatar
Kenichi Handa committed
176
#define CODING_ATTR_FOR_UNIBYTE(attrs)	AREF (attrs, coding_attr_for_unibyte)
Kenichi Handa's avatar
Kenichi Handa committed
177 178 179 180
#define CODING_ATTR_FLUSHING(attrs)	AREF (attrs, coding_attr_flushing)
#define CODING_ATTR_PLIST(attrs)	AREF (attrs, coding_attr_plist)
#define CODING_ATTR_CATEGORY(attrs)	AREF (attrs, coding_attr_category)
#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
181 182


Kenichi Handa's avatar
Kenichi Handa committed
183 184 185 186 187 188
/* Return the name of a coding system specified by ID.  */
#define CODING_ID_NAME(id) \
  (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))

/* Return the attribute vector of a coding system specified by ID.  */

Kenichi Handa's avatar
Kenichi Handa committed
189 190
#define CODING_ID_ATTRS(id)	\
  (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
191

Kenichi Handa's avatar
Kenichi Handa committed
192 193
/* Return the list of aliases of a coding system specified by ID.  */

Kenichi Handa's avatar
Kenichi Handa committed
194 195
#define CODING_ID_ALIASES(id)	\
  (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
196

Kenichi Handa's avatar
Kenichi Handa committed
197 198
/* Return the eol-type of a coding system specified by ID.  */

Kenichi Handa's avatar
Kenichi Handa committed
199 200
#define CODING_ID_EOL_TYPE(id)	\
  (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
201

Kenichi Handa's avatar
Kenichi Handa committed
202 203

/* Return the spec vector of CODING_SYSTEM_SYMBOL.  */
204

Kenichi Handa's avatar
Kenichi Handa committed
205 206
#define CODING_SYSTEM_SPEC(coding_system_symbol)	\
  (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
Karl Heuer's avatar
Karl Heuer committed
207

Kenichi Handa's avatar
Kenichi Handa committed
208 209 210

/* Return the ID of CODING_SYSTEM_SYMBOL.  */

Kenichi Handa's avatar
Kenichi Handa committed
211 212 213
#define CODING_SYSTEM_ID(coding_system_symbol)			\
  hash_lookup (XHASH_TABLE (Vcoding_system_hash_table),		\
	       coding_system_symbol, NULL)
Karl Heuer's avatar
Karl Heuer committed
214

Kenichi Handa's avatar
Kenichi Handa committed
215 216
/* Return 1 iff CODING_SYSTEM_SYMBOL is a coding system.  */

217 218 219 220
#define CODING_SYSTEM_P(coding_system_symbol)		\
  (CODING_SYSTEM_ID (coding_system_symbol) >= 0		\
   || (! NILP (coding_system_symbol)			\
       && ! NILP (Fcoding_system_p (coding_system_symbol))))
Karl Heuer's avatar
Karl Heuer committed
221

Kenichi Handa's avatar
Kenichi Handa committed
222 223
/* Check if X is a coding system or not.  */

Kenichi Handa's avatar
Kenichi Handa committed
224
#define CHECK_CODING_SYSTEM(x)				\
Kenichi Handa's avatar
Kenichi Handa committed
225
  do {							\
226 227
    if (CODING_SYSTEM_ID (x) < 0			\
	&& NILP (Fcheck_coding_system (x)))		\
Kenichi Handa's avatar
Kenichi Handa committed
228
      wrong_type_argument (Qcoding_system_p, (x));	\
Kenichi Handa's avatar
Kenichi Handa committed
229
  } while (0)
230

Karl Heuer's avatar
Karl Heuer committed
231

Kenichi Handa's avatar
Kenichi Handa committed
232 233 234
/* Check if X is a coding system or not.  If it is, set SEPC to the
   spec vector of the coding system.  */

Kenichi Handa's avatar
Kenichi Handa committed
235 236 237
#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec)		\
  do {							\
    spec = CODING_SYSTEM_SPEC (x);			\
238 239 240 241 242
    if (NILP (spec))					\
      {							\
	Fcheck_coding_system (x);			\
	spec = CODING_SYSTEM_SPEC (x);			\
      }							\
Kenichi Handa's avatar
Kenichi Handa committed
243 244 245
    if (NILP (spec))					\
      x = wrong_type_argument (Qcoding_system_p, (x));	\
  } while (0)
246

247

Kenichi Handa's avatar
Kenichi Handa committed
248 249 250
/* Check if X is a coding system or not.  If it is, set ID to the
   ID of the coding system.  */

Kenichi Handa's avatar
Kenichi Handa committed
251 252 253 254
#define CHECK_CODING_SYSTEM_GET_ID(x, id)			\
  do								\
    {								\
      id = CODING_SYSTEM_ID (x);				\
255 256 257 258 259
      if (id < 0)						\
	{							\
	  Fcheck_coding_system (x);				\
	  id = CODING_SYSTEM_ID (x);				\
	}							\
Kenichi Handa's avatar
Kenichi Handa committed
260 261 262
      if (id < 0)						\
	x = wrong_type_argument (Qcoding_system_p, (x));	\
    } while (0)
Karl Heuer's avatar
Karl Heuer committed
263 264 265 266


/*** GENERAL section ***/

Kenichi Handa's avatar
Kenichi Handa committed
267 268
/* Enumeration of result code of code conversion.  */
enum coding_result_code
Karl Heuer's avatar
Karl Heuer committed
269
  {
Kenichi Handa's avatar
Kenichi Handa committed
270 271 272 273
    CODING_RESULT_SUCCESS,
    CODING_RESULT_INSUFFICIENT_SRC,
    CODING_RESULT_INSUFFICIENT_DST,
    CODING_RESULT_INCONSISTENT_EOL,
274
    CODING_RESULT_INVALID_SRC,
Kenichi Handa's avatar
Kenichi Handa committed
275 276
    CODING_RESULT_INTERRUPT,
    CODING_RESULT_INSUFFICIENT_MEM
Karl Heuer's avatar
Karl Heuer committed
277 278
  };

279

Kenichi Handa's avatar
Kenichi Handa committed
280
/* Macros used for the member `mode' of the struct coding_system.  */
281 282 283 284 285 286

/* If set, recover the original CR or LF of the already decoded text
   when the decoding routine encounters an inconsistent eol format.  */
#define CODING_MODE_INHIBIT_INCONSISTENT_EOL	0x01

/* If set, the decoding/encoding routines treat the current data as
Dave Love's avatar
Dave Love committed
287
   the last block of the whole text to be converted, and do the
Dave Love's avatar
Dave Love committed
288
   appropriate finishing job.  */
289 290 291 292 293 294 295 296 297 298
#define CODING_MODE_LAST_BLOCK			0x02

/* If set, it means that the current source text is in a buffer which
   enables selective display.  */
#define CODING_MODE_SELECTIVE_DISPLAY		0x04

/* This flag is used by the decoding/encoding routines on the fly.  If
   set, it means that right-to-left text is being processed.  */
#define CODING_MODE_DIRECTION			0x08

Kenichi Handa's avatar
Kenichi Handa committed
299 300
#define CODING_MODE_FIXED_DESTINATION		0x10

Kenichi Handa's avatar
Kenichi Handa committed
301 302
/* If set, it means that the encoding routines produces some safe
   ASCII characters (usually '?') for unsupported characters.  */
Kenichi Handa's avatar
Kenichi Handa committed
303 304 305 306 307
#define CODING_MODE_SAFE_ENCODING		0x20

/* Structure of the field `spec.iso_2022' in the structure
   `coding_system'.  */
struct iso_2022_spec
Karl Heuer's avatar
Karl Heuer committed
308
{
Kenichi Handa's avatar
Kenichi Handa committed
309
  /* Bit-wise-or of CODING_ISO_FLAG_XXX.  */
Kenichi Handa's avatar
Kenichi Handa committed
310
  unsigned flags;
Karl Heuer's avatar
Karl Heuer committed
311

Kenichi Handa's avatar
Kenichi Handa committed
312 313
  /* The current graphic register invoked to each graphic plane.  */
  int current_invocation[2];
314

Kenichi Handa's avatar
Kenichi Handa committed
315 316 317 318
  /* The current charset designated to each graphic register.  The
     value -1 means that not charset is designated, -2 means that
     there was an invalid designation previously.  */
  int current_designation[4];
Karl Heuer's avatar
Karl Heuer committed
319

Kenichi Handa's avatar
Kenichi Handa committed
320 321 322
  /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
     by single-shift while encoding.  */
  int single_shifting;
Karl Heuer's avatar
Karl Heuer committed
323

Kenichi Handa's avatar
Kenichi Handa committed
324 325 326
  /* Set to 1 temporarily only when processing at beginning of line.  */
  int bol;
};
Karl Heuer's avatar
Karl Heuer committed
327

Kenichi Handa's avatar
Kenichi Handa committed
328
struct ccl_spec;
Karl Heuer's avatar
Karl Heuer committed
329

Kenichi Handa's avatar
Kenichi Handa committed
330 331 332 333 334 335
enum utf_16_bom_type
  {
    utf_16_detect_bom,
    utf_16_without_bom,
    utf_16_with_bom
  };
336

Kenichi Handa's avatar
Kenichi Handa committed
337 338 339 340 341
enum utf_16_endian_type
  {
    utf_16_big_endian,
    utf_16_little_endian
  };
342

Kenichi Handa's avatar
Kenichi Handa committed
343 344 345 346 347 348
struct utf_16_spec
{
  enum utf_16_bom_type bom;
  enum utf_16_endian_type endian;
  int surrogate;
};
349

350 351 352 353 354 355 356 357 358 359
struct coding_detection_info
{
  /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs.  */
  /* Which categories are already checked.  */
  int checked;
  /* Which categories are strongly found.  */
  int found;
  /* Which categories are rejected.  */
  int rejected;
};
360 361


Kenichi Handa's avatar
Kenichi Handa committed
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
struct coding_system
{
  /* ID number of the coding system.  This is an index to
     Vcoding_system_hash_table.  This value is set by
     setup_coding_system.  At the early stage of building time, this
     value is -1 in the array coding_categories to indicate that no
     coding-system of that category is yet defined.  */
  int id;

  /* Flag bits of the coding system.  The meaning of each bit is common
     to all types of coding systems.  */
  int common_flags;

  /* Mode bits of the coding system.  See the comments of the macros
     CODING_MODE_XXX.  */
  unsigned int mode;
378

Karl Heuer's avatar
Karl Heuer committed
379
  /* Detailed information specific to each type of coding system.  */
Kenichi Handa's avatar
Kenichi Handa committed
380
  union
Karl Heuer's avatar
Karl Heuer committed
381
    {
Kenichi Handa's avatar
Kenichi Handa committed
382 383 384 385
      struct iso_2022_spec iso_2022;
      struct ccl_spec *ccl;	/* Defined in ccl.h.  */
      struct utf_16_spec utf_16;
      int emacs_mule_full_support;
Karl Heuer's avatar
Karl Heuer committed
386 387
    } spec;

Kenichi Handa's avatar
Kenichi Handa committed
388 389
  int max_charset_id;
  char *safe_charsets;
390

Kenichi Handa's avatar
Kenichi Handa committed
391 392 393 394
  /* The following two members specify how binary 8-bit code 128..255
     are represented in source and destination text respectively.  1
     means they are represented by 2-byte sequence, 0 means they are
     represented by 1-byte as is (see the comment in character.h).  */
395 396 397
  unsigned src_multibyte : 1;
  unsigned dst_multibyte : 1;

398 399 400 401
  /* How may heading bytes we can skip for decoding.  This is set to
     -1 in setup_coding_system, and updated by detect_coding.  So,
     when this is equal to the byte length of the text being
     converted, we can skip the actual conversion process.  */
Kenichi Handa's avatar
Kenichi Handa committed
402
  int head_ascii;
403 404

  /* The following members are set by encoding/decoding routine.  */
Kenichi Handa's avatar
Kenichi Handa committed
405
  EMACS_INT produced, produced_char, consumed, consumed_char;
406

407 408 409
  /* Number of error source data found in a decoding routine.  */
  int errors;

Kenichi Handa's avatar
Kenichi Handa committed
410 411
  /* Store the positions of error source data. */
  EMACS_INT *error_positions;
412

Kenichi Handa's avatar
Kenichi Handa committed
413 414
  /* Finish status of code conversion.  */
  enum coding_result_code result;
415

Kenichi Handa's avatar
Kenichi Handa committed
416 417
  EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
  Lisp_Object src_object;
Kenichi Handa's avatar
Kenichi Handa committed
418
  const unsigned char *source;
Karl Heuer's avatar
Karl Heuer committed
419

Kenichi Handa's avatar
Kenichi Handa committed
420 421 422
  EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
  Lisp_Object dst_object;
  unsigned char *destination;
Karl Heuer's avatar
Karl Heuer committed
423

Kenichi Handa's avatar
Kenichi Handa committed
424 425
  /* Set to 1 iff the source of conversion is not in the member
     `charbuf', but at `src_object'.  */
Kenichi Handa's avatar
Kenichi Handa committed
426
  int chars_at_source;
Karl Heuer's avatar
Karl Heuer committed
427

Kenichi Handa's avatar
Kenichi Handa committed
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
  /* If an element is non-negative, it is a character code.

     If it is in the range -128..-1, it is a 8-bit character code
     minus 256.

     If it is less than -128, it specifies the start of an annotation
     chunk.  The length of the chunk is -128 minus the value of the
     element.  The following elements are OFFSET, ANNOTATION-TYPE, and
     a sequence of actual data for the annotation.  OFFSET is a
     character position offset from dst_pos or src_pos,
     ANNOTATION-TYPE specfies the meaning of the annotation and how to
     handle the following data..  */
  int *charbuf;
  int charbuf_size, charbuf_used;

  /* Set to 1 if charbuf contains an annotation.  */
  int annotated;
Karl Heuer's avatar
Karl Heuer committed
445

Kenichi Handa's avatar
Kenichi Handa committed
446 447
  unsigned char carryover[64];
  int carryover_bytes;
448

Kenichi Handa's avatar
Kenichi Handa committed
449 450
  int default_char;

451 452
  int (*detector) P_ ((struct coding_system *,
		       struct coding_detection_info *));
Kenichi Handa's avatar
Kenichi Handa committed
453 454 455 456 457 458 459 460 461 462
  void (*decoder) P_ ((struct coding_system *));
  int (*encoder) P_ ((struct coding_system *));
};

/* Meanings of bits in the member `common_flags' of the structure
   coding_system.  The lowest 8 bits are reserved for various kind of
   annotations (currently two of them are used).  */
#define CODING_ANNOTATION_MASK			0x00FF
#define CODING_ANNOTATE_COMPOSITION_MASK	0x0001
#define CODING_ANNOTATE_DIRECTION_MASK		0x0002
463
#define CODING_ANNOTATE_CHARSET_MASK		0x0003
Kenichi Handa's avatar
Kenichi Handa committed
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
#define CODING_FOR_UNIBYTE_MASK			0x0100
#define CODING_REQUIRE_FLUSHING_MASK		0x0200
#define CODING_REQUIRE_DECODING_MASK		0x0400
#define CODING_REQUIRE_ENCODING_MASK		0x0800
#define CODING_REQUIRE_DETECTION_MASK		0x1000
#define CODING_RESET_AT_BOL_MASK		0x2000

/* Return 1 if the coding context CODING requires annotaion
   handling.  */
#define CODING_REQUIRE_ANNOTATION(coding) \
  ((coding)->common_flags & CODING_ANNOTATION_MASK)

/* Return 1 if the coding context CODING prefers decoding into unibyte.  */
#define CODING_FOR_UNIBYTE(coding) \
  ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)

/* Return 1 if the coding context CODING requires specific code to be
481 482 483 484
   attached at the tail of converted text.  */
#define CODING_REQUIRE_FLUSHING(coding) \
  ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)

Kenichi Handa's avatar
Kenichi Handa committed
485
/* Return 1 if the coding context CODING requires code conversion on
486 487
   decoding.  */
#define CODING_REQUIRE_DECODING(coding)	\
488 489
  ((coding)->dst_multibyte		\
   || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
490

Kenichi Handa's avatar
Kenichi Handa committed
491 492

/* Return 1 if the coding context CODING requires code conversion on
493 494 495 496
   encoding.
   The non-multibyte part of the condition is to support encoding of
   unibyte strings/buffers generated by string-as-unibyte or
   (set-buffer-multibyte nil) from multibyte strings/buffers.  */
Kenichi Handa's avatar
Kenichi Handa committed
497 498 499 500 501
#define CODING_REQUIRE_ENCODING(coding)				\
  ((coding)->src_multibyte					\
   || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK	\
   || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)

502

Kenichi Handa's avatar
Kenichi Handa committed
503
/* Return 1 if the coding context CODING requires some kind of code
504 505 506 507
   detection.  */
#define CODING_REQUIRE_DETECTION(coding) \
  ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)

Kenichi Handa's avatar
Kenichi Handa committed
508
/* Return 1 if the coding context CODING requires code conversion on
509
   decoding or some kind of code detection.  */
510
#define CODING_MAY_REQUIRE_DECODING(coding)	\
511 512
  (CODING_REQUIRE_DECODING (coding)		\
   || CODING_REQUIRE_DETECTION (coding))
Karl Heuer's avatar
Karl Heuer committed
513 514 515 516 517 518

/* Macros to decode or encode a character of JISX0208 in SJIS.  S1 and
   S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
   system.  C1 and C2 are the 1st and 2nd position codes of Emacs'
   internal format.  */

Kenichi Handa's avatar
Kenichi Handa committed
519 520 521 522 523 524 525 526 527 528 529 530 531
#define SJIS_TO_JIS(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    s1 = (code) >> 8, s2 = (code) & 0xFF;		\
							\
    if (s2 >= 0x9F)					\
      (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0),	\
       j2 = s2 - 0x7E);					\
    else						\
      (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1),	\
       j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F));		\
    (code) = (j1 << 8) | j2;				\
Karl Heuer's avatar
Karl Heuer committed
532 533
  } while (0)

534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
#define SJIS_TO_JIS2(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    s1 = (code) >> 8, s2 = (code) & 0xFF;		\
							\
    if (s2 >= 0x9F)					\
      {							\
	j1 = (s1 == 0xF0 ? 0x28				\
	      : s1 == 0xF1 ? 0x24			\
	      : s1 == 0xF2 ? 0x2C			\
	      : s1 == 0xF3 ? 0x2E			\
	      : 0x6E + (s1 - 0xF4) * 2);		\
	j2 = s2 - 0x7E;					\
      }							\
    else						\
      {							\
	j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2	\
	      : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2	\
	      : 0x6F + (s1 - 0xF5) * 2);		\
	j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F));		\
      }							\
    (code) = (j1 << 8) | j2;				\
  } while (0)

Kenichi Handa's avatar
Kenichi Handa committed
559 560

#define JIS_TO_SJIS(code)				\
Karl Heuer's avatar
Karl Heuer committed
561
  do {							\
Kenichi Handa's avatar
Kenichi Handa committed
562 563 564 565 566 567
    int s1, s2, j1, j2;					\
							\
    j1 = (code) >> 8, j2 = (code) & 0xFF;		\
    if (j1 & 1)						\
      (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1),	\
       s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F));		\
Karl Heuer's avatar
Karl Heuer committed
568
    else						\
Kenichi Handa's avatar
Kenichi Handa committed
569 570
      (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0),	\
       s2 = j2 + 0x7E);					\
571
    (code) = (s1 << 8) | s2;				\
Karl Heuer's avatar
Karl Heuer committed
572 573
  } while (0)

574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
#define JIS_TO_SJIS2(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    j1 = (code) >> 8, j2 = (code) & 0xFF;		\
    if (j1 & 1)						\
      {							\
	s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2	\
	      : j1 <= 0x27 ? 0xF3 + (j1 - 0x2D) / 2	\
	      : 0xF5 + (j1 - 0x6F) / 2);		\
	s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F);		\
      }							\
    else						\
      {							\
	s1 = (j1 == 0x28 ? 0xF0				\
	      : j1 == 0x24 ? 0xF1			\
	      : j1 == 0x2C ? 0xF2			\
	      : j1 == 0x2E ? 0xF3			\
	      : 0xF4 + (j1 - 0x6E) / 2);		\
	s2 = j2 + 0x7E;					\
      }							\
    (code) = (s1 << 8) | s2;				\
  } while (0)
Kenichi Handa's avatar
Kenichi Handa committed
597

598 599 600 601
/* Encode the file name NAME using the specified coding system
   for file names, if any.  */
#define ENCODE_FILE(name)						   \
  (! NILP (Vfile_name_coding_system)					   \
602
   && !EQ (Vfile_name_coding_system, make_number (0))			   \
Karl Heuer's avatar
Karl Heuer committed
603
   ? code_convert_string_norecord (name, Vfile_name_coding_system, 1)	   \
604
   : (! NILP (Vdefault_file_name_coding_system)				   \
605
      && !EQ (Vdefault_file_name_coding_system, make_number (0))	   \
Karl Heuer's avatar
Karl Heuer committed
606
      ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
607 608
      : name))

Kenichi Handa's avatar
Kenichi Handa committed
609

610 611 612 613
/* Decode the file name NAME using the specified coding system
   for file names, if any.  */
#define DECODE_FILE(name)						   \
  (! NILP (Vfile_name_coding_system)					   \
614
   && !EQ (Vfile_name_coding_system, make_number (0))			   \
Karl Heuer's avatar
Karl Heuer committed
615
   ? code_convert_string_norecord (name, Vfile_name_coding_system, 0)	   \
616
   : (! NILP (Vdefault_file_name_coding_system)				   \
617
      && !EQ (Vdefault_file_name_coding_system, make_number (0))	   \
Karl Heuer's avatar
Karl Heuer committed
618
      ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
619 620
      : name))

Kenichi Handa's avatar
Kenichi Handa committed
621

622
/* Encode the string STR using the specified coding system
623
   for system functions, if any.  */
624
#define ENCODE_SYSTEM(str)						   \
625
  (! NILP (Vlocale_coding_system)					   \
626
   && !EQ (Vlocale_coding_system, make_number (0))			   \
627
   ? code_convert_string_norecord (str, Vlocale_coding_system, 1)	   \
628 629 630
   : str)

/* Decode the string STR using the specified coding system
631
   for system functions, if any.  */
632
#define DECODE_SYSTEM(str)						   \
633
  (! NILP (Vlocale_coding_system)					   \
634
   && !EQ (Vlocale_coding_system, make_number (0))			   \
635
   ? code_convert_string_norecord (str, Vlocale_coding_system, 0)	   \
636
   : str)
637

Dave Love's avatar
Dave Love committed
638 639 640 641
/* Used by the gtk menu code.  Note that this encodes utf-8, not
   utf-8-emacs, so it's not a no-op.  */
#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)

Karl Heuer's avatar
Karl Heuer committed
642
/* Extern declarations.  */
643
extern Lisp_Object code_conversion_save P_ ((int, int));
644 645
extern int decoding_buffer_size P_ ((struct coding_system *, int));
extern int encoding_buffer_size P_ ((struct coding_system *, int));
Kenichi Handa's avatar
Kenichi Handa committed
646
extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *));
647
extern Lisp_Object coding_charset_list P_ ((struct coding_system *));
Kenichi Handa's avatar
Kenichi Handa committed
648
extern void detect_coding P_ ((struct coding_system *));
649
extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object,
Kenichi Handa's avatar
Kenichi Handa committed
650 651 652 653
					    Lisp_Object, Lisp_Object,
					    int, int));
extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object,
					    Lisp_Object, int, int, int));
654 655
extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
						     int));
Kenichi Handa's avatar
Kenichi Handa committed
656 657 658 659 660 661 662 663 664 665 666 667 668 669
extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object));
extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object));

extern int decode_coding_gap P_ ((struct coding_system *,
				  EMACS_INT, EMACS_INT));
extern int encode_coding_gap P_ ((struct coding_system *,
				  EMACS_INT, EMACS_INT));
extern void decode_coding_object P_ ((struct coding_system *,
				      Lisp_Object, EMACS_INT, EMACS_INT,
				      EMACS_INT, EMACS_INT, Lisp_Object));
extern void encode_coding_object P_ ((struct coding_system *,
				      Lisp_Object, EMACS_INT, EMACS_INT,
				      EMACS_INT, EMACS_INT, Lisp_Object));

Kenichi Handa's avatar
Kenichi Handa committed
670 671
/* Macros for backward compatibility.  */

Kenichi Handa's avatar
Kenichi Handa committed
672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
#define decode_coding_region(coding, from, to)		\
  decode_coding_object (coding, Fcurrent_buffer (),	\
			from, CHAR_TO_BYTE (from),	\
			to, CHAR_TO_BYTE (to), Fcurrent_buffer ())


#define encode_coding_region(coding, from, to)		\
  encode_coding_object (coding, Fcurrent_buffer (),	\
			from, CHAR_TO_BYTE (from),	\
			to, CHAR_TO_BYTE (to), Fcurrent_buffer ())


#define decode_coding_string(coding, string, nocopy)			\
  decode_coding_object (coding, string, 0, 0, XSTRING (string)->size,	\
			STRING_BYTES (XSTRING (string)), Qt)

#define encode_coding_string(coding, string, nocopy)			\
  (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size,	\
			 STRING_BYTES (XSTRING (string)), Qt),		\
   (coding)->dst_object)


#define decode_coding_c_string(coding, src, bytes, dst_object)		\
  do {									\
    (coding)->source = (src);						\
    (coding)->src_chars = (coding)->src_bytes = (bytes);		\
    decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes),	\
			  (dst_object));				\
  } while (0)


extern Lisp_Object preferred_coding_system P_ (());


Kenichi Handa's avatar
Kenichi Handa committed
706 707
extern Lisp_Object Qutf_8, Qutf_8_emacs;

Karl Heuer's avatar
Karl Heuer committed
708
extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
Kenichi Handa's avatar
Kenichi Handa committed
709 710 711
extern Lisp_Object Qcoding_system_p;
extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
extern Lisp_Object Qiso_2022;
Karl Heuer's avatar
Karl Heuer committed
712
extern Lisp_Object Qbuffer_file_coding_system;
Kenichi Handa's avatar
Kenichi Handa committed
713 714

extern Lisp_Object Qunix, Qdos, Qmac;
Karl Heuer's avatar
Karl Heuer committed
715

716 717
extern Lisp_Object Qtranslation_table;
extern Lisp_Object Qtranslation_table_id;
718

719 720 721 722
/* Mnemonic strings to indicate each type of end-of-line.  */
extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
/* Mnemonic string to indicate type of end-of-line is not yet decided.  */
extern Lisp_Object eol_mnemonic_undecided;
Karl Heuer's avatar
Karl Heuer committed
723 724 725

#ifdef emacs
extern Lisp_Object Qfile_coding_system;
Kim F. Storm's avatar
Kim F. Storm committed
726
extern Lisp_Object Qcall_process, Qcall_process_region;
Karl Heuer's avatar
Karl Heuer committed
727
extern Lisp_Object Qstart_process, Qopen_network_stream;
728
extern Lisp_Object Qwrite_region;
Karl Heuer's avatar
Karl Heuer committed
729

730 731
extern char *emacs_strerror P_ ((int));

Karl Heuer's avatar
Karl Heuer committed
732 733 734 735 736 737
/* Coding-system for reading files and receiving data from process.  */
extern Lisp_Object Vcoding_system_for_read;
/* Coding-system for writing files and sending data to process.  */
extern Lisp_Object Vcoding_system_for_write;
/* Coding-system actually used in the latest I/O.  */
extern Lisp_Object Vlast_coding_system_used;
738 739
/* Coding-system to use with system messages (e.g. strerror).  */
extern Lisp_Object Vlocale_coding_system;
Karl Heuer's avatar
Karl Heuer committed
740

741 742 743 744
/* If non-zero, process buffer inherits the coding system used to decode
   the subprocess output.  */
extern int inherit_process_coding_system;

Karl Heuer's avatar
Karl Heuer committed
745 746 747 748 749
/* Coding-system to be used for encoding terminal output.  This
   structure contains information of a coding-system specified by the
   function `set-terminal-coding-system'.  */
extern struct coding_system terminal_coding;

750 751 752 753
/* Coding system to be used to encode text for terminal display when
   terminal coding system is nil.  */
extern struct coding_system safe_terminal_coding;

Karl Heuer's avatar
Karl Heuer committed
754 755 756 757 758
/* Coding-system of what is sent from terminal keyboard.  This
   structure contains information of a coding-system specified by the
   function `set-keyboard-coding-system'.  */
extern struct coding_system keyboard_coding;

759 760
/* Default coding systems used for process I/O.  */
extern Lisp_Object Vdefault_process_coding_system;
Karl Heuer's avatar
Karl Heuer committed
761

Kenichi Handa's avatar
Kenichi Handa committed
762
/* Function to call to force a user to force select a propert coding
763 764 765
   system.  */
extern Lisp_Object Vselect_safe_coding_system_function;

766 767 768 769
/* If nonzero, on writing a file, Vselect_safe_coding_system_function
   is called even if Vcoding_system_for_write is non-nil.  */
extern int coding_system_require_warning;

770 771 772 773 774 775
/* Coding system for file names, or nil if none.  */
extern Lisp_Object Vfile_name_coding_system;

/* Coding system for file names used only when
   Vfile_name_coding_system is nil.  */
extern Lisp_Object Vdefault_file_name_coding_system;
776

Karl Heuer's avatar
Karl Heuer committed
777 778
#endif

779 780 781
/* Error signaled when there's a problem with detecting coding system */
extern Lisp_Object Qcoding_system_error;

Kenichi Handa's avatar
Kenichi Handa committed
782 783 784
extern char emacs_mule_bytes[256];
extern int emacs_mule_string_char P_ ((unsigned char *));

785
#endif /* EMACS_CODING_H */
Kenichi Handa's avatar
Kenichi Handa committed
786 787 788

/* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4
   (do not change this comment) */