charset.h 19.2 KB
Newer Older
1
/* Header for charset handler.
2
   Copyright (C) 2001-2011 Free Software Foundation, Inc.
Kenichi Handa's avatar
Kenichi Handa committed
3
   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4
     2005, 2006, 2007, 2008, 2009, 2010, 2011
Kenichi Handa's avatar
Kenichi Handa committed
5 6
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H14PRO021
Karl Heuer's avatar
Karl Heuer committed
7

Kenichi Handa's avatar
Kenichi Handa committed
8
   Copyright (C) 2003
9 10
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009
Karl Heuer's avatar
Karl Heuer committed
11

Karl Heuer's avatar
Karl Heuer committed
12 13
This file is part of GNU Emacs.

14
GNU Emacs is free software: you can redistribute it and/or modify
Karl Heuer's avatar
Karl Heuer committed
15
it under the terms of the GNU General Public License as published by
16 17
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Karl Heuer's avatar
Karl Heuer committed
18

Karl Heuer's avatar
Karl Heuer committed
19 20 21 22
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
Karl Heuer's avatar
Karl Heuer committed
23

Karl Heuer's avatar
Karl Heuer committed
24
You should have received a copy of the GNU General Public License
25
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Karl Heuer's avatar
Karl Heuer committed
26

27 28
#ifndef EMACS_CHARSET_H
#define EMACS_CHARSET_H
Karl Heuer's avatar
Karl Heuer committed
29

30 31 32 33 34 35 36
/* Index to arguments of Fdefine_charset_internal.  */

enum define_charset_arg_index
  {
    charset_arg_name,
    charset_arg_dimension,
    charset_arg_code_space,
37 38
    charset_arg_min_code,
    charset_arg_max_code,
39 40 41 42 43 44 45 46
    charset_arg_iso_final,
    charset_arg_iso_revision,
    charset_arg_emacs_mule_id,
    charset_arg_ascii_compatible_p,
    charset_arg_supplementary_p,
    charset_arg_invalid_code,
    charset_arg_code_offset,
    charset_arg_map,
47 48
    charset_arg_subset,
    charset_arg_superset,
49 50 51 52 53 54 55 56 57 58 59 60
    charset_arg_unify_map,
    charset_arg_plist,
    charset_arg_max
  };


/* Indices to charset attributes vector.  */

enum charset_attr_index
  {
    /* ID number of the charset.  */
    charset_id,
Karl Heuer's avatar
Karl Heuer committed
61

62 63 64 65 66 67
    /* Name of the charset (symbol).  */
    charset_name,

    /* Property list of the charset.  */
    charset_plist,

Kenichi Handa's avatar
Kenichi Handa committed
68 69 70
    /* If the method of the charset is `MAP', the value is a mapping
       vector or a file name that contains mapping vector.  Otherwise,
       nil.  */
71 72 73 74 75 76 77 78 79 80 81 82 83 84
    charset_map,

    /* If the method of the charset is `MAP', the value is a vector
       that maps code points of the charset to characters.  The vector
       is indexed by a character index.  A character index is
       calculated from a code point and the code-space table of the
       charset.  */
    charset_decoder,

    /* If the method of the charset is `MAP', the value is a
       char-table that maps characters of the charset to code
       points.  */
    charset_encoder,

85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
    /* If the method of the charset is `SUBSET', the value is a vector
       that has this form:

	[ CHARSET-ID MIN-CODE MAX-CODE OFFSET ]

       CHARSET-ID is an ID number of a parent charset.  MIN-CODE and
       MAX-CODE specify the range of characters inherited from the
       parent.  OFFSET is an integer value to add to a code point of
       the parent charset to get the corresponding code point of this
       charset.  */
    charset_subset,

    /* If the method of the charset is `SUPERSET', the value is a list
       whose elements have this form:

	(CHARSET-ID . OFFSET)

Juanma Barranquero's avatar
Juanma Barranquero committed
102 103 104
       CHARSET-IDs are ID numbers of parent charsets.  OFFSET is an
       integer value to add to a code point of the parent charset to
       get the corresponding code point of this charset.  */
105
    charset_superset,
106

Dave Love's avatar
Dave Love committed
107 108 109
    /* The value is a mapping vector or a file name that contains the
       mapping.  This defines how characters in the charset should be
       unified with Unicode.  The value of the member
Kenichi Handa's avatar
Kenichi Handa committed
110
       `charset_deunifier' is created from this information.  */
111 112
    charset_unify_map,

Kenichi Handa's avatar
Kenichi Handa committed
113
    /* If characters in the charset must be unified Unicode, the value
114 115
       is a char table that maps a unified Unicode character code to
       the non-unified character code in the charset.  */
116 117
    charset_deunifier,

Dave Love's avatar
Dave Love committed
118
    /* The length of the charset attribute vector.  */
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
    charset_attr_max
  };

/* Methods for converting code points and characters of charsets.  */

enum charset_method
  {
    /* For a charset of this method, a character code is calculated
       from a character index (which is calculated from a code point)
       simply by adding an offset value.  */
    CHARSET_METHOD_OFFSET,

    /* For a charset of this method, a decoder vector and an encoder
       char-table is used for code point <-> character code
       conversion.  */
    CHARSET_METHOD_MAP,

Dave Love's avatar
Dave Love committed
136
    /* A charset of this method is a subset of another charset.  */
137 138
    CHARSET_METHOD_SUBSET,

Dave Love's avatar
Dave Love committed
139
    /* A charset of this method is a superset of other charsets.  */
140
    CHARSET_METHOD_SUPERSET
141 142 143 144
  };

struct charset
{
Kenichi Handa's avatar
Kenichi Handa committed
145
  /* Index to charset_table.  */
146
  int id;
Karl Heuer's avatar
Karl Heuer committed
147

Kenichi Handa's avatar
Kenichi Handa committed
148
  /* Index to Vcharset_hash_table.  */
149 150 151 152 153
  int hash_index;

  /* Dimension of the charset: 1, 2, 3, or 4.  */
  int dimension;

154 155 156 157
  /* Byte code range of each dimension.  <code_space>[4N] is a mininum
     byte code of the (N+1)th dimension, <code_space>[4N+1] is a
     maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is
     (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3]
158 159 160
     is the number of characters contained in the first through (N+1)th
     dimensions, except that there is no <code_space>[15].
     We get `char-index' of a `code-point' from this
161
     information.  */
162
  int code_space[15];
163

164 165 166 167
  /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit
     of code_space_mask[B] is set.  This array is used to quickly
     check if a code-point is in a valid range.  */
  unsigned char *code_space_mask;
168

169 170
  /* 1 if there's no gap in code-points.  */
  int code_linear_p;
171

172 173 174
  /* If the charset is treated as 94-chars in ISO-2022, the value is 0.
     If the charset is treated as 96-chars in ISO-2022, the value is 1.  */
  int iso_chars_96;
175

Kenichi Handa's avatar
Kenichi Handa committed
176 177
  /* ISO final byte of the charset: 48..127.  It may be -1 if the
     charset doesn't conform to ISO-2022.  */
178
  int iso_final;
179

Kenichi Handa's avatar
Kenichi Handa committed
180
  /* ISO revision number of the charset.  */
181
  int iso_revision;
182

183 184 185 186 187
  /* If the charset is identical to what supported by Emacs 21 and the
     priors, the identification number of the charset used in those
     version.  Otherwise, -1.  */
  int emacs_mule_id;

Miles Bader's avatar
Miles Bader committed
188
  /* Nonzero if the charset is compatible with ASCII.  */
189
  int ascii_compatible_p;
Karl Heuer's avatar
Karl Heuer committed
190

Miles Bader's avatar
Miles Bader committed
191
  /* Nonzero if the charset is supplementary.  */
192 193
  int supplementary_p;

Miles Bader's avatar
Miles Bader committed
194
  /* Nonzero if all the code points are representable by Lisp_Int.  */
195 196 197 198 199 200 201 202
  int compact_codes_p;

  /* The method for encoding/decoding characters of the charset.  */
  enum charset_method method;

  /* Mininum and Maximum code points of the charset.  */
  unsigned min_code, max_code;

203 204 205 206
  /* Offset value used by macros CODE_POINT_TO_INDEX and
      INDEX_TO_CODE_POINT. .  */
  unsigned char_index_offset;

207 208
  /* Mininum and Maximum character codes of the charset.  If the
     charset is compatible with ASCII, min_char is a minimum non-ASCII
209 210 211
     character of the charset.  If the method of charset is
     CHARSET_METHOD_OFFSET, even if the charset is unified, min_char
     and max_char doesn't change.  */
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
  int min_char, max_char;

  /* The code returned by ENCODE_CHAR if a character is not encodable
     by the charset.  */
  unsigned invalid_code;

  /* If the method of the charset is CHARSET_METHOD_MAP, this is a
     table of bits used to quickly and roughly guess if a character
     belongs to the charset.

     The first 64 elements are 512 bits for characters less than
     0x10000.  Each bit corresponds to 128-character block.  The last
     126 elements are 1008 bits for the greater characters
     (0x10000..0x3FFFFF).  Each bit corresponds to 4096-character
     block.

Dave Love's avatar
Dave Love committed
228
     If a bit is 1, at least one character in the corresponding block is
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
     in this charset.  */
  unsigned char fast_map[190];

  /* Offset value to calculate a character code from code-point, and
     visa versa.  */
  int code_offset;

  int unified_p;
};

/* Hash table of charset symbols vs. the correponding attribute
   vectors.  */
extern Lisp_Object Vcharset_hash_table;

/* Table of struct charset.  */
extern struct charset *charset_table;

#define CHARSET_FROM_ID(id) (charset_table + (id))

248
extern Lisp_Object Vcharset_ordered_list;
Kenichi Handa's avatar
Kenichi Handa committed
249
extern Lisp_Object Vcharset_non_preferred_head;
250 251

/* Incremented everytime we change the priority of charsets.  */
252
extern unsigned short charset_ordered_list_tick;
253

254 255 256
extern Lisp_Object Viso_2022_charset_list;
extern Lisp_Object Vemacs_mule_charset_list;

257
extern int emacs_mule_charset[256];
258 259 260 261 262 263 264 265 266 267 268 269 270

/* Macros to access information about charset.  */

/* Return the attribute vector of charset whose symbol is SYMBOL.  */
#define CHARSET_SYMBOL_ATTRIBUTES(symbol)	\
  Fgethash ((symbol), Vcharset_hash_table, Qnil)

#define CHARSET_ATTR_ID(attrs)		AREF ((attrs), charset_id)
#define CHARSET_ATTR_NAME(attrs)	AREF ((attrs), charset_name)
#define CHARSET_ATTR_PLIST(attrs)	AREF ((attrs), charset_plist)
#define CHARSET_ATTR_MAP(attrs)		AREF ((attrs), charset_map)
#define CHARSET_ATTR_DECODER(attrs)	AREF ((attrs), charset_decoder)
#define CHARSET_ATTR_ENCODER(attrs)	AREF ((attrs), charset_encoder)
271 272
#define CHARSET_ATTR_SUBSET(attrs)	AREF ((attrs), charset_subset)
#define CHARSET_ATTR_SUPERSET(attrs)	AREF ((attrs), charset_superset)
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
#define CHARSET_ATTR_UNIFY_MAP(attrs)	AREF ((attrs), charset_unify_map)
#define CHARSET_ATTR_DEUNIFIER(attrs)	AREF ((attrs), charset_deunifier)

#define CHARSET_SYMBOL_ID(symbol)	\
  CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol))

/* Return an index to Vcharset_hash_table of the charset whose symbol
   is SYMBOL.  */
#define CHARSET_SYMBOL_HASH_INDEX(symbol)	\
  hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL)

/* Return the attribute vector of CHARSET.  */
#define CHARSET_ATTRIBUTES(charset)	\
  (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index))

#define CHARSET_ID(charset)		((charset)->id)
#define CHARSET_HASH_INDEX(charset)	((charset)->hash_index)
#define CHARSET_DIMENSION(charset)	((charset)->dimension)
#define CHARSET_CODE_SPACE(charset)	((charset)->code_space)
#define CHARSET_CODE_LINEAR_P(charset)	((charset)->code_linear_p)
#define CHARSET_ISO_CHARS_96(charset)	((charset)->iso_chars_96)
#define CHARSET_ISO_FINAL(charset)	((charset)->iso_final)
#define CHARSET_ISO_PLANE(charset)	((charset)->iso_plane)
#define CHARSET_ISO_REVISION(charset)	((charset)->iso_revision)
#define CHARSET_EMACS_MULE_ID(charset)	((charset)->emacs_mule_id)
#define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p)
#define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p)
#define CHARSET_METHOD(charset)		((charset)->method)
#define CHARSET_MIN_CODE(charset)	((charset)->min_code)
#define CHARSET_MAX_CODE(charset)	((charset)->max_code)
#define CHARSET_INVALID_CODE(charset)	((charset)->invalid_code)
#define CHARSET_MIN_CHAR(charset)	((charset)->min_char)
#define CHARSET_MAX_CHAR(charset)	((charset)->max_char)
#define CHARSET_CODE_OFFSET(charset)	((charset)->code_offset)
#define CHARSET_UNIFIED_P(charset)	((charset)->unified_p)

#define CHARSET_NAME(charset)		\
  (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_MAP(charset)	\
  (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_DECODER(charset)	\
  (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_ENCODER(charset)	\
  (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset)))
317 318 319 320
#define CHARSET_SUBSET(charset)	\
  (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_SUPERSET(charset)	\
  (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset)))
321 322 323 324 325 326
#define CHARSET_UNIFY_MAP(charset)	\
  (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_DEUNIFIER(charset)	\
  (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset)))


Miles Bader's avatar
Miles Bader committed
327
/* Nonzero if OBJ is a valid charset symbol.  */
328 329 330 331
#define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0)

/* Check if X is a valid charset symbol.  If not, signal an error.  */
#define CHECK_CHARSET(x)					\
332
  do {								\
333
    if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0)	\
334
      wrong_type_argument (Qcharsetp, (x));			\
335
  } while (0)
336 337


338 339 340 341 342 343 344
/* Check if X is a valid charset symbol.  If valid, set ID to the id
   number of the charset.  Otherwise, signal an error. */
#define CHECK_CHARSET_GET_ID(x, id)					\
  do {									\
    int idx;								\
									\
    if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0)	\
345
      wrong_type_argument (Qcharsetp, (x));				\
346 347
    id = XINT (AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \
		     charset_id));					\
Karl Heuer's avatar
Karl Heuer committed
348 349
  } while (0)

350

351 352 353 354 355
/* Check if X is a valid charset symbol.  If valid, set ATTR to the
   attr vector of the charset.  Otherwise, signal an error. */
#define CHECK_CHARSET_GET_ATTR(x, attr)				\
  do {									\
    if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x)))	\
356
      wrong_type_argument (Qcharsetp, (x));				\
357 358 359
  } while (0)


360 361
#define CHECK_CHARSET_GET_CHARSET(x, charset)	\
  do {						\
362 363 364
    int csid;					\
    CHECK_CHARSET_GET_ID (x, csid);		\
    charset = CHARSET_FROM_ID (csid);		\
365 366 367
  } while (0)


Juanma Barranquero's avatar
Juanma Barranquero committed
368
/* Lookup Vcharset_ordered_list and return the first charset that
369
   contains the character C.  */
370 371 372
#define CHAR_CHARSET(c)				\
  ((c) < 0x80 ? CHARSET_FROM_ID (charset_ascii)	\
   : char_charset ((c), Qnil, NULL))
373

374 375 376 377
#if 0
/* Char-table of charset-sets.  Each element is a bool vector indexed
   by a charset ID.  */
extern Lisp_Object Vchar_charset_set;
378

379 380 381
/* Charset-bag of character C.  */
#define CHAR_CHARSET_SET(c) \
  CHAR_TABLE_REF (Vchar_charset_set, c)
382

383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
/* Check if two characters C1 and C2 belong to the same charset.  */
#define SAME_CHARSET_P(c1, c2)	\
  intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2))

#endif


/* Return a character correponding to the code-point CODE of CHARSET.
   Try some optimization before calling decode_char.  */

#define DECODE_CHAR(charset, code)					\
  ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p)		\
   ? (code)								\
   : ((code) < (charset)->min_code || (code) > (charset)->max_code)	\
   ? -1									\
   : (charset)->unified_p						\
   ? decode_char ((charset), (code))					\
   : (charset)->method == CHARSET_METHOD_OFFSET				\
   ? ((charset)->code_linear_p						\
      ? (code) - (charset)->min_code + (charset)->code_offset		\
      : decode_char ((charset), (code)))				\
   : (charset)->method == CHARSET_METHOD_MAP				\
Kenichi Handa's avatar
Kenichi Handa committed
405 406
   ? (((charset)->code_linear_p						\
       && VECTORP (CHARSET_DECODER (charset)))				\
407
      ? XINT (AREF (CHARSET_DECODER (charset),				\
Kenichi Handa's avatar
Kenichi Handa committed
408
		    (code) - (charset)->min_code))			\
409 410 411 412
      : decode_char ((charset), (code)))				\
   : decode_char ((charset), (code)))


Kenichi Handa's avatar
Kenichi Handa committed
413 414 415 416 417 418 419 420 421
/* If CHARSET is a simple offset base charset, return it's offset,
   otherwise return -1.  */
#define CHARSET_OFFSET(charset)				\
  (((charset)->method == CHARSET_METHOD_OFFSET		\
    && (charset)->code_linear_p				\
    && ! (charset)->unified_p)				\
   ? (charset)->code_offset - (charset)->min_code	\
   : -1)

422 423
extern Lisp_Object charset_work;

424 425 426
/* Return a code point of CHAR in CHARSET.
   Try some optimization before calling encode_char.  */

427 428 429
#define ENCODE_CHAR(charset, c)						 \
  ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p)			 \
   ? (c)								 \
430 431 432
   : ((charset)->unified_p						 \
      || (charset)->method == CHARSET_METHOD_SUBSET			 \
      || (charset)->method == CHARSET_METHOD_SUPERSET)			 \
433 434 435 436 437 438 439 440
   ? encode_char ((charset), (c))					 \
   : ((c) < (charset)->min_char || (c) > (charset)->max_char)		 \
   ? (charset)->invalid_code						 \
   : (charset)->method == CHARSET_METHOD_OFFSET				 \
   ? ((charset)->code_linear_p						 \
      ? (c) - (charset)->code_offset + (charset)->min_code		 \
      : encode_char ((charset), (c)))					 \
   : (charset)->method == CHARSET_METHOD_MAP				 \
Kenichi Handa's avatar
Kenichi Handa committed
441 442
   ? (((charset)->compact_codes_p					 \
       && CHAR_TABLE_P (CHARSET_ENCODER (charset)))			 \
443 444 445 446 447
      ? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c)), \
	 (NILP (charset_work)						 \
	  ? (charset)->invalid_code					 \
	  : XFASTINT (charset_work)))					 \
      : encode_char ((charset), (c)))					 \
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
   : encode_char ((charset), (c)))


/* Set to 1 when a charset map is loaded to warn that a buffer text
   and a string data may be relocated.  */
extern int charset_map_loaded;


/* Set CHARSET to the charset highest priority of C, CODE to the
   code-point of C in CHARSET.  */
#define SPLIT_CHAR(c, charset, code)	\
  ((charset) = char_charset ((c), Qnil, &(code)))


#define ISO_MAX_DIMENSION 3
#define ISO_MAX_CHARS 2
#define ISO_MAX_FINAL 0x80	/* only 0x30..0xFF are used */

/* Mapping table from ISO2022's charset (specified by DIMENSION,
   CHARS, and FINAL_CHAR) to Emacs' charset ID.  Should be accessed by
   macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR).  */
extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
470

471 472 473 474
/* A charset of type iso2022 who has DIMENSION, CHARS, and FINAL
   (final character).  */
#define ISO_CHARSET_TABLE(dimension, chars_96, final)	\
  iso_charset_table[(dimension) - 1][(chars_96)][(final)]
475

Miles Bader's avatar
Miles Bader committed
476
/* Nonzero if the charset who has FAST_MAP may contain C.  */
477 478 479 480
#define CHARSET_FAST_MAP_REF(c, fast_map)		\
  ((c) < 0x10000					\
   ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7))	\
   : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7)))
481

482
#define CHARSET_FAST_MAP_SET(c, fast_map)			\
483
  do {								\
484 485
    if ((c) < 0x10000)						\
      (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7);		\
486
    else							\
487
      (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7);	\
488 489
  } while (0)

490 491


Miles Bader's avatar
Miles Bader committed
492
/* 1 if CHARSET may contain the character C.  */
493 494
#define CHAR_CHARSET_P(c, charset)					 \
  ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p)			 \
495 496 497
   || ((CHARSET_UNIFIED_P (charset)					 \
	|| (charset)->method == CHARSET_METHOD_SUBSET			 \
	|| (charset)->method == CHARSET_METHOD_SUPERSET)		 \
498 499 500 501 502
       ? encode_char ((charset), (c)) != (charset)->invalid_code	 \
       : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map)		 \
	  && ((charset)->method == CHARSET_METHOD_OFFSET		 \
	      ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \
	      : ((charset)->method == CHARSET_METHOD_MAP		 \
503 504
		 && (charset)->compact_codes_p				 \
		 && CHAR_TABLE_P (CHARSET_ENCODER (charset)))		 \
505
	      ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \
506
	      : encode_char ((charset), (c)) != (charset)->invalid_code))))
507

508 509

/* Special macros for emacs-mule encoding.  */
Karl Heuer's avatar
Karl Heuer committed
510

511 512 513 514 515 516 517
/* Leading-code followed by extended leading-code.    DIMENSION/COLUMN */
#define EMACS_MULE_LEADING_CODE_PRIVATE_11	0x9A /* 1/1 */
#define EMACS_MULE_LEADING_CODE_PRIVATE_12	0x9B /* 1/2 */
#define EMACS_MULE_LEADING_CODE_PRIVATE_21	0x9C /* 2/2 */
#define EMACS_MULE_LEADING_CODE_PRIVATE_22	0x9D /* 2/2 */


Karl Heuer's avatar
Karl Heuer committed
518

519 520
extern Lisp_Object Qcharsetp;

521
extern Lisp_Object Qascii;
Kenichi Handa's avatar
Kenichi Handa committed
522
extern int charset_ascii, charset_eight_bit;
Kenichi Handa's avatar
Kenichi Handa committed
523
extern int charset_unicode;
524 525 526
extern int charset_jisx0201_roman;
extern int charset_jisx0208_1978;
extern int charset_jisx0208;
Kenichi Handa's avatar
Kenichi Handa committed
527
extern int charset_ksc5601;
528

Kenichi Handa's avatar
Kenichi Handa committed
529 530
extern int charset_unibyte;

Jan D's avatar
Jan D committed
531 532
extern struct charset *char_charset (int, Lisp_Object, unsigned *);
extern Lisp_Object charset_attributes (int);
533

Jan D's avatar
Jan D committed
534 535 536 537
extern int maybe_unify_char (int, Lisp_Object);
extern int decode_char (struct charset *, unsigned);
extern unsigned encode_char (struct charset *, int);
extern int string_xstring_p (Lisp_Object);
538

Jan D's avatar
Jan D committed
539 540 541
extern void map_charset_chars (void (*) (Lisp_Object, Lisp_Object),
                               Lisp_Object, Lisp_Object,
                               struct charset *, unsigned, unsigned);
542

543
#endif /* EMACS_CHARSET_H */