charset.h 19 KB
Newer Older
1
/* Header for charset handler.
Paul Eggert's avatar
Paul Eggert committed
2
   Copyright (C) 2001-2019 Free Software Foundation, Inc.
Kenichi Handa's avatar
Kenichi Handa committed
3
   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4
     2005, 2006, 2007, 2008, 2009, 2010, 2011
Kenichi Handa's avatar
Kenichi Handa committed
5 6
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H14PRO021
Karl Heuer's avatar
Karl Heuer committed
7

Kenichi Handa's avatar
Kenichi Handa committed
8
   Copyright (C) 2003
9 10
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009
Karl Heuer's avatar
Karl Heuer committed
11

Karl Heuer's avatar
Karl Heuer committed
12 13
This file is part of GNU Emacs.

14
GNU Emacs is free software: you can redistribute it and/or modify
Karl Heuer's avatar
Karl Heuer committed
15
it under the terms of the GNU General Public License as published by
16 17
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
Karl Heuer's avatar
Karl Heuer committed
18

Karl Heuer's avatar
Karl Heuer committed
19 20 21 22
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
Karl Heuer's avatar
Karl Heuer committed
23

Karl Heuer's avatar
Karl Heuer committed
24
You should have received a copy of the GNU General Public License
25
along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
Karl Heuer's avatar
Karl Heuer committed
26

27 28
#ifndef EMACS_CHARSET_H
#define EMACS_CHARSET_H
Karl Heuer's avatar
Karl Heuer committed
29

30
#include <verify.h>
31
#include "lisp.h"
32

33 34
INLINE_HEADER_BEGIN

35 36 37 38 39 40 41
/* Index to arguments of Fdefine_charset_internal.  */

enum define_charset_arg_index
  {
    charset_arg_name,
    charset_arg_dimension,
    charset_arg_code_space,
42 43
    charset_arg_min_code,
    charset_arg_max_code,
44 45 46 47 48 49 50 51
    charset_arg_iso_final,
    charset_arg_iso_revision,
    charset_arg_emacs_mule_id,
    charset_arg_ascii_compatible_p,
    charset_arg_supplementary_p,
    charset_arg_invalid_code,
    charset_arg_code_offset,
    charset_arg_map,
52 53
    charset_arg_subset,
    charset_arg_superset,
54 55 56 57 58 59 60 61 62 63 64 65
    charset_arg_unify_map,
    charset_arg_plist,
    charset_arg_max
  };


/* Indices to charset attributes vector.  */

enum charset_attr_index
  {
    /* ID number of the charset.  */
    charset_id,
Karl Heuer's avatar
Karl Heuer committed
66

67 68 69 70 71 72
    /* Name of the charset (symbol).  */
    charset_name,

    /* Property list of the charset.  */
    charset_plist,

Kenichi Handa's avatar
Kenichi Handa committed
73 74 75
    /* If the method of the charset is `MAP', the value is a mapping
       vector or a file name that contains mapping vector.  Otherwise,
       nil.  */
76 77 78 79 80 81 82 83 84 85 86 87 88 89
    charset_map,

    /* If the method of the charset is `MAP', the value is a vector
       that maps code points of the charset to characters.  The vector
       is indexed by a character index.  A character index is
       calculated from a code point and the code-space table of the
       charset.  */
    charset_decoder,

    /* If the method of the charset is `MAP', the value is a
       char-table that maps characters of the charset to code
       points.  */
    charset_encoder,

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
    /* If the method of the charset is `SUBSET', the value is a vector
       that has this form:

	[ CHARSET-ID MIN-CODE MAX-CODE OFFSET ]

       CHARSET-ID is an ID number of a parent charset.  MIN-CODE and
       MAX-CODE specify the range of characters inherited from the
       parent.  OFFSET is an integer value to add to a code point of
       the parent charset to get the corresponding code point of this
       charset.  */
    charset_subset,

    /* If the method of the charset is `SUPERSET', the value is a list
       whose elements have this form:

	(CHARSET-ID . OFFSET)

Juanma Barranquero's avatar
Juanma Barranquero committed
107 108 109
       CHARSET-IDs are ID numbers of parent charsets.  OFFSET is an
       integer value to add to a code point of the parent charset to
       get the corresponding code point of this charset.  */
110
    charset_superset,
111

Dave Love's avatar
Dave Love committed
112 113 114
    /* The value is a mapping vector or a file name that contains the
       mapping.  This defines how characters in the charset should be
       unified with Unicode.  The value of the member
Kenichi Handa's avatar
Kenichi Handa committed
115
       `charset_deunifier' is created from this information.  */
116 117
    charset_unify_map,

Kenichi Handa's avatar
Kenichi Handa committed
118
    /* If characters in the charset must be unified Unicode, the value
119 120
       is a char table that maps a unified Unicode character code to
       the non-unified character code in the charset.  */
121 122
    charset_deunifier,

Dave Love's avatar
Dave Love committed
123
    /* The length of the charset attribute vector.  */
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
    charset_attr_max
  };

/* Methods for converting code points and characters of charsets.  */

enum charset_method
  {
    /* For a charset of this method, a character code is calculated
       from a character index (which is calculated from a code point)
       simply by adding an offset value.  */
    CHARSET_METHOD_OFFSET,

    /* For a charset of this method, a decoder vector and an encoder
       char-table is used for code point <-> character code
       conversion.  */
    CHARSET_METHOD_MAP,

Dave Love's avatar
Dave Love committed
141
    /* A charset of this method is a subset of another charset.  */
142 143
    CHARSET_METHOD_SUBSET,

Dave Love's avatar
Dave Love committed
144
    /* A charset of this method is a superset of other charsets.  */
145
    CHARSET_METHOD_SUPERSET
146 147 148 149
  };

struct charset
{
Kenichi Handa's avatar
Kenichi Handa committed
150
  /* Index to charset_table.  */
151
  int id;
Karl Heuer's avatar
Karl Heuer committed
152

Kenichi Handa's avatar
Kenichi Handa committed
153
  /* Index to Vcharset_hash_table.  */
154
  ptrdiff_t hash_index;
155 156 157 158

  /* Dimension of the charset: 1, 2, 3, or 4.  */
  int dimension;

Juanma Barranquero's avatar
Juanma Barranquero committed
159
  /* Byte code range of each dimension.  <code_space>[4N] is a minimum
160 161 162
     byte code of the (N+1)th dimension, <code_space>[4N+1] is a
     maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is
     (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3]
163 164 165
     is the number of characters contained in the first through (N+1)th
     dimensions, except that there is no <code_space>[15].
     We get `char-index' of a `code-point' from this
166
     information.  */
167
  int code_space[15];
168

169 170 171 172
  /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit
     of code_space_mask[B] is set.  This array is used to quickly
     check if a code-point is in a valid range.  */
  unsigned char *code_space_mask;
173

174
  /* True if there's no gap in code-points.  */
175
  bool_bf code_linear_p : 1;
176

177 178
  /* True if the charset is treated as 96 chars in ISO-2022
     as opposed to 94 chars.  */
179
  bool_bf iso_chars_96 : 1;
180 181

  /* True if the charset is compatible with ASCII.  */
182
  bool_bf ascii_compatible_p : 1;
183 184

  /* True if the charset is supplementary.  */
185
  bool_bf supplementary_p : 1;
186 187

  /* True if all the code points are representable by Lisp_Int.  */
188
  bool_bf compact_codes_p : 1;
189 190

  /* True if the charset is unified with Unicode.  */
191
  bool_bf unified_p : 1;
192

Kenichi Handa's avatar
Kenichi Handa committed
193 194
  /* ISO final byte of the charset: 48..127.  It may be -1 if the
     charset doesn't conform to ISO-2022.  */
195
  int iso_final;
196

Kenichi Handa's avatar
Kenichi Handa committed
197
  /* ISO revision number of the charset.  */
198
  int iso_revision;
199

200 201 202 203 204 205 206 207
  /* If the charset is identical to what supported by Emacs 21 and the
     priors, the identification number of the charset used in those
     version.  Otherwise, -1.  */
  int emacs_mule_id;

  /* The method for encoding/decoding characters of the charset.  */
  enum charset_method method;

Juanma Barranquero's avatar
Juanma Barranquero committed
208
  /* Minimum and Maximum code points of the charset.  */
209 210
  unsigned min_code, max_code;

211
  /* Offset value used by macros CODE_POINT_TO_INDEX and
Paul Eggert's avatar
Paul Eggert committed
212
      INDEX_TO_CODE_POINT.  */
213 214
  unsigned char_index_offset;

Juanma Barranquero's avatar
Juanma Barranquero committed
215
  /* Minimum and Maximum character codes of the charset.  If the
216
     charset is compatible with ASCII, min_char is a minimum non-ASCII
217 218 219
     character of the charset.  If the method of charset is
     CHARSET_METHOD_OFFSET, even if the charset is unified, min_char
     and max_char doesn't change.  */
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
  int min_char, max_char;

  /* The code returned by ENCODE_CHAR if a character is not encodable
     by the charset.  */
  unsigned invalid_code;

  /* If the method of the charset is CHARSET_METHOD_MAP, this is a
     table of bits used to quickly and roughly guess if a character
     belongs to the charset.

     The first 64 elements are 512 bits for characters less than
     0x10000.  Each bit corresponds to 128-character block.  The last
     126 elements are 1008 bits for the greater characters
     (0x10000..0x3FFFFF).  Each bit corresponds to 4096-character
     block.

Dave Love's avatar
Dave Love committed
236
     If a bit is 1, at least one character in the corresponding block is
237 238 239 240 241 242 243 244
     in this charset.  */
  unsigned char fast_map[190];

  /* Offset value to calculate a character code from code-point, and
     visa versa.  */
  int code_offset;
};

Paul Eggert's avatar
Paul Eggert committed
245
/* Hash table of charset symbols vs. the corresponding attribute
246 247 248 249 250 251 252 253
   vectors.  */
extern Lisp_Object Vcharset_hash_table;

/* Table of struct charset.  */
extern struct charset *charset_table;

#define CHARSET_FROM_ID(id) (charset_table + (id))

254
extern Lisp_Object Vcharset_ordered_list;
Kenichi Handa's avatar
Kenichi Handa committed
255
extern Lisp_Object Vcharset_non_preferred_head;
256

257
extern EMACS_UINT charset_ordered_list_tick;
258

259 260 261
extern Lisp_Object Viso_2022_charset_list;
extern Lisp_Object Vemacs_mule_charset_list;

262
extern int emacs_mule_charset[256];
263 264 265 266 267 268 269 270 271 272 273 274 275

/* Macros to access information about charset.  */

/* Return the attribute vector of charset whose symbol is SYMBOL.  */
#define CHARSET_SYMBOL_ATTRIBUTES(symbol)	\
  Fgethash ((symbol), Vcharset_hash_table, Qnil)

#define CHARSET_ATTR_ID(attrs)		AREF ((attrs), charset_id)
#define CHARSET_ATTR_NAME(attrs)	AREF ((attrs), charset_name)
#define CHARSET_ATTR_PLIST(attrs)	AREF ((attrs), charset_plist)
#define CHARSET_ATTR_MAP(attrs)		AREF ((attrs), charset_map)
#define CHARSET_ATTR_DECODER(attrs)	AREF ((attrs), charset_decoder)
#define CHARSET_ATTR_ENCODER(attrs)	AREF ((attrs), charset_encoder)
276 277
#define CHARSET_ATTR_SUBSET(attrs)	AREF ((attrs), charset_subset)
#define CHARSET_ATTR_SUPERSET(attrs)	AREF ((attrs), charset_superset)
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
#define CHARSET_ATTR_UNIFY_MAP(attrs)	AREF ((attrs), charset_unify_map)
#define CHARSET_ATTR_DEUNIFIER(attrs)	AREF ((attrs), charset_deunifier)

#define CHARSET_SYMBOL_ID(symbol)	\
  CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol))

/* Return an index to Vcharset_hash_table of the charset whose symbol
   is SYMBOL.  */
#define CHARSET_SYMBOL_HASH_INDEX(symbol)	\
  hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL)

/* Return the attribute vector of CHARSET.  */
#define CHARSET_ATTRIBUTES(charset)	\
  (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index))

#define CHARSET_ID(charset)		((charset)->id)
#define CHARSET_HASH_INDEX(charset)	((charset)->hash_index)
#define CHARSET_DIMENSION(charset)	((charset)->dimension)
#define CHARSET_CODE_SPACE(charset)	((charset)->code_space)
#define CHARSET_CODE_LINEAR_P(charset)	((charset)->code_linear_p)
#define CHARSET_ISO_CHARS_96(charset)	((charset)->iso_chars_96)
#define CHARSET_ISO_FINAL(charset)	((charset)->iso_final)
#define CHARSET_ISO_PLANE(charset)	((charset)->iso_plane)
#define CHARSET_ISO_REVISION(charset)	((charset)->iso_revision)
#define CHARSET_EMACS_MULE_ID(charset)	((charset)->emacs_mule_id)
#define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p)
#define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p)
#define CHARSET_METHOD(charset)		((charset)->method)
#define CHARSET_MIN_CODE(charset)	((charset)->min_code)
#define CHARSET_MAX_CODE(charset)	((charset)->max_code)
#define CHARSET_INVALID_CODE(charset)	((charset)->invalid_code)
#define CHARSET_MIN_CHAR(charset)	((charset)->min_char)
#define CHARSET_MAX_CHAR(charset)	((charset)->max_char)
#define CHARSET_CODE_OFFSET(charset)	((charset)->code_offset)
#define CHARSET_UNIFIED_P(charset)	((charset)->unified_p)

#define CHARSET_NAME(charset)		\
  (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_MAP(charset)	\
  (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_DECODER(charset)	\
  (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_ENCODER(charset)	\
  (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset)))
322 323 324 325
#define CHARSET_SUBSET(charset)	\
  (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_SUPERSET(charset)	\
  (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset)))
326 327 328 329 330
#define CHARSET_UNIFY_MAP(charset)	\
  (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_DEUNIFIER(charset)	\
  (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset)))

Paul Eggert's avatar
Paul Eggert committed
331
INLINE void
332 333 334 335 336 337
set_charset_attr (struct charset *charset, enum charset_attr_index idx,
		  Lisp_Object val)
{
  ASET (CHARSET_ATTRIBUTES (charset), idx, val);
}

338

Miles Bader's avatar
Miles Bader committed
339
/* Nonzero if OBJ is a valid charset symbol.  */
340 341 342 343
#define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0)

/* Check if X is a valid charset symbol.  If not, signal an error.  */
#define CHECK_CHARSET(x)					\
344
  do {								\
345
    if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0)	\
346
      wrong_type_argument (Qcharsetp, (x));			\
347
  } while (false)
348 349


350 351 352 353
/* Check if X is a valid charset symbol.  If valid, set ID to the id
   number of the charset.  Otherwise, signal an error. */
#define CHECK_CHARSET_GET_ID(x, id)					\
  do {									\
354
    ptrdiff_t idx;							\
355 356
									\
    if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0)	\
357
      wrong_type_argument (Qcharsetp, (x));				\
358 359
    id = XINT (AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \
		     charset_id));					\
360
  } while (false)
Karl Heuer's avatar
Karl Heuer committed
361

362

363 364 365 366 367
/* Check if X is a valid charset symbol.  If valid, set ATTR to the
   attr vector of the charset.  Otherwise, signal an error. */
#define CHECK_CHARSET_GET_ATTR(x, attr)				\
  do {									\
    if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x)))	\
368
      wrong_type_argument (Qcharsetp, (x));				\
369
  } while (false)
370 371


372 373
#define CHECK_CHARSET_GET_CHARSET(x, charset)	\
  do {						\
374 375 376
    int csid;					\
    CHECK_CHARSET_GET_ID (x, csid);		\
    charset = CHARSET_FROM_ID (csid);		\
377
  } while (false)
378 379


Juanma Barranquero's avatar
Juanma Barranquero committed
380
/* Lookup Vcharset_ordered_list and return the first charset that
381
   contains the character C.  */
382 383 384
#define CHAR_CHARSET(c)				\
  ((c) < 0x80 ? CHARSET_FROM_ID (charset_ascii)	\
   : char_charset ((c), Qnil, NULL))
385

386
#if false
387 388 389
/* Char-table of charset-sets.  Each element is a bool vector indexed
   by a charset ID.  */
extern Lisp_Object Vchar_charset_set;
390

391 392 393
/* Charset-bag of character C.  */
#define CHAR_CHARSET_SET(c) \
  CHAR_TABLE_REF (Vchar_charset_set, c)
394

395 396 397 398 399 400 401
/* Check if two characters C1 and C2 belong to the same charset.  */
#define SAME_CHARSET_P(c1, c2)	\
  intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2))

#endif


Paul Eggert's avatar
Paul Eggert committed
402
/* Return a character corresponding to the code-point CODE of CHARSET.
403 404 405
   Try some optimization before calling decode_char.  */

#define DECODE_CHAR(charset, code)					\
406
  ((ASCII_CHAR_P (code) && (charset)->ascii_compatible_p)		\
407 408 409 410 411 412 413
   ? (code)								\
   : ((code) < (charset)->min_code || (code) > (charset)->max_code)	\
   ? -1									\
   : (charset)->unified_p						\
   ? decode_char ((charset), (code))					\
   : (charset)->method == CHARSET_METHOD_OFFSET				\
   ? ((charset)->code_linear_p						\
414
      ? (int) ((code) - (charset)->min_code) + (charset)->code_offset	\
415 416
      : decode_char ((charset), (code)))				\
   : (charset)->method == CHARSET_METHOD_MAP				\
Kenichi Handa's avatar
Kenichi Handa committed
417 418
   ? (((charset)->code_linear_p						\
       && VECTORP (CHARSET_DECODER (charset)))				\
419
      ? XINT (AREF (CHARSET_DECODER (charset),				\
Kenichi Handa's avatar
Kenichi Handa committed
420
		    (code) - (charset)->min_code))			\
421 422 423
      : decode_char ((charset), (code)))				\
   : decode_char ((charset), (code)))

424 425
extern Lisp_Object charset_work;

426 427 428
/* Return a code point of CHAR in CHARSET.
   Try some optimization before calling encode_char.  */

429 430 431 432
#define ENCODE_CHAR(charset, c)						\
  (verify_expr								\
   (sizeof (c) <= sizeof (int),						\
    (ASCII_CHAR_P (c) && (charset)->ascii_compatible_p			\
433
     ? (unsigned) (c)							\
434 435 436 437 438 439 440 441
     : ((charset)->unified_p						\
	|| (charset)->method == CHARSET_METHOD_SUBSET			\
	|| (charset)->method == CHARSET_METHOD_SUPERSET)		\
     ? encode_char (charset, c)						\
     : (c) < (charset)->min_char || (c) > (charset)->max_char		\
     ? (charset)->invalid_code						\
     : (charset)->method == CHARSET_METHOD_OFFSET			\
     ? ((charset)->code_linear_p					\
442
	? (unsigned) ((c) - (charset)->code_offset) + (charset)->min_code \
443 444 445 446 447 448 449
	: encode_char (charset, c))					\
     : (charset)->method == CHARSET_METHOD_MAP				\
     ? (((charset)->compact_codes_p					\
	 && CHAR_TABLE_P (CHARSET_ENCODER (charset)))			\
	? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), c), \
	   (NILP (charset_work)						\
	    ? (charset)->invalid_code					\
450
	    : (unsigned) XFASTINT (charset_work)))			\
451 452
	: encode_char (charset, c))					\
     : encode_char (charset, c))))
453 454


455
/* Set to true when a charset map is loaded to warn that a buffer text
456
   and a string data may be relocated.  */
457
extern bool charset_map_loaded;
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473


/* Set CHARSET to the charset highest priority of C, CODE to the
   code-point of C in CHARSET.  */
#define SPLIT_CHAR(c, charset, code)	\
  ((charset) = char_charset ((c), Qnil, &(code)))


#define ISO_MAX_DIMENSION 3
#define ISO_MAX_CHARS 2
#define ISO_MAX_FINAL 0x80	/* only 0x30..0xFF are used */

/* Mapping table from ISO2022's charset (specified by DIMENSION,
   CHARS, and FINAL_CHAR) to Emacs' charset ID.  Should be accessed by
   macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR).  */
extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
474

475
/* A charset of type iso2022 who has DIMENSION, CHARS_96, and FINAL
476 477
   (final character).  */
#define ISO_CHARSET_TABLE(dimension, chars_96, final)	\
478
  iso_charset_table[(dimension) - 1][chars_96][final]
479

Miles Bader's avatar
Miles Bader committed
480
/* Nonzero if the charset who has FAST_MAP may contain C.  */
481 482 483 484
#define CHARSET_FAST_MAP_REF(c, fast_map)		\
  ((c) < 0x10000					\
   ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7))	\
   : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7)))
485

486
#define CHARSET_FAST_MAP_SET(c, fast_map)			\
487
  do {								\
488 489
    if ((c) < 0x10000)						\
      (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7);		\
490
    else							\
491
      (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7);	\
492
  } while (false)
493

494 495


496
/* True if CHARSET may contain the character C.  */
497 498
#define CHAR_CHARSET_P(c, charset)					 \
  ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p)			 \
499 500 501
   || ((CHARSET_UNIFIED_P (charset)					 \
	|| (charset)->method == CHARSET_METHOD_SUBSET			 \
	|| (charset)->method == CHARSET_METHOD_SUPERSET)		 \
502 503 504 505 506
       ? encode_char ((charset), (c)) != (charset)->invalid_code	 \
       : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map)		 \
	  && ((charset)->method == CHARSET_METHOD_OFFSET		 \
	      ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \
	      : ((charset)->method == CHARSET_METHOD_MAP		 \
507 508
		 && (charset)->compact_codes_p				 \
		 && CHAR_TABLE_P (CHARSET_ENCODER (charset)))		 \
509
	      ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \
510
	      : encode_char ((charset), (c)) != (charset)->invalid_code))))
511

512 513

/* Special macros for emacs-mule encoding.  */
Karl Heuer's avatar
Karl Heuer committed
514

515 516 517 518 519 520 521
/* Leading-code followed by extended leading-code.    DIMENSION/COLUMN */
#define EMACS_MULE_LEADING_CODE_PRIVATE_11	0x9A /* 1/1 */
#define EMACS_MULE_LEADING_CODE_PRIVATE_12	0x9B /* 1/2 */
#define EMACS_MULE_LEADING_CODE_PRIVATE_21	0x9C /* 2/2 */
#define EMACS_MULE_LEADING_CODE_PRIVATE_22	0x9D /* 2/2 */


Karl Heuer's avatar
Karl Heuer committed
522

Kenichi Handa's avatar
Kenichi Handa committed
523
extern int charset_ascii, charset_eight_bit;
Kenichi Handa's avatar
Kenichi Handa committed
524
extern int charset_unicode;
525 526 527
extern int charset_jisx0201_roman;
extern int charset_jisx0208_1978;
extern int charset_jisx0208;
Kenichi Handa's avatar
Kenichi Handa committed
528
extern int charset_ksc5601;
529

Kenichi Handa's avatar
Kenichi Handa committed
530 531
extern int charset_unibyte;

Jan D's avatar
Jan D committed
532 533
extern struct charset *char_charset (int, Lisp_Object, unsigned *);
extern Lisp_Object charset_attributes (int);
534

Jan D's avatar
Jan D committed
535 536 537
extern int decode_char (struct charset *, unsigned);
extern unsigned encode_char (struct charset *, int);
extern int string_xstring_p (Lisp_Object);
538

Jan D's avatar
Jan D committed
539 540 541
extern void map_charset_chars (void (*) (Lisp_Object, Lisp_Object),
                               Lisp_Object, Lisp_Object,
                               struct charset *, unsigned, unsigned);
542

543 544
INLINE_HEADER_END

545
#endif /* EMACS_CHARSET_H */