charset.h 19.4 KB
Newer Older
1
/* Header for charset handler.
2
   Copyright (C) 2001, 2002, 2003, 2004, 2005,
Glenn Morris's avatar
Glenn Morris committed
3
                 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
Kenichi Handa's avatar
Kenichi Handa committed
4
   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
Glenn Morris's avatar
Glenn Morris committed
5
     2005, 2006, 2007, 2008, 2009, 2010
Kenichi Handa's avatar
Kenichi Handa committed
6 7
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H14PRO021
Karl Heuer's avatar
Karl Heuer committed
8

Kenichi Handa's avatar
Kenichi Handa committed
9
   Copyright (C) 2003
10 11
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009
Karl Heuer's avatar
Karl Heuer committed
12

Karl Heuer's avatar
Karl Heuer committed
13 14
This file is part of GNU Emacs.

15
GNU Emacs is free software: you can redistribute it and/or modify
Karl Heuer's avatar
Karl Heuer committed
16
it under the terms of the GNU General Public License as published by
17 18
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Karl Heuer's avatar
Karl Heuer committed
19

Karl Heuer's avatar
Karl Heuer committed
20 21 22 23
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
Karl Heuer's avatar
Karl Heuer committed
24

Karl Heuer's avatar
Karl Heuer committed
25
You should have received a copy of the GNU General Public License
26
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Karl Heuer's avatar
Karl Heuer committed
27

28 29
#ifndef EMACS_CHARSET_H
#define EMACS_CHARSET_H
Karl Heuer's avatar
Karl Heuer committed
30

31 32 33 34 35 36 37
/* Index to arguments of Fdefine_charset_internal.  */

enum define_charset_arg_index
  {
    charset_arg_name,
    charset_arg_dimension,
    charset_arg_code_space,
38 39
    charset_arg_min_code,
    charset_arg_max_code,
40 41 42 43 44 45 46 47
    charset_arg_iso_final,
    charset_arg_iso_revision,
    charset_arg_emacs_mule_id,
    charset_arg_ascii_compatible_p,
    charset_arg_supplementary_p,
    charset_arg_invalid_code,
    charset_arg_code_offset,
    charset_arg_map,
48 49
    charset_arg_subset,
    charset_arg_superset,
50 51 52 53 54 55 56 57 58 59 60 61
    charset_arg_unify_map,
    charset_arg_plist,
    charset_arg_max
  };


/* Indices to charset attributes vector.  */

enum charset_attr_index
  {
    /* ID number of the charset.  */
    charset_id,
Karl Heuer's avatar
Karl Heuer committed
62

63 64 65 66 67 68
    /* Name of the charset (symbol).  */
    charset_name,

    /* Property list of the charset.  */
    charset_plist,

Kenichi Handa's avatar
Kenichi Handa committed
69 70 71
    /* If the method of the charset is `MAP', the value is a mapping
       vector or a file name that contains mapping vector.  Otherwise,
       nil.  */
72 73 74 75 76 77 78 79 80 81 82 83 84 85
    charset_map,

    /* If the method of the charset is `MAP', the value is a vector
       that maps code points of the charset to characters.  The vector
       is indexed by a character index.  A character index is
       calculated from a code point and the code-space table of the
       charset.  */
    charset_decoder,

    /* If the method of the charset is `MAP', the value is a
       char-table that maps characters of the charset to code
       points.  */
    charset_encoder,

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    /* If the method of the charset is `SUBSET', the value is a vector
       that has this form:

	[ CHARSET-ID MIN-CODE MAX-CODE OFFSET ]

       CHARSET-ID is an ID number of a parent charset.  MIN-CODE and
       MAX-CODE specify the range of characters inherited from the
       parent.  OFFSET is an integer value to add to a code point of
       the parent charset to get the corresponding code point of this
       charset.  */
    charset_subset,

    /* If the method of the charset is `SUPERSET', the value is a list
       whose elements have this form:

	(CHARSET-ID . OFFSET)

Juanma Barranquero's avatar
Juanma Barranquero committed
103 104 105
       CHARSET-IDs are ID numbers of parent charsets.  OFFSET is an
       integer value to add to a code point of the parent charset to
       get the corresponding code point of this charset.  */
106
    charset_superset,
107

Dave Love's avatar
Dave Love committed
108 109 110
    /* The value is a mapping vector or a file name that contains the
       mapping.  This defines how characters in the charset should be
       unified with Unicode.  The value of the member
Kenichi Handa's avatar
Kenichi Handa committed
111
       `charset_deunifier' is created from this information.  */
112 113
    charset_unify_map,

Kenichi Handa's avatar
Kenichi Handa committed
114
    /* If characters in the charset must be unified Unicode, the value
115 116
       is a char table that maps a unified Unicode character code to
       the non-unified character code in the charset.  */
117 118
    charset_deunifier,

Dave Love's avatar
Dave Love committed
119
    /* The length of the charset attribute vector.  */
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
    charset_attr_max
  };

/* Methods for converting code points and characters of charsets.  */

enum charset_method
  {
    /* For a charset of this method, a character code is calculated
       from a character index (which is calculated from a code point)
       simply by adding an offset value.  */
    CHARSET_METHOD_OFFSET,

    /* For a charset of this method, a decoder vector and an encoder
       char-table is used for code point <-> character code
       conversion.  */
    CHARSET_METHOD_MAP,

Dave Love's avatar
Dave Love committed
137
    /* A charset of this method is a subset of another charset.  */
138 139
    CHARSET_METHOD_SUBSET,

Dave Love's avatar
Dave Love committed
140
    /* A charset of this method is a superset of other charsets.  */
141
    CHARSET_METHOD_SUPERSET
142 143 144 145
  };

struct charset
{
Kenichi Handa's avatar
Kenichi Handa committed
146
  /* Index to charset_table.  */
147
  int id;
Karl Heuer's avatar
Karl Heuer committed
148

Kenichi Handa's avatar
Kenichi Handa committed
149
  /* Index to Vcharset_hash_table.  */
150 151 152 153 154
  int hash_index;

  /* Dimension of the charset: 1, 2, 3, or 4.  */
  int dimension;

155 156 157 158 159 160 161
  /* Byte code range of each dimension.  <code_space>[4N] is a mininum
     byte code of the (N+1)th dimension, <code_space>[4N+1] is a
     maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is
     (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3]
     is a number of characters containd in the first to (N+1)th
     dismesions.  We get `char-index' of a `code-point' from this
     information.  */
162
  int code_space[16];
163

164 165 166 167
  /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit
     of code_space_mask[B] is set.  This array is used to quickly
     check if a code-point is in a valid range.  */
  unsigned char *code_space_mask;
168

169 170
  /* 1 if there's no gap in code-points.  */
  int code_linear_p;
171

172 173 174
  /* If the charset is treated as 94-chars in ISO-2022, the value is 0.
     If the charset is treated as 96-chars in ISO-2022, the value is 1.  */
  int iso_chars_96;
175

Kenichi Handa's avatar
Kenichi Handa committed
176 177
  /* ISO final byte of the charset: 48..127.  It may be -1 if the
     charset doesn't conform to ISO-2022.  */
178
  int iso_final;
179

Kenichi Handa's avatar
Kenichi Handa committed
180
  /* ISO revision number of the charset.  */
181
  int iso_revision;
182

183 184 185 186 187
  /* If the charset is identical to what supported by Emacs 21 and the
     priors, the identification number of the charset used in those
     version.  Otherwise, -1.  */
  int emacs_mule_id;

Miles Bader's avatar
Miles Bader committed
188
  /* Nonzero if the charset is compatible with ASCII.  */
189
  int ascii_compatible_p;
Karl Heuer's avatar
Karl Heuer committed
190

Miles Bader's avatar
Miles Bader committed
191
  /* Nonzero if the charset is supplementary.  */
192 193
  int supplementary_p;

Miles Bader's avatar
Miles Bader committed
194
  /* Nonzero if all the code points are representable by Lisp_Int.  */
195 196 197 198 199 200 201 202
  int compact_codes_p;

  /* The method for encoding/decoding characters of the charset.  */
  enum charset_method method;

  /* Mininum and Maximum code points of the charset.  */
  unsigned min_code, max_code;

203 204 205 206
  /* Offset value used by macros CODE_POINT_TO_INDEX and
      INDEX_TO_CODE_POINT. .  */
  unsigned char_index_offset;

207 208
  /* Mininum and Maximum character codes of the charset.  If the
     charset is compatible with ASCII, min_char is a minimum non-ASCII
209 210 211
     character of the charset.  If the method of charset is
     CHARSET_METHOD_OFFSET, even if the charset is unified, min_char
     and max_char doesn't change.  */
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
  int min_char, max_char;

  /* The code returned by ENCODE_CHAR if a character is not encodable
     by the charset.  */
  unsigned invalid_code;

  /* If the method of the charset is CHARSET_METHOD_MAP, this is a
     table of bits used to quickly and roughly guess if a character
     belongs to the charset.

     The first 64 elements are 512 bits for characters less than
     0x10000.  Each bit corresponds to 128-character block.  The last
     126 elements are 1008 bits for the greater characters
     (0x10000..0x3FFFFF).  Each bit corresponds to 4096-character
     block.

Dave Love's avatar
Dave Love committed
228
     If a bit is 1, at least one character in the corresponding block is
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
     in this charset.  */
  unsigned char fast_map[190];

  /* Offset value to calculate a character code from code-point, and
     visa versa.  */
  int code_offset;

  int unified_p;
};

/* Hash table of charset symbols vs. the correponding attribute
   vectors.  */
extern Lisp_Object Vcharset_hash_table;

/* Table of struct charset.  */
extern struct charset *charset_table;

#define CHARSET_FROM_ID(id) (charset_table + (id))

248
extern Lisp_Object Vcharset_ordered_list;
Kenichi Handa's avatar
Kenichi Handa committed
249
extern Lisp_Object Vcharset_non_preferred_head;
250 251

/* Incremented everytime we change the priority of charsets.  */
252
extern unsigned short charset_ordered_list_tick;
253

254 255 256 257 258 259
extern Lisp_Object Vcharset_list;
extern Lisp_Object Viso_2022_charset_list;
extern Lisp_Object Vemacs_mule_charset_list;

extern struct charset *emacs_mule_charset[256];

Kenichi Handa's avatar
Kenichi Handa committed
260
extern Lisp_Object Vcurrent_iso639_language;
261 262 263 264 265 266 267 268 269 270 271 272 273

/* Macros to access information about charset.  */

/* Return the attribute vector of charset whose symbol is SYMBOL.  */
#define CHARSET_SYMBOL_ATTRIBUTES(symbol)	\
  Fgethash ((symbol), Vcharset_hash_table, Qnil)

#define CHARSET_ATTR_ID(attrs)		AREF ((attrs), charset_id)
#define CHARSET_ATTR_NAME(attrs)	AREF ((attrs), charset_name)
#define CHARSET_ATTR_PLIST(attrs)	AREF ((attrs), charset_plist)
#define CHARSET_ATTR_MAP(attrs)		AREF ((attrs), charset_map)
#define CHARSET_ATTR_DECODER(attrs)	AREF ((attrs), charset_decoder)
#define CHARSET_ATTR_ENCODER(attrs)	AREF ((attrs), charset_encoder)
274 275
#define CHARSET_ATTR_SUBSET(attrs)	AREF ((attrs), charset_subset)
#define CHARSET_ATTR_SUPERSET(attrs)	AREF ((attrs), charset_superset)
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
#define CHARSET_ATTR_UNIFY_MAP(attrs)	AREF ((attrs), charset_unify_map)
#define CHARSET_ATTR_DEUNIFIER(attrs)	AREF ((attrs), charset_deunifier)

#define CHARSET_SYMBOL_ID(symbol)	\
  CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol))

/* Return an index to Vcharset_hash_table of the charset whose symbol
   is SYMBOL.  */
#define CHARSET_SYMBOL_HASH_INDEX(symbol)	\
  hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL)

/* Return the attribute vector of CHARSET.  */
#define CHARSET_ATTRIBUTES(charset)	\
  (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index))

#define CHARSET_ID(charset)		((charset)->id)
#define CHARSET_HASH_INDEX(charset)	((charset)->hash_index)
#define CHARSET_DIMENSION(charset)	((charset)->dimension)
#define CHARSET_CODE_SPACE(charset)	((charset)->code_space)
#define CHARSET_CODE_LINEAR_P(charset)	((charset)->code_linear_p)
#define CHARSET_ISO_CHARS_96(charset)	((charset)->iso_chars_96)
#define CHARSET_ISO_FINAL(charset)	((charset)->iso_final)
#define CHARSET_ISO_PLANE(charset)	((charset)->iso_plane)
#define CHARSET_ISO_REVISION(charset)	((charset)->iso_revision)
#define CHARSET_EMACS_MULE_ID(charset)	((charset)->emacs_mule_id)
#define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p)
#define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p)
#define CHARSET_METHOD(charset)		((charset)->method)
#define CHARSET_MIN_CODE(charset)	((charset)->min_code)
#define CHARSET_MAX_CODE(charset)	((charset)->max_code)
#define CHARSET_INVALID_CODE(charset)	((charset)->invalid_code)
#define CHARSET_MIN_CHAR(charset)	((charset)->min_char)
#define CHARSET_MAX_CHAR(charset)	((charset)->max_char)
#define CHARSET_CODE_OFFSET(charset)	((charset)->code_offset)
#define CHARSET_UNIFIED_P(charset)	((charset)->unified_p)

#define CHARSET_NAME(charset)		\
  (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_MAP(charset)	\
  (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_DECODER(charset)	\
  (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_ENCODER(charset)	\
  (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset)))
320 321 322 323
#define CHARSET_SUBSET(charset)	\
  (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_SUPERSET(charset)	\
  (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset)))
324 325 326 327 328 329
#define CHARSET_UNIFY_MAP(charset)	\
  (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset)))
#define CHARSET_DEUNIFIER(charset)	\
  (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset)))


Miles Bader's avatar
Miles Bader committed
330
/* Nonzero if OBJ is a valid charset symbol.  */
331 332 333 334
#define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0)

/* Check if X is a valid charset symbol.  If not, signal an error.  */
#define CHECK_CHARSET(x)					\
335
  do {								\
336
    if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0)	\
337
      wrong_type_argument (Qcharsetp, (x));			\
338
  } while (0)
339 340


341 342 343 344 345 346 347
/* Check if X is a valid charset symbol.  If valid, set ID to the id
   number of the charset.  Otherwise, signal an error. */
#define CHECK_CHARSET_GET_ID(x, id)					\
  do {									\
    int idx;								\
									\
    if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0)	\
348
      wrong_type_argument (Qcharsetp, (x));				\
349 350
    id = XINT (AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \
		     charset_id));					\
Karl Heuer's avatar
Karl Heuer committed
351 352
  } while (0)

353

354 355 356 357 358
/* Check if X is a valid charset symbol.  If valid, set ATTR to the
   attr vector of the charset.  Otherwise, signal an error. */
#define CHECK_CHARSET_GET_ATTR(x, attr)				\
  do {									\
    if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x)))	\
359
      wrong_type_argument (Qcharsetp, (x));				\
360 361 362
  } while (0)


363 364 365 366 367
#define CHECK_CHARSET_GET_CHARSET(x, charset)	\
  do {						\
    int id;					\
    CHECK_CHARSET_GET_ID (x, id);		\
    charset = CHARSET_FROM_ID (id);		\
368 369 370
  } while (0)


Juanma Barranquero's avatar
Juanma Barranquero committed
371
/* Lookup Vcharset_ordered_list and return the first charset that
372
   contains the character C.  */
373 374 375
#define CHAR_CHARSET(c)				\
  ((c) < 0x80 ? CHARSET_FROM_ID (charset_ascii)	\
   : char_charset ((c), Qnil, NULL))
376

377 378 379 380
#if 0
/* Char-table of charset-sets.  Each element is a bool vector indexed
   by a charset ID.  */
extern Lisp_Object Vchar_charset_set;
381

382 383 384
/* Charset-bag of character C.  */
#define CHAR_CHARSET_SET(c) \
  CHAR_TABLE_REF (Vchar_charset_set, c)
385

386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
/* Check if two characters C1 and C2 belong to the same charset.  */
#define SAME_CHARSET_P(c1, c2)	\
  intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2))

#endif


/* Return a character correponding to the code-point CODE of CHARSET.
   Try some optimization before calling decode_char.  */

#define DECODE_CHAR(charset, code)					\
  ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p)		\
   ? (code)								\
   : ((code) < (charset)->min_code || (code) > (charset)->max_code)	\
   ? -1									\
   : (charset)->unified_p						\
   ? decode_char ((charset), (code))					\
   : (charset)->method == CHARSET_METHOD_OFFSET				\
   ? ((charset)->code_linear_p						\
      ? (code) - (charset)->min_code + (charset)->code_offset		\
      : decode_char ((charset), (code)))				\
   : (charset)->method == CHARSET_METHOD_MAP				\
Kenichi Handa's avatar
Kenichi Handa committed
408 409
   ? (((charset)->code_linear_p						\
       && VECTORP (CHARSET_DECODER (charset)))				\
410
      ? XINT (AREF (CHARSET_DECODER (charset),				\
Kenichi Handa's avatar
Kenichi Handa committed
411
		    (code) - (charset)->min_code))			\
412 413 414 415
      : decode_char ((charset), (code)))				\
   : decode_char ((charset), (code)))


Kenichi Handa's avatar
Kenichi Handa committed
416 417 418 419 420 421 422 423 424
/* If CHARSET is a simple offset base charset, return it's offset,
   otherwise return -1.  */
#define CHARSET_OFFSET(charset)				\
  (((charset)->method == CHARSET_METHOD_OFFSET		\
    && (charset)->code_linear_p				\
    && ! (charset)->unified_p)				\
   ? (charset)->code_offset - (charset)->min_code	\
   : -1)

425 426
extern Lisp_Object charset_work;

427 428 429
/* Return a code point of CHAR in CHARSET.
   Try some optimization before calling encode_char.  */

430 431 432
#define ENCODE_CHAR(charset, c)						 \
  ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p)			 \
   ? (c)								 \
433 434 435
   : ((charset)->unified_p						 \
      || (charset)->method == CHARSET_METHOD_SUBSET			 \
      || (charset)->method == CHARSET_METHOD_SUPERSET)			 \
436 437 438 439 440 441 442 443
   ? encode_char ((charset), (c))					 \
   : ((c) < (charset)->min_char || (c) > (charset)->max_char)		 \
   ? (charset)->invalid_code						 \
   : (charset)->method == CHARSET_METHOD_OFFSET				 \
   ? ((charset)->code_linear_p						 \
      ? (c) - (charset)->code_offset + (charset)->min_code		 \
      : encode_char ((charset), (c)))					 \
   : (charset)->method == CHARSET_METHOD_MAP				 \
Kenichi Handa's avatar
Kenichi Handa committed
444 445
   ? (((charset)->compact_codes_p					 \
       && CHAR_TABLE_P (CHARSET_ENCODER (charset)))			 \
446 447 448 449 450
      ? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c)), \
	 (NILP (charset_work)						 \
	  ? (charset)->invalid_code					 \
	  : XFASTINT (charset_work)))					 \
      : encode_char ((charset), (c)))					 \
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
   : encode_char ((charset), (c)))


/* Set to 1 when a charset map is loaded to warn that a buffer text
   and a string data may be relocated.  */
extern int charset_map_loaded;


/* Set CHARSET to the charset highest priority of C, CODE to the
   code-point of C in CHARSET.  */
#define SPLIT_CHAR(c, charset, code)	\
  ((charset) = char_charset ((c), Qnil, &(code)))


#define ISO_MAX_DIMENSION 3
#define ISO_MAX_CHARS 2
#define ISO_MAX_FINAL 0x80	/* only 0x30..0xFF are used */

/* Mapping table from ISO2022's charset (specified by DIMENSION,
   CHARS, and FINAL_CHAR) to Emacs' charset ID.  Should be accessed by
   macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR).  */
extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
473

474 475 476 477
/* A charset of type iso2022 who has DIMENSION, CHARS, and FINAL
   (final character).  */
#define ISO_CHARSET_TABLE(dimension, chars_96, final)	\
  iso_charset_table[(dimension) - 1][(chars_96)][(final)]
478

Miles Bader's avatar
Miles Bader committed
479
/* Nonzero if the charset who has FAST_MAP may contain C.  */
480 481 482 483
#define CHARSET_FAST_MAP_REF(c, fast_map)		\
  ((c) < 0x10000					\
   ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7))	\
   : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7)))
484

485
#define CHARSET_FAST_MAP_SET(c, fast_map)			\
486
  do {								\
487 488
    if ((c) < 0x10000)						\
      (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7);		\
489
    else							\
490
      (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7);	\
491 492
  } while (0)

493 494


Miles Bader's avatar
Miles Bader committed
495
/* 1 if CHARSET may contain the character C.  */
496 497
#define CHAR_CHARSET_P(c, charset)					 \
  ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p)			 \
498 499 500
   || ((CHARSET_UNIFIED_P (charset)					 \
	|| (charset)->method == CHARSET_METHOD_SUBSET			 \
	|| (charset)->method == CHARSET_METHOD_SUPERSET)		 \
501 502 503 504 505
       ? encode_char ((charset), (c)) != (charset)->invalid_code	 \
       : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map)		 \
	  && ((charset)->method == CHARSET_METHOD_OFFSET		 \
	      ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \
	      : ((charset)->method == CHARSET_METHOD_MAP		 \
506 507
		 && (charset)->compact_codes_p				 \
		 && CHAR_TABLE_P (CHARSET_ENCODER (charset)))		 \
508
	      ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \
509
	      : encode_char ((charset), (c)) != (charset)->invalid_code))))
510

511 512

/* Special macros for emacs-mule encoding.  */
Karl Heuer's avatar
Karl Heuer committed
513

514 515 516 517 518 519 520 521 522
/* Leading-code followed by extended leading-code.    DIMENSION/COLUMN */
#define EMACS_MULE_LEADING_CODE_PRIVATE_11	0x9A /* 1/1 */
#define EMACS_MULE_LEADING_CODE_PRIVATE_12	0x9B /* 1/2 */
#define EMACS_MULE_LEADING_CODE_PRIVATE_21	0x9C /* 2/2 */
#define EMACS_MULE_LEADING_CODE_PRIVATE_22	0x9D /* 2/2 */

extern struct charset *emacs_mule_charset[256];


Karl Heuer's avatar
Karl Heuer committed
523

524 525 526
extern Lisp_Object Qcharsetp;

extern Lisp_Object Qascii, Qunicode;
Kenichi Handa's avatar
Kenichi Handa committed
527
extern int charset_ascii, charset_eight_bit;
528
extern int charset_iso_8859_1;
Kenichi Handa's avatar
Kenichi Handa committed
529
extern int charset_unicode;
530 531 532
extern int charset_jisx0201_roman;
extern int charset_jisx0208_1978;
extern int charset_jisx0208;
Kenichi Handa's avatar
Kenichi Handa committed
533
extern int charset_ksc5601;
534

Kenichi Handa's avatar
Kenichi Handa committed
535 536
extern int charset_unibyte;

Jan D's avatar
Jan D committed
537 538
extern struct charset *char_charset (int, Lisp_Object, unsigned *);
extern Lisp_Object charset_attributes (int);
539

Jan D's avatar
Jan D committed
540 541 542 543
extern int maybe_unify_char (int, Lisp_Object);
extern int decode_char (struct charset *, unsigned);
extern unsigned encode_char (struct charset *, int);
extern int string_xstring_p (Lisp_Object);
544

Jan D's avatar
Jan D committed
545 546 547
extern void map_charset_chars (void (*) (Lisp_Object, Lisp_Object),
                               Lisp_Object, Lisp_Object,
                               struct charset *, unsigned, unsigned);
548

549
EXFUN (Funify_charset, 3);
Kenichi Handa's avatar
Kenichi Handa committed
550

551
#endif /* EMACS_CHARSET_H */
Kenichi Handa's avatar
Kenichi Handa committed
552 553 554

/* arch-tag: 3b96db55-4961-481d-ac3e-219f46a2b3aa
   (do not change this comment) */