charset.c 58.8 KB
Newer Older
1
/* Basic character set support.
2
   Copyright (C) 2001, 2002, 2003, 2004, 2005,
Glenn Morris's avatar
Glenn Morris committed
3
                 2006, 2007, 2008 Free Software Foundation, Inc.
Kenichi Handa's avatar
Kenichi Handa committed
4
   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
Glenn Morris's avatar
Glenn Morris committed
5
     2005, 2006, 2007, 2008
Kenichi Handa's avatar
Kenichi Handa committed
6 7
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H14PRO021
Karl Heuer's avatar
Karl Heuer committed
8

9
   Copyright (C) 2003, 2004
10 11
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009
Karl Heuer's avatar
Karl Heuer committed
12

Karl Heuer's avatar
Karl Heuer committed
13 14 15 16
This file is part of GNU Emacs.

GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
17
the Free Software Foundation; either version 3, or (at your option)
Karl Heuer's avatar
Karl Heuer committed
18
any later version.
Karl Heuer's avatar
Karl Heuer committed
19

Karl Heuer's avatar
Karl Heuer committed
20 21 22 23
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
Karl Heuer's avatar
Karl Heuer committed
24

Karl Heuer's avatar
Karl Heuer committed
25 26
You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING.  If not, write to
Lute Kamstra's avatar
Lute Kamstra committed
27 28
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.  */
Karl Heuer's avatar
Karl Heuer committed
29

30 31
#include <config.h>

Karl Heuer's avatar
Karl Heuer committed
32
#include <stdio.h>
33 34
#include <unistd.h>
#include <ctype.h>
Karl Heuer's avatar
Karl Heuer committed
35 36
#include <sys/types.h>
#include "lisp.h"
37
#include "character.h"
Karl Heuer's avatar
Karl Heuer committed
38 39
#include "charset.h"
#include "coding.h"
Kenichi Handa's avatar
Kenichi Handa committed
40
#include "disptab.h"
41
#include "buffer.h"
Karl Heuer's avatar
Karl Heuer committed
42

43
/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
Karl Heuer's avatar
Karl Heuer committed
44

45
  A coded character set ("charset" hereafter) is a meaningful
46
  collection (i.e. language, culture, functionality, etc.) of
47
  characters.  Emacs handles multiple charsets at once.  In Emacs Lisp
48 49
  code, a charset is represented by a symbol.  In C code, a charset is
  represented by its ID number or by a pointer to a struct charset.
Karl Heuer's avatar
Karl Heuer committed
50

51 52 53
  The actual information about each charset is stored in two places.
  Lispy information is stored in the hash table Vcharset_hash_table as
  a vector (charset attributes).  The other information is stored in
54
  charset_table as a struct charset.
Karl Heuer's avatar
Karl Heuer committed
55

56
*/
Karl Heuer's avatar
Karl Heuer committed
57

58 59
/* List of all charsets.  This variable is used only from Emacs
   Lisp.  */
Karl Heuer's avatar
Karl Heuer committed
60 61
Lisp_Object Vcharset_list;

62 63 64
/* Hash table that contains attributes of each charset.  Keys are
   charset symbols, and values are vectors of charset attributes.  */
Lisp_Object Vcharset_hash_table;
Karl Heuer's avatar
Karl Heuer committed
65

66 67
/* Table of struct charset.  */
struct charset *charset_table;
Karl Heuer's avatar
Karl Heuer committed
68

69
static int charset_table_size;
70
static int charset_table_used;
Karl Heuer's avatar
Karl Heuer committed
71

72
Lisp_Object Qcharsetp;
Karl Heuer's avatar
Karl Heuer committed
73

74 75
/* Special charset symbols.  */
Lisp_Object Qascii;
76
Lisp_Object Qeight_bit;
77 78
Lisp_Object Qiso_8859_1;
Lisp_Object Qunicode;
Karl Heuer's avatar
Karl Heuer committed
79

80 81
/* The corresponding charsets.  */
int charset_ascii;
82
int charset_eight_bit;
83 84
int charset_iso_8859_1;
int charset_unicode;
85

86 87 88 89
/* The other special charsets.  */
int charset_jisx0201_roman;
int charset_jisx0208_1978;
int charset_jisx0208;
90

91 92
/* Value of charset attribute `charset-iso-plane'.  */
Lisp_Object Qgl, Qgr;
93

94 95
/* Charset of unibyte characters.  */
int charset_unibyte;
Karl Heuer's avatar
Karl Heuer committed
96

97 98
/* List of charsets ordered by the priority.  */
Lisp_Object Vcharset_ordered_list;
Karl Heuer's avatar
Karl Heuer committed
99

100
/* Incremented everytime we change Vcharset_ordered_list.  This is
101
   unsigned short so that it fits in Lisp_Int and never matches
102 103
   -1.  */
unsigned short charset_ordered_list_tick;
Karl Heuer's avatar
Karl Heuer committed
104

105 106
/* List of iso-2022 charsets.  */
Lisp_Object Viso_2022_charset_list;
107

108 109 110 111
/* List of emacs-mule charsets.  */
Lisp_Object Vemacs_mule_charset_list;

struct charset *emacs_mule_charset[256];
Karl Heuer's avatar
Karl Heuer committed
112 113 114

/* Mapping table from ISO2022's charset (specified by DIMENSION,
   CHARS, and FINAL-CHAR) to Emacs' charset.  */
115 116
int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];

117
Lisp_Object Vcharset_map_path;
118 119 120

Lisp_Object Vchar_unified_charset_table;

121 122 123 124 125 126 127
/* Defined in chartab.c */
extern void
map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
				Lisp_Object function, Lisp_Object table,
				Lisp_Object arg, struct charset *charset,
				unsigned from, unsigned to));

128 129 130 131 132 133 134 135 136 137 138 139 140
#define CODE_POINT_TO_INDEX(charset, code)				\
  ((charset)->code_linear_p						\
   ? (code) - (charset)->min_code					\
   : (((charset)->code_space_mask[(code) >> 24] & 0x8)			\
      && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4)	\
      && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2)	\
      && ((charset)->code_space_mask[(code) & 0xFF] & 0x1))		\
   ? (((((code) >> 24) - (charset)->code_space[12])			\
       * (charset)->code_space[11])					\
      + (((((code) >> 16) & 0xFF) - (charset)->code_space[8])		\
	 * (charset)->code_space[7])					\
      + (((((code) >> 8) & 0xFF) - (charset)->code_space[4])		\
	 * (charset)->code_space[3])					\
141 142
      + (((code) & 0xFF) - (charset)->code_space[0])			\
      - ((charset)->char_index_offset))					\
143 144 145 146 147 148
   : -1)


/* Convert the character index IDX to code-point CODE for CHARSET.
   It is assumed that IDX is in a valid range.  */

149 150 151 152 153 154 155 156 157 158 159 160 161
#define INDEX_TO_CODE_POINT(charset, idx)				     \
  ((charset)->code_linear_p						     \
   ? (idx) + (charset)->min_code					     \
   : (idx += (charset)->char_index_offset,				     \
      (((charset)->code_space[0] + (idx) % (charset)->code_space[2])	     \
       | (((charset)->code_space[4]					     \
	   + ((idx) / (charset)->code_space[3] % (charset)->code_space[6]))  \
	  << 8)								     \
       | (((charset)->code_space[8]					     \
	   + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
	  << 16)							     \
       | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11]))  \
	  << 24))))
162

163

Richard M. Stallman's avatar
Richard M. Stallman committed
164

165

166 167
/* Set to 1 to warn that a charset map is loaded and thus a buffer
   text and a string data may be relocated.  */
168
int charset_map_loaded;
Karl Heuer's avatar
Karl Heuer committed
169

170
struct charset_map_entries
Karl Heuer's avatar
Karl Heuer committed
171
{
172 173 174 175 176 177 178 179
  struct {
    unsigned from, to;
    int c;
  } entry[0x10000];
  struct charset_map_entries *next;
};

/* Load the mapping information for CHARSET from ENTRIES.
180

181
   If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
182

183 184
   If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
   CHARSET->decoder, and CHARSET->encoder.
185

186 187 188
   If CONTROL_FLAG is 2, setup CHARSET->deunifier and
   Vchar_unify_table.  If Vchar_unified_charset_table is non-nil,
   setup it too.  */
Karl Heuer's avatar
Karl Heuer committed
189

190
static void
191
load_charset_map (charset, entries, n_entries, control_flag)
192
  struct charset *charset;
193 194
  struct charset_map_entries *entries;
  int n_entries;
195
  int control_flag;
Karl Heuer's avatar
Karl Heuer committed
196
{
197 198 199 200 201 202
  Lisp_Object vec, table;
  unsigned max_code = CHARSET_MAX_CODE (charset);
  int ascii_compatible_p = charset->ascii_compatible_p;
  int min_char, max_char, nonascii_min_char;
  int i;
  unsigned char *fast_map = charset->fast_map;
203

204 205 206 207
  if (n_entries <= 0)
    return;

  if (control_flag > 0)
208
    {
209
      int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
210

211
      table = Fmake_char_table (Qnil, Qnil);
212 213 214
      if (control_flag == 1)
	vec = Fmake_vector (make_number (n), make_number (-1));
      else if (! CHAR_TABLE_P (Vchar_unify_table))
215
	Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
216

217
      charset_map_loaded = 1;
218
    }
219

220
  min_char = max_char = entries->entry[0].c;
221
  nonascii_min_char = MAX_CHAR;
222
  for (i = 0; i < n_entries; i++)
223
    {
224
      unsigned from, to;
225 226
      int from_index, to_index;
      int from_c, to_c;
227
      int idx = i % 0x10000;
228

229 230 231 232
      if (i > 0 && idx == 0)
	entries = entries->next;
      from = entries->entry[idx].from;
      to = entries->entry[idx].to;
233 234 235
      from_c = entries->entry[idx].c;
      from_index = CODE_POINT_TO_INDEX (charset, from);
      if (from == to)
236
	{
237 238
	  to_index = from_index;
	  to_c = from_c;
239
	}
240
      else
241
	{
242 243
	  to_index = CODE_POINT_TO_INDEX (charset, to);
	  to_c = from_c + (to_index - from_index);
244
	}
245 246
      if (from_index < 0 || to_index < 0)
	continue;
247 248

      if (control_flag < 2)
249
	{
250
	  int c;
251

252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
	  if (to_c > max_char)
	    max_char = to_c;
	  else if (from_c < min_char)
	    min_char = from_c;
	  if (ascii_compatible_p)
	    {
	      if (! ASCII_BYTE_P (from_c))
		{
		  if (from_c < nonascii_min_char)
		    nonascii_min_char = from_c;
		}
	      else if (! ASCII_BYTE_P (to_c))
		{
		  nonascii_min_char = 0x80;
		}
	    }
268

269 270 271
	  for (c = from_c; c <= to_c; c++)
	    CHARSET_FAST_MAP_SET (c, fast_map);

272
	  if (control_flag == 1)
273
	    {
274 275 276 277 278
	      unsigned code = from;

	      if (CHARSET_COMPACT_CODES_P (charset))
		while (1)
		  {
279
		    ASET (vec, from_index, make_number (from_c));
Kenichi Handa's avatar
Kenichi Handa committed
280 281
		    if (NILP (CHAR_TABLE_REF (table, from_c)))
		      CHAR_TABLE_SET (table, from_c, make_number (code));
282 283
		    if (from_index == to_index)
		      break;
284
		    from_index++, from_c++;
285 286 287
		    code = INDEX_TO_CODE_POINT (charset, from_index);
		  }
	      else
288
		for (; from_index <= to_index; from_index++, from_c++)
289
		  {
290
		    ASET (vec, from_index, make_number (from_c));
Kenichi Handa's avatar
Kenichi Handa committed
291 292
		    if (NILP (CHAR_TABLE_REF (table, from_c)))
		      CHAR_TABLE_SET (table, from_c, make_number (from_index));
293
		  }
294 295
	    }
	}
296 297
      else
	{
298
	  unsigned code = from;
299

300 301 302
	  while (1)
	    {
	      int c1 = DECODE_CHAR (charset, code);
Kenichi Handa's avatar
Kenichi Handa committed
303

304 305
	      if (c1 >= 0)
		{
306
		  CHAR_TABLE_SET (table, from_c, make_number (c1));
307
		  CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
308 309 310 311
		  if (CHAR_TABLE_P (Vchar_unified_charset_table))
		    CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
				    CHARSET_NAME (charset));
		}
312 313
	      if (from_index == to_index)
		break;
314
	      from_index++, from_c++;
315
	      code = INDEX_TO_CODE_POINT (charset, from_index);
316
	    }
317
	}
318
    }
319 320

  if (control_flag < 2)
Karl Heuer's avatar
Karl Heuer committed
321
    {
322 323 324
      CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
				    ? nonascii_min_char : min_char);
      CHARSET_MAX_CHAR (charset) = max_char;
325
      if (control_flag == 1)
Karl Heuer's avatar
Karl Heuer committed
326
	{
327 328
	  CHARSET_DECODER (charset) = vec;
	  CHARSET_ENCODER (charset) = table;
Karl Heuer's avatar
Karl Heuer committed
329 330
	}
    }
331
  else
Kenichi Handa's avatar
Kenichi Handa committed
332
    CHARSET_DEUNIFIER (charset) = table;
Karl Heuer's avatar
Karl Heuer committed
333 334
}

335

336 337
/* Read a hexadecimal number (preceded by "0x") from the file FP while
   paying attention to comment charcter '#'.  */
338

339 340 341 342
static INLINE unsigned
read_hex (fp, eof)
     FILE *fp;
     int *eof;
343
{
344 345
  int c;
  unsigned n;
346

347 348
  while ((c = getc (fp)) != EOF)
    {
349
      if (c == '#')
350 351 352 353 354 355 356 357
	{
	  while ((c = getc (fp)) != EOF && c != '\n');
	}
      else if (c == '0')
	{
	  if ((c = getc (fp)) == EOF || c == 'x')
	    break;
	}
Kenichi Handa's avatar
Kenichi Handa committed
358
    }
359 360 361 362 363 364 365 366 367 368 369 370 371 372
  if (c == EOF)
    {
      *eof = 1;
      return 0;
    }
  *eof = 0;
  n = 0;
  if (c == 'x')
    while ((c = getc (fp)) != EOF && isxdigit (c))
      n = ((n << 4)
	   | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
  else
    while ((c = getc (fp)) != EOF && isdigit (c))
      n = (n * 10) + c - '0';
373 374
  if (c != EOF)
    ungetc (c, fp);
375 376
  return n;
}
377

378

379
/* Return a mapping vector for CHARSET loaded from MAPFILE.
380 381 382 383 384 385 386
   Each line of MAPFILE has this form
	0xAAAA 0xCCCC
   where 0xAAAA is a code-point and 0xCCCC is the corresponding
   character code, or this form
	0xAAAA-0xBBBB 0xCCCC
   where 0xAAAA and 0xBBBB are code-points specifying a range, and
   0xCCCC is the first character code of the range.
Karl Heuer's avatar
Karl Heuer committed
387

388 389
   The returned vector has this form:
	[ CODE1 CHAR1 CODE2 CHAR2 .... ]
390 391
   where CODE1 is a code-point or a cons of code-points specifying a
   range.  */
Karl Heuer's avatar
Karl Heuer committed
392

393
extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
Karl Heuer's avatar
Karl Heuer committed
394

395 396
static void
load_charset_map_from_file (charset, mapfile, control_flag)
397 398
     struct charset *charset;
     Lisp_Object mapfile;
399
     int control_flag;
Karl Heuer's avatar
Karl Heuer committed
400
{
401 402
  unsigned min_code = CHARSET_MIN_CODE (charset);
  unsigned max_code = CHARSET_MAX_CODE (charset);
403 404 405 406
  int fd;
  FILE *fp;
  int eof;
  Lisp_Object suffixes;
407 408
  struct charset_map_entries *head, *entries;
  int n_entries;
Karl Heuer's avatar
Karl Heuer committed
409

410 411
  suffixes = Fcons (build_string (".map"),
		    Fcons (build_string (".TXT"), Qnil));
Karl Heuer's avatar
Karl Heuer committed
412

413
  fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
414 415 416 417
  if (fd < 0
      || ! (fp = fdopen (fd, "r")))
    {
      add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
418
      return;
419
    }
Karl Heuer's avatar
Karl Heuer committed
420

421 422 423
  head = entries = ((struct charset_map_entries *)
		    alloca (sizeof (struct charset_map_entries)));
  n_entries = 0;
424 425 426
  eof = 0;
  while (1)
    {
427 428 429
      unsigned from, to;
      int c;
      int idx;
Karl Heuer's avatar
Karl Heuer committed
430

431
      from = read_hex (fp, &eof);
432 433
      if (eof)
	break;
434 435 436 437 438
      if (getc (fp) == '-')
	to = read_hex (fp, &eof);
      else
	to = from;
      c = (int) read_hex (fp, &eof);
439

440 441
      if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
	continue;
442

443
      if (n_entries > 0 && (n_entries % 0x10000) == 0)
444
	{
445 446 447
	  entries->next = ((struct charset_map_entries *)
			   alloca (sizeof (struct charset_map_entries)));
	  entries = entries->next;
448
	}
449 450 451 452 453
      idx = n_entries % 0x10000;
      entries->entry[idx].from = from;
      entries->entry[idx].to = to;
      entries->entry[idx].c = c;
      n_entries++;
454 455 456
    }
  fclose (fp);
  close (fd);
457

458
  load_charset_map (charset, head, n_entries, control_flag);
Karl Heuer's avatar
Karl Heuer committed
459 460
}

461 462 463 464 465
static void
load_charset_map_from_vector (charset, vec, control_flag)
     struct charset *charset;
     Lisp_Object vec;
     int control_flag;
Kenichi Handa's avatar
Kenichi Handa committed
466
{
467 468 469 470 471 472
  unsigned min_code = CHARSET_MIN_CODE (charset);
  unsigned max_code = CHARSET_MAX_CODE (charset);
  struct charset_map_entries *head, *entries;
  int n_entries;
  int len = ASIZE (vec);
  int i;
Kenichi Handa's avatar
Kenichi Handa committed
473

474
  if (len % 2 == 1)
475
    {
476 477
      add_to_log ("Failure in loading charset map: %V", vec, Qnil);
      return;
478
    }
479

480 481 482 483
  head = entries = ((struct charset_map_entries *)
		    alloca (sizeof (struct charset_map_entries)));
  n_entries = 0;
  for (i = 0; i < len; i += 2)
484
    {
485 486 487 488
      Lisp_Object val, val2;
      unsigned from, to;
      int c;
      int idx;
489

490 491
      val = AREF (vec, i);
      if (CONSP (val))
Kenichi Handa's avatar
Kenichi Handa committed
492
	{
493 494 495 496 497 498
	  val2 = XCDR (val);
	  val = XCAR (val);
	  CHECK_NATNUM (val);
	  CHECK_NATNUM (val2);
	  from = XFASTINT (val);
	  to = XFASTINT (val2);
Kenichi Handa's avatar
Kenichi Handa committed
499
	}
500
      else
Kenichi Handa's avatar
Kenichi Handa committed
501
	{
502 503
	  CHECK_NATNUM (val);
	  from = to = XFASTINT (val);
Kenichi Handa's avatar
Kenichi Handa committed
504
	}
505 506 507
      val = AREF (vec, i + 1);
      CHECK_NATNUM (val);
      c = XFASTINT (val);
508

509 510
      if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
	continue;
511

512
      if (n_entries > 0 && (n_entries % 0x10000) == 0)
513 514 515 516 517 518 519 520 521 522 523
	{
	  entries->next = ((struct charset_map_entries *)
			   alloca (sizeof (struct charset_map_entries)));
	  entries = entries->next;
	}
      idx = n_entries % 0x10000;
      entries->entry[idx].from = from;
      entries->entry[idx].to = to;
      entries->entry[idx].c = c;
      n_entries++;
    }
524

525
  load_charset_map (charset, head, n_entries, control_flag);
526 527
}

528 529 530
static void
load_charset (charset)
     struct charset *charset;
531
{
532
  if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
533
    {
534
      Lisp_Object map;
535

536 537
      map = CHARSET_MAP (charset);
      if (STRINGP (map))
538
	load_charset_map_from_file (charset, map, 1);
Kenichi Handa's avatar
Kenichi Handa committed
539
      else
540
	load_charset_map_from_vector (charset, map, 1);
541
      CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
542
    }
Karl Heuer's avatar
Karl Heuer committed
543
}
544

545 546 547 548 549

DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
       doc: /* Return non-nil if and only if OBJECT is a charset.*/)
     (object)
     Lisp_Object object;
Kenichi Handa's avatar
Kenichi Handa committed
550
{
551
  return (CHARSETP (object) ? Qt : Qnil);
552 553
}

Karl Heuer's avatar
Karl Heuer committed
554 555

void
556 557 558 559 560 561
map_charset_chars (c_function, function, arg,
		   charset, from, to)
     void (*c_function) P_ ((Lisp_Object, Lisp_Object));
     Lisp_Object function, arg;
     struct charset *charset;
     unsigned from, to;
Karl Heuer's avatar
Karl Heuer committed
562
{
563
  Lisp_Object range;
564
  int partial;
565

Kenichi Handa's avatar
Kenichi Handa committed
566
  if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
567 568
    load_charset (charset);

569 570 571 572 573
  partial = (from > CHARSET_MIN_CODE (charset)
	     || to < CHARSET_MAX_CODE (charset));

  if (CHARSET_UNIFIED_P (charset)
      && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
Karl Heuer's avatar
Karl Heuer committed
574
    {
575 576 577
      map_char_table_for_charset (c_function, function,
				  CHARSET_DEUNIFIER (charset), arg,
				  partial ? charset : NULL, from, to);
Karl Heuer's avatar
Karl Heuer committed
578
    }
579

580
  if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
Karl Heuer's avatar
Karl Heuer committed
581
    {
582 583 584 585 586 587
      int from_idx = CODE_POINT_TO_INDEX (charset, from);
      int to_idx = CODE_POINT_TO_INDEX (charset, to);
      int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
      int to_c = to_idx + CHARSET_CODE_OFFSET (charset);

      range = Fcons (make_number (from_c), make_number (to_c));
588
      if (NILP (function))
589
	(*c_function) (arg, range);
590 591
      else
	call2 (function, range, arg);
592
    }
593 594 595 596
  else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
    {
      if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
	return;
597 598 599
      map_char_table_for_charset (c_function, function,
				  CHARSET_ENCODER (charset), arg,
				  partial ? charset : NULL, from, to);
600
    }
601
  else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
Karl Heuer's avatar
Karl Heuer committed
602
    {
603 604 605 606 607 608 609 610 611 612 613 614 615
      Lisp_Object subset_info;
      int offset;

      subset_info = CHARSET_SUBSET (charset);
      charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
      offset = XINT (AREF (subset_info, 3));
      from -= offset;
      if (from < XFASTINT (AREF (subset_info, 1)))
	from = XFASTINT (AREF (subset_info, 1));
      to -= offset;
      if (to > XFASTINT (AREF (subset_info, 2)))
	to = XFASTINT (AREF (subset_info, 2));
      map_charset_chars (c_function, function, arg, charset, from, to);
Karl Heuer's avatar
Karl Heuer committed
616
    }
617 618 619
  else				/* i.e. CHARSET_METHOD_SUPERSET */
    {
      Lisp_Object parents;
Karl Heuer's avatar
Karl Heuer committed
620

621 622
      for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
	   parents = XCDR (parents))
Kenichi Handa's avatar
Kenichi Handa committed
623
	{
624 625 626 627 628 629 630 631 632 633 634
	  int offset;
	  unsigned this_from, this_to;

	  charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
	  offset = XINT (XCDR (XCAR (parents)));
	  this_from = from - offset;
	  this_to = to - offset;
	  if (this_from < CHARSET_MIN_CODE (charset))
	    this_from = CHARSET_MIN_CODE (charset);
	  if (this_to > CHARSET_MAX_CODE (charset))
	    this_to = CHARSET_MAX_CODE (charset);
635 636
	  map_charset_chars (c_function, function, arg, charset,
			     this_from, this_to);
Kenichi Handa's avatar
Kenichi Handa committed
637
	}
638
    }
Karl Heuer's avatar
Karl Heuer committed
639 640
}

641
DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
642
       doc: /* Call FUNCTION for all characters in CHARSET.
643
FUNCTION is called with an argument RANGE and the optional 3rd
644
argument ARG.
Karl Heuer's avatar
Karl Heuer committed
645

646 647
RANGE is a cons (FROM .  TO), where FROM and TO indicate a range of
characters contained in CHARSET.
Karl Heuer's avatar
Karl Heuer committed
648

649
The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
Kenichi Handa's avatar
Kenichi Handa committed
650
range of code points of target characters.  */)
651 652
     (function, charset, arg, from_code, to_code)
       Lisp_Object function, charset, arg, from_code, to_code;
Karl Heuer's avatar
Karl Heuer committed
653
{
654
  struct charset *cs;
655
  unsigned from, to;
Karl Heuer's avatar
Karl Heuer committed
656

657 658
  CHECK_CHARSET_GET_CHARSET (charset, cs);
  if (NILP (from_code))
659
    from = CHARSET_MIN_CODE (cs);
660
  else
Karl Heuer's avatar
Karl Heuer committed
661
    {
662 663 664 665
      CHECK_NATNUM (from_code);
      from = XINT (from_code);
      if (from < CHARSET_MIN_CODE (cs))
	from = CHARSET_MIN_CODE (cs);
Karl Heuer's avatar
Karl Heuer committed
666
    }
667
  if (NILP (to_code))
668
    to = CHARSET_MAX_CODE (cs);
Karl Heuer's avatar
Karl Heuer committed
669 670
  else
    {
671 672 673 674
      CHECK_NATNUM (to_code);
      to = XINT (to_code);
      if (to > CHARSET_MAX_CODE (cs))
	to = CHARSET_MAX_CODE (cs);
Karl Heuer's avatar
Karl Heuer committed
675
    }
676
  map_charset_chars (NULL, function, arg, cs, from, to);
677
  return Qnil;
678
}
Karl Heuer's avatar
Karl Heuer committed
679 680


681 682 683 684
/* Define a charset according to the arguments.  The Nth argument is
   the Nth attribute of the charset (the last attribute `charset-id'
   is not included).  See the docstring of `define-charset' for the
   detail.  */
Karl Heuer's avatar
Karl Heuer committed
685

686 687
DEFUN ("define-charset-internal", Fdefine_charset_internal,
       Sdefine_charset_internal, charset_arg_max, MANY, 0,
688 689
       doc: /* For internal use only.
usage: (define-charset-internal ...)  */)
690 691 692
     (nargs, args)
     int nargs;
     Lisp_Object *args;
Karl Heuer's avatar
Karl Heuer committed
693
{
694 695 696 697 698
  /* Charset attr vector.  */
  Lisp_Object attrs;
  Lisp_Object val;
  unsigned hash_code;
  struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
699
  int i, j;
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717
  struct charset charset;
  int id;
  int dimension;
  int new_definition_p;
  int nchars;

  if (nargs != charset_arg_max)
    return Fsignal (Qwrong_number_of_arguments,
		    Fcons (intern ("define-charset-internal"),
			   make_number (nargs)));

  attrs = Fmake_vector (make_number (charset_attr_max), Qnil);

  CHECK_SYMBOL (args[charset_arg_name]);
  ASET (attrs, charset_name, args[charset_arg_name]);

  val = args[charset_arg_code_space];
  for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
718
    {
719 720 721 722 723 724 725 726 727 728 729 730 731 732
      int min_byte, max_byte;

      min_byte = XINT (Faref (val, make_number (i * 2)));
      max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
      if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
	error ("Invalid :code-space value");
      charset.code_space[i * 4] = min_byte;
      charset.code_space[i * 4 + 1] = max_byte;
      charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
      nchars *= charset.code_space[i * 4 + 2];
      charset.code_space[i * 4 + 3] = nchars;
      if (max_byte > 0)
	dimension = i + 1;
    }
Karl Heuer's avatar
Karl Heuer committed
733

734 735 736 737
  val = args[charset_arg_dimension];
  if (NILP (val))
    charset.dimension = dimension;
  else
Karl Heuer's avatar
Karl Heuer committed
738
    {
739 740 741 742
      CHECK_NATNUM (val);
      charset.dimension = XINT (val);
      if (charset.dimension < 1 || charset.dimension > 4)
	args_out_of_range_3 (val, make_number (1), make_number (4));
Karl Heuer's avatar
Karl Heuer committed
743 744
    }

745 746 747 748 749 750 751 752
  charset.code_linear_p
    = (charset.dimension == 1
       || (charset.code_space[2] == 256
	   && (charset.dimension == 2
	       || (charset.code_space[6] == 256
		   && (charset.dimension == 3
		       || charset.code_space[10] == 256)))));

753
  if (! charset.code_linear_p)
Karl Heuer's avatar
Karl Heuer committed
754
    {
755
      charset.code_space_mask = (unsigned char *) xmalloc (256);
756
      bzero (charset.code_space_mask, 256);
757 758 759 760
      for (i = 0; i < 4; i++)
	for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
	     j++)
	  charset.code_space_mask[j] |= (1 << i);
Karl Heuer's avatar
Karl Heuer committed
761 762
    }

763
  charset.iso_chars_96 = charset.code_space[2] == 96;
Karl Heuer's avatar
Karl Heuer committed
764

765 766 767 768 769 770 771 772
  charset.min_code = (charset.code_space[0]
		      | (charset.code_space[4] << 8)
		      | (charset.code_space[8] << 16)
		      | (charset.code_space[12] << 24));
  charset.max_code = (charset.code_space[1]
		      | (charset.code_space[5] << 8)
		      | (charset.code_space[9] << 16)
		      | (charset.code_space[13] << 24));
773
  charset.char_index_offset = 0;
774

775 776 777 778
  val = args[charset_arg_min_code];
  if (! NILP (val))
    {
      unsigned code;
779

780 781 782 783 784
      if (INTEGERP (val))
	code = XINT (val);
      else
	{
	  CHECK_CONS (val);
Kenichi Handa's avatar
Kenichi Handa committed
785 786
	  CHECK_NUMBER_CAR (val);
	  CHECK_NUMBER_CDR (val);
787 788 789 790 791 792 793 794 795
	  code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
	}
      if (code < charset.min_code
	  || code > charset.max_code)
	args_out_of_range_3 (make_number (charset.min_code),
			     make_number (charset.max_code), val);
      charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
      charset.min_code = code;
    }
796

797 798
  val = args[charset_arg_max_code];
  if (! NILP (val))
799
    {
800 801 802 803 804 805 806
      unsigned code;

      if (INTEGERP (val))
	code = XINT (val);
      else
	{
	  CHECK_CONS (val);
Kenichi Handa's avatar
Kenichi Handa committed
807 808
	  CHECK_NUMBER_CAR (val);
	  CHECK_NUMBER_CDR (val);
809 810 811 812 813 814 815
	  code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
	}
      if (code < charset.min_code
	  || code > charset.max_code)
	args_out_of_range_3 (make_number (charset.min_code),
			     make_number (charset.max_code), val);
      charset.max_code = code;
816 817
    }

818
  charset.compact_codes_p = charset.max_code < 0x1000000;
Karl Heuer's avatar
Karl Heuer committed
819

820 821 822 823 824
  val = args[charset_arg_invalid_code];
  if (NILP (val))
    {
      if (charset.min_code > 0)
	charset.invalid_code = 0;
Kenichi Handa's avatar
Kenichi Handa committed
825 826
      else
	{
827 828 829 830 831
	  XSETINT (val, charset.max_code + 1);
	  if (XINT (val) == charset.max_code + 1)
	    charset.invalid_code = charset.max_code + 1;
	  else
	    error ("Attribute :invalid-code must be specified");
832 833
	}
    }
834 835 836 837 838
  else
    {
      CHECK_NATNUM (val);
      charset.invalid_code = XFASTINT (val);
    }
Karl Heuer's avatar
Karl Heuer committed
839

840 841 842 843 844 845 846 847 848 849
  val = args[charset_arg_iso_final];
  if (NILP (val))
    charset.iso_final = -1;
  else
    {
      CHECK_NUMBER (val);
      if (XINT (val) < '0' || XINT (val) > 127)
	error ("Invalid iso-final-char: %d", XINT (val));
      charset.iso_final = XINT (val);
    }
Karl Heuer's avatar
Karl Heuer committed
850

851 852 853 854
  val = args[charset_arg_iso_revision];
  if (NILP (val))
    charset.iso_revision = -1;
  else
Karl Heuer's avatar
Karl Heuer committed
855
    {
856