characters.el 50.4 KB
Newer Older
Karl Heuer's avatar
Karl Heuer committed
1 2
;;; characters.el --- set syntax and category for multibyte characters

3
;; Copyright (C) 1997, 2000-2014 Free Software Foundation, Inc.
Kenichi Handa's avatar
Kenichi Handa committed
4
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5
;;   2005, 2006, 2007, 2008, 2009, 2010, 2011
Kenichi Handa's avatar
Kenichi Handa committed
6 7
;;   National Institute of Advanced Industrial Science and Technology (AIST)
;;   Registration Number H14PRO021
Kenichi Handa's avatar
Kenichi Handa committed
8
;; Copyright (C) 2003
9 10
;;   National Institute of Advanced Industrial Science and Technology (AIST)
;;   Registration Number H13PRO009
Karl Heuer's avatar
Karl Heuer committed
11 12 13 14 15

;; Keywords: multibyte character, character set, syntax, category

;; This file is part of GNU Emacs.

16
;; GNU Emacs is free software: you can redistribute it and/or modify
Karl Heuer's avatar
Karl Heuer committed
17
;; it under the terms of the GNU General Public License as published by
18 19
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
Karl Heuer's avatar
Karl Heuer committed
20 21 22 23 24 25 26

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
27
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
Karl Heuer's avatar
Karl Heuer committed
28 29 30

;;; Commentary:

31 32
;;; Code:

Karl Heuer's avatar
Karl Heuer committed
33 34 35 36
;;; Predefined categories.

;; For each character set.

37 38
(define-category ?a "ASCII
ASCII graphic characters 32-126 (ISO646 IRV:1983[4/0])")
Karl Heuer's avatar
Karl Heuer committed
39 40 41 42 43 44
(define-category ?l "Latin")
(define-category ?t "Thai")
(define-category ?g "Greek")
(define-category ?b "Arabic")
(define-category ?w "Hebrew")
(define-category ?y "Cyrillic")
45 46 47 48
(define-category ?k "Katakana
Japanese katakana")
(define-category ?r "Roman
Japanese roman")
Karl Heuer's avatar
Karl Heuer committed
49 50 51
(define-category ?c "Chinese")
(define-category ?j "Japanese")
(define-category ?h "Korean")
52 53 54 55
(define-category ?e "Ethiopic
Ethiopic (Ge'ez)")
(define-category ?v "Viet
Vietnamese")
Karl Heuer's avatar
Karl Heuer committed
56
(define-category ?i "Indian")
57
(define-category ?o "Lao")
58
(define-category ?q "Tibetan")
Karl Heuer's avatar
Karl Heuer committed
59 60 61

;; For each group (row) of 2-byte character sets.

62 63 64 65 66 67 68 69 70 71 72 73
(define-category ?A "2-byte alnum
Alpha-numeric characters of 2-byte character sets")
(define-category ?C "2-byte han
Chinese (Han) characters of 2-byte character sets")
(define-category ?G "2-byte Greek
Greek characters of 2-byte character sets")
(define-category ?H "2-byte Hiragana
Japanese Hiragana characters of 2-byte character sets")
(define-category ?K "2-byte Katakana
Japanese Katakana characters of 2-byte character sets")
(define-category ?N "2-byte Korean
Korean Hangul characters of 2-byte character sets")
74
(define-category ?Y "2-byte Cyrillic
75
Cyrillic characters of 2-byte character sets")
Karl Heuer's avatar
Karl Heuer committed
76 77 78 79 80
(define-category ?I "Indian Glyphs")

;; For phonetic classifications.

(define-category ?0 "consonant")
81
(define-category ?1 "base vowel
82
Base (independent) vowel")
83
(define-category ?2 "upper diacritic
84
Upper diacritical mark (including upper vowel)")
85
(define-category ?3 "lower diacritic
86
Lower diacritical mark (including lower vowel)")
87
(define-category ?4 "combining tone
88
Combining tone mark")
89
(define-category ?5 "symbol")
Karl Heuer's avatar
Karl Heuer committed
90
(define-category ?6 "digit")
91
(define-category ?7 "vowel diacritic
92
Vowel-modifying diacritical mark")
93 94
(define-category ?8 "vowel-signs")
(define-category ?9 "semivowel lower")
Karl Heuer's avatar
Karl Heuer committed
95 96

;; For filling.
97 98
(define-category ?| "line breakable
While filling, we can break a line at this character.")
Karl Heuer's avatar
Karl Heuer committed
99

Karl Heuer's avatar
Karl Heuer committed
100
;; For indentation calculation.
101
(define-category ?\s
102 103
  "space for indent
This character counts as a space for indentation purposes.")
Karl Heuer's avatar
Karl Heuer committed
104

Karl Heuer's avatar
Karl Heuer committed
105
;; Keep the following for `kinsoku' processing.  See comments in
Karl Heuer's avatar
Karl Heuer committed
106
;; kinsoku.el.
107 108 109 110
(define-category ?> "Not at bol
A character which can't be placed at beginning of line.")
(define-category ?< "Not at eol
A character which can't be placed at end of line.")
Karl Heuer's avatar
Karl Heuer committed
111

112 113 114
;; Base and Combining
(define-category ?. "Base
Base characters (Unicode General Category L,N,P,S,Zs)")
115
(define-category ?^ "Combining
116
Combining diacritic or mark (Unicode General Category M)")
117 118 119 120 121 122 123 124 125 126

;; bidi types
(define-category ?R "Right-to-left (strong)
Characters with \"strong\" right-to-left directionality, i.e.
with R, AL, RLE, or RLO Unicode bidi character type.")

(define-category ?L "Left-to-right (strong)
Characters with \"strong\" left-to-right directionality, i.e.
with L, LRE, or LRO Unicode bidi character type.")

Karl Heuer's avatar
Karl Heuer committed
127 128 129 130 131

;;; Setting syntax and category.

;; ASCII

132 133 134
;; All ASCII characters have the category `a' (ASCII) and `l' (Latin).
(modify-category-entry '(32 . 127) ?a)
(modify-category-entry '(32 . 127) ?l)
Karl Heuer's avatar
Karl Heuer committed
135

Dave Love's avatar
Dave Love committed
136 137 138 139
;; Deal with the CJK charsets first.  Since the syntax of blocks is
;; defined per charset, and the charsets may contain e.g. Latin
;; characters, we end up with the wrong syntax definitions if we're
;; not careful.
Karl Heuer's avatar
Karl Heuer committed
140

Kenichi Handa's avatar
Kenichi Handa committed
141
;; Chinese characters (Unicode)
142 143
(modify-category-entry '(#x2E80 . #x312F) ?|)
(modify-category-entry '(#x3190 . #x33FF) ?|)
Kenichi Handa's avatar
Kenichi Handa committed
144 145
(modify-category-entry '(#x3400 . #x4DBF) ?C)
(modify-category-entry '(#x4E00 . #x9FAF) ?C)
Kenichi Handa's avatar
Kenichi Handa committed
146 147 148 149 150
(modify-category-entry '(#x3400 . #x9FAF) ?c)
(modify-category-entry '(#x3400 . #x9FAF) ?|)
(modify-category-entry '(#xF900 . #xFAFF) ?C)
(modify-category-entry '(#xF900 . #xFAFF) ?c)
(modify-category-entry '(#xF900 . #xFAFF) ?|)
151 152 153
(modify-category-entry '(#x20000 . #x2FFFF) ?|)
(modify-category-entry '(#x20000 . #x2FFFF) ?C)
(modify-category-entry '(#x20000 . #x2FFFF) ?c)
Kenichi Handa's avatar
Kenichi Handa committed
154

Karl Heuer's avatar
Karl Heuer committed
155 156 157

;; Chinese character set (GB2312)

Kenichi Handa's avatar
Kenichi Handa committed
158 159 160
(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E)
(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E)
(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E)
Karl Heuer's avatar
Karl Heuer committed
161

Dave Love's avatar
Dave Love committed
162
(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
Kenichi Handa's avatar
Kenichi Handa committed
163 164 165
(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339)
(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A)
(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A)
Kenichi Handa's avatar
Kenichi Handa committed
166 167 168 169 170
(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E)
(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E)
(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E)
(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
Karl Heuer's avatar
Karl Heuer committed
171 172 173

;; Chinese character set (BIG5)

Kenichi Handa's avatar
Kenichi Handa committed
174
(map-charset-chars #'modify-category-entry 'big5 ?c)
Kenichi Handa's avatar
Kenichi Handa committed
175
(map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA261)
Kenichi Handa's avatar
Kenichi Handa committed
176
(map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E)
Kenichi Handa's avatar
Kenichi Handa committed
177
(map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DC)
Karl Heuer's avatar
Karl Heuer committed
178 179 180

;; Chinese character set (CNS11643)

Dave Love's avatar
Dave Love committed
181 182 183 184
(dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
	     chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
	     chinese-cns11643-7))
  (map-charset-chars #'modify-category-entry c ?c)
Kenichi Handa's avatar
Kenichi Handa committed
185 186
  (if (eq c 'chinese-cns11643-1)
      (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E)
Kenichi Handa's avatar
Kenichi Handa committed
187
    (map-charset-chars #'modify-category-entry c ?C)))
Karl Heuer's avatar
Karl Heuer committed
188

Kenichi Handa's avatar
Kenichi Handa committed
189
;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213)
Karl Heuer's avatar
Karl Heuer committed
190

Kenichi Handa's avatar
Kenichi Handa committed
191
(map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
Karl Heuer's avatar
Karl Heuer committed
192

Kenichi Handa's avatar
Kenichi Handa committed
193
(map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
Karl Heuer's avatar
Karl Heuer committed
194

Kenichi Handa's avatar
Kenichi Handa committed
195
(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212
196 197
			       japanese-jisx0213-1 japanese-jisx0213-2
			       cp932-2-byte))
Kenichi Handa's avatar
Kenichi Handa committed
198
  (map-charset-chars #'modify-category-entry l ?j))
Karl Heuer's avatar
Karl Heuer committed
199

200 201 202
;; Fullwidth characters
(modify-category-entry '(#xff01 . #xff60) ?\|)

203
;; Unicode equivalents of JISX0201-kana
Kenichi Handa's avatar
Kenichi Handa committed
204 205 206 207
(let ((range '(#xff61 . #xff9f)))
  (modify-category-entry range  ?k)
  (modify-category-entry range ?j)
  (modify-category-entry range ?\|))
208 209

;; Katakana block
210 211
(modify-category-entry '(#x3099 . #x309C) ?K)
(modify-category-entry '(#x30A0 . #x30FF) ?K)
Kenichi Handa's avatar
Kenichi Handa committed
212
(modify-category-entry '(#x31F0 . #x31FF) ?K)
Kenichi Handa's avatar
Kenichi Handa committed
213
(modify-category-entry '(#x30A0 . #x30FA) ?\|)
214
(modify-category-entry #x30FF ?\|)
215 216

;; Hiragana block
217 218 219 220 221 222
(modify-category-entry '(#x3040 . #x309F) ?H)
(modify-category-entry '(#x3040 . #x3096) ?\|)
(modify-category-entry #x309F ?\|)
(modify-category-entry #x30A0 ?H)
(modify-category-entry #x30FC ?H)

223

Karl Heuer's avatar
Karl Heuer committed
224
;; JISX0208
Kenichi Handa's avatar
Kenichi Handa committed
225 226 227
(map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
(map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E)
(let ((chars '(? ? ? ? ? ? ? ? ? ? ? ?)))
228
  (dolist (elt chars)
229
    (modify-syntax-entry elt "w")))
Kenichi Handa's avatar
Kenichi Handa committed
230 231 232 233 234 235 236

(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E)
(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E)
(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E)
(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E)
(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E)
(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E)
Kenichi Handa's avatar
Kenichi Handa committed
237
(let ((chars '(? ? ? ?)))
Karl Heuer's avatar
Karl Heuer committed
238 239 240 241 242 243
  (while chars
    (modify-category-entry (car chars) ?C)
    (setq chars (cdr chars))))

;; JISX0212

Kenichi Handa's avatar
Kenichi Handa committed
244
(map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E)
Karl Heuer's avatar
Karl Heuer committed
245 246

;; JISX0201-Kana
Dave Love's avatar
Dave Love committed
247

Dave Love's avatar
Dave Love committed
248
(let ((chars '(? ? ?)))
Karl Heuer's avatar
Karl Heuer committed
249 250 251 252
  (while chars
    (modify-syntax-entry (car chars) ".")
    (setq chars (cdr chars))))

253 254
(modify-syntax-entry ?\「 "(」")
(modify-syntax-entry ?\」 "(「")
255

Karl Heuer's avatar
Karl Heuer committed
256 257
;; Korean character set (KSC5601)

Dave Love's avatar
Dave Love committed
258
(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
Kenichi Handa's avatar
Kenichi Handa committed
259 260

(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E)
Dave Love's avatar
Dave Love committed
261 262 263
(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E)
(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E)
(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E)
Kenichi Handa's avatar
Kenichi Handa committed
264 265 266
(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339)
(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A)
(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A)
Kenichi Handa's avatar
Kenichi Handa committed
267 268 269 270
(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E)
(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E)
(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E)
(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
Karl Heuer's avatar
Karl Heuer committed
271

Dave Love's avatar
Dave Love committed
272
;; These are in more than one charset.
Kenichi Handa's avatar
Kenichi Handa committed
273 274 275 276 277 278 279 280 281
(let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛"
		      "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄"
		      "()[]{}"))
      open close)
  (dotimes (i (/ (length parens) 2))
    (setq open (aref parens (* i 2))
	  close (aref parens (1+ (* i 2))))
    (modify-syntax-entry open (format "(%c" close))
    (modify-syntax-entry close (format ")%c" open))))
282

Dave Love's avatar
Dave Love committed
283
;; Arabic character set
284

Dave Love's avatar
Dave Love committed
285 286 287 288 289 290 291 292 293 294
(let ((charsets '(arabic-iso8859-6
		  arabic-digit
		  arabic-1-column
		  arabic-2-column)))
  (while charsets
    (map-charset-chars #'modify-category-entry (car charsets) ?b)
    (setq charsets (cdr charsets))))
(modify-category-entry '(#x600 . #x6ff) ?b)
(modify-category-entry '(#xfb50 . #xfdff) ?b)
(modify-category-entry '(#xfe70 . #xfefe) ?b)
295

Dave Love's avatar
Dave Love committed
296 297 298 299 300 301
;; Cyrillic character set (ISO-8859-5)

(modify-syntax-entry ? ".")

;; Ethiopic character set

Kenichi Handa's avatar
Kenichi Handa committed
302 303
(modify-category-entry '(#x1200 . #x1399) ?e)
(modify-category-entry '(#x2d80 . #x2dde) ?e)
304
(let ((chars '(? ? ? ? ? ? ? ?)))
Dave Love's avatar
Dave Love committed
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
  (while chars
    (modify-syntax-entry (car chars) ".")
    (setq chars (cdr chars))))
(map-charset-chars #'modify-category-entry 'ethiopic ?e)

;; Hebrew character set (ISO-8859-8)

(modify-syntax-entry #x5be ".") ; MAQAF
(modify-syntax-entry #x5c0 ".") ; PASEQ
(modify-syntax-entry #x5c3 ".") ; SOF PASUQ
(modify-syntax-entry #x5f3 ".") ; GERESH
(modify-syntax-entry #x5f4 ".") ; GERSHAYIM

;; Indian character set (IS 13194 and other Emacs original Indian charsets)

(modify-category-entry '(#x901 . #x970) ?i)
(map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
(map-charset-chars #'modify-category-entry 'indian-2-column ?i)
323

324 325
;; Lao character set

Dave Love's avatar
Dave Love committed
326 327
(modify-category-entry '(#xe80 . #xeff) ?o)
(map-charset-chars #'modify-category-entry 'lao ?o)
328

Dave Love's avatar
Dave Love committed
329
(let ((deflist	'(("ກ-ຮ"	"w"	?0) ; consonant
330 331 332
		  ("ະາຳຽເ-ໄ"	"w"	?1) ; vowel base
		  ("ັິ-ືົໍ"	"w"	?2) ; vowel upper
		  ("ຸູ"	"w"	?3) ; vowel lower
Kenichi Handa's avatar
Kenichi Handa committed
333
		  ("່-໋"	"w"	?4) ; tone mark
334 335 336
		  ("ຼຽ"	"w"	?9) ; semivowel lower
		  ("໐-໙"	"w"	?6) ; digit
		  ("ຯໆ"	"_"	?5) ; symbol
337 338 339 340 341 342 343 344 345 346 347 348
		  ))
      elm chars len syntax category to ch i)
  (while deflist
    (setq elm (car deflist))
    (setq chars (car elm)
	  len (length chars)
	  syntax (nth 1 elm)
	  category (nth 2 elm)
	  i 0)
    (while (< i len)
      (if (= (aref chars i) ?-)
	  (setq i (1+ i)
Kenichi Handa's avatar
Kenichi Handa committed
349 350
		to (aref chars i))
	(setq ch (aref chars i)
351 352
	      to ch))
      (while (<= ch to)
353 354
	(unless (string-equal syntax "w")
	  (modify-syntax-entry ch syntax))
355 356
	(modify-category-entry ch category)
	(setq ch (1+ ch)))
Kenichi Handa's avatar
Kenichi Handa committed
357
      (setq i (1+ i)))
358 359
    (setq deflist (cdr deflist))))

Karl Heuer's avatar
Karl Heuer committed
360 361
;; Thai character set (TIS620)

Dave Love's avatar
Dave Love committed
362 363
(modify-category-entry '(#xe00 . #xe7f) ?t)
(map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
Karl Heuer's avatar
Karl Heuer committed
364 365

(let ((deflist	'(;; chars	syntax	category
366 367 368 369
		  ("ก-รลว-ฮ"	"w"	?0) ; consonant
		  ("ฤฦะาำเ-ๅ"	"w"	?1) ; vowel base
		  ("ัิ-ื็๎"	"w"	?2) ; vowel upper
		  ("ุ-ฺ"	"w"	?3) ; vowel lower
Kenichi Handa's avatar
Kenichi Handa committed
370
		  ("่-ํ"	"w"	?4) ; tone mark
371 372
		  ("๐-๙"	"w"	?6) ; digit
		  ("ฯๆ฿๏๚๛"	"_"	?5) ; symbol
Karl Heuer's avatar
Karl Heuer committed
373 374
		  ))
      elm chars len syntax category to ch i)
375 376 377 378 379 380 381 382 383 384
  (while deflist
    (setq elm (car deflist))
    (setq chars (car elm)
	  len (length chars)
	  syntax (nth 1 elm)
	  category (nth 2 elm)
	  i 0)
    (while (< i len)
      (if (= (aref chars i) ?-)
	  (setq i (1+ i)
Kenichi Handa's avatar
Kenichi Handa committed
385 386
		to (aref chars i))
	(setq ch (aref chars i)
387 388
	      to ch))
      (while (<= ch to)
389 390
	(unless (string-equal syntax "w")
	  (modify-syntax-entry ch syntax))
391 392
	(modify-category-entry ch category)
	(setq ch (1+ ch)))
Kenichi Handa's avatar
Kenichi Handa committed
393
      (setq i (1+ i)))
394 395 396 397
    (setq deflist (cdr deflist))))

;; Tibetan character set

Dave Love's avatar
Dave Love committed
398 399 400
(modify-category-entry '(#xf00 . #xfff) ?q)
(map-charset-chars #'modify-category-entry 'tibetan ?q)
(map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
401 402

(let ((deflist	'(;; chars             syntax category
Dave Love's avatar
Dave Love committed
403
		  ("ཀ-ཀྵཪ"        	"w"	?0) ; consonant
404
		  ("ྐ-ྐྵྺྻྼ"       "w"     ?0) ;
Dave Love's avatar
Dave Love committed
405 406
		  ("ིེཻོཽྀ"       "w"	?2) ; upper vowel
		  ("ཾྂྃ྆྇ྈྉྊྋ" "w"	?2) ; upper modifier
Paul Eggert's avatar
Paul Eggert committed
407
		  ("྄ཱུ༙༵༷"       "w"	?3) ; lower vowel/modifier
Kenichi Handa's avatar
Kenichi Handa committed
408
		  ("཰"		"w" ?3)		    ; invisible vowel a
Dave Love's avatar
Dave Love committed
409 410 411 412 413 414 415
		  ("༠-༩༪-༳"	        "w"	?6) ; digit
		  ("་།-༒༔ཿ"        "."     ?|) ; line-break char
		  ("་།༏༐༑༔ཿ"            "."     ?|) ;
		  ("༈་།-༒༔ཿ༽༴"  "."     ?>) ; prohibition
		  ("་།༏༐༑༔ཿ"            "."     ?>) ;
		  ("ༀ-༊༼࿁࿂྅"      "."     ?<) ; prohibition
		  ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
416 417
		  ))
      elm chars len syntax category to ch i)
Karl Heuer's avatar
Karl Heuer committed
418 419 420 421 422 423 424 425 426 427
  (while deflist
    (setq elm (car deflist))
    (setq chars (car elm)
	  len (length chars)
	  syntax (nth 1 elm)
	  category (nth 2 elm)
	  i 0)
    (while (< i len)
      (if (= (aref chars i) ?-)
	  (setq i (1+ i)
Kenichi Handa's avatar
Kenichi Handa committed
428 429
		to (aref chars i))
	(setq ch (aref chars i)
Karl Heuer's avatar
Karl Heuer committed
430 431
	      to ch))
      (while (<= ch to)
432 433
	(unless (string-equal syntax "w")
	  (modify-syntax-entry ch syntax))
Karl Heuer's avatar
Karl Heuer committed
434 435
	(modify-category-entry ch category)
	(setq ch (1+ ch)))
Kenichi Handa's avatar
Kenichi Handa committed
436
      (setq i (1+ i)))
Karl Heuer's avatar
Karl Heuer committed
437 438 439 440
    (setq deflist (cdr deflist))))

;; Vietnamese character set

Dave Love's avatar
Dave Love committed
441 442 443 444 445 446
;; To make a word with Latin characters
(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)

(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
Karl Heuer's avatar
Karl Heuer committed
447

448 449 450
(let ((tbl (standard-case-table))
      (i 32))
  (while (< i 128)
Dave Love's avatar
Dave Love committed
451 452 453 454 455
    (let* ((char (decode-char 'vietnamese-viscii-upper i))
	   (charl (decode-char 'vietnamese-viscii-lower i))
	   (uc (encode-char char 'ucs))
	   (lc (encode-char charl 'ucs)))
      (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i)
456
			    tbl)
Dave Love's avatar
Dave Love committed
457 458
      (if uc (modify-category-entry uc ?v))
      (if lc (modify-category-entry lc ?v)))
459 460
    (setq i (1+ i))))

461 462
;; Tai Viet
(let ((deflist '(;; chars	syntax	category
Paul Eggert's avatar
Paul Eggert committed
463
		 ((?.  ?)	"w"	?0) ; consonant
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
		 ("ꪱꪵꪶ"		"w"	?1) ; vowel base
		 ((? . ?)	"w"	?1) ; vowel base
		 ("ꪰꪲꪳꪷꪸꪾ"	"w"	?2) ; vowel upper
		 ("ꪴ"		"w"	?3) ; vowel lower
		 ("ꫀꫂ"		"w"	?1) ; non-combining tone-mark
		 ("꪿꫁"		"w"	?4) ; combining tone-mark
		 ((? . ?)	"_"	?5) ; symbol
		 )))
  (dolist (elm deflist)
    (let ((chars (car elm))
	  (syntax (nth 1 elm))
	  (category (nth 2 elm)))
      (if (consp chars)
	  (progn
	    (modify-syntax-entry chars syntax)
	    (modify-category-entry chars category))
	(mapc #'(lambda (x)
		  (modify-syntax-entry x syntax)
		  (modify-category-entry x category))
	      chars)))))
Dave Love's avatar
Dave Love committed
484

485 486
;; Bidi categories

487 488 489 490 491 492 493 494 495 496
;; If bootstrapping without generated uni-*.el files, table not defined.
(let ((table (unicode-property-table-internal 'bidi-class)))
  (when table
    (map-char-table (lambda (key val)
		      (cond
		       ((memq val '(R AL RLO RLE))
			(modify-category-entry key ?R))
		       ((memq val '(L LRE LRO))
			(modify-category-entry key ?L))))
		    table)))
497

498 499 500 501 502 503 504 505
;; Load this if available, so that it gets dumped into Emacs.  This
;; allows to start Emacs with force-load-messages in ~/.emacs, and
;; avoid infinite recursion in bidi_initialize, which needs to load
;; uni-mirrored.el in order to display "Loading" messages.  We use
;; 'no-error to avoid error messages when bootstrapping without
;; generated uni-*.el files.
(load "international/uni-mirrored" 'no-error)

Dave Love's avatar
Dave Love committed
506 507 508
;; Latin

(modify-category-entry '(#x80 . #x024F) ?l)
509

510 511
(let ((tbl (standard-case-table)) c)

Dave Love's avatar
Dave Love committed
512 513 514 515 516 517
  ;; Latin-1

  ;; Fixme: Some of the non-word syntaxes here perhaps should be
  ;; reviewed.  (Note that the following all implicitly have word
  ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.)  There should be a well-defined way of
  ;; relating Unicode categories to Emacs syntax codes.
518 519

  ;; NBSP isn't semantically interchangeable with other whitespace chars,
Paul Eggert's avatar
Paul Eggert committed
520
  ;; so it's more like punctuation.
521
  (set-case-syntax ?  "." tbl)
Dave Love's avatar
Dave Love committed
522 523 524 525
  (set-case-syntax ?¡ "." tbl)
  (set-case-syntax ?¦ "_" tbl)
  (set-case-syntax ?§ "." tbl)
  (set-case-syntax ?© "_" tbl)
526 527 528 529 530 531 532
  ;; French wants
  ;;   (set-case-syntax-delims ?« ?» tbl)
  ;; And German wants
  ;;   (set-case-syntax-delims ?» ?« tbl)
  ;; So let's stay neutral and let users set these up if/when they want to.
  (set-case-syntax ?« "." tbl)
  (set-case-syntax ?» "." tbl)
Dave Love's avatar
Dave Love committed
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
  (set-case-syntax ?¬ "_" tbl)
  (set-case-syntax ?­ "_" tbl)
  (set-case-syntax ?® "_" tbl)
  (set-case-syntax ?° "_" tbl)
  (set-case-syntax ?± "_" tbl)
  (set-case-syntax ?µ "_" tbl)
  (set-case-syntax ?· "_" tbl)
  (set-case-syntax ?¼ "_" tbl)
  (set-case-syntax ?½ "_" tbl)
  (set-case-syntax ?¾ "_" tbl)
  (set-case-syntax ?¿ "." tbl)
  (let ((c 192))
    (while (<= c 222)
      (set-case-syntax-pair c (+ c 32) tbl)
      (setq c (1+ c))))
  (set-case-syntax ?× "_" tbl)
  (set-case-syntax ?ß "w" tbl)
  (set-case-syntax ?÷ "_" tbl)
  ;; See below for ÿ.
552 553 554

  ;; Latin Extended-A, Latin Extended-B
  (setq c #x0100)
555 556
  (while (<= c #x02B8)
    (modify-category-entry c ?l)
557
    (setq c (1+ c)))
Kenichi Handa's avatar
Kenichi Handa committed
558

559 560 561 562 563 564
  (let ((pair-ranges '((#x0100 . #x012F)
		       (#x0132 . #x0137)
		       (#x0139 . #x0148)
		       (#x014a . #x0177)
		       (#x0179 . #x017E)
		       (#x0182 . #x0185)
565 566
		       (#x0187 . #x0188)
		       (#x018B . #x018C)
567 568 569 570 571 572 573
		       (#x0191 . #x0192)
		       (#x0198 . #x0199)
		       (#x01A0 . #x01A5)
		       (#x01A7 . #x01A8)
		       (#x01AC . #x01AD)
		       (#x01AF . #x01B0)
		       (#x01B3 . #x01B6)
574
		       (#x01B8 . #x01B9)
575 576 577 578 579 580 581 582 583 584 585 586 587 588
		       (#x01BC . #x01BD)
		       (#x01CD . #x01DC)
		       (#x01DE . #x01EF)
		       (#x01F4 . #x01F5)
		       (#x01F8 . #x021F)
		       (#x0222 . #x0233)
		       (#x023B . #x023C)
		       (#x0241 . #x0242)
		       (#x0246 . #x024F))))
    (dolist (elt pair-ranges)
      (let ((from (car elt)) (to (cdr elt)))
	(while (< from to)
	  (set-case-syntax-pair from (1+ from) tbl)
	  (setq from (+ from 2))))))
Kenichi Handa's avatar
Kenichi Handa committed
589

590
  (set-case-syntax-pair ?Ÿ ?ÿ tbl)
591

Kenichi Handa's avatar
Kenichi Handa committed
592 593 594 595 596 597 598 599 600 601 602 603
  ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I
  ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so
  ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN
  ;; SMALL LETTER I.

  ;; We used to set up half of those correspondence unconditionally,
  ;; but that makes searches slow.  So now we don't set up either half
  ;; of these correspondences by default.

  ;; (set-downcase-syntax  ?İ ?i tbl)
  ;; (set-upcase-syntax    ?I ?ı tbl)

604 605
  (set-case-syntax-pair ?Ɓ ?ɓ tbl)
  (set-case-syntax-pair ?Ɔ ?ɔ tbl)
606 607
  (set-case-syntax-pair ?Ɖ ?ɖ tbl)
  (set-case-syntax-pair ?Ɗ ?ɗ tbl)
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
  (set-case-syntax-pair ?Ǝ ?ǝ tbl)
  (set-case-syntax-pair ?Ə ?ə tbl)
  (set-case-syntax-pair ?Ɛ ?ɛ tbl)
  (set-case-syntax-pair ?Ɠ ?ɠ tbl)
  (set-case-syntax-pair ?Ɣ ?ɣ tbl)
  (set-case-syntax-pair ?Ɩ ?ɩ tbl)
  (set-case-syntax-pair ?Ɨ ?ɨ tbl)
  (set-case-syntax-pair ?Ɯ ?ɯ tbl)
  (set-case-syntax-pair ?Ɲ ?ɲ tbl)
  (set-case-syntax-pair ?Ɵ ?ɵ tbl)
  (set-case-syntax-pair ?Ʀ ?ʀ tbl)
  (set-case-syntax-pair ?Ʃ ?ʃ tbl)
  (set-case-syntax-pair ?Ʈ ?ʈ tbl)
  (set-case-syntax-pair ?Ʊ ?ʊ tbl)
  (set-case-syntax-pair ?Ʋ ?ʋ tbl)
  (set-case-syntax-pair ?Ʒ ?ʒ tbl)
624 625 626 627 628 629
  (set-case-syntax-pair ?DŽ ?dž tbl)
  (set-case-syntax-pair ?Dž ?dž tbl)
  (set-case-syntax-pair ?LJ ?lj tbl)
  (set-case-syntax-pair ?Lj ?lj tbl)
  (set-case-syntax-pair ?NJ ?nj tbl)
  (set-case-syntax-pair ?Nj ?nj tbl)
630

631
  ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
632 633 634 635
  (set-case-syntax-pair ?DZ ?dz tbl)
  (set-case-syntax-pair ?Dz ?dz tbl)
  (set-case-syntax-pair ?Ƕ ?ƕ tbl)
  (set-case-syntax-pair ?Ƿ ?ƿ tbl)
Eli Zaretskii's avatar
Eli Zaretskii committed
636 637 638 639 640 641
  (set-case-syntax-pair ?Ⱥ ? tbl)
  (set-case-syntax-pair ?Ƚ ?ƚ tbl)
  (set-case-syntax-pair ?Ⱦ ? tbl)
  (set-case-syntax-pair ?Ƀ ?ƀ tbl)
  (set-case-syntax-pair ?Ʉ ?ʉ tbl)
  (set-case-syntax-pair ?Ʌ ?ʌ tbl)
642

643
  ;; Latin Extended Additional
Dave Love's avatar
Dave Love committed
644
  (modify-category-entry '(#x1e00 . #x1ef9) ?l)
645
  (setq c #x1e00)
646 647 648
  (while (<= c #x1ef9)
    (and (zerop (% c 2))
	 (or (<= c #x1e94) (>= c #x1ea0))
Dave Love's avatar
Dave Love committed
649
	 (set-case-syntax-pair c (1+ c) tbl))
650 651
    (setq c (1+ c)))

652
  ;; Greek
Dave Love's avatar
Dave Love committed
653
  (modify-category-entry '(#x0370 . #x03ff) ?g)
654
  (setq c #x0370)
655 656 657
  (while (<= c #x03ff)
    (if (or (and (>= c #x0391) (<= c #x03a1))
	    (and (>= c #x03a3) (<= c #x03ab)))
Dave Love's avatar
Dave Love committed
658
	(set-case-syntax-pair c (+ c 32) tbl))
659 660 661
    (and (>= c #x03da)
	 (<= c #x03ee)
	 (zerop (% c 2))
Dave Love's avatar
Dave Love committed
662
	 (set-case-syntax-pair c (1+ c) tbl))
663
    (setq c (1+ c)))
664 665 666 667 668 669 670
  (set-case-syntax-pair ?Ά ?ά tbl)
  (set-case-syntax-pair ?Έ ?έ tbl)
  (set-case-syntax-pair ?Ή ?ή tbl)
  (set-case-syntax-pair ?Ί ?ί tbl)
  (set-case-syntax-pair ?Ό ?ό tbl)
  (set-case-syntax-pair ?Ύ ?ύ tbl)
  (set-case-syntax-pair ?Ώ ?ώ tbl)
671

672 673 674
  ;; Armenian
  (setq c #x531)
  (while (<= c #x556)
Dave Love's avatar
Dave Love committed
675
    (set-case-syntax-pair c (+ c #x30) tbl)
676 677
    (setq c (1+ c)))

678
  ;; Greek Extended
Dave Love's avatar
Dave Love committed
679
  (modify-category-entry '(#x1f00 . #x1fff) ?g)
680
  (setq c #x1f00)
681 682 683
  (while (<= c #x1fff)
    (and (<= (logand c #x000f) 7)
	 (<= c #x1fa7)
684 685 686
	 (not (memq c '(#x1f16 #x1f17 #x1f56 #x1f57
			       #x1f50 #x1f52 #x1f54 #x1f56)))
	 (/= (logand c #x00f0) #x70)
Dave Love's avatar
Dave Love committed
687
	 (set-case-syntax-pair (+ c 8) c tbl))
688
    (setq c (1+ c)))
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
  (set-case-syntax-pair ? ? tbl)
713

714
  ;; cyrillic
Dave Love's avatar
Dave Love committed
715
  (modify-category-entry '(#x0400 . #x04FF) ?y)
716
  (setq c #x0400)
717 718 719
  (while (<= c #x04ff)
    (and (>= c #x0400)
	 (<= c #x040f)
Dave Love's avatar
Dave Love committed
720
	 (set-case-syntax-pair c (+ c 80) tbl))
721 722
    (and (>= c #x0410)
	 (<= c #x042f)
Dave Love's avatar
Dave Love committed
723
	 (set-case-syntax-pair c (+ c 32) tbl))
724 725 726 727
    (and (zerop (% c 2))
	 (or (and (>= c #x0460) (<= c #x0480))
	     (and (>= c #x048c) (<= c #x04be))
	     (and (>= c #x04d0) (<= c #x04f4)))
Kenichi Handa's avatar
Kenichi Handa committed
728
	 (set-case-syntax-pair c (1+ c) tbl))
729
    (setq c (1+ c)))
730 731 732 733 734
  (set-case-syntax-pair ?Ӂ ?ӂ tbl)
  (set-case-syntax-pair ?Ӄ ?ӄ tbl)
  (set-case-syntax-pair ?Ӈ ?ӈ tbl)
  (set-case-syntax-pair ?Ӌ ?ӌ tbl)
  (set-case-syntax-pair ?Ӹ ?ӹ tbl)
735

736 737
  ;; general punctuation
  (setq c #x2000)
738 739 740
  (while (<= c #x200b)
    (set-case-syntax c " " tbl)
    (setq c (1+ c)))
Dave Love's avatar
Dave Love committed
741 742 743 744
  (while (<= c #x200F)
    (set-case-syntax c "." tbl)
    (setq c (1+ c)))
  ;; Fixme: These aren't all right:
Kenichi Handa's avatar
Kenichi Handa committed
745 746 747 748 749 750 751 752 753
  (setq c #x2010)
  (while (<= c #x2016)
    (set-case-syntax c "_" tbl)
    (setq c (1+ c)))
  ;; Punctuation syntax for quotation marks (like `)
  (while (<= c #x201f)
    (set-case-syntax  c "." tbl)
    (setq c (1+ c)))
  ;; Fixme: These aren't all right:
754 755 756
  (while (<= c #x2027)
    (set-case-syntax c "_" tbl)
    (setq c (1+ c)))
Dave Love's avatar
Dave Love committed
757 758 759
  (while (<= c #x206F)
    (set-case-syntax c "." tbl)
    (setq c (1+ c)))
760

761 762 763
  ;; Roman numerals
  (setq c #x2160)
  (while (<= c #x216f)
Dave Love's avatar
Dave Love committed
764
    (set-case-syntax-pair c (+ c #x10) tbl)
765 766
    (setq c (1+ c)))

Dave Love's avatar
Dave Love committed
767 768
  ;; Fixme: The following blocks might be better as symbol rather than
  ;; punctuation.
Dave Love's avatar
Dave Love committed
769 770
  ;; Arrows
  (setq c #x2190)
Dave Love's avatar
Dave Love committed
771 772
  (while (<= c #x21FF)
    (set-case-syntax c "." tbl)
Dave Love's avatar
Dave Love committed
773 774 775
    (setq c (1+ c)))
  ;; Mathematical Operators
  (while (<= c #x22FF)
Dave Love's avatar
Dave Love committed
776
    (set-case-syntax c "." tbl)
Dave Love's avatar
Dave Love committed
777 778 779
    (setq c (1+ c)))
  ;; Miscellaneous Technical
  (while (<= c #x23FF)
Dave Love's avatar
Dave Love committed
780
    (set-case-syntax c "." tbl)
Dave Love's avatar
Dave Love committed
781 782 783
    (setq c (1+ c)))
  ;; Control Pictures
  (while (<= c #x243F)
Dave Love's avatar
Dave Love committed
784
    (set-case-syntax c "_" tbl)
785 786 787 788 789
    (setq c (1+ c)))

  ;; Circled Latin
  (setq c #x24b6)
  (while (<= c #x24cf)
Dave Love's avatar
Dave Love committed
790 791 792
    (set-case-syntax-pair c (+ c 26) tbl)
    (modify-category-entry c ?l)
    (modify-category-entry (+ c 26) ?l)
793 794 795 796 797
    (setq c (1+ c)))

  ;; Fullwidth Latin
  (setq c #xff21)
  (while (<= c #xff3a)
Dave Love's avatar
Dave Love committed
798 799 800
    (set-case-syntax-pair c (+ c #x20) tbl)
    (modify-category-entry c ?l)
    (modify-category-entry (+ c #x20) ?l)
801 802 803
    (setq c (1+ c)))

  ;; Combining diacritics
Dave Love's avatar
Dave Love committed
804
  (modify-category-entry '(#x300 . #x362) ?^)
805
  ;; Combining marks
806
  (modify-category-entry '(#x20d0 . #x20ff) ?^)
807 808 809

  ;; Fixme: syntax for symbols &c
  )
Kenichi Handa's avatar
Kenichi Handa committed
810 811

(let ((pairs
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863
       '("⁅⁆"				; U+2045 U+2046
	 "⁽⁾"				; U+207D U+207E
	 "₍₎"				; U+208D U+208E
	 "〈〉"				; U+2329 U+232A
	 "⎴⎵"				; U+23B4 U+23B5
	 "❨❩"				; U+2768 U+2769
	 "❪❫"				; U+276A U+276B
	 "❬❭"				; U+276C U+276D
	 "❰❱"				; U+2770 U+2771
	 "❲❳"				; U+2772 U+2773
	 "❴❵"				; U+2774 U+2775
	 "⟦⟧"				; U+27E6 U+27E7
	 "⟨⟩"				; U+27E8 U+27E9
	 "⟪⟫"				; U+27EA U+27EB
	 "⦃⦄"				; U+2983 U+2984
	 "⦅⦆"				; U+2985 U+2986
	 "⦇⦈"				; U+2987 U+2988
	 "⦉⦊"				; U+2989 U+298A
	 "⦋⦌"				; U+298B U+298C
	 "⦍⦎"				; U+298D U+298E
	 "⦏⦐"				; U+298F U+2990
	 "⦑⦒"				; U+2991 U+2992
	 "⦓⦔"				; U+2993 U+2994
	 "⦕⦖"				; U+2995 U+2996
	 "⦗⦘"				; U+2997 U+2998
	 "⧼⧽"				; U+29FC U+29FD
	 "〈〉"				; U+3008 U+3009
	 "《》"				; U+300A U+300B
	 "「」"				; U+300C U+300D
	 "『』"				; U+300E U+300F
	 "【】"				; U+3010 U+3011
	 "〔〕"				; U+3014 U+3015
	 "〖〗"				; U+3016 U+3017
	 "〘〙"				; U+3018 U+3019
	 "〚〛"				; U+301A U+301B
	 "﴾﴿"				; U+FD3E U+FD3F
	 "︵︶"				; U+FE35 U+FE36
	 "︷︸"				; U+FE37 U+FE38
	 "︹︺"				; U+FE39 U+FE3A
	 "︻︼"				; U+FE3B U+FE3C
	 "︽︾"				; U+FE3D U+FE3E
	 "︿﹀"				; U+FE3F U+FE40
	 "﹁﹂"				; U+FE41 U+FE42
	 "﹃﹄"				; U+FE43 U+FE44
	 "﹙﹚"				; U+FE59 U+FE5A
	 "﹛﹜"				; U+FE5B U+FE5C
	 "﹝﹞"				; U+FE5D U+FE5E
	 "()"				; U+FF08 U+FF09
	 "[]"				; U+FF3B U+FF3D
	 "{}"				; U+FF5B U+FF5D
	 "⦅⦆"				; U+FF5F U+FF60
	 "「」"				; U+FF62 U+FF63
Kenichi Handa's avatar
Kenichi Handa committed
864 865 866 867 868
	 )))
  (dolist (elt pairs)
    (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
    (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))

Karl Heuer's avatar
Karl Heuer committed
869

Kenichi Handa's avatar
Kenichi Handa committed
870
;; For each character set, put the information of the most proper
871
;; coding system to encode it by `preferred-coding-system' property.
Kenichi Handa's avatar
Kenichi Handa committed
872

Dave Love's avatar
Dave Love committed
873
;; Fixme: should this be junked?
Kenichi Handa's avatar
Kenichi Handa committed
874 875 876 877 878 879 880 881 882 883 884 885 886
(let ((l '((latin-iso8859-1	. iso-latin-1)
	   (latin-iso8859-2	. iso-latin-2)
	   (latin-iso8859-3	. iso-latin-3)
	   (latin-iso8859-4	. iso-latin-4)
	   (thai-tis620		. thai-tis620)
	   (greek-iso8859-7	. greek-iso-8bit)
	   (arabic-iso8859-6	. iso-2022-7bit)
	   (hebrew-iso8859-8	. hebrew-iso-8bit)
	   (katakana-jisx0201	. japanese-shift-jis)
	   (latin-jisx0201	. japanese-shift-jis)
	   (cyrillic-iso8859-5	. cyrillic-iso-8bit)
	   (latin-iso8859-9	. iso-latin-5)
	   (japanese-jisx0208-1978 . iso-2022-jp)
Kenichi Handa's avatar
Kenichi Handa committed
887 888 889 890 891 892 893
	   (chinese-gb2312	. chinese-iso-8bit)
	   (chinese-gbk		. chinese-gbk)
	   (gb18030-2-byte	. chinese-gb18030)
	   (gb18030-4-byte-bmp	. chinese-gb18030)
	   (gb18030-4-byte-smp	. chinese-gb18030)
	   (gb18030-4-byte-ext-1 . chinese-gb18030)
	   (gb18030-4-byte-ext-2 . chinese-gb18030)
Kenichi Handa's avatar
Kenichi Handa committed
894 895 896 897 898 899 900 901 902 903 904 905 906 907
	   (japanese-jisx0208	. iso-2022-jp)
	   (korean-ksc5601	. iso-2022-kr)
	   (japanese-jisx0212	. iso-2022-jp)
	   (chinese-big5-1	. chinese-big5)
	   (chinese-big5-2	. chinese-big5)
	   (chinese-sisheng	. iso-2022-7bit)
	   (ipa			. iso-2022-7bit)
	   (vietnamese-viscii-lower . vietnamese-viscii)
	   (vietnamese-viscii-upper . vietnamese-viscii)
	   (arabic-digit	. iso-2022-7bit)
	   (arabic-1-column	. iso-2022-7bit)
	   (lao			. lao)
	   (arabic-2-column	. iso-2022-7bit)
	   (indian-is13194	. devanagari)
908
	   (indian-glyph	. devanagari)
Kenichi Handa's avatar
Kenichi Handa committed
909
	   (tibetan-1-column	. tibetan)
910
	   (ethiopic		. iso-2022-7bit)
Kenichi Handa's avatar
Kenichi Handa committed
911 912
	   (chinese-cns11643-1	. iso-2022-cn)
	   (chinese-cns11643-2	. iso-2022-cn)
Kenichi Handa's avatar
Kenichi Handa committed
913 914 915 916 917 918
	   (chinese-cns11643-3	. iso-2022-cn)
	   (chinese-cns11643-4	. iso-2022-cn)
	   (chinese-cns11643-5	. iso-2022-cn)
	   (chinese-cns11643-6	. iso-2022-cn)
	   (chinese-cns11643-7	. iso-2022-cn)
	   (indian-2-column	. devanagari)
Dave Love's avatar
Dave Love committed
919 920 921
	   (tibetan		. tibetan)
	   (latin-iso8859-14	. iso-latin-8)
	   (latin-iso8859-15	. iso-latin-9))))
Kenichi Handa's avatar
Kenichi Handa committed
922
  (while l
923
    (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
Kenichi Handa's avatar
Kenichi Handa committed
924
    (setq l (cdr l))))
Kenichi Handa's avatar
Kenichi Handa committed
925 926


927
;; Setup auto-fill-chars for charsets that should invoke auto-filling.
928
;; SPACE and NEWLINE are already set.
Kenichi Handa's avatar
Kenichi Handa committed
929 930 931 932 933 934 935 936

(set-char-table-range auto-fill-chars '(#x3041 . #x30FF) t)
(set-char-table-range auto-fill-chars '(#x3400 . #x4DB5) t)
(set-char-table-range auto-fill-chars '(#x4e00 . #x9fbb) t)
(set-char-table-range auto-fill-chars '(#xF900 . #xFAFF) t)
(set-char-table-range auto-fill-chars '(#xFF00 . #xFF9F) t)
(set-char-table-range auto-fill-chars '(#x20000 . #x2FFFF) t)

937

938 939 940 941
;;; Setting char-width-table.  The default is 1.

;; 0: non-spacing, enclosing combining, formatting, Hangul Jamo medial
;;    and final characters.
942
(let ((l '((#x0300 . #x036F)
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
	   (#x0483 . #x0489)
	   (#x0591 . #x05BD)
	   (#x05BF . #x05BF)
	   (#x05C1 . #x05C2)
	   (#x05C4 . #x05C5)
	   (#x05C7 . #x05C7)
	   (#x0600 . #x0603)
	   (#x0610 . #x0615)
	   (#x064B . #x065E)
	   (#x0670 . #x0670)
	   (#x06D6 . #x06E4)
	   (#x06E7 . #x06E8)
	   (#x06EA . #x06ED)
	   (#x070F . #x070F)
	   (#x0711 . #x0711)
	   (#x0730 . #x074A)
	   (#x07A6 . #x07B0)
	   (#x07EB . #x07F3)
	   (#x0901 . #x0902)
	   (#x093C . #x093C)
	   (#x0941 . #x0948)
	   (#x094D . #x094D)
	   (#x0951 . #x0954)
	   (#x0962 . #x0963)
	   (#x0981 . #x0981)
	   (#x09BC . #x09BC)
	   (#x09C1 . #x09C4)
	   (#x09CD . #x09CD)
	   (#x09E2 . #x09E3)
	   (#x0A01 . #x0A02)
	   (#x0A3C . #x0A3C)
	   (#x0A41 . #x0A4D)
	   (#x0A70 . #x0A71)
	   (#x0A81 . #x0A82)
	   (#x0ABC . #x0ABC)
	   (#x0AC1 . #x0AC8)
	   (#x0ACD . #x0ACD)
	   (#x0AE2 . #x0AE3)
	   (#x0B01 . #x0B01)
	   (#x0B3C . #x0B3C)
	   (#x0B3F . #x0B3F)
	   (#x0B41 . #x0B43)
	   (#x0B4D . #x0B56)
	   (#x0B82 . #x0B82)
	   (#x0BC0 . #x0BC0)
	   (#x0BCD . #x0BCD)
	   (#x0C3E . #x0C40)
	   (#x0C46 . #x0C56)
	   (#x0CBC . #x0CBC)
	   (#x0CBF . #x0CBF)
	   (#x0CC6 . #x0CC6)
	   (#x0CCC . #x0CCD)
	   (#x0CE2 . #x0CE3)
	   (#x0D41 . #x0D43)
	   (#x0D4D . #x0D4D)
	   (#x0DCA . #x0DCA)
	   (#x0DD2 . #x0DD6)
	   (#x0E31 . #x0E31)
	   (#x0E34 . #x0E3A)