titdic-cnv.el 46.4 KB
Newer Older
Dave Love's avatar
Dave Love committed
1
;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding:iso-2022-7bit; -*-
Karl Heuer's avatar
Karl Heuer committed
2

3
;; Copyright (C) 1997-1998, 2000-2013 Free Software Foundation, Inc.
Kenichi Handa's avatar
Kenichi Handa committed
4
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5
;;   2005, 2006, 2007, 2008, 2009, 2010, 2011
Kenichi Handa's avatar
Kenichi Handa committed
6 7
;;   National Institute of Advanced Industrial Science and Technology (AIST)
;;   Registration Number H14PRO021
Kenichi Handa's avatar
Kenichi Handa committed
8 9 10
;; Copyright (C) 2003
;;   National Institute of Advanced Industrial Science and Technology (AIST)
;;   Registration Number H13PRO009
Karl Heuer's avatar
Karl Heuer committed
11 12 13 14 15

;; Keywords: Quail, TIT, cxterm

;; This file is part of GNU Emacs.

16
;; GNU Emacs is free software: you can redistribute it and/or modify
Karl Heuer's avatar
Karl Heuer committed
17
;; it under the terms of the GNU General Public License as published by
18 19
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
Karl Heuer's avatar
Karl Heuer committed
20 21 22 23 24 25 26

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
27
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
Karl Heuer's avatar
Karl Heuer committed
28

29
;;; Commentary:
Karl Heuer's avatar
Karl Heuer committed
30

Kenichi Handa's avatar
Kenichi Handa committed
31
;; Convert cxterm dictionary (of TIT format) to quail-package.
Karl Heuer's avatar
Karl Heuer committed
32 33
;;
;; Usage (within Emacs):
Kenichi Handa's avatar
Kenichi Handa committed
34
;;	M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR>
Karl Heuer's avatar
Karl Heuer committed
35
;; Usage (from shell):
Kenichi Handa's avatar
Kenichi Handa committed
36
;;	% emacs -batch -l titdic-cnv -f batch-titdic-convert\
Karl Heuer's avatar
Karl Heuer committed
37 38 39 40 41 42 43
;;		[-dir DIR] [DIR | FILE] ...
;;
;; When you run titdic-convert within Emacs, you have a chance to
;; modify arguments of `quail-define-package' before saving the
;; converted file.  For instance, you are likely to modify TITLE,
;; DOCSTRING, and KEY-BINDINGS.

Kenichi Handa's avatar
Kenichi Handa committed
44
;; Cxterm dictionary file (*.tit) is a line-oriented text (English,
Karl Heuer's avatar
Karl Heuer committed
45 46 47 48 49 50 51 52 53 54 55 56 57
;; Chinese, Japanese, and Korean) file.  The whole file contains of
;; two parts, the definition part (`header' here after) followed by
;; the dictionary part (`body' here after).  All lines begin with
;; leading '#' are ignored.
;;
;; Each line in the header part has two fields, KEY and VALUE.  These
;; fields are separated by one or more white characters.
;;
;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS.
;; These fields are separated by one or more white characters.
;;
;; See the manual page of `tit2cit' of cxterm distribution for more
;; detail.
58
;;
Eli Zaretskii's avatar
Eli Zaretskii committed
59
;; Near the end of this file, we also have a few other tools to convert
60
;; miscellaneous dictionaries.
Karl Heuer's avatar
Karl Heuer committed
61 62 63 64 65

;;; Code:

(require 'quail)

Kenichi Handa's avatar
Kenichi Handa committed
66
;; List of values of key "ENCODE:" and the corresponding Emacs
Karl Heuer's avatar
Karl Heuer committed
67 68
;; coding-system and language environment name.
(defvar tit-encode-list
69 70 71
  '(("GB" euc-china "Chinese-GB")
    ("BIG5" cn-big5 "Chinese-BIG5")
    ("JIS" euc-japan "Japanese")
Kenichi Handa's avatar
Kenichi Handa committed
72 73
    ("KS" euc-kr "Korean")))

74 75 76 77 78 79
;; Alist of input method names and the corresponding title and extra
;; docstring.  For each of input method generated from TIT dictionary,
;; a docstring is automatically generated from the comments in the
;; dictionary.  The extra docstring in this alist is to add more
;; information.
;; The command describe-input-method shows the automatically generated
Dave Love's avatar
Dave Love committed
80
;; docstring, then an extra docstring while replacing the form \<VAR>
81 82 83 84
;; by the value of variable VAR.  For instance, the form
;; \<quail-translation-docstring> is replaced by a description about
;; how to select a translation from a list of candidates.

85 86 87
(defvar quail-cxterm-package-ext-info
  '(("chinese-4corner" "$(0(?-F(B")
    ("chinese-array30" "$(0#R#O(B")
88 89 90
    ("chinese-ccdospy" "$AKuF4(B"
     "Pinyin base input method for Chinese charset GB2312 \(`chinese-gb2312').

Dave Love's avatar
Dave Love committed
91
Pinyin is the standard Roman transliteration method for Chinese.
92 93 94 95 96 97 98
For the detail of Pinyin system, see the documentation of the input
method `chinese-py'.

This input method works almost the same way as `chinese-py'.  The
difference is that you type a single key for these Pinyin spelling.
    Pinyin:  zh  en  eng ang ch  an  ao  ai  ong sh  ing  yu($A(9(B)
    keyseq:   a   f   g   h   i   j   k   l   s   u   y   v
Dave Love's avatar
Dave Love committed
99
For example:
100 101 102 103 104 105
    Chinese:  $A0!(B    $A9{(B    $AVP(B    $AND(B    $A9b(B    $ASq(B    $AH+(B
    Pinyin:   a    guo   zhong  wen  guang  yu   quan
    Keyseq:   a1   guo4   as1   wf4  guh1  yu..6 qvj6

\\<quail-translation-docstring>

Paul Eggert's avatar
Paul Eggert committed
106
For double-width GB2312 characters corresponding to ASCII, use the
107 108 109
input method `chinese-qj'.")

    ("chinese-ecdict" "$(05CKH(B"
110
"In this input method, you enter a Chinese (Big5) character or word
111 112 113 114 115 116 117 118 119 120 121 122 123 124
by typing the corresponding English word.  For example, if you type
\"computer\", \"$(0IZH+(B\" is input.

\\<quail-translation-docstring>")

    ("chinese-etzy" "$(06/0D(B"
"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
`chinese-big5-2').

Zhuyin is a kind of phonetic symbol.  One to three Zhuyin symbols
compose one Chinese character.

In this input method, you enter a Chinese character by first typing
keys corresponding to Zhuyin symbols (see the above table) followed by
Paul Eggert's avatar
Paul Eggert committed
125
SPC, 1, 2, 3, or 4 specifying a tone (SPC:$(0?v(N(B, 1:$(0M=Vy(B, 2:$(0Dm(N(B, 3: $(0&9Vy(B,
126 127 128
4:$(0(+Vy(B).

\\<quail-translation-docstring>")
129 130

    ("chinese-punct-b5" "$(0O:(BB"
Paul Eggert's avatar
Paul Eggert committed
131
     "Input method for Chinese punctuation and symbols of Big5
132 133 134
\(`chinese-big5-1' and `chinese-big5-2').")

    ("chinese-punct" "$A1j(BG"
Paul Eggert's avatar
Paul Eggert committed
135
     "Input method for Chinese punctuation and symbols of GB2312
136 137 138 139 140 141 142 143 144 145
\(`chinese-gb2312').")

    ("chinese-py-b5" "$(03<(BB"
     "Pinyin base input method for Chinese Big5 characters
\(`chinese-big5-1', `chinese-big5-2').

This input method works almost the same way as `chinese-py' (which
see).

This input method supports only Han characters.  The more convenient
146 147 148
method is `chinese-py-punct-b5', which is the combination of this
method and `chinese-punct-b5' and which supports both Han characters
and punctuation/symbols.
149

150
For double-width Big5 characters corresponding to ASCII, use the input
151 152 153
method `chinese-qj-b5'.

The input method `chinese-py' and `chinese-tonepy' are also Pinyin
154
based, but for the character set GB2312 (`chinese-gb2312').")
155

156 157 158 159
    ("chinese-qj-b5" "$(0)A(BB")

    ("chinese-qj" "$AH+(BG")

160
    ("chinese-sw" "$AJWN2(B"
161 162
"Radical base input method for Chinese charset GB2312 (`chinese-gb2312').

163 164 165 166
In this input method, you enter a Chinese character by typing two
keys.  The first key corresponds to the first ($AJW(B) radical, the second
key corresponds to the last ($AN2(B) radical.  The correspondence of keys
and radicals is as below:
167 168 169

 first radical:
 a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v  w  x  y  z
170
 $APD(B $AZ"(B $AJ,(B $AX<(B $A;p(B $A?Z(B $A^P(B $Ac_(B $AZ%(B $A\3(B $AXi(B $AD>(B $Alj(B $Ab;(B $ATB(B $Afy(B $AJ/(B $AMu(B $A0K(B $AX/(B $AHU(B $AeA(B $Aak(B $AVq(B $AR;(B $AHK(B
171 172
 last radical:
 a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v  w  x  y  z
173
 $ASV(B $AI=(B $AMA(B $A56(B $AZb(B $A?Z(B $ARB(B $Aqb(B $A4s(B $A6!(B $A[L(B $Ala(B $AJ.(B $A4u(B $AXg(B $ACE(B $A=q(B $AX-(B $AE.(B $ARR(B $A`m(B $AP!(B $A3'(B $A3f(B $A_.(B $A27(B
174

175
\\<quail-translation-docstring>")
176

177 178 179
    ("chinese-tonepy" "$A5wF4(B"
     "Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').

180
Pinyin is the standard roman transliteration method for Chinese.
181
For the details of Pinyin system, see the documentation of the input
182 183 184
method `chinese-py'.

This input method works almost the same way as `chinese-py'.  The
185 186 187
difference is that you must type 1..5 after each Pinyin spelling to
specify a tone (1:$ARuF=(B, 2:$AQtF=(B, 3:$AIOIy(B, 4$AOBIy(B, 5:$AGaIy(B).

188
\\<quail-translation-docstring>
189 190 191 192

For instance, to input $ADc(B, you type \"n i 3 3\", the first \"n i\" is
a Pinyin, the next \"3\" specifies tone, and the last \"3\" selects
the third character from the candidate list.
193

Paul Eggert's avatar
Paul Eggert committed
194
For double-width GB2312 characters corresponding to ASCII, use the
195 196
input method `chinese-qj'.")

197 198 199 200
    ("chinese-zozy" "$(0I\0D(B"
"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
`chinese-big5-2').

201
Zhuyin is a kind of a phonetic symbol.  One to three Zhuyin symbols
202 203 204 205
compose a Chinese character.

In this input method, you enter a Chinese character by first typing
keys corresponding to Zhuyin symbols (see the above table) followed by
Paul Eggert's avatar
Paul Eggert committed
206
SPC, 6, 3, 4, or 7 specifying a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy(B, 4:$(0(+Vy(B,
207 208
7:$(0M=Vy(B).

209
\\<quail-translation-docstring>")))
Karl Heuer's avatar
Karl Heuer committed
210 211 212

;; Return a value of the key in the current line.
(defsubst tit-read-key-value ()
Kenichi Handa's avatar
Kenichi Handa committed
213
  (if (looking-at "[^ \t\r\n]+")
Karl Heuer's avatar
Karl Heuer committed
214 215 216
      (car (read-from-string (concat "\"" (match-string 0) "\"")))))

;; Return an appropriate quail-package filename from FILENAME (TIT
Kenichi Handa's avatar
Kenichi Handa committed
217 218
;; dictionary filename).  For instance, ".../ZOZY.tit" -> "ZOZY.el".
(defun tit-make-quail-package-file-name (filename &optional dirname)
Karl Heuer's avatar
Karl Heuer committed
219
  (expand-file-name
Kenichi Handa's avatar
Kenichi Handa committed
220
   (concat (file-name-nondirectory (substring filename 0 -4)) ".el")
Karl Heuer's avatar
Karl Heuer committed
221 222
   dirname))

223
;; This value is nil if we are processing phrase dictionary.
224
(defvar tit-dictionary t)
Karl Heuer's avatar
Karl Heuer committed
225 226 227 228 229 230 231 232
(defvar tit-encode nil)
(defvar tit-default-encode "GB")

;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so
;; that each characters in KEYS invokes FUNCTION-SYMBOL.
(defun tit-generate-key-bindings (keys function-symbol)
  (let ((len (length keys))
	(i 0)
233
	(first t)
Karl Heuer's avatar
Karl Heuer committed
234 235
	key)
    (while (< i len)
236
      (or first (princ "\n   "))
Karl Heuer's avatar
Karl Heuer committed
237
      (setq key (aref keys i))
238 239 240
      (if (if (< key ?\ )
	      (eq (lookup-key quail-translation-keymap
			      (char-to-string key))
Karl Heuer's avatar
Karl Heuer committed
241
		  'quail-execute-non-quail-command)
242 243 244 245 246 247 248
	    (<= key 127))
	  (progn
	    (princ (cons (cond ((< key ?\ ) (format "\"\\C-%c\"" (+ key ?@)))
			       ((< key 127) (format "\"%c\"" key))
			       (t "\"\\C-?\""))
			 function-symbol))
	    (setq first nil)))
Karl Heuer's avatar
Karl Heuer committed
249 250 251 252 253 254 255 256
      (setq i (1+ i)))))

;; Analyze header part of TIT dictionary and generate an appropriate
;; `quail-define-package' function call.
(defun tit-process-header (filename)
  (message "Processing header part...")
  (goto-char (point-min))

257 258 259 260 261 262 263
  ;; At first, generate header part of the Quail package while
  ;; collecting information from the original header.
  (let ((package (concat
		  "chinese-"
		  (substring (downcase (file-name-nondirectory filename))
			     0 -4)))
	;; TIT keywords and the corresponding default values.
Karl Heuer's avatar
Karl Heuer committed
264 265 266 267 268 269 270 271
	(tit-multichoice t)
	(tit-prompt "")
	(tit-comments nil)
	(tit-backspace "\010\177")
	(tit-deleteall "\015\025")
	(tit-moveright ".>")
	(tit-moveleft ",<")
	(tit-keyprompt nil))
272 273

    (princ ";; Quail package `")
274
    (princ package)
275
    (princ "\n")
276 277
    (princ ";;   Generated by the command `titdic-convert'\n")
    (princ ";;\tOriginal TIT dictionary file: ")
278
    (princ (file-name-nondirectory filename))
279
    (princ "\n\n")
280

Karl Heuer's avatar
Karl Heuer committed
281
    (while (not (eobp))
282 283
      (let ((ch (following-char))
	    (pos (point)))
Karl Heuer's avatar
Karl Heuer committed
284 285 286 287
	(cond ((= ch ?C)		; COMMENT
	       (cond ((looking-at "COMMENT")
		      (let ((pos (match-end 0)))
			(end-of-line)
Kenichi Handa's avatar
Kenichi Handa committed
288 289 290
			(setq tit-comments
			      (cons (buffer-substring-no-properties pos (point))
				    tit-comments))))))
Karl Heuer's avatar
Karl Heuer committed
291 292 293 294 295 296 297 298 299 300 301 302 303
	      ((= ch ?M)		; MULTICHOICE, MOVERIGHT, MOVELEFT
	       (cond ((looking-at "MULTICHOICE:[ \t]*")
		      (goto-char (match-end 0))
		      (setq tit-multichoice (looking-at "YES")))
		     ((looking-at "MOVERIGHT:[ \t]*")
		      (goto-char (match-end 0))
		      (setq tit-moveright (tit-read-key-value)))
		     ((looking-at "MOVELEFT:[ \t]*")
		      (goto-char (match-end 0))
		      (setq tit-moveleft (tit-read-key-value)))))
	      ((= ch ?P)		; PROMPT
	       (cond ((looking-at "PROMPT:[ \t]*")
		      (goto-char (match-end 0))
304
		      (setq tit-prompt (tit-read-key-value))
Paul Eggert's avatar
Paul Eggert committed
305
		      ;; Some TIT dictionaries that are encoded by
306 307 308 309 310 311
		      ;; euc-china contains invalid character at the tail.
		      (let* ((last (aref tit-prompt (1- (length tit-prompt))))
			     (split (split-char last)))
			(if (or (eq (nth 1 split) 32)
				(eq (nth 2 split) 32))
			    (setq tit-prompt (substring tit-prompt 0 -1)))))))
Karl Heuer's avatar
Karl Heuer committed
312 313 314 315 316 317
	      ((= ch ?B)		; BACKSPACE, BEGINDICTIONARY,
					; BEGINPHRASE
	       (cond ((looking-at "BACKSPACE:[ \t]*")
		      (goto-char (match-end 0))
		      (setq tit-backspace (tit-read-key-value)))
		     ((looking-at "BEGINDICTIONARY")
318
		      (setq tit-dictionary t))
Karl Heuer's avatar
Karl Heuer committed
319
		     ((looking-at "BEGINPHRASE")
320
		      (setq tit-dictionary nil))))
Karl Heuer's avatar
Karl Heuer committed
321 322 323 324
	      ((= ch ?K)		; KEYPROMPT
	       (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*")
		      (let ((key-char (match-string 1)))
			(goto-char (match-end 0))
325 326 327 328
			(if (string-match "\\\\[0-9]+" key-char)
			    (setq key-char
				  (car (read-from-string (format "\"%s\""
								 key-char)))))
Karl Heuer's avatar
Karl Heuer committed
329 330
			(setq tit-keyprompt
			      (cons (cons key-char (tit-read-key-value))
331 332 333
				    tit-keyprompt)))))))
	(end-of-line)
	(princ ";; ")
Kenichi Handa's avatar
Kenichi Handa committed
334
	(princ (buffer-substring-no-properties pos (point)))
335 336
	(princ "\n")
	(forward-line 1)))
337

338 339 340 341 342
    (princ "\n;;; End of the header of original TIT dictionary.\n\n")
    (princ ";;; Code:\n\n(require 'quail)\n\n")

    (princ "(quail-define-package ")
    ;; Args NAME, LANGUAGE, TITLE
343
    (let ((title (nth 1 (assoc package quail-cxterm-package-ext-info))))
344 345 346 347 348 349 350 351 352 353
      (princ "\"")
      (princ package)
      (princ "\" \"")
      (princ (nth 2 (assoc tit-encode tit-encode-list)))
      (princ "\" \"")
      (princ (or title
		 (if (string-match "[:$A!K$(0!(!J(B]+\\([^:$A!K$(0!(!K(B]+\\)" tit-prompt)
		     (substring tit-prompt (match-beginning 1) (match-end 1))
		   tit-prompt)))
      (princ "\"\n"))
Karl Heuer's avatar
Karl Heuer committed
354 355 356 357

    ;; Arg GUIDANCE
    (if tit-keyprompt
	(progn
358
	  (princ " '(")
Karl Heuer's avatar
Karl Heuer committed
359
	  (while tit-keyprompt
360 361 362 363
	    (princ "   ")
	    (princ (format "(%d . \"%s\")\n"
			   (string-to-char (car (car tit-keyprompt)))
			   (cdr (car tit-keyprompt))))
Karl Heuer's avatar
Karl Heuer committed
364
	    (setq tit-keyprompt (cdr tit-keyprompt)))
365 366
	  (princ ")"))
      (princ " t\n"))
Karl Heuer's avatar
Karl Heuer committed
367 368

    ;; Arg DOCSTRING
369 370 371 372 373 374 375 376 377 378
    (let ((doc (concat tit-prompt "\n"))
	  (comments (if tit-comments
			(mapconcat 'identity (nreverse tit-comments) "\n")))
	  (doc-ext (nth 2 (assoc package quail-cxterm-package-ext-info))))
      (if comments
	  (setq doc (concat doc "\n" comments "\n")))
      (if doc-ext
	  (setq doc (concat doc "\n" doc-ext "\n")))
      (prin1 doc)
      (terpri))
Karl Heuer's avatar
Karl Heuer committed
379 380

    ;; Arg KEY-BINDINGS
381
    (princ " '(")
Karl Heuer's avatar
Karl Heuer committed
382
    (tit-generate-key-bindings tit-backspace 'quail-delete-last-char)
383
    (princ "\n   ")
Karl Heuer's avatar
Karl Heuer committed
384
    (tit-generate-key-bindings tit-deleteall 'quail-abort-translation)
385
    (princ "\n   ")
Karl Heuer's avatar
Karl Heuer committed
386
    (tit-generate-key-bindings tit-moveright 'quail-next-translation)
387
    (princ "\n   ")
Karl Heuer's avatar
Karl Heuer committed
388
    (tit-generate-key-bindings tit-moveleft 'quail-prev-translation)
389
    (princ ")\n")
Karl Heuer's avatar
Karl Heuer committed
390 391 392

    ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT.
    ;; The remaining args are all nil.
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
    (princ " nil")
    (princ (if tit-multichoice " nil" " t"))
    (princ (if tit-keyprompt " t t)\n\n" " nil nil)\n\n"))))

(defsubst tit-flush-translations (key translations)
  (if (string-match "\\\\[0-9][0-9][0-9]" key)
      (let ((newkey (concat (substring key 0 (match-beginning 0))
			    (car (read-from-string
				  (concat "\"" (match-string 0 key) "\"")))))
	    (idx (match-end 0)))
	(while (string-match "\\\\[0-9][0-9][0-9]" key idx)
	  (setq newkey (concat
			newkey
			(substring key idx (match-beginning 0))
			(car (read-from-string
			      (concat "\"" (match-string 0 key) "\"")))))
	  (setq idx (match-end 0)))
	(setq key (concat newkey (substring key idx)))))
  (prin1 (list key (if tit-dictionary translations
		     (vconcat (nreverse translations)))))
  (princ "\n"))
Karl Heuer's avatar
Karl Heuer committed
414 415 416 417 418

;; Convert body part of TIT dictionary into `quail-define-rules'
;; function call.
(defun tit-process-body ()
  (message "Formatting translation rules...")
419 420 421 422 423
  (let* ((template (list nil nil))
	 (second (cdr template))
	 (prev-key "")
	 ch key translations pos)
    (princ "(quail-define-rules\n")
Karl Heuer's avatar
Karl Heuer committed
424
    (while (null (eobp))
425 426 427
      (setq ch (following-char))
      (if (or (= ch ?#) (= ch ?\n))
	  (forward-line 1)
Karl Heuer's avatar
Karl Heuer committed
428
	(setq pos (point))
429
	(skip-chars-forward "^ \t\n")
Kenichi Handa's avatar
Kenichi Handa committed
430
	(setq key (buffer-substring-no-properties pos (point)))
Karl Heuer's avatar
Karl Heuer committed
431
	(skip-chars-forward " \t")
432 433
	(setq ch (following-char))
	(if (or (= ch ?#) (= ch ?\n))
434
	    ;; This entry contains no translations.  Let's ignore it.
435 436
	    (forward-line 1)
	  (or (string= key prev-key)
437
	      (progn
438 439 440 441 442 443 444 445 446 447 448
		(if translations
		    (tit-flush-translations prev-key translations))
		(setq translations nil
		      prev-key key)))
	  (if tit-dictionary
	      (progn
		(setq pos (point))
		(skip-chars-forward "^ \t#\n")
		(setq translations
		      (if translations
			  (concat translations
Kenichi Handa's avatar
Kenichi Handa committed
449 450
				  (buffer-substring-no-properties pos (point)))
			(buffer-substring-no-properties pos (point)))))
451 452 453
	    (while (not (eolp))
	      (setq pos (point))
	      (skip-chars-forward "^ \t\n")
Kenichi Handa's avatar
Kenichi Handa committed
454 455
	      (setq translations (cons (buffer-substring-no-properties
					pos (point))
456 457 458 459
				       translations))
	      (skip-chars-forward " \t")
	      (setq ch (following-char))
	      (if (= ch ?#) (end-of-line))))
460
	  (forward-line 1))))
461 462 463 464

    (if translations
	(tit-flush-translations prev-key translations))
    (princ ")\n")))
Karl Heuer's avatar
Karl Heuer committed
465 466 467 468 469 470 471

;;;###autoload
(defun titdic-convert (filename &optional dirname)
  "Convert a TIT dictionary of FILENAME into a Quail package.
Optional argument DIRNAME if specified is the directory name under which
the generated Quail package is saved."
  (interactive "FTIT dictionary file: ")
472
  (let ((coding-system-for-write nil))
Dave Love's avatar
Dave Love committed
473 474 475 476
    (with-temp-file  (tit-make-quail-package-file-name filename dirname)
      (let ((standard-output (current-buffer)))
	(with-temp-buffer
	  (set-buffer-multibyte nil)
Kenichi Handa's avatar
Kenichi Handa committed
477 478 479
	  ;; Here we must use `raw-text' instead of `no-conversion' to
	  ;; enable auto-decoding of eol format (CRLF->LF).
	  (let ((coding-system-for-read 'raw-text))
Dave Love's avatar
Dave Love committed
480
	    (insert-file-contents (expand-file-name filename)))
481

Dave Love's avatar
Dave Love committed
482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
	  ;; Decode the buffer contents from the encoding specified by a
	  ;; value of the key "ENCODE:".
	  (if (not (search-forward "\nBEGIN" nil t))
	      (error "TIT dictionary doesn't have body part"))
	  (let ((limit (point))
		coding-system slot)
	    (goto-char (point-min))
	    (if (re-search-forward "^ENCODE:[ \t]*" limit t)
		(progn
		  (goto-char (match-end 0))
		  (setq tit-encode (tit-read-key-value)))
	      (setq tit-encode tit-default-encode))
	    (setq slot (assoc tit-encode tit-encode-list))
	    (if (not slot)
		(error "Invalid ENCODE: value in TIT dictionary"))
	    (setq coding-system (nth 1 slot))
	    (message "Decoding with coding system %s..." coding-system)
	    (goto-char (point-min))
500
	    (decode-coding-region (point-min) (point-max) coding-system)
Kenichi Handa's avatar
Kenichi Handa committed
501 502 503
	    ;; Explicitly set eol format to `unix'.
	    (setq coding-system-for-write
		  (coding-system-change-eol-conversion coding-system 'unix))
504
	    (remove-text-properties (point-min) (point-max) '(charset nil)))
Dave Love's avatar
Dave Love committed
505

506
	  (set-buffer-multibyte t)
Dave Love's avatar
Dave Love committed
507
	  ;; Set point the starting position of the body part.
508
	  (goto-char (point-min))
Dave Love's avatar
Dave Love committed
509 510 511
	  (if (not (search-forward "\nBEGIN" nil t))
	      (error "TIT dictionary can't be decoded correctly"))

512
	  ;; Process the header part.
Dave Love's avatar
Dave Love committed
513 514 515 516 517
	  (forward-line 1)
	  (narrow-to-region (point-min) (point))
	  (tit-process-header filename)
	  (widen)

Kenichi Handa's avatar
Kenichi Handa committed
518
	  ;; Process the body part
519 520 521 522 523 524 525
	  (tit-process-body)

	  (princ ";; Local Variables:\n")
	  (princ ";; version-control: never\n")
	  (princ ";; no-update-autoloads: t\n")
	  (princ (format ";; coding: %s\n" coding-system-for-write))
	  (princ ";; End:\n"))))))
Karl Heuer's avatar
Karl Heuer committed
526 527

;;;###autoload
528
(defun batch-titdic-convert (&optional force)
Karl Heuer's avatar
Karl Heuer committed
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
  "Run `titdic-convert' on the files remaining on the command line.
Use this from the command line, with `-batch';
it won't work in an interactive Emacs.
For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to
 generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\".
To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
  (defvar command-line-args-left)	; Avoid compiler warning.
  (if (not noninteractive)
      (error "`batch-titdic-convert' should be used only with -batch"))
  (if (string= (car command-line-args-left) "-h")
      (progn
	(message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:")
	(message "  %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit")
	(message "To convert XXX.tit into DIR/xxx.el:")
	(message "  %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit"))
    (let (targetdir filename files file)
      (if (string= (car command-line-args-left) "-dir")
	  (progn
	    (setq command-line-args-left (cdr command-line-args-left))
	    (setq targetdir (car command-line-args-left))
	    (setq command-line-args-left (cdr command-line-args-left))))
      (while command-line-args-left
	(setq filename (expand-file-name (car command-line-args-left)))
	(if (file-directory-p filename)
	    (progn
	      (message "Converting all tit files in the directory %s" filename)
	      (setq files (directory-files filename t "\\.tit$")))
	  (setq files (list filename)))
	(while files
	  (setq file (expand-file-name (car files)))
559 560 561 562 563
	  (when (or force
		    (file-newer-than-file-p
		     file (tit-make-quail-package-file-name file targetdir)))
	    (message "Converting %s to quail-package..." file)
	    (titdic-convert file targetdir))
Karl Heuer's avatar
Karl Heuer committed
564 565
	  (setq files (cdr files)))
	(setq command-line-args-left (cdr command-line-args-left)))
566
      (message "Byte-compile the created files by:")
Karl Heuer's avatar
Karl Heuer committed
567 568 569
      (message "  %% emacs -batch -f batch-byte-compile XXX.el")))
  (kill-emacs 0))

570 571 572 573 574 575

;;; Converter of miscellaneous dictionaries other than TIT format.

;; Alist of input method names and the corresponding information.
;; Each element has this form:
;;   (INPUT-METHOD-NAME		;; Name of the input method.
576
;;    INPUT-METHOD-TITLE	;; Title string of the input method
577 578 579 580 581 582 583 584 585
;;    DICFILE			;; Name of the source dictionary file.
;;    CODING			;; Coding system of the dictionary file.
;;    QUAILFILE			;; Name of the Quail package file.
;;    CONVERTER			;; Function to generate the Quail package.
;;    COPYRIGHT-NOTICE		;; Copyright notice of the source dictionary.
;;    )

(defvar quail-misc-package-ext-info
  '(("chinese-b5-tsangchi" "$(06A(BB"
586
     "cangjie-table.b5" big5 "tsang-b5.el"
587 588 589 590 591 592 593 594 595
     tsang-b5-converter
     "\
;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
;; #
;; # Permission to copy and distribute both modified and
;; # unmodified versions is granted without royalty provided
;; # this notice is preserved.")

    ("chinese-b5-quick" "$(0X|(BB"
596
     "cangjie-table.b5" big5 "quick-b5.el"
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
     quick-b5-converter
     "\
;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
;; #
;; # Permission to copy and distribute both modified and
;; # unmodified versions is granted without royalty provided
;; # this notice is preserved.")

    ("chinese-cns-tsangchi" "$(GT?(BC"
     "cangjie-table.cns" iso-2022-cn-ext "tsang-cns.el"
     tsang-cns-converter
     "\
;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
;; #
;; # Permission to copy and distribute both modified and
;; # unmodified versions is granted without royalty provided
;; # this notice is preserved.")

    ("chinese-cns-quick" "$(Gv|(BC"
     "cangjie-table.cns" iso-2022-cn-ext "quick-cns.el"
     quick-cns-converter
     "\
;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
;; #
;; # Permission to copy and distribute both modified and
;; # unmodified versions is granted without royalty provided
;; # this notice is preserved.")

    ("chinese-py" "$AF4(BG"
     "pinyin.map" cn-gb-2312 "PY.el"
     py-converter
     "\
;; \"pinyin.map\" is included in a free package called CCE.  It is
;; available at:
;;	http://ftp.debian.org/debian/dists/potato/main
;;		/source/utils/cce_0.36.orig.tar.gz
;; This package contains the following copyright notice.
;;
;;
;;             Copyright (C) 1999, Rui He, herui@cs.duke.edu
637 638
;;
;;
639
;;                  CCE(Console Chinese Environment) 0.32
640 641 642 643 644 645 646 647 648 649
;;
;; CCE is free software; you can redistribute it and/or modify it under the
;; terms of the GNU General Public License as published by the Free Software
;; Foundation; either version 1, or (at your option) any later version.
;;
;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
;; FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
;; details.
;;
650
;; You should have received a copy of the GNU General Public License along with
651
;; CCE.  If not, see <http://www.gnu.org/licenses/>.")
652 653 654 655 656 657 658 659 660 661 662 663 664

    ("chinese-ziranma" "$AWTH;(B"
     "ziranma.cin" cn-gb-2312 "ZIRANMA.el"
     ziranma-converter
     "\
;; \"ziranma.cin\" is included in a free package called CCE.  It is
;; available at:
;;	http://ftp.debian.org/debian/dists/potato/main
;;		/source/utils/cce_0.36.orig.tar.gz
;; This package contains the following copyright notice.
;;
;;
;;             Copyright (C) 1999, Rui He, herui@cs.duke.edu
665 666
;;
;;
667
;;                  CCE(Console Chinese Environment) 0.32
668 669 670 671 672 673 674 675 676 677
;;
;; CCE is free software; you can redistribute it and/or modify it under the
;; terms of the GNU General Public License as published by the Free Software
;; Foundation; either version 1, or (at your option) any later version.
;;
;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
;; FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
;; details.
;;
678
;; You should have received a copy of the GNU General Public License along with
679
;; CCE.  If not, see <http://www.gnu.org/licenses/>.")
680 681 682 683 684 685 686 687 688 689 690 691

    ("chinese-ctlau" "$AAuTA(B"
     "CTLau.html" cn-gb-2312 "CTLau.el"
     ctlau-gb-converter
     "\
;; \"CTLau.html\" is available at:
;;
;;   http://umunhum.stanford.edu/~lee/chicomp/CTLau.html
;;
;; It contains the following copyright notice:
;;
;; # Copyright (C) 1988-2001  Fung Fung Lee (lee@umunhum.stanford.edu)
692
;; #
693 694 695 696
;; # This program is free software; you can redistribute it and/or
;; # modify it under the terms of the GNU General Public License
;; # as published by the Free Software Foundation; either version 2
;; # of the License, or any later version.
697
;; #
698 699 700 701
;; # This program is distributed in the hope that it will be useful,
;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; # GNU General Public License for more details.
702
;; #
703
;; # You should have received a copy of the GNU General Public License
704
;; # along with this program.  If not, see <http://www.gnu.org/licenses/>.")
705

706
    ("chinese-ctlaub" "$(0N,Gn(B"
707
     "CTLau-b5.html" big5 "CTLau-b5.el"
708 709 710 711 712 713 714 715 716
     ctlau-b5-converter
     "\
;; \"CTLau-b5.html\" is available at:
;;
;;   http://umunhum.stanford.edu/~lee/chicomp/CTLau-b5.html
;;
;; It contains the following copyright notice:
;;
;; # Copyright (C) 1988-2001  Fung Fung Lee (lee@umunhum.stanford.edu)
717
;; #
718 719 720 721
;; # This program is free software; you can redistribute it and/or
;; # modify it under the terms of the GNU General Public License
;; # as published by the Free Software Foundation; either version 2
;; # of the License, or any later version.
722
;; #
723 724 725 726
;; # This program is distributed in the hope that it will be useful,
;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; # GNU General Public License for more details.
727
;; #
728
;; # You should have received a copy of the GNU General Public License
729
;; # along with this program.  If not, see <http://www.gnu.org/licenses/>.")
730 731 732 733 734 735
    ))

;; Generate a code of a Quail package in the current buffer from Tsang
;; dictionary in the buffer DICBUF.  The input method name of the
;; Quail package is NAME, and the title string is TITLE.

Juanma Barranquero's avatar
Juanma Barranquero committed
736
;; TSANG-P is non-nil, generate $(06AQo(B input method.  Otherwise
737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
;; generate $(0X|/y(B (simple version of $(06AQo(B).  If BIG5-P is non-nil, the
;; input method is for inputting Big5 characters.  Otherwise the input
;; method is for inputting CNS characters.

(defun tsang-quick-converter (dicbuf name title tsang-p big5-p)
  (let ((fulltitle (if tsang-p (if big5-p "$(06AQo(B" "$(GT?on(B")
		     (if big5-p "$(0X|/y(B" "$(Gv|Mx(B")))
	dic)
    (goto-char (point-max))
    (if big5-p
	(insert (format "\"$(0&d'GTT&,!J(B%s$(0!K(BBIG5

	$(0KHM$(B%s$(0TT&,WoOu(B

   [Q $(0'D(B] [W $(0(q(B] [E $(0'V(B] [R $(0&H(B] [T $(0'>(B] [Y $(0&4(B] [U $(0&U(B] [I $(0'B(B] [O $(0&*(B] [P $(0'A(B]

    [A $(0'K(B] [S $(0&T(B] [D $(0'N(B] [F $(0'W(B] [G $(0&I(B] [H $(0*M(B] [J $(0&3(B] [L $(0&d(B]
754

755 756 757 758 759 760 761 762 763 764 765
      [Z  ] [X $(0[E(B] [C $(01[(B] [V $(0&M(B] [B $(0'M(B] [N $(0&_(B] [M $(0&"(B]

\\\\<quail-translation-docstring>\"\n"
			fulltitle fulltitle))
      (insert (format "\"$(GDcEFrSD+!J(B%s$(G!K(BCNS

	$(GiGk#(B%s$(GrSD+uomu(B

   [Q $(GEC(B] [W $(GFp(B] [E $(GEU(B] [R $(GDG(B] [T $(GE=(B] [Y $(GD3(B] [U $(GDT(B] [I $(GEA(B] [O $(GD)(B] [P $(GE@(B]

    [A $(GEJ(B] [S $(GDS(B] [D $(GEM(B] [F $(GEV(B] [G $(GDH(B] [H $(GHL(B] [J $(GD2(B] [L $(GDc(B]
766 767

      [Z  ] [X $(GyE(B] [C $(GOZ(B] [V $(GDL(B] [B $(GEL(B] [N $(GD^(B] [M $(GD!(B]
768 769 770 771 772 773 774

\\\\<quail-translation-docstring>\"\n"
		      fulltitle fulltitle)))
    (insert "  '((\".\" . quail-next-translation-block)
   (\",\" . quail-prev-translation-block))
  nil nil)\n\n")
    (insert "(quail-define-rules\n")
775
    (with-current-buffer dicbuf
Kenichi Handa's avatar
Kenichi Handa committed
776 777 778
      ;; Handle double CR line ends, which result when checking out of
      ;; CVS on MS-Windows.
      (goto-char (point-min))
779 780 781 782 783 784 785 786
      (search-forward "A440")
      (beginning-of-line)
      (let ((table (make-hash-table :test 'equal))
	    val)
	(while (not (eobp))
	  (forward-char 5)
	  (let ((trans (char-to-string (following-char)))
		key slot)
Kenichi Handa's avatar
Kenichi Handa committed
787
	    (re-search-forward "\\([A-Z]+\\)\r*$" nil t)
788 789
	    (setq key (downcase
		       (if (or tsang-p
Kenichi Handa's avatar
Kenichi Handa committed
790 791 792 793
			       (<= (- (match-end 1) (match-beginning 1)) 1))
			   (match-string 1)
			 (string (char-after (match-beginning 1))
				 (char-after (1- (match-end 1)))))))
794 795 796 797 798 799 800 801 802
	    (setq val (gethash key table))
	    (if val (setq trans (concat val trans)))
	    (puthash key trans table)
	    (forward-line 1)))
	(maphash #'(lambda (key val) (setq dic (cons (cons key val) dic)))
		 table)))
    (setq dic (sort dic (function (lambda (x y) (string< (car x ) (car y))))))
    (dolist (elt dic)
      (insert (format "(%S\t%S)\n" (car elt) (cdr elt))))
Paul Eggert's avatar
Paul Eggert committed
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
    (let ((punctuation '((";" "$(0!'!2!"!#!.!/(B" "$(G!'!2!"!#!.!/(B")
			 (":" "$(0!(!+!3!%!$!&!0!1(B" "$(G!(!+!3!%!$!&!0!1(B")
			 ("'" "$(0!e!d(B" "$(G!e!d(B")
			 ("\"" "$(0!g!f!h!i!q(B" "$(G!g!f!h!i!q(B")
			 ("\\" "$(0"`"b#M(B" "$(G"`"b#M(B")
			 ("|" "$(0!6!8!:"^(B" "$(G!6!8!:"^(B")
			 ("/" "$(0"_"a#L(B" "$(G"_"a#L(B")
			 ("?" "$(0!)!4(B" "$(G!)!4(B")
			 ("<" "$(0!R"6"A!T"H(B" "$(G!R"6"A!T"H(B")
			 (">" "$(0!S"7"B!U(B" "$(G!S"7"B!U(B")
			 ("[" "$(0!F!J!b!H!L!V!Z!X!\(B" "$(G!F!J!b!H!L!V!Z!X!\(B")
			 ("]" "$(0!G!K!c!I!M!W![!Y!](B" "$(G!G!K!c!I!M!W![!Y!](B")
			 ("{" "$(0!B!`!D(B " "$(G!B!`!D(B ")
			 ("}" "$(0!C!a!E(B" "$(G!C!a!E(B")
			 ("`" "$(0!j!k(B" "$(G!j!k(B")
			 ("~" "$(0"D"+",!<!=(B" "$(G"D"+",!<!=(B")
			 ("!" "$(0!*!5(B" "$(G!*!5(B")
			 ("@" "$(0"i"n(B" "$(G"i"n(B")
			 ("#" "$(0!l"-(B" "$(G!l"-(B")
			 ("$" "$(0"c"l(B" "$(G"c"l(B")
			 ("%" "$(0"h"m(B" "$(G"h"m(B")
			 ("&" "$(0!m".(B" "$(G!m".(B")
			 ("*" "$(0!n"/!o!w!x(B" "$(G!n"/!o!w!x(B")
			 ("(" "$(0!>!^!@(B" "$(G!>!^!@(B")
			 (")" "$(0!?!_!A(B" "$(G!?!_!A(B")
			 ("-" "$(0!7!9"#"$"1"@(B" "$(G!7!9"#"$"1"@(B")
			 ("_" "$(0"%"&(B" "$(G"%"&(B")
			 ("=" "$(0"8"C(B" "$(G"8"C(B")
			 ("+" "$(0"0"?(B" "$(G"0"?(B"))))
    (dolist (elt punctuation)
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
      (insert (format "(%S %S)\n" (concat "z" (car elt))
		      (if big5-p (nth 1 elt) (nth 2 elt))))))
    (insert ")\n")))

(defun tsang-b5-converter (dicbuf name title)
  (tsang-quick-converter dicbuf name title t t))

(defun quick-b5-converter (dicbuf name title)
  (tsang-quick-converter dicbuf name title nil t))

(defun tsang-cns-converter (dicbuf name title)
  (tsang-quick-converter dicbuf name title t nil))

(defun quick-cns-converter (dicbuf name title)
  (tsang-quick-converter dicbuf name title nil nil))

;; Generate a code of a Quail package in the current buffer from
;; Pinyin dictionary in the buffer DICBUF.  The input method name of
;; the Quail package is NAME, and the title string is TITLE.

(defun py-converter (dicbuf name title)
  (goto-char (point-max))
  (insert (format "%S\n" "$A::WVJdHk!KF4Rt!K(B

	$AF4Rt7=08(B

 $AP!P4S"NDWVD84z1m!8F4Rt!97{:E#,(B \"u(yu) $ATrSC(B u: $A1mJ>!C(B

Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').

863
Pinyin is the standard roman transliteration method for Chinese.
864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
Pinyin uses a sequence of Latin alphabetic characters for each Chinese
character.  The sequence is made by the combination of the initials
\(the beginning sounds) and finals (the ending sounds).

  initials: b p m f d t n l z c s zh ch sh r j q x g k h
  finals: a o e i er ai ei oa ou an en ang eng ong i ia iao ie iu ian in
          iang ing iong u ua uo uai ui uan un uan ueng yu yue yuan yun

  (Note: In the correct Pinyin writing, the sequence \"yu\" in the last
   four finals should be written by the character u-umlaut `$A(9(B'.)

With this input method, you enter a Chinese character by first
entering its pinyin spelling.

\\<quail-translation-docstring>

For instance, to input $ADc(B, you type \"n i C-n 3\".  The first \"n i\"
is a Pinyin, \"C-n\" selects the next group of candidates (each group
contains at most 10 characters), \"3\" select the third character in
that group.

This input method supports only Han characters.  The related input
method `chinese-py-punct' is the combination of this method and
`chinese-punct'; it supports both Han characters and punctuation
characters.

For double-width GB2312 characters corresponding to ASCII, use the
input method `chinese-qj'.

The correct Pinyin system specifies tones by diacritical marks, but
this input method doesn't use them, which results in easy (you don't
have to know the exact tones), but verbose (many characters are assigned
to the same key sequence) input.  You may also want to try the input
method `chinese-tonepy' with which you must specify tones by digits
\(1..5)."))
  (insert "  '((\"\C-?\" . quail-delete-last-char)
   (\".\" . quail-next-translation)
   (\">\" . quail-next-translation)
   (\",\" . quail-prev-translation)
   (\"<\" . quail-prev-translation))
  nil nil nil nil)\n\n")
  (insert "(quail-define-rules\n")
  (let ((pos (point)))
Kenichi Handa's avatar
Kenichi Handa committed
907
    (insert-buffer-substring-no-properties dicbuf)
908
    (goto-char pos)
909
    (re-search-forward "^[a-z]")
Kenichi Handa's avatar
Kenichi Handa committed
910 911
    (beginning-of-line)
    (delete-region pos (point))
912 913 914 915 916 917
    (while (not (eobp))
      (insert "(\"")
      (skip-chars-forward "a-z")
      (insert "\" \"")
      (delete-char 1)
      (end-of-line)
Kenichi Handa's avatar
Kenichi Handa committed
918 919
      (while (= (preceding-char) ?\r)
	(delete-char -1))
920 921 922 923 924 925 926 927 928 929
      (insert "\")")
      (forward-line 1)))
  (insert ")\n"))

;; Generate a code of a Quail package in the current buffer from
;; Ziranma dictionary in the buffer DICBUF.  The input method name of
;; the Quail package is NAME, and the title string is TITLE.

(defun ziranma-converter (dicbuf name title)
  (let (dic)
930
    (with-current-buffer dicbuf
931
      (goto-char (point-min))
Kenichi Handa's avatar
Kenichi Handa committed
932 933
      (search-forward "\n%keyname end")
      (forward-line 1)
934 935 936 937 938
      (let ((table (make-hash-table :test 'equal))
	    elt pos key trans val)
	(while (not (eobp))
	  (setq pos (point))
	  (skip-chars-forward "^ \t")
Kenichi Handa's avatar
Kenichi Handa committed
939
	  (setq key (buffer-substring-no-properties pos (point)))
940
	  (skip-chars-forward " \t")
Kenichi Handa's avatar
Kenichi Handa committed
941 942 943
	  (setq pos (point))
	  (skip-chars-forward "^\r\n")
	  (setq trans (vector (buffer-substring-no-properties pos (point))))
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
	  (setq val (gethash key table))
	  (if val (setq trans (vconcat val trans)))
	  (puthash key trans table)
	  (forward-line 1))
	(maphash #'(lambda (key trans)
		     (let ((len (length trans))
			   i)
		       (if (and (= len 1) (= (length (aref trans 0)) 1))
			   (setq trans (aref trans 0))
			 (setq i 0)
			 (while (and (< i len)
				     (= (length (aref trans i)) 1))
			   (setq i (1+ i)))
			 (if (= i len)
			     (setq trans (mapconcat 'identity trans "")))))
		     (setq dic (cons (cons key trans) dic)))
		 table)))
    (setq dic (sort dic (function (lambda (x y) (string< (car x) (car y))))))
    (goto-char (point-max))
    (insert (format "%S\n" "$A::WVJdHk!K!>WTH;!?!K(B

                            $A<|EL6TUU1m(B:
 $A)3)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)7(B
 $A)'#Q(B  $A)'#W(B  $A)'#E(B  $A)'#R(B  $A)'#T(B  $A)'#Y(B  $A)'#U(Bsh$A)'#I(Bch$A)'#O(B  $A)'#P(B  $A)'(B
 $A)'(B  iu$A)'(B  ua$A)'(B   e$A)'(B uan$A)'(B  ue$A)'(B uai$A)'(B   u$A)'(B   i$A)'(B   o$A)'(B  un$A)'(B
 $A)'(B    $A)'(B  ia$A)'(B    $A)'(B van$A)'(B  ve$A)'(B ing$A)'(B    $A)'(B    $A)'(B  uo$A)'(B  vn$A)'(B
 $A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)?(B
   $A)'#A(B  $A)'#S(B  $A)'#D(B  $A)'#F(B  $A)'#G(B  $A)'#H(B  $A)'#J(B  $A)'#K(B  $A)'#L(B  $A)'(B
   $A)'(B   a$A)'(Biong$A)'(Buang$A)'(B  en$A)'(B eng$A)'(B ang$A)'(B  an$A)'(B  ao$A)'(B  ai$A)'(B
   $A)'(B    $A)'(B ong$A)'(Biang$A)'(B    $A)'(B  ng$A)'(B    $A)'(B    $A)'(B    $A)'(B    $A)'(B
   $A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)%)7(B
     $A)'#Z(B  $A)'#X(B  $A)'#C(B  $A)'#V(Bzh$A)'#B(B  $A)'#N(B  $A)'#M(B  $A)'#,(B  $A)'#.(B  $A)'(B $A#/(B $A)'(B
     $A)'(B  ei$A)'(B  ie$A)'(B iao$A)'(B  ui$A)'(B  ou$A)'(B  in$A)'(B ian$A)'G0R3)':sR3)'7{:E)'(B
     $A)'(B    $A)'(B    $A)'(B    $A)'(B   v$A)'(B    $A)'(B    $A)'(B    $A)'(B    $A)'(B    $A)'(B    $A)'(B
     $A);)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)?(B


Pinyin base input method for Chinese GB2312 characters (`chinese-gb2312').

Pinyin is the standard roman transliteration method for Chinese.
For the details of Pinyin system, see the documentation of the input
method `chinese-py'.

Unlike the standard spelling of Pinyin, in this input method all
initials and finals are assigned to single keys (see the above table).
For instance, the initial \"ch\" is assigned to the key `i', the final
\"iu\" is assigned to the key `q', and tones 1, 2, 3, 4, and $AGaIy(B are
assigned to the keys `q', `w', `e', `r', `t' respectively.

\\<quail-translation-docstring>

To input one-letter words, you type 4 keys, the first two for the
Pinyin of the letter, next one for tone, and the last one is always a
quote (').  For instance, \"vsq'\" input $AVP(B.  Exceptions are these
letters.  You can input them just by typing a single key.

	Character: $A04(B $A2;(B $A4N(B $A5D(B $A6~(B $A7"(B $A8v(B $A:M(B $A3v(B $A<0(B $A?I(B $AAK(B $AC;(B
	Key:	   a  b  c  d  e  f  g  h  i  j  k  l  m
	Character: $ADc(B $AE7(B $AF,(B $AF_(B $AHK(B $AH}(B $AK{(B $AJG(B $AWE(B $ANR(B $AP!(B $AR;(B $ATZ(B
	Key:	   n  o  p  q  r  s  t  u  v  w  x  y  z

To input two-letter words, you have two ways.  One way is to type 4
keys, two for the first Pinyin, two for the second Pinyin.  For
instance, \"vsgo\" inputs $AVP9z(B.  Another way is to type 3 keys: 2
initials of two letters, and quote (').  For instance, \"vg'\" also
inputs $AVP9z(B.

To input three-letter words, you type 4 keys: initials of three
letters, and the last is quote (').  For instance, \"bjy'2\" inputs $A11(B
$A>)Q<(B (the last `2' is to select one of the candidates).

To input words of more than three letters, you type 4 keys, initials
of the first three letters and the last letter.  For instance,
\"bjdt\" inputs $A11>)5gJSL((B.

Paul Eggert's avatar
Paul Eggert committed
1019
To input symbols and punctuation, type `/' followed by one of `a' to
1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
`z', then select one of the candidates."))
    (insert "  '((\"\C-?\" . quail-delete-last-char)
   (\".\" . quail-next-translation)
   (\"[\" . quail-next-translation)
   (\",\" . quail-prev-translation)
   (\"]\" . quail-prev-translation))
  nil nil nil nil)\n\n")
    (insert "(quail-define-rules\n")
    (dolist (elt dic)
      (insert (format "(%S %S)\n" (car elt) (cdr elt))))
    (insert ")\n")))

1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
;; Generate the code for a Quail package in the current buffer from a
;; CTLau or CTLau-b5 dictionary in the buffer DICBUF.  The input
;; method name of the Quail package is NAME, and the title string is
;; TITLE.  DESCRIPTION is the string shown by describe-input-method.

(defun ctlau-converter (dicbuf name title description)
  (goto-char (point-max))
  (insert (format "%S\n" description))
  (insert "  '((\"\C-?\" . quail-delete-last-char)
   (\".\" . quail-next-translation)
   (\">\" . quail-next-translation)
   (\",\" . quail-prev-translation)
   (\"<\" . quail-prev-translation))
  nil nil nil nil)\n\n")
  (insert "(quail-define-rules\n")
  (let (dicbuf-start dicbuf-end key-start key (pos (point)))
    ;; Find the dictionary, which starts below a horizontal rule and
    ;; ends at the second to last line in the HTML file.
1050
    (with-current-buffer dicbuf
1051
      (goto-char (point-min))
Kenichi Handa's avatar
Kenichi Handa committed
1052 1053
      (re-search-forward "^#<hr>")
      (forward-line 1)
1054 1055
      (setq dicbuf-start (point))
      (goto-char (point-max))
Kenichi Handa's avatar
Kenichi Handa committed
1056
      (re-search-backward "^<hr>")
1057
      (setq dicbuf-end (point)))
Kenichi Handa's avatar
Kenichi Handa committed
1058
    (insert-buffer-substring-no-properties dicbuf dicbuf-start dicbuf-end)
1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
    ;; CTLau-b5.html contains characters (0xa1 0xbc) which show up as
    ;; hollow boxes when the original characters in CTLau.html from
    ;; which the file is converted have no Big5 equivalent.  Go
    ;; through and delete them.
    (goto-char pos)
    (while (search-forward "$(0!{(B" nil t)
      (delete-char -1))
    ;; Uppercase keys in dictionary need to be downcased.  Backslashes
    ;; at the beginning of keys need to be turned into double
    ;; backslashes.
    (goto-char pos)
    (while (not (eobp))
      (insert "(\"")
      (if (char-equal (following-char) ?\\)
	  (insert "\\"))
      (setq key-start (point))
      (skip-chars-forward "\\\\A-Z")
      (downcase-region key-start (point))
      (insert "\" \"")
      (delete-char 1)
      (end-of-line)
Kenichi Handa's avatar
Kenichi Handa committed
1080 1081
      (while (= (preceding-char) ?\r)
	(delete-char -1))
1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
      (insert "\")")
      (forward-line 1)))
  (insert ")\n"))

(defun ctlau-gb-converter (dicbuf name title)
  (ctlau-converter dicbuf name title
"$A::WVJdHk!KAuN}OiJ=TARt!K(B

 $AAuN}OiJ=TASoW"Rt7=08(B
 Sidney Lau's Cantonese transcription scheme as described in his book
 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
 This file was prepared by Fung Fung Lee ($A@n7c7e(B).
 Originally converted from CTCPS3.tit
 Last modified: June 2, 1993.

1097
 Some infrequent GB characters are accessed by typing \\, followed by
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
 the Cantonese romanization of the respective radical ($A2?JW(B)."))

(defun ctlau-b5-converter (dicbuf name title)
  (ctlau-converter dicbuf name title
"$(0KH)tTT&,!(N,Tg>A*#Gn5x!((B

 $(0N,Tg>A*#GnM$0D5x'J7{(B
 Sidney Lau's Cantonese transcription scheme as described in his book
 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
 This file was prepared by Fung Fung Lee ($(0,XFS76(B).
 Originally converted from CTCPS3.tit
 Last modified: June 2, 1993.

1111
 Some infrequent characters are accessed by typing \\, followed by
1112 1113
 the Cantonese romanization of the respective radical ($(0?f5}(B)."))

1114
(declare-function dos-8+3-filename "dos-fns.el" (filename))
1115

1116
(defun miscdic-convert (filename &optional dirname)
1117
  "Convert a dictionary file FILENAME into a Quail package.
1118 1119 1120 1121 1122 1123
Optional argument DIRNAME if specified is the directory name under which
the generated Quail package is saved."
  (interactive "FInput method dictionary file: ")
  (or (file-readable-p filename)
      (error "%s does not exist" filename))
  (let ((tail quail-misc-package-ext-info)
1124
	coding-system-for-write
1125 1126 1127 1128
	slot
	name title dicfile coding quailfile converter copyright
	dicbuf)
    (while tail
1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
      (setq slot (car tail)
	    dicfile (nth 2 slot)
	    quailfile (nth 4 slot))
      (when (and (or (string-match dicfile filename)
		     ;; MS-DOS filesystem truncates file names to 8+3
		     ;; limits, so "cangjie-table.cns" becomes
		     ;; "cangjie-.cns", and the above string-match
		     ;; fails.  Give DOS users a chance...
		     (and (fboundp 'msdos-long-file-names)
			  (not (msdos-long-file-names))
			  (string-match (dos-8+3-filename dicfile) filename)))
		 (if (file-newer-than-file-p
		      filename (expand-file-name quailfile dirname))
		     t
		   (message "%s is up to date" quailfile)
		   nil))
	(setq name (car slot)
1146 1147 1148 1149 1150
	      title (nth 1 slot)
	      coding (nth 3 slot)
	      converter (nth 5 slot)
	      copyright (nth 6 slot))
	(message "Converting %s to %s..." dicfile quailfile)
Kenichi Handa's avatar
Kenichi Handa committed
1151 1152 1153
	;; Explicitly set eol format to `unix'.
	(setq coding-system-for-write
	      (coding-system-change-eol-conversion coding 'unix))
1154
	(with-temp-file (expand-file-name quailfile dirname)
1155
	  (insert (format ";; Quail package `%s'\n" name))
1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
	  (insert ";;   Generated by the command `miscdic-convert'\n")
	  (insert ";;   Source dictionary file: " dicfile "\n")
	  (insert ";;   Copyright notice of the source file\n")
	  (insert ";;------------------------------------------------------\n")
	  (insert copyright "\n")
	  (insert ";;------------------------------------------------------\n")
	  (insert "\n")
	  (insert ";;; Code:\n\n")
	  (insert "(require 'quail)\n")
	  (insert "(quail-define-package \"" name "\" \""
1166 1167 1168
		  (if (eq coding 'big5) "Chinese-BIG5"
		    (if (eq coding 'iso-2022-cn-ext) "Chinese-CNS"
		      "Chinese-GB"))
1169
		  "\" \"" title "\" t\n")
Kenichi Handa's avatar
Kenichi Handa committed
1170 1171
	  (let* ((coding-system-for-read
		  (coding-system-change-eol-conversion coding 'unix))
1172 1173
		 (dicbuf (find-file-noselect filename)))
	    (funcall converter dicbuf name title)
1174 1175 1176 1177 1178 1179 1180
	    (kill-buffer dicbuf))
	  (insert ";; Local Variables:\n"
		  ";; version-control: never\n"
		  ";; no-update-autoloads: t\n"
		  (format ";; coding: %s\n" coding)
		  ";; End:\n\n"
		  ";;; " quailfile " ends here\n"))
1181 1182 1183 1184
	(message "Converting %s to %s...done" dicfile quailfile))
      (setq tail (cdr tail)))))

(defun batch-miscdic-convert ()
Paul Eggert's avatar
Paul Eggert committed
1185
  "Run `miscdic-convert' on the files remaining on the command line.
1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
Use this from the command line, with `-batch';
it won't work in an interactive Emacs.
If there's an argument \"-dir\", the next argument specifies a directory
to store generated Quail packages."
  (defvar command-line-args-left)	; Avoid compiler warning.
  (if (not noninteractive)
      (error "`batch-miscdic-convert' should be used only with -batch"))
  (let ((dir default-directory)
	filename)
    (while command-line-args-left
      (if (string= (car command-line-args-left) "-dir")
	  (progn
	    (setq command-line-args-left (cdr command-line-args-l