Commit 13d813b1 authored by Michal Nazarewicz's avatar Michal Nazarewicz

Add support for title-casing letters (bug#24603)

* src/casefiddle.c (struct casing_context, prepare_casing_context): Add
titlecase_char_table member.  It’s set to the ‘titlecase’ Unicode
property table if capitalisation has been requested.
(case_character): Make use of the titlecase_char_table to title-case
initial characters when capitalising.

* test/src/casefiddle-tests.el (casefiddle-tests--characters,
casefiddle-tests-casing): Update test cases which are now passing.
parent 8e5b909f
......@@ -355,7 +355,7 @@ same as in modes where the character is not whitespace.
Instead of only checking the modification time, Emacs now also checks
the file's actual content before prompting the user.
** Title case characters are properly converted to upper case.
** Title case characters are properly cased (from and into).
'upcase', 'upcase-region' et al. convert title case characters (such
as the single character "Dz") into their upper case form (such as "DZ").
As a downside, 'capitalize' and 'upcase-initials' produce awkward
......
......@@ -33,6 +33,9 @@ enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
/* State for casing individual characters. */
struct casing_context {
/* A char-table with title-case character mappings or nil. Non-nil implies
flag is CASE_CAPITALIZE or CASE_CAPITALIZE_UP. */
Lisp_Object titlecase_char_table;
/* User-requested action. */
enum case_action flag;
/* If true, function operates on a buffer as opposed to a string or character.
......@@ -53,6 +56,8 @@ prepare_casing_context (struct casing_context *ctx,
ctx->flag = flag;
ctx->inbuffer = inbuffer;
ctx->inword = flag == CASE_DOWN;
ctx->titlecase_char_table = (int)flag < (int)CASE_CAPITALIZE ? Qnil :
uniprop_table (intern_c_string ("titlecase"));
/* If the case table is flagged as modified, rescan it. */
if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
......@@ -67,10 +72,16 @@ prepare_casing_context (struct casing_context *ctx,
static int
case_character (struct casing_context *ctx, int ch)
{
Lisp_Object prop;
if (ctx->inword)
ch = ctx->flag == CASE_CAPITALIZE_UP ? ch : downcase (ch);
else if (!NILP (ctx->titlecase_char_table) &&
CHARACTERP (prop = CHAR_TABLE_REF (ctx->titlecase_char_table, ch)))
ch = XFASTINT (prop);
else
ch = upcase(ch);
if ((int) ctx->flag >= (int) CASE_CAPITALIZE)
ctx->inword = SYNTAX (ch) == Sword &&
(!ctx->inbuffer || ctx->inword || !syntax_prefix_flag_p (ch));
......@@ -198,8 +209,8 @@ The argument object is not altered--the value is a copy. */)
DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
doc: /* Convert argument to capitalized form and return that.
This means that each word's first character is upper case
and the rest is lower case.
This means that each word's first character is converted to either
title case or upper case, and the rest to lower case.
The argument may be a character or string. The result has the same type.
The argument object is not altered--the value is a copy. */)
(Lisp_Object obj)
......@@ -211,7 +222,8 @@ The argument object is not altered--the value is a copy. */)
DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
doc: /* Convert the initial of each word in the argument to upper case.
Do not change the other letters of each word.
This means that each word's first character is converted to either
title case or upper case, and the rest are left unchanged.
The argument may be a character or string. The result has the same type.
The argument object is not altered--the value is a copy. */)
(Lisp_Object obj)
......@@ -375,8 +387,8 @@ point and the mark is operated on. */)
DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
doc: /* Convert the region to capitalized form.
Capitalized form means each word's first character is upper case
and the rest of it is lower case.
This means that each word's first character is converted to either
title case or upper case, and the rest to lower case.
In programs, give two arguments, the starting and ending
character positions to operate on. */)
(Lisp_Object beg, Lisp_Object end)
......@@ -390,7 +402,8 @@ character positions to operate on. */)
DEFUN ("upcase-initials-region", Fupcase_initials_region,
Supcase_initials_region, 2, 2, "r",
doc: /* Upcase the initial of each word in the region.
Subsequent letters of each word are not changed.
This means that each word's first character is converted to either
title case or upper case, and the rest are left unchanged.
In programs, give two arguments, the starting and ending
character positions to operate on. */)
(Lisp_Object beg, Lisp_Object end)
......
......@@ -63,13 +63,9 @@
(?Ł ?Ł ?ł ?Ł)
(?ł ?Ł ?ł ?Ł)
;; FIXME(bug#24603): Commented ones are what we want.
;;(?DŽ ?DŽ ?dž ?Dž)
(?DŽ ?DŽ ?dž ?DŽ)
;;(?Dž ?DŽ ?dž ?Dž)
(?Dž ?DŽ ?dž ?DŽ)
;;(?dž ?DŽ ?dž ?Dž)
(?dž ?DŽ ?dž ?DŽ)
(?DŽ ?DŽ ?dž ?Dž)
(?Dž ?DŽ ?dž ?Dž)
(?dž ?DŽ ?dž ?Dž)
(?Σ ?Σ ?σ ?Σ)
(?σ ?Σ ?σ ?Σ)
......@@ -186,19 +182,19 @@
;; input upper lower capitalize up-initials
'(("Foo baR" "FOO BAR" "foo bar" "Foo Bar" "Foo BaR")
("Ⅷ ⅷ" "Ⅷ Ⅷ" "ⅷ ⅷ" "Ⅷ Ⅷ" "Ⅷ Ⅷ")
;; "DžUNGLA" is an unfortunate result but it’s really best we can
;; do while still being consistent. Hopefully, users only ever
;; use upcase-initials on camelCase identifiers not real words.
("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA")
("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
;; FIXME(bug#24603): Everything below is broken at the moment.
;; Here’s what should happen:
;;("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA")
;;("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
;;("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
;;("define" "DEFINE" "define" "Define" "Define")
;;("fish" "FIsh" "fish" "Fish" "Fish")
;;("Straße" "STRASSE" "straße" "Straße" "Straße")
;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος")
;; And here’s what is actually happening:
("DŽUNGLA" "DŽUNGLA" "džungla" "DŽungla" "DŽUNGLA")
("Džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla")
("džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla")
("define" "DEfiNE" "define" "Define" "Define")
("fish" "fiSH" "fish" "fish" "fish")
("Straße" "STRAßE" "straße" "Straße" "Straße")
......@@ -243,4 +239,21 @@
"\xef\xff\xef Zażółć GĘŚlą \xcf\xcf")))))))
(ert-deftest casefiddle-tests-char-casing ()
;; input upcase downcase [titlecase]
(dolist (test '((?a ?A ?a) (?A ?A ?a)
(?ł ?Ł ?ł) (?Ł ?Ł ?ł)
(?ß ?ß ?ß) (? ? ?ß)
(? ? ?) (? ? ?)
(?DŽ ?DŽ ?dž ?Dž) (?Dž ?DŽ ?dž ?Dž) (?dž ?DŽ ?dž ?Dž)))
(let ((ch (car test))
(up (nth 1 test))
(lo (nth 2 test))
(tc (or (nth 3 test) (nth 1 test))))
(should (eq up (upcase ch)))
(should (eq lo (downcase ch)))
(should (eq tc (capitalize ch)))
(should (eq tc (upcase-initials ch))))))
;;; casefiddle-tests.el ends here
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment