casefiddle.c 15.4 KB
Newer Older
Jim Blandy's avatar
Jim Blandy committed
1
/* GNU Emacs case conversion functions.
2

Paul Eggert's avatar
Paul Eggert committed
3
Copyright (C) 1985, 1994, 1997-1999, 2001-2017 Free Software Foundation,
4
Inc.
Jim Blandy's avatar
Jim Blandy committed
5 6 7

This file is part of GNU Emacs.

8
GNU Emacs is free software: you can redistribute it and/or modify
Jim Blandy's avatar
Jim Blandy committed
9
it under the terms of the GNU General Public License as published by
10 11
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
Jim Blandy's avatar
Jim Blandy committed
12 13 14 15 16 17 18

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
19
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Jim Blandy's avatar
Jim Blandy committed
20 21


22
#include <config.h>
23

Jim Blandy's avatar
Jim Blandy committed
24
#include "lisp.h"
25
#include "character.h"
26
#include "buffer.h"
Jim Blandy's avatar
Jim Blandy committed
27 28
#include "commands.h"
#include "syntax.h"
Kenichi Handa's avatar
Kenichi Handa committed
29
#include "composite.h"
Stefan Monnier's avatar
Stefan Monnier committed
30
#include "keymap.h"
Jim Blandy's avatar
Jim Blandy committed
31 32

enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
33 34 35

/* State for casing individual characters.  */
struct casing_context {
36 37 38
  /* A char-table with title-case character mappings or nil.  Non-nil implies
     flag is CASE_CAPITALIZE or CASE_CAPITALIZE_UP.  */
  Lisp_Object titlecase_char_table;
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
  /* User-requested action. */
  enum case_action flag;
  /* If true, function operates on a buffer as opposed to a string or character.
     When run on a buffer, syntax_prefix_flag_p is taken into account when
     determined inword flag. */
  bool inbuffer;
  /* Conceptually, this denotes whether we are inside of a word except
     that if flag is CASE_UP it’s always false and if flag is CASE_DOWN
     this is always true. */
  bool inword;
};

/* Initialise CTX structure for casing characters. */
static void
prepare_casing_context (struct casing_context *ctx,
			enum case_action flag, bool inbuffer)
{
  ctx->flag = flag;
  ctx->inbuffer = inbuffer;
  ctx->inword = flag == CASE_DOWN;
59 60
  ctx->titlecase_char_table = (int)flag < (int)CASE_CAPITALIZE ? Qnil :
    uniprop_table (intern_c_string ("titlecase"));
61 62 63 64 65 66 67 68 69 70 71 72 73 74

  /* If the case table is flagged as modified, rescan it.  */
  if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
    Fset_case_table (BVAR (current_buffer, downcase_table));

  if (inbuffer && (int) flag >= (int) CASE_CAPITALIZE)
    SETUP_BUFFER_SYNTAX_TABLE ();	/* For syntax_prefix_flag_p.  */
}

/* Based on CTX, case character CH accordingly.  Update CTX as necessary.
   Return cased character. */
static int
case_character (struct casing_context *ctx, int ch)
{
75 76
  Lisp_Object prop;

77 78
  if (ctx->inword)
    ch = ctx->flag == CASE_CAPITALIZE_UP ? ch : downcase (ch);
79 80 81
  else if (!NILP (ctx->titlecase_char_table) &&
	   CHARACTERP (prop = CHAR_TABLE_REF (ctx->titlecase_char_table, ch)))
    ch = XFASTINT (prop);
82 83
  else
    ch = upcase(ch);
84

85 86 87 88 89
  if ((int) ctx->flag >= (int) CASE_CAPITALIZE)
    ctx->inword = SYNTAX (ch) == Sword &&
      (!ctx->inbuffer || ctx->inword || !syntax_prefix_flag_p (ch));
  return ch;
}
Jim Blandy's avatar
Jim Blandy committed
90

91
static Lisp_Object
92
do_casify_natnum (struct casing_context *ctx, Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
93
{
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
		  | CHAR_SHIFT | CHAR_CTL | CHAR_META);
  int flags, ch = XFASTINT (obj), cased;
  bool multibyte;

  /* If the character has higher bits set above the flags, return it unchanged.
     It is not a real character.  */
  if (UNSIGNED_CMP (ch, >, flagbits))
    return obj;

  flags = ch & flagbits;
  ch = ch & ~flagbits;

  /* FIXME: Even if enable-multibyte-characters is nil, we may manipulate
     multibyte chars.  This means we have a bug for latin-1 chars since when we
     receive an int 128-255 we can't tell whether it's an eight-bit byte or
     a latin-1 char.  */
  multibyte = ch >= 256
    || !NILP (BVAR (current_buffer, enable_multibyte_characters));
  if (! multibyte)
    MAKE_CHAR_MULTIBYTE (ch);
115
  cased = case_character (ctx, ch);
116 117 118 119 120 121 122 123 124 125
  if (cased == ch)
    return obj;

  if (! multibyte)
    MAKE_CHAR_UNIBYTE (cased);
  XSETFASTINT (obj, cased | flags);
  return obj;
}

static Lisp_Object
126
do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
{
  ptrdiff_t i, i_byte, size = SCHARS (obj);
  int len, ch, cased;
  USE_SAFE_ALLOCA;
  ptrdiff_t o_size;
  if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &o_size))
    o_size = PTRDIFF_MAX;
  unsigned char *dst = SAFE_ALLOCA (o_size);
  unsigned char *o = dst;

  for (i = i_byte = 0; i < size; i++, i_byte += len)
    {
      if (o_size - MAX_MULTIBYTE_LENGTH < o - dst)
	string_overflow ();
      ch = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
142
      cased = case_character (ctx, ch);
143 144 145 146 147 148 149 150 151
      o += CHAR_STRING (cased, o);
    }
  eassert (o - dst <= o_size);
  obj = make_multibyte_string ((char *) dst, size, o - dst);
  SAFE_FREE ();
  return obj;
}

static Lisp_Object
152
do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
153 154 155 156 157 158 159 160 161
{
  ptrdiff_t i, size = SCHARS (obj);
  int ch, cased;

  obj = Fcopy_sequence (obj);
  for (i = 0; i < size; i++)
    {
      ch = SREF (obj, i);
      MAKE_CHAR_MULTIBYTE (ch);
162
      cased = case_character (ctx, ch);
163 164
      if (ch == cased)
	continue;
165
      MAKE_CHAR_UNIBYTE (cased);
166
      /* If the char can't be converted to a valid byte, just don't change it */
167 168
      if (cased >= 0 && cased < 256)
	SSET (obj, i, cased);
169 170 171
    }
  return obj;
}
Jim Blandy's avatar
Jim Blandy committed
172

173 174 175
static Lisp_Object
casify_object (enum case_action flag, Lisp_Object obj)
{
176 177
  struct casing_context ctx;
  prepare_casing_context (&ctx, flag, false);
178

179
  if (NATNUMP (obj))
180
    return do_casify_natnum (&ctx, obj);
181
  else if (!STRINGP (obj))
182
    wrong_type_argument (Qchar_or_string_p, obj);
183 184 185
  else if (!SCHARS (obj))
    return obj;
  else if (STRING_MULTIBYTE (obj))
186
    return do_casify_multibyte_string (&ctx, obj);
187
  else
188
    return do_casify_unibyte_string (&ctx, obj);
Jim Blandy's avatar
Jim Blandy committed
189 190
}

Paul Eggert's avatar
Paul Eggert committed
191
DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
192 193 194 195
       doc: /* Convert argument to upper case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.
See also `capitalize', `downcase' and `upcase-initials'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
196
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
197 198 199 200
{
  return casify_object (CASE_UP, obj);
}

Paul Eggert's avatar
Paul Eggert committed
201
DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
202 203 204
       doc: /* Convert argument to lower case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
205
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
206 207 208 209 210
{
  return casify_object (CASE_DOWN, obj);
}

DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
211
       doc: /* Convert argument to capitalized form and return that.
212 213
This means that each word's first character is converted to either
title case or upper case, and the rest to lower case.
214 215
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
216
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
217 218 219
{
  return casify_object (CASE_CAPITALIZE, obj);
}
220

221 222
/* Like Fcapitalize but change only the initials.  */

Paul Eggert's avatar
Paul Eggert committed
223
DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
224
       doc: /* Convert the initial of each word in the argument to upper case.
225 226
This means that each word's first character is converted to either
title case or upper case, and the rest are left unchanged.
227 228
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
229
  (Lisp_Object obj)
230 231 232
{
  return casify_object (CASE_CAPITALIZE_UP, obj);
}
Jim Blandy's avatar
Jim Blandy committed
233

234 235 236 237 238 239
/* Based on CTX, case region in a unibyte buffer from POS to *ENDP.  Return
   first position that has changed and save last position in *ENDP.  If no
   characters were changed, return -1 and *ENDP is unspecified. */
static ptrdiff_t
do_casify_unibyte_region (struct casing_context *ctx,
			  ptrdiff_t pos, ptrdiff_t *endp)
Jim Blandy's avatar
Jim Blandy committed
240
{
241 242 243
  ptrdiff_t first = -1, last = -1;  /* Position of first and last changes. */
  ptrdiff_t end = *endp;
  int ch, cased;
244

245 246 247 248
  for (; pos < end; ++pos)
    {
      ch = FETCH_BYTE (pos);
      MAKE_CHAR_MULTIBYTE (ch);
249

250 251 252
      cased = case_character (ctx, ch);
      if (cased == ch)
	continue;
Jim Blandy's avatar
Jim Blandy committed
253

254 255 256
      last = pos;
      if (first < 0)
	first = pos;
257

258 259 260
      MAKE_CHAR_UNIBYTE (cased);
      FETCH_BYTE (pos) = cased;
    }
Jim Blandy's avatar
Jim Blandy committed
261

262 263 264
  *endp = last + 1;
  return first;
}
Jim Blandy's avatar
Jim Blandy committed
265

266 267 268 269 270 271 272 273 274 275 276
/* Based on CTX, case region in a multibyte buffer from POS to *ENDP.  Return
   first position that has changed and save last position in *ENDP.  If no
   characters were changed, return -1 and *ENDP is unspecified. */
static ptrdiff_t
do_casify_multibyte_region (struct casing_context *ctx,
                           ptrdiff_t pos, ptrdiff_t *endp)
{
  ptrdiff_t first = -1, last = -1;  /* Position of first and last changes. */
  ptrdiff_t pos_byte = CHAR_TO_BYTE (pos), end = *endp;
  ptrdiff_t opoint = PT;
  int ch, cased, len;
277

278
  while (pos < end)
Karl Heuer's avatar
Karl Heuer committed
279
    {
280 281 282
      ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte), len);
      cased = case_character (ctx, ch);
      if (cased != ch)
Karl Heuer's avatar
Karl Heuer committed
283
	{
284
	  last = pos;
285
	  if (first < 0)
286
	    first = pos;
287

288 289
	  if (ASCII_CHAR_P (cased) && ASCII_CHAR_P (ch))
	    FETCH_BYTE (pos_byte) = cased;
290
	  else
Karl Heuer's avatar
Karl Heuer committed
291
	    {
Kenichi Handa's avatar
Kenichi Handa committed
292
	      unsigned char str[MAX_MULTIBYTE_LENGTH];
293 294 295
	      int totlen = CHAR_STRING (cased, str);
	      if (len == totlen)
		memcpy (BYTE_POS_ADDR (pos_byte), str, len);
296
	      else
297 298 299 300 301
		/* Replace one character with the other(s), keeping text
		   properties the same.  */
		replace_range_2 (pos, pos_byte, pos + 1, pos_byte + len,
				 (char *) str, 9, totlen, 0);
	      len = totlen;
Karl Heuer's avatar
Karl Heuer committed
302 303
	    }
	}
304 305
      pos++;
      pos_byte += len;
Jim Blandy's avatar
Jim Blandy committed
306 307
    }

Kenichi Handa's avatar
Kenichi Handa committed
308
  if (PT != opoint)
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
    TEMP_SET_PT_BOTH (opoint, CHAR_TO_BYTE (opoint));

  *endp = last;
  return first;
}

/* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
   b and e specify range of buffer to operate on. */
static void
casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
{
  struct casing_context ctx;
  ptrdiff_t start, end;

  if (EQ (b, e))
    /* Not modifying because nothing marked */
    return;

  validate_region (&b, &e);
  start = XFASTINT (b);
  end = XFASTINT (e);
  modify_text (start, end);
  record_change (start, end - start);
  prepare_casing_context (&ctx, flag, true);

  if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
    start = do_casify_unibyte_region (&ctx, start, &end);
  else
    start = do_casify_multibyte_region (&ctx, start, &end);
Kenichi Handa's avatar
Kenichi Handa committed
338

339
  if (start >= 0)
Kenichi Handa's avatar
Kenichi Handa committed
340
    {
341 342
      signal_after_change (start, end + 1 - start, end + 1 - start);
      update_compositions (start, end + 1, CHECK_ALL);
Kenichi Handa's avatar
Kenichi Handa committed
343
    }
Jim Blandy's avatar
Jim Blandy committed
344 345
}

346 347
DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3,
       "(list (region-beginning) (region-end) (region-noncontiguous-p))",
348 349 350 351 352
       doc: /* Convert the region to upper case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.
See also `capitalize-region'.  */)
353
  (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
Jim Blandy's avatar
Jim Blandy committed
354
{
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
  Lisp_Object bounds = Qnil;

  if (!NILP (region_noncontiguous_p))
    {
      bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
		      intern ("bounds"));

      while (CONSP (bounds))
	{
	  casify_region (CASE_UP, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
	  bounds = XCDR (bounds);
	}
    }
  else
    casify_region (CASE_UP, beg, end);

Jim Blandy's avatar
Jim Blandy committed
371 372 373
  return Qnil;
}

374 375
DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 3,
       "(list (region-beginning) (region-end) (region-noncontiguous-p))",
376 377 378 379
       doc: /* Convert the region to lower case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.  */)
380
  (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
Jim Blandy's avatar
Jim Blandy committed
381
{
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
  Lisp_Object bounds = Qnil;

  if (!NILP (region_noncontiguous_p))
    {
      bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
		      intern ("bounds"));

      while (CONSP (bounds))
	{
	  casify_region (CASE_DOWN, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
	  bounds = XCDR (bounds);
	}
    }
  else
    casify_region (CASE_DOWN, beg, end);

Jim Blandy's avatar
Jim Blandy committed
398 399 400 401
  return Qnil;
}

DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
402
       doc: /* Convert the region to capitalized form.
403 404
This means that each word's first character is converted to either
title case or upper case, and the rest to lower case.
405 406
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
407
  (Lisp_Object beg, Lisp_Object end)
Jim Blandy's avatar
Jim Blandy committed
408
{
409
  casify_region (CASE_CAPITALIZE, beg, end);
Jim Blandy's avatar
Jim Blandy committed
410 411 412
  return Qnil;
}

413 414
/* Like Fcapitalize_region but change only the initials.  */

Paul Eggert's avatar
Paul Eggert committed
415
DEFUN ("upcase-initials-region", Fupcase_initials_region,
416
       Supcase_initials_region, 2, 2, "r",
417
       doc: /* Upcase the initial of each word in the region.
418 419
This means that each word's first character is converted to either
title case or upper case, and the rest are left unchanged.
420 421
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
422
  (Lisp_Object beg, Lisp_Object end)
423
{
424
  casify_region (CASE_CAPITALIZE_UP, beg, end);
425 426
  return Qnil;
}
Jim Blandy's avatar
Jim Blandy committed
427

428
static Lisp_Object
429
casify_word (enum case_action flag, Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
430
{
431
  CHECK_NUMBER (arg);
432 433 434 435 436
  ptrdiff_t farend = scan_words (PT, XINT (arg));
  if (!farend)
    farend = XINT (arg) <= 0 ? BEGV : ZV;
  ptrdiff_t newpoint = max (PT, farend);
  casify_region (flag, make_number (PT), make_number (farend));
437
  SET_PT (newpoint);
438
  return Qnil;
Jim Blandy's avatar
Jim Blandy committed
439 440 441
}

DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
442 443 444 445 446
       doc: /* Convert to upper case from point to end of word, moving over.

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

447 448
With negative argument, convert previous words but do not move.
See also `capitalize-word'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
449
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
450
{
451
  return casify_word (CASE_UP, arg);
Jim Blandy's avatar
Jim Blandy committed
452 453 454
}

DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
455 456 457 458 459
       doc: /* Convert to lower case from point to end of word, moving over.

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

460
With negative argument, convert previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
461
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
462
{
463
  return casify_word (CASE_DOWN, arg);
Jim Blandy's avatar
Jim Blandy committed
464 465 466
}

DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
467 468
       doc: /* Capitalize from point to the end of word, moving over.
With numerical argument ARG, capitalize the next ARG-1 words as well.
469 470
This gives the word(s) a first character in upper case
and the rest lower case.
471 472 473 474

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

475
With negative argument, capitalize previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
476
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
477
{
478
  return casify_word (CASE_CAPITALIZE, arg);
Jim Blandy's avatar
Jim Blandy committed
479 480
}

Andreas Schwab's avatar
Andreas Schwab committed
481
void
482
syms_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
483
{
484
  DEFSYM (Qidentity, "identity");
Jim Blandy's avatar
Jim Blandy committed
485 486 487
  defsubr (&Supcase);
  defsubr (&Sdowncase);
  defsubr (&Scapitalize);
488
  defsubr (&Supcase_initials);
Jim Blandy's avatar
Jim Blandy committed
489 490 491
  defsubr (&Supcase_region);
  defsubr (&Sdowncase_region);
  defsubr (&Scapitalize_region);
492
  defsubr (&Supcase_initials_region);
Jim Blandy's avatar
Jim Blandy committed
493 494 495 496 497
  defsubr (&Supcase_word);
  defsubr (&Sdowncase_word);
  defsubr (&Scapitalize_word);
}

Andreas Schwab's avatar
Andreas Schwab committed
498
void
499
keys_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
500
{
Juanma Barranquero's avatar
Juanma Barranquero committed
501
  initial_define_key (control_x_map, Ctl ('U'), "upcase-region");
Jim Blandy's avatar
Jim Blandy committed
502
  Fput (intern ("upcase-region"), Qdisabled, Qt);
Juanma Barranquero's avatar
Juanma Barranquero committed
503
  initial_define_key (control_x_map, Ctl ('L'), "downcase-region");
Jim Blandy's avatar
Jim Blandy committed
504 505
  Fput (intern ("downcase-region"), Qdisabled, Qt);

Jim Blandy's avatar
Jim Blandy committed
506 507 508 509
  initial_define_key (meta_map, 'u', "upcase-word");
  initial_define_key (meta_map, 'l', "downcase-word");
  initial_define_key (meta_map, 'c', "capitalize-word");
}