casefiddle.c 13.9 KB
Newer Older
Jim Blandy's avatar
Jim Blandy committed
1
/* GNU Emacs case conversion functions.
2

Paul Eggert's avatar
Paul Eggert committed
3
Copyright (C) 1985, 1994, 1997-1999, 2001-2017 Free Software Foundation,
4
Inc.
Jim Blandy's avatar
Jim Blandy committed
5 6 7

This file is part of GNU Emacs.

8
GNU Emacs is free software: you can redistribute it and/or modify
Jim Blandy's avatar
Jim Blandy committed
9
it under the terms of the GNU General Public License as published by
10 11
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
Jim Blandy's avatar
Jim Blandy committed
12 13 14 15 16 17 18

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
19
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Jim Blandy's avatar
Jim Blandy committed
20 21


22
#include <config.h>
23

Jim Blandy's avatar
Jim Blandy committed
24
#include "lisp.h"
25
#include "character.h"
26
#include "buffer.h"
Jim Blandy's avatar
Jim Blandy committed
27 28
#include "commands.h"
#include "syntax.h"
Kenichi Handa's avatar
Kenichi Handa committed
29
#include "composite.h"
Stefan Monnier's avatar
Stefan Monnier committed
30
#include "keymap.h"
Jim Blandy's avatar
Jim Blandy committed
31 32

enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78

/* State for casing individual characters.  */
struct casing_context {
  /* User-requested action. */
  enum case_action flag;
  /* If true, function operates on a buffer as opposed to a string or character.
     When run on a buffer, syntax_prefix_flag_p is taken into account when
     determined inword flag. */
  bool inbuffer;
  /* Conceptually, this denotes whether we are inside of a word except
     that if flag is CASE_UP it’s always false and if flag is CASE_DOWN
     this is always true. */
  bool inword;
};

/* Initialise CTX structure for casing characters. */
static void
prepare_casing_context (struct casing_context *ctx,
			enum case_action flag, bool inbuffer)
{
  ctx->flag = flag;
  ctx->inbuffer = inbuffer;
  ctx->inword = flag == CASE_DOWN;

  /* If the case table is flagged as modified, rescan it.  */
  if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
    Fset_case_table (BVAR (current_buffer, downcase_table));

  if (inbuffer && (int) flag >= (int) CASE_CAPITALIZE)
    SETUP_BUFFER_SYNTAX_TABLE ();	/* For syntax_prefix_flag_p.  */
}

/* Based on CTX, case character CH accordingly.  Update CTX as necessary.
   Return cased character. */
static int
case_character (struct casing_context *ctx, int ch)
{
  if (ctx->inword)
    ch = ctx->flag == CASE_CAPITALIZE_UP ? ch : downcase (ch);
  else
    ch = upcase(ch);
  if ((int) ctx->flag >= (int) CASE_CAPITALIZE)
    ctx->inword = SYNTAX (ch) == Sword &&
      (!ctx->inbuffer || ctx->inword || !syntax_prefix_flag_p (ch));
  return ch;
}
Jim Blandy's avatar
Jim Blandy committed
79

80
static Lisp_Object
81
do_casify_natnum (struct casing_context *ctx, Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
82
{
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
  int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
		  | CHAR_SHIFT | CHAR_CTL | CHAR_META);
  int flags, ch = XFASTINT (obj), cased;
  bool multibyte;

  /* If the character has higher bits set above the flags, return it unchanged.
     It is not a real character.  */
  if (UNSIGNED_CMP (ch, >, flagbits))
    return obj;

  flags = ch & flagbits;
  ch = ch & ~flagbits;

  /* FIXME: Even if enable-multibyte-characters is nil, we may manipulate
     multibyte chars.  This means we have a bug for latin-1 chars since when we
     receive an int 128-255 we can't tell whether it's an eight-bit byte or
     a latin-1 char.  */
  multibyte = ch >= 256
    || !NILP (BVAR (current_buffer, enable_multibyte_characters));
  if (! multibyte)
    MAKE_CHAR_MULTIBYTE (ch);
104
  cased = case_character (ctx, ch);
105 106 107 108 109 110 111 112 113 114
  if (cased == ch)
    return obj;

  if (! multibyte)
    MAKE_CHAR_UNIBYTE (cased);
  XSETFASTINT (obj, cased | flags);
  return obj;
}

static Lisp_Object
115
do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
{
  ptrdiff_t i, i_byte, size = SCHARS (obj);
  int len, ch, cased;
  USE_SAFE_ALLOCA;
  ptrdiff_t o_size;
  if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &o_size))
    o_size = PTRDIFF_MAX;
  unsigned char *dst = SAFE_ALLOCA (o_size);
  unsigned char *o = dst;

  for (i = i_byte = 0; i < size; i++, i_byte += len)
    {
      if (o_size - MAX_MULTIBYTE_LENGTH < o - dst)
	string_overflow ();
      ch = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
131
      cased = case_character (ctx, ch);
132 133 134 135 136 137 138 139 140
      o += CHAR_STRING (cased, o);
    }
  eassert (o - dst <= o_size);
  obj = make_multibyte_string ((char *) dst, size, o - dst);
  SAFE_FREE ();
  return obj;
}

static Lisp_Object
141
do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
142 143 144 145 146 147 148 149 150
{
  ptrdiff_t i, size = SCHARS (obj);
  int ch, cased;

  obj = Fcopy_sequence (obj);
  for (i = 0; i < size; i++)
    {
      ch = SREF (obj, i);
      MAKE_CHAR_MULTIBYTE (ch);
151
      cased = case_character (ctx, ch);
152 153
      if (ch == cased)
	continue;
154
      MAKE_CHAR_UNIBYTE (cased);
155
      /* If the char can't be converted to a valid byte, just don't change it */
156 157
      if (cased >= 0 && cased < 256)
	SSET (obj, i, cased);
158 159 160
    }
  return obj;
}
Jim Blandy's avatar
Jim Blandy committed
161

162 163 164
static Lisp_Object
casify_object (enum case_action flag, Lisp_Object obj)
{
165 166
  struct casing_context ctx;
  prepare_casing_context (&ctx, flag, false);
167

168
  if (NATNUMP (obj))
169
    return do_casify_natnum (&ctx, obj);
170
  else if (!STRINGP (obj))
171
    wrong_type_argument (Qchar_or_string_p, obj);
172 173 174
  else if (!SCHARS (obj))
    return obj;
  else if (STRING_MULTIBYTE (obj))
175
    return do_casify_multibyte_string (&ctx, obj);
176
  else
177
    return do_casify_unibyte_string (&ctx, obj);
Jim Blandy's avatar
Jim Blandy committed
178 179
}

Paul Eggert's avatar
Paul Eggert committed
180
DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
181 182 183 184
       doc: /* Convert argument to upper case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.
See also `capitalize', `downcase' and `upcase-initials'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
185
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
186 187 188 189
{
  return casify_object (CASE_UP, obj);
}

Paul Eggert's avatar
Paul Eggert committed
190
DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
191 192 193
       doc: /* Convert argument to lower case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
194
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
195 196 197 198 199
{
  return casify_object (CASE_DOWN, obj);
}

DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
200 201 202 203 204
       doc: /* Convert argument to capitalized form and return that.
This means that each word's first character is upper case
and the rest is lower case.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
205
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
206 207 208
{
  return casify_object (CASE_CAPITALIZE, obj);
}
209

210 211
/* Like Fcapitalize but change only the initials.  */

Paul Eggert's avatar
Paul Eggert committed
212
DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
213 214 215 216
       doc: /* Convert the initial of each word in the argument to upper case.
Do not change the other letters of each word.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
217
  (Lisp_Object obj)
218 219 220
{
  return casify_object (CASE_CAPITALIZE_UP, obj);
}
Jim Blandy's avatar
Jim Blandy committed
221 222 223 224

/* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
   b and e specify range of buffer to operate on. */

225
static void
226
casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
Jim Blandy's avatar
Jim Blandy committed
227
{
228
  bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
229 230
  ptrdiff_t start, end;
  ptrdiff_t start_byte;
231 232

  /* Position of first and last changes.  */
233
  ptrdiff_t first = -1, last;
234

235 236
  ptrdiff_t opoint = PT;
  ptrdiff_t opoint_byte = PT_BYTE;
Jim Blandy's avatar
Jim Blandy committed
237

238 239
  struct casing_context ctx;

Jim Blandy's avatar
Jim Blandy committed
240 241 242 243 244
  if (EQ (b, e))
    /* Not modifying because nothing marked */
    return;

  validate_region (&b, &e);
245 246
  start = XFASTINT (b);
  end = XFASTINT (e);
247
  modify_text (start, end);
248
  record_change (start, end - start);
249
  start_byte = CHAR_TO_BYTE (start);
Jim Blandy's avatar
Jim Blandy committed
250

251
  prepare_casing_context (&ctx, flag, true);
252

253
  while (start < end)
Karl Heuer's avatar
Karl Heuer committed
254
    {
255
      int ch, cased, len;
256 257 258

      if (multibyte)
	{
259 260
	  ch = FETCH_MULTIBYTE_CHAR (start_byte);
	  len = CHAR_BYTES (ch);
261 262 263
	}
      else
	{
264 265
	  ch = FETCH_BYTE (start_byte);
	  MAKE_CHAR_MULTIBYTE (ch);
266 267
	  len = 1;
	}
268 269
      cased = case_character (&ctx, ch);
      if (ch != cased)
Karl Heuer's avatar
Karl Heuer committed
270
	{
271 272 273 274
	  last = start;
	  if (first < 0)
	    first = start;

275 276
	  if (! multibyte)
	    {
277 278
	      MAKE_CHAR_UNIBYTE (cased);
	      FETCH_BYTE (start_byte) = cased;
279
	    }
280 281
	  else if (ASCII_CHAR_P (cased) && ASCII_CHAR_P (ch))
	    FETCH_BYTE (start_byte) = cased;
282
	  else
Karl Heuer's avatar
Karl Heuer committed
283
	    {
284
	      int tolen = CHAR_BYTES (cased);
285
	      int j;
Kenichi Handa's avatar
Kenichi Handa committed
286
	      unsigned char str[MAX_MULTIBYTE_LENGTH];
Karl Heuer's avatar
Karl Heuer committed
287

288
	      CHAR_STRING (cased, str);
289 290 291 292 293 294 295 296 297 298 299 300
	      if (len == tolen)
		{
		  /* Length is unchanged.  */
		  for (j = 0; j < len; ++j)
		    FETCH_BYTE (start_byte + j) = str[j];
		}
	      else
		{
		  /* Replace one character with the other,
		     keeping text properties the same.  */
		  replace_range_2 (start, start_byte,
				   start + 1, start_byte + len,
301
				   (char *) str, 1, tolen,
302 303 304
				   0);
		  len = tolen;
		}
Karl Heuer's avatar
Karl Heuer committed
305 306
	    }
	}
307 308
      start++;
      start_byte += len;
Jim Blandy's avatar
Jim Blandy committed
309 310
    }

Kenichi Handa's avatar
Kenichi Handa committed
311 312 313
  if (PT != opoint)
    TEMP_SET_PT_BOTH (opoint, opoint_byte);

314
  if (first >= 0)
Kenichi Handa's avatar
Kenichi Handa committed
315
    {
316 317
      signal_after_change (first, last + 1 - first, last + 1 - first);
      update_compositions (first, last + 1, CHECK_ALL);
Kenichi Handa's avatar
Kenichi Handa committed
318
    }
Jim Blandy's avatar
Jim Blandy committed
319 320
}

321 322
DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3,
       "(list (region-beginning) (region-end) (region-noncontiguous-p))",
323 324 325 326 327
       doc: /* Convert the region to upper case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.
See also `capitalize-region'.  */)
328
  (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
Jim Blandy's avatar
Jim Blandy committed
329
{
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
  Lisp_Object bounds = Qnil;

  if (!NILP (region_noncontiguous_p))
    {
      bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
		      intern ("bounds"));

      while (CONSP (bounds))
	{
	  casify_region (CASE_UP, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
	  bounds = XCDR (bounds);
	}
    }
  else
    casify_region (CASE_UP, beg, end);

Jim Blandy's avatar
Jim Blandy committed
346 347 348
  return Qnil;
}

349 350
DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 3,
       "(list (region-beginning) (region-end) (region-noncontiguous-p))",
351 352 353 354
       doc: /* Convert the region to lower case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.  */)
355
  (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
Jim Blandy's avatar
Jim Blandy committed
356
{
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
  Lisp_Object bounds = Qnil;

  if (!NILP (region_noncontiguous_p))
    {
      bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
		      intern ("bounds"));

      while (CONSP (bounds))
	{
	  casify_region (CASE_DOWN, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
	  bounds = XCDR (bounds);
	}
    }
  else
    casify_region (CASE_DOWN, beg, end);

Jim Blandy's avatar
Jim Blandy committed
373 374 375 376
  return Qnil;
}

DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
377 378 379 380 381
       doc: /* Convert the region to capitalized form.
Capitalized form means each word's first character is upper case
and the rest of it is lower case.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
382
  (Lisp_Object beg, Lisp_Object end)
Jim Blandy's avatar
Jim Blandy committed
383
{
384
  casify_region (CASE_CAPITALIZE, beg, end);
Jim Blandy's avatar
Jim Blandy committed
385 386 387
  return Qnil;
}

388 389
/* Like Fcapitalize_region but change only the initials.  */

Paul Eggert's avatar
Paul Eggert committed
390
DEFUN ("upcase-initials-region", Fupcase_initials_region,
391
       Supcase_initials_region, 2, 2, "r",
392 393 394 395
       doc: /* Upcase the initial of each word in the region.
Subsequent letters of each word are not changed.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
396
  (Lisp_Object beg, Lisp_Object end)
397
{
398
  casify_region (CASE_CAPITALIZE_UP, beg, end);
399 400
  return Qnil;
}
Jim Blandy's avatar
Jim Blandy committed
401

402
static Lisp_Object
403
casify_word (enum case_action flag, Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
404
{
405
  CHECK_NUMBER (arg);
406 407 408 409 410
  ptrdiff_t farend = scan_words (PT, XINT (arg));
  if (!farend)
    farend = XINT (arg) <= 0 ? BEGV : ZV;
  ptrdiff_t newpoint = max (PT, farend);
  casify_region (flag, make_number (PT), make_number (farend));
411
  SET_PT (newpoint);
412
  return Qnil;
Jim Blandy's avatar
Jim Blandy committed
413 414 415
}

DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
416 417 418 419 420
       doc: /* Convert to upper case from point to end of word, moving over.

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

421 422
With negative argument, convert previous words but do not move.
See also `capitalize-word'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
423
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
424
{
425
  return casify_word (CASE_UP, arg);
Jim Blandy's avatar
Jim Blandy committed
426 427 428
}

DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
429 430 431 432 433
       doc: /* Convert to lower case from point to end of word, moving over.

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

434
With negative argument, convert previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
435
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
436
{
437
  return casify_word (CASE_DOWN, arg);
Jim Blandy's avatar
Jim Blandy committed
438 439 440
}

DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
441 442
       doc: /* Capitalize from point to the end of word, moving over.
With numerical argument ARG, capitalize the next ARG-1 words as well.
443 444
This gives the word(s) a first character in upper case
and the rest lower case.
445 446 447 448

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

449
With negative argument, capitalize previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
450
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
451
{
452
  return casify_word (CASE_CAPITALIZE, arg);
Jim Blandy's avatar
Jim Blandy committed
453 454
}

Andreas Schwab's avatar
Andreas Schwab committed
455
void
456
syms_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
457
{
458
  DEFSYM (Qidentity, "identity");
Jim Blandy's avatar
Jim Blandy committed
459 460 461
  defsubr (&Supcase);
  defsubr (&Sdowncase);
  defsubr (&Scapitalize);
462
  defsubr (&Supcase_initials);
Jim Blandy's avatar
Jim Blandy committed
463 464 465
  defsubr (&Supcase_region);
  defsubr (&Sdowncase_region);
  defsubr (&Scapitalize_region);
466
  defsubr (&Supcase_initials_region);
Jim Blandy's avatar
Jim Blandy committed
467 468 469 470 471
  defsubr (&Supcase_word);
  defsubr (&Sdowncase_word);
  defsubr (&Scapitalize_word);
}

Andreas Schwab's avatar
Andreas Schwab committed
472
void
473
keys_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
474
{
Juanma Barranquero's avatar
Juanma Barranquero committed
475
  initial_define_key (control_x_map, Ctl ('U'), "upcase-region");
Jim Blandy's avatar
Jim Blandy committed
476
  Fput (intern ("upcase-region"), Qdisabled, Qt);
Juanma Barranquero's avatar
Juanma Barranquero committed
477
  initial_define_key (control_x_map, Ctl ('L'), "downcase-region");
Jim Blandy's avatar
Jim Blandy committed
478 479
  Fput (intern ("downcase-region"), Qdisabled, Qt);

Jim Blandy's avatar
Jim Blandy committed
480 481 482 483
  initial_define_key (meta_map, 'u', "upcase-word");
  initial_define_key (meta_map, 'l', "downcase-word");
  initial_define_key (meta_map, 'c', "capitalize-word");
}