casefiddle.c 13.5 KB
Newer Older
Jim Blandy's avatar
Jim Blandy committed
1
/* GNU Emacs case conversion functions.
2

Paul Eggert's avatar
Paul Eggert committed
3
Copyright (C) 1985, 1994, 1997-1999, 2001-2017 Free Software Foundation,
4
Inc.
Jim Blandy's avatar
Jim Blandy committed
5 6 7

This file is part of GNU Emacs.

8
GNU Emacs is free software: you can redistribute it and/or modify
Jim Blandy's avatar
Jim Blandy committed
9
it under the terms of the GNU General Public License as published by
10 11
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
Jim Blandy's avatar
Jim Blandy committed
12 13 14 15 16 17 18

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
19
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Jim Blandy's avatar
Jim Blandy committed
20 21


22
#include <config.h>
23

Jim Blandy's avatar
Jim Blandy committed
24
#include "lisp.h"
25
#include "character.h"
26
#include "buffer.h"
Jim Blandy's avatar
Jim Blandy committed
27 28
#include "commands.h"
#include "syntax.h"
Kenichi Handa's avatar
Kenichi Handa committed
29
#include "composite.h"
Stefan Monnier's avatar
Stefan Monnier committed
30
#include "keymap.h"
Jim Blandy's avatar
Jim Blandy committed
31 32 33

enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};

34
static Lisp_Object
35
do_casify_natnum (enum case_action flag, Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
36
{
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
  int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
		  | CHAR_SHIFT | CHAR_CTL | CHAR_META);
  int flags, ch = XFASTINT (obj), cased;
  bool multibyte;

  /* If the character has higher bits set above the flags, return it unchanged.
     It is not a real character.  */
  if (UNSIGNED_CMP (ch, >, flagbits))
    return obj;

  flags = ch & flagbits;
  ch = ch & ~flagbits;

  /* FIXME: Even if enable-multibyte-characters is nil, we may manipulate
     multibyte chars.  This means we have a bug for latin-1 chars since when we
     receive an int 128-255 we can't tell whether it's an eight-bit byte or
     a latin-1 char.  */
  multibyte = ch >= 256
    || !NILP (BVAR (current_buffer, enable_multibyte_characters));
  if (! multibyte)
    MAKE_CHAR_MULTIBYTE (ch);
  cased = flag == CASE_DOWN ? downcase (ch) : upcase (ch);
  if (cased == ch)
    return obj;

  if (! multibyte)
    MAKE_CHAR_UNIBYTE (cased);
  XSETFASTINT (obj, cased | flags);
  return obj;
}

static Lisp_Object
do_casify_multibyte_string (enum case_action flag, Lisp_Object obj)
{
  ptrdiff_t i, i_byte, size = SCHARS (obj);
72
  bool inword = flag == CASE_DOWN;
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
  int len, ch, cased;
  USE_SAFE_ALLOCA;
  ptrdiff_t o_size;
  if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &o_size))
    o_size = PTRDIFF_MAX;
  unsigned char *dst = SAFE_ALLOCA (o_size);
  unsigned char *o = dst;

  for (i = i_byte = 0; i < size; i++, i_byte += len)
    {
      if (o_size - MAX_MULTIBYTE_LENGTH < o - dst)
	string_overflow ();
      ch = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
      if (inword && flag != CASE_CAPITALIZE_UP)
	cased = downcase (ch);
      else if (!inword || flag != CASE_CAPITALIZE_UP)
	cased = upcase (ch);
      else
	cased = ch;
      if ((int) flag >= (int) CASE_CAPITALIZE)
	inword = (SYNTAX (ch) == Sword);
      o += CHAR_STRING (cased, o);
    }
  eassert (o - dst <= o_size);
  obj = make_multibyte_string ((char *) dst, size, o - dst);
  SAFE_FREE ();
  return obj;
}

static Lisp_Object
do_casify_unibyte_string (enum case_action flag, Lisp_Object obj)
{
  ptrdiff_t i, size = SCHARS (obj);
  bool inword = flag == CASE_DOWN;
  int ch, cased;

  obj = Fcopy_sequence (obj);
  for (i = 0; i < size; i++)
    {
      ch = SREF (obj, i);
      MAKE_CHAR_MULTIBYTE (ch);
      cased = ch;
      if (inword && flag != CASE_CAPITALIZE_UP)
	ch = downcase (ch);
      else if (!uppercasep (ch)
	       && (!inword || flag != CASE_CAPITALIZE_UP))
	ch = upcase (cased);
      if ((int) flag >= (int) CASE_CAPITALIZE)
	inword = (SYNTAX (ch) == Sword);
      if (ch == cased)
	continue;
      MAKE_CHAR_UNIBYTE (ch);
      /* If the char can't be converted to a valid byte, just don't change it */
      if (ch >= 0 && ch < 256)
	SSET (obj, i, ch);
    }
  return obj;
}
Jim Blandy's avatar
Jim Blandy committed
131

132 133 134
static Lisp_Object
casify_object (enum case_action flag, Lisp_Object obj)
{
135
  /* If the case table is flagged as modified, rescan it.  */
Tom Tromey's avatar
Tom Tromey committed
136 137
  if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
    Fset_case_table (BVAR (current_buffer, downcase_table));
138

139
  if (NATNUMP (obj))
140 141
    return do_casify_natnum (flag, obj);
  else if (!STRINGP (obj))
142
    wrong_type_argument (Qchar_or_string_p, obj);
143 144 145 146
  else if (!SCHARS (obj))
    return obj;
  else if (STRING_MULTIBYTE (obj))
    return do_casify_multibyte_string (flag, obj);
147
  else
148
    return do_casify_unibyte_string (flag, obj);
Jim Blandy's avatar
Jim Blandy committed
149 150
}

Paul Eggert's avatar
Paul Eggert committed
151
DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
152 153 154 155
       doc: /* Convert argument to upper case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.
See also `capitalize', `downcase' and `upcase-initials'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
156
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
157 158 159 160
{
  return casify_object (CASE_UP, obj);
}

Paul Eggert's avatar
Paul Eggert committed
161
DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
162 163 164
       doc: /* Convert argument to lower case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
165
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
166 167 168 169 170
{
  return casify_object (CASE_DOWN, obj);
}

DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
171 172 173 174 175
       doc: /* Convert argument to capitalized form and return that.
This means that each word's first character is upper case
and the rest is lower case.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
176
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
177 178 179
{
  return casify_object (CASE_CAPITALIZE, obj);
}
180

181 182
/* Like Fcapitalize but change only the initials.  */

Paul Eggert's avatar
Paul Eggert committed
183
DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
184 185 186 187
       doc: /* Convert the initial of each word in the argument to upper case.
Do not change the other letters of each word.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
188
  (Lisp_Object obj)
189 190 191
{
  return casify_object (CASE_CAPITALIZE_UP, obj);
}
Jim Blandy's avatar
Jim Blandy committed
192 193 194 195

/* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
   b and e specify range of buffer to operate on. */

196
static void
197
casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
Jim Blandy's avatar
Jim Blandy committed
198
{
199 200 201
  int c;
  bool inword = flag == CASE_DOWN;
  bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
202 203
  ptrdiff_t start, end;
  ptrdiff_t start_byte;
204 205

  /* Position of first and last changes.  */
206
  ptrdiff_t first = -1, last;
207

208 209
  ptrdiff_t opoint = PT;
  ptrdiff_t opoint_byte = PT_BYTE;
Jim Blandy's avatar
Jim Blandy committed
210 211 212 213 214

  if (EQ (b, e))
    /* Not modifying because nothing marked */
    return;

215
  /* If the case table is flagged as modified, rescan it.  */
Tom Tromey's avatar
Tom Tromey committed
216 217
  if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
    Fset_case_table (BVAR (current_buffer, downcase_table));
218

Jim Blandy's avatar
Jim Blandy committed
219
  validate_region (&b, &e);
220 221
  start = XFASTINT (b);
  end = XFASTINT (e);
222
  modify_text (start, end);
223
  record_change (start, end - start);
224
  start_byte = CHAR_TO_BYTE (start);
Jim Blandy's avatar
Jim Blandy committed
225

Juanma Barranquero's avatar
Juanma Barranquero committed
226
  SETUP_BUFFER_SYNTAX_TABLE ();	/* For syntax_prefix_flag_p.  */
227

228
  while (start < end)
Karl Heuer's avatar
Karl Heuer committed
229
    {
230 231 232 233 234 235 236 237 238 239 240 241 242 243
      int c2, len;

      if (multibyte)
	{
	  c = FETCH_MULTIBYTE_CHAR (start_byte);
	  len = CHAR_BYTES (c);
	}
      else
	{
	  c = FETCH_BYTE (start_byte);
	  MAKE_CHAR_MULTIBYTE (c);
	  len = 1;
	}
      c2 = c;
244
      if (inword && flag != CASE_CAPITALIZE_UP)
245
	c = downcase (c);
246 247
      else if (!inword || flag != CASE_CAPITALIZE_UP)
	c = upcase (c);
248
      if ((int) flag >= (int) CASE_CAPITALIZE)
249 250
	inword = ((SYNTAX (c) == Sword)
		  && (inword || !syntax_prefix_flag_p (c)));
251
      if (c != c2)
Karl Heuer's avatar
Karl Heuer committed
252
	{
253 254 255 256
	  last = start;
	  if (first < 0)
	    first = start;

257 258 259 260 261 262 263
	  if (! multibyte)
	    {
	      MAKE_CHAR_UNIBYTE (c);
	      FETCH_BYTE (start_byte) = c;
	    }
	  else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
	    FETCH_BYTE (start_byte) = c;
264
	  else
Karl Heuer's avatar
Karl Heuer committed
265
	    {
266
	      int tolen = CHAR_BYTES (c);
267
	      int j;
Kenichi Handa's avatar
Kenichi Handa committed
268
	      unsigned char str[MAX_MULTIBYTE_LENGTH];
Karl Heuer's avatar
Karl Heuer committed
269

270
	      CHAR_STRING (c, str);
271 272 273 274 275 276 277 278 279 280 281 282
	      if (len == tolen)
		{
		  /* Length is unchanged.  */
		  for (j = 0; j < len; ++j)
		    FETCH_BYTE (start_byte + j) = str[j];
		}
	      else
		{
		  /* Replace one character with the other,
		     keeping text properties the same.  */
		  replace_range_2 (start, start_byte,
				   start + 1, start_byte + len,
283
				   (char *) str, 1, tolen,
284 285 286
				   0);
		  len = tolen;
		}
Karl Heuer's avatar
Karl Heuer committed
287 288
	    }
	}
289 290
      start++;
      start_byte += len;
Jim Blandy's avatar
Jim Blandy committed
291 292
    }

Kenichi Handa's avatar
Kenichi Handa committed
293 294 295
  if (PT != opoint)
    TEMP_SET_PT_BOTH (opoint, opoint_byte);

296
  if (first >= 0)
Kenichi Handa's avatar
Kenichi Handa committed
297
    {
298 299
      signal_after_change (first, last + 1 - first, last + 1 - first);
      update_compositions (first, last + 1, CHECK_ALL);
Kenichi Handa's avatar
Kenichi Handa committed
300
    }
Jim Blandy's avatar
Jim Blandy committed
301 302
}

303 304
DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3,
       "(list (region-beginning) (region-end) (region-noncontiguous-p))",
305 306 307 308 309
       doc: /* Convert the region to upper case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.
See also `capitalize-region'.  */)
310
  (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
Jim Blandy's avatar
Jim Blandy committed
311
{
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
  Lisp_Object bounds = Qnil;

  if (!NILP (region_noncontiguous_p))
    {
      bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
		      intern ("bounds"));

      while (CONSP (bounds))
	{
	  casify_region (CASE_UP, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
	  bounds = XCDR (bounds);
	}
    }
  else
    casify_region (CASE_UP, beg, end);

Jim Blandy's avatar
Jim Blandy committed
328 329 330
  return Qnil;
}

331 332
DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 3,
       "(list (region-beginning) (region-end) (region-noncontiguous-p))",
333 334 335 336
       doc: /* Convert the region to lower case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.  */)
337
  (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
Jim Blandy's avatar
Jim Blandy committed
338
{
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
  Lisp_Object bounds = Qnil;

  if (!NILP (region_noncontiguous_p))
    {
      bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
		      intern ("bounds"));

      while (CONSP (bounds))
	{
	  casify_region (CASE_DOWN, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
	  bounds = XCDR (bounds);
	}
    }
  else
    casify_region (CASE_DOWN, beg, end);

Jim Blandy's avatar
Jim Blandy committed
355 356 357 358
  return Qnil;
}

DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
359 360 361 362 363
       doc: /* Convert the region to capitalized form.
Capitalized form means each word's first character is upper case
and the rest of it is lower case.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
364
  (Lisp_Object beg, Lisp_Object end)
Jim Blandy's avatar
Jim Blandy committed
365
{
366
  casify_region (CASE_CAPITALIZE, beg, end);
Jim Blandy's avatar
Jim Blandy committed
367 368 369
  return Qnil;
}

370 371
/* Like Fcapitalize_region but change only the initials.  */

Paul Eggert's avatar
Paul Eggert committed
372
DEFUN ("upcase-initials-region", Fupcase_initials_region,
373
       Supcase_initials_region, 2, 2, "r",
374 375 376 377
       doc: /* Upcase the initial of each word in the region.
Subsequent letters of each word are not changed.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
378
  (Lisp_Object beg, Lisp_Object end)
379
{
380
  casify_region (CASE_CAPITALIZE_UP, beg, end);
381 382
  return Qnil;
}
Jim Blandy's avatar
Jim Blandy committed
383

384
static Lisp_Object
385
casify_word (enum case_action flag, Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
386
{
387
  CHECK_NUMBER (arg);
388 389 390 391 392
  ptrdiff_t farend = scan_words (PT, XINT (arg));
  if (!farend)
    farend = XINT (arg) <= 0 ? BEGV : ZV;
  ptrdiff_t newpoint = max (PT, farend);
  casify_region (flag, make_number (PT), make_number (farend));
393
  SET_PT (newpoint);
394
  return Qnil;
Jim Blandy's avatar
Jim Blandy committed
395 396 397
}

DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
398 399 400 401 402
       doc: /* Convert to upper case from point to end of word, moving over.

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

403 404
With negative argument, convert previous words but do not move.
See also `capitalize-word'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
405
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
406
{
407
  return casify_word (CASE_UP, arg);
Jim Blandy's avatar
Jim Blandy committed
408 409 410
}

DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
411 412 413 414 415
       doc: /* Convert to lower case from point to end of word, moving over.

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

416
With negative argument, convert previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
417
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
418
{
419
  return casify_word (CASE_DOWN, arg);
Jim Blandy's avatar
Jim Blandy committed
420 421 422
}

DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
423 424
       doc: /* Capitalize from point to the end of word, moving over.
With numerical argument ARG, capitalize the next ARG-1 words as well.
425 426
This gives the word(s) a first character in upper case
and the rest lower case.
427 428 429 430

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

431
With negative argument, capitalize previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
432
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
433
{
434
  return casify_word (CASE_CAPITALIZE, arg);
Jim Blandy's avatar
Jim Blandy committed
435 436
}

Andreas Schwab's avatar
Andreas Schwab committed
437
void
438
syms_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
439
{
440
  DEFSYM (Qidentity, "identity");
Jim Blandy's avatar
Jim Blandy committed
441 442 443
  defsubr (&Supcase);
  defsubr (&Sdowncase);
  defsubr (&Scapitalize);
444
  defsubr (&Supcase_initials);
Jim Blandy's avatar
Jim Blandy committed
445 446 447
  defsubr (&Supcase_region);
  defsubr (&Sdowncase_region);
  defsubr (&Scapitalize_region);
448
  defsubr (&Supcase_initials_region);
Jim Blandy's avatar
Jim Blandy committed
449 450 451 452 453
  defsubr (&Supcase_word);
  defsubr (&Sdowncase_word);
  defsubr (&Scapitalize_word);
}

Andreas Schwab's avatar
Andreas Schwab committed
454
void
455
keys_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
456
{
Juanma Barranquero's avatar
Juanma Barranquero committed
457
  initial_define_key (control_x_map, Ctl ('U'), "upcase-region");
Jim Blandy's avatar
Jim Blandy committed
458
  Fput (intern ("upcase-region"), Qdisabled, Qt);
Juanma Barranquero's avatar
Juanma Barranquero committed
459
  initial_define_key (control_x_map, Ctl ('L'), "downcase-region");
Jim Blandy's avatar
Jim Blandy committed
460 461
  Fput (intern ("downcase-region"), Qdisabled, Qt);

Jim Blandy's avatar
Jim Blandy committed
462 463 464 465
  initial_define_key (meta_map, 'u', "upcase-word");
  initial_define_key (meta_map, 'l', "downcase-word");
  initial_define_key (meta_map, 'c', "capitalize-word");
}