casefiddle.c 13.3 KB
Newer Older
Jim Blandy's avatar
Jim Blandy committed
1
/* GNU Emacs case conversion functions.
2

Paul Eggert's avatar
Paul Eggert committed
3
Copyright (C) 1985, 1994, 1997-1999, 2001-2016 Free Software Foundation,
4
Inc.
Jim Blandy's avatar
Jim Blandy committed
5 6 7

This file is part of GNU Emacs.

8
GNU Emacs is free software: you can redistribute it and/or modify
Jim Blandy's avatar
Jim Blandy committed
9
it under the terms of the GNU General Public License as published by
10 11
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
Jim Blandy's avatar
Jim Blandy committed
12 13 14 15 16 17 18

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
19
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Jim Blandy's avatar
Jim Blandy committed
20 21


22
#include <config.h>
23

Jim Blandy's avatar
Jim Blandy committed
24
#include "lisp.h"
25
#include "character.h"
26
#include "buffer.h"
Jim Blandy's avatar
Jim Blandy committed
27 28
#include "commands.h"
#include "syntax.h"
Kenichi Handa's avatar
Kenichi Handa committed
29
#include "composite.h"
Stefan Monnier's avatar
Stefan Monnier committed
30
#include "keymap.h"
Jim Blandy's avatar
Jim Blandy committed
31 32 33

enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};

34
static Lisp_Object
35
casify_object (enum case_action flag, Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
36
{
37 38
  int c, c1;
  bool inword = flag == CASE_DOWN;
Jim Blandy's avatar
Jim Blandy committed
39

40
  /* If the case table is flagged as modified, rescan it.  */
Tom Tromey's avatar
Tom Tromey committed
41 42
  if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
    Fset_case_table (BVAR (current_buffer, downcase_table));
43

44
  if (INTEGERP (obj))
Jim Blandy's avatar
Jim Blandy committed
45
    {
46 47 48
      int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
		      | CHAR_SHIFT | CHAR_CTL | CHAR_META);
      int flags = XINT (obj) & flagbits;
49 50
      bool multibyte = ! NILP (BVAR (current_buffer,
				     enable_multibyte_characters));
51 52 53 54

      /* If the character has higher bits set
	 above the flags, return it unchanged.
	 It is not a real character.  */
55
      if (UNSIGNED_CMP (XFASTINT (obj), >, flagbits))
56 57 58
	return obj;

      c1 = XFASTINT (obj) & ~flagbits;
59 60 61 62 63 64
      /* FIXME: Even if enable-multibyte-characters is nil, we may
	 manipulate multibyte chars.  This means we have a bug for latin-1
	 chars since when we receive an int 128-255 we can't tell whether
	 it's an eight-bit byte or a latin-1 char.  */
      if (c1 >= 256)
	multibyte = 1;
65 66
      if (! multibyte)
	MAKE_CHAR_MULTIBYTE (c1);
67
      c = downcase (c1);
68 69 70
      if (inword)
	XSETFASTINT (obj, c | flags);
      else if (c == (XFASTINT (obj) & ~flagbits))
Jim Blandy's avatar
Jim Blandy committed
71
	{
72
	  if (! inword)
73
	    c = upcase1 (c1);
74
	  if (! multibyte)
75 76
	    MAKE_CHAR_UNIBYTE (c);
	  XSETFASTINT (obj, c | flags);
Jim Blandy's avatar
Jim Blandy committed
77
	}
78 79
      return obj;
    }
80

81 82 83
  if (!STRINGP (obj))
    wrong_type_argument (Qchar_or_string_p, obj);
  else if (!STRING_MULTIBYTE (obj))
84
    {
85 86
      ptrdiff_t i;
      ptrdiff_t size = SCHARS (obj);
87

88
      obj = Fcopy_sequence (obj);
89
      for (i = 0; i < size; i++)
90
	{
91
	  c = SREF (obj, i);
92
	  MAKE_CHAR_MULTIBYTE (c);
93 94
	  c1 = c;
	  if (inword && flag != CASE_CAPITALIZE_UP)
95 96
	    c = downcase (c);
	  else if (!uppercasep (c)
97
		   && (!inword || flag != CASE_CAPITALIZE_UP))
98
	    c = upcase1 (c1);
99 100 101 102
	  if ((int) flag >= (int) CASE_CAPITALIZE)
	    inword = (SYNTAX (c) == Sword);
	  if (c != c1)
	    {
103
	      MAKE_CHAR_UNIBYTE (c);
104 105 106 107 108 109 110
	      /* If the char can't be converted to a valid byte, just don't
		 change it.  */
	      if (c >= 0 && c < 256)
		SSET (obj, i, c);
	    }
	}
      return obj;
111 112 113
    }
  else
    {
114
      ptrdiff_t i, i_byte, size = SCHARS (obj);
115
      int len;
116
      USE_SAFE_ALLOCA;
117 118 119
      ptrdiff_t o_size;
      if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &o_size))
	o_size = PTRDIFF_MAX;
120 121
      unsigned char *dst = SAFE_ALLOCA (o_size);
      unsigned char *o = dst;
122 123 124

      for (i = i_byte = 0; i < size; i++, i_byte += len)
	{
125
	  if (o_size - MAX_MULTIBYTE_LENGTH < o - dst)
126
	    string_overflow ();
127
	  c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
128
	  if (inword && flag != CASE_CAPITALIZE_UP)
129 130
	    c = downcase (c);
	  else if (!uppercasep (c)
131
		   && (!inword || flag != CASE_CAPITALIZE_UP))
132
	    c = upcase1 (c);
133 134 135
	  if ((int) flag >= (int) CASE_CAPITALIZE)
	    inword = (SYNTAX (c) == Sword);
	  o += CHAR_STRING (c, o);
Jim Blandy's avatar
Jim Blandy committed
136
	}
137
      eassert (o - dst <= o_size);
138
      obj = make_multibyte_string ((char *) dst, size, o - dst);
139
      SAFE_FREE ();
140
      return obj;
Jim Blandy's avatar
Jim Blandy committed
141 142 143
    }
}

Paul Eggert's avatar
Paul Eggert committed
144
DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
145 146 147 148
       doc: /* Convert argument to upper case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.
See also `capitalize', `downcase' and `upcase-initials'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
149
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
150 151 152 153
{
  return casify_object (CASE_UP, obj);
}

Paul Eggert's avatar
Paul Eggert committed
154
DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
155 156 157
       doc: /* Convert argument to lower case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
158
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
159 160 161 162 163
{
  return casify_object (CASE_DOWN, obj);
}

DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
164 165 166 167 168
       doc: /* Convert argument to capitalized form and return that.
This means that each word's first character is upper case
and the rest is lower case.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
169
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
170 171 172
{
  return casify_object (CASE_CAPITALIZE, obj);
}
173

174 175
/* Like Fcapitalize but change only the initials.  */

Paul Eggert's avatar
Paul Eggert committed
176
DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
177 178 179 180
       doc: /* Convert the initial of each word in the argument to upper case.
Do not change the other letters of each word.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
181
  (Lisp_Object obj)
182 183 184
{
  return casify_object (CASE_CAPITALIZE_UP, obj);
}
Jim Blandy's avatar
Jim Blandy committed
185 186 187 188

/* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
   b and e specify range of buffer to operate on. */

189
static void
190
casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
Jim Blandy's avatar
Jim Blandy committed
191
{
192 193 194
  int c;
  bool inword = flag == CASE_DOWN;
  bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
195 196
  ptrdiff_t start, end;
  ptrdiff_t start_byte;
197 198

  /* Position of first and last changes.  */
199
  ptrdiff_t first = -1, last;
200

201 202
  ptrdiff_t opoint = PT;
  ptrdiff_t opoint_byte = PT_BYTE;
Jim Blandy's avatar
Jim Blandy committed
203 204 205 206 207

  if (EQ (b, e))
    /* Not modifying because nothing marked */
    return;

208
  /* If the case table is flagged as modified, rescan it.  */
Tom Tromey's avatar
Tom Tromey committed
209 210
  if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
    Fset_case_table (BVAR (current_buffer, downcase_table));
211

Jim Blandy's avatar
Jim Blandy committed
212
  validate_region (&b, &e);
213 214
  start = XFASTINT (b);
  end = XFASTINT (e);
215
  modify_text (start, end);
216
  record_change (start, end - start);
217
  start_byte = CHAR_TO_BYTE (start);
Jim Blandy's avatar
Jim Blandy committed
218

Juanma Barranquero's avatar
Juanma Barranquero committed
219
  SETUP_BUFFER_SYNTAX_TABLE ();	/* For syntax_prefix_flag_p.  */
220

221
  while (start < end)
Karl Heuer's avatar
Karl Heuer committed
222
    {
223 224 225 226 227 228 229 230 231 232 233 234 235 236
      int c2, len;

      if (multibyte)
	{
	  c = FETCH_MULTIBYTE_CHAR (start_byte);
	  len = CHAR_BYTES (c);
	}
      else
	{
	  c = FETCH_BYTE (start_byte);
	  MAKE_CHAR_MULTIBYTE (c);
	  len = 1;
	}
      c2 = c;
237
      if (inword && flag != CASE_CAPITALIZE_UP)
238 239
	c = downcase (c);
      else if (!uppercasep (c)
240
	       && (!inword || flag != CASE_CAPITALIZE_UP))
241
	c = upcase1 (c);
242
      if ((int) flag >= (int) CASE_CAPITALIZE)
243 244
	inword = ((SYNTAX (c) == Sword)
		  && (inword || !syntax_prefix_flag_p (c)));
245
      if (c != c2)
Karl Heuer's avatar
Karl Heuer committed
246
	{
247 248 249 250
	  last = start;
	  if (first < 0)
	    first = start;

251 252 253 254 255 256 257
	  if (! multibyte)
	    {
	      MAKE_CHAR_UNIBYTE (c);
	      FETCH_BYTE (start_byte) = c;
	    }
	  else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
	    FETCH_BYTE (start_byte) = c;
258
	  else
Karl Heuer's avatar
Karl Heuer committed
259
	    {
260
	      int tolen = CHAR_BYTES (c);
261
	      int j;
Kenichi Handa's avatar
Kenichi Handa committed
262
	      unsigned char str[MAX_MULTIBYTE_LENGTH];
Karl Heuer's avatar
Karl Heuer committed
263

264
	      CHAR_STRING (c, str);
265 266 267 268 269 270 271 272 273 274 275 276
	      if (len == tolen)
		{
		  /* Length is unchanged.  */
		  for (j = 0; j < len; ++j)
		    FETCH_BYTE (start_byte + j) = str[j];
		}
	      else
		{
		  /* Replace one character with the other,
		     keeping text properties the same.  */
		  replace_range_2 (start, start_byte,
				   start + 1, start_byte + len,
277
				   (char *) str, 1, tolen,
278 279 280
				   0);
		  len = tolen;
		}
Karl Heuer's avatar
Karl Heuer committed
281 282
	    }
	}
283 284
      start++;
      start_byte += len;
Jim Blandy's avatar
Jim Blandy committed
285 286
    }

Kenichi Handa's avatar
Kenichi Handa committed
287 288 289
  if (PT != opoint)
    TEMP_SET_PT_BOTH (opoint, opoint_byte);

290
  if (first >= 0)
Kenichi Handa's avatar
Kenichi Handa committed
291
    {
292 293
      signal_after_change (first, last + 1 - first, last + 1 - first);
      update_compositions (first, last + 1, CHECK_ALL);
Kenichi Handa's avatar
Kenichi Handa committed
294
    }
Jim Blandy's avatar
Jim Blandy committed
295 296
}

297 298
DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3,
       "(list (region-beginning) (region-end) (region-noncontiguous-p))",
299 300 301 302 303
       doc: /* Convert the region to upper case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.
See also `capitalize-region'.  */)
304
  (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
Jim Blandy's avatar
Jim Blandy committed
305
{
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
  Lisp_Object bounds = Qnil;

  if (!NILP (region_noncontiguous_p))
    {
      bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
		      intern ("bounds"));

      while (CONSP (bounds))
	{
	  casify_region (CASE_UP, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
	  bounds = XCDR (bounds);
	}
    }
  else
    casify_region (CASE_UP, beg, end);

Jim Blandy's avatar
Jim Blandy committed
322 323 324
  return Qnil;
}

325 326
DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 3,
       "(list (region-beginning) (region-end) (region-noncontiguous-p))",
327 328 329 330
       doc: /* Convert the region to lower case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.  */)
331
  (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
Jim Blandy's avatar
Jim Blandy committed
332
{
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
  Lisp_Object bounds = Qnil;

  if (!NILP (region_noncontiguous_p))
    {
      bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
		      intern ("bounds"));

      while (CONSP (bounds))
	{
	  casify_region (CASE_DOWN, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
	  bounds = XCDR (bounds);
	}
    }
  else
    casify_region (CASE_DOWN, beg, end);

Jim Blandy's avatar
Jim Blandy committed
349 350 351 352
  return Qnil;
}

DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
353 354 355 356 357
       doc: /* Convert the region to capitalized form.
Capitalized form means each word's first character is upper case
and the rest of it is lower case.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
358
  (Lisp_Object beg, Lisp_Object end)
Jim Blandy's avatar
Jim Blandy committed
359
{
360
  casify_region (CASE_CAPITALIZE, beg, end);
Jim Blandy's avatar
Jim Blandy committed
361 362 363
  return Qnil;
}

364 365
/* Like Fcapitalize_region but change only the initials.  */

Paul Eggert's avatar
Paul Eggert committed
366
DEFUN ("upcase-initials-region", Fupcase_initials_region,
367
       Supcase_initials_region, 2, 2, "r",
368 369 370 371
       doc: /* Upcase the initial of each word in the region.
Subsequent letters of each word are not changed.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
372
  (Lisp_Object beg, Lisp_Object end)
373
{
374
  casify_region (CASE_CAPITALIZE_UP, beg, end);
375 376
  return Qnil;
}
Jim Blandy's avatar
Jim Blandy committed
377

378
static Lisp_Object
379
casify_word (enum case_action flag, Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
380
{
381 382
  Lisp_Object beg, end;
  ptrdiff_t newpoint;
383
  EMACS_INT iarg;
Jim Blandy's avatar
Jim Blandy committed
384

385
  CHECK_NUMBER (arg);
386
  iarg = XINT (arg);
Jim Blandy's avatar
Jim Blandy committed
387

388 389 390 391 392 393 394 395 396 397
  newpoint = scan_words (PT, iarg);
  if (!newpoint)
    newpoint = iarg > 0 ? ZV : BEGV;

  XSETFASTINT (beg, PT);
  XSETFASTINT (end, newpoint);
  if (PT > newpoint)
    newpoint = PT;

  casify_region (flag, beg, end);
Jim Blandy's avatar
Jim Blandy committed
398

399
  SET_PT (newpoint);
400
  return Qnil;
Jim Blandy's avatar
Jim Blandy committed
401 402 403
}

DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
404 405 406 407 408
       doc: /* Convert to upper case from point to end of word, moving over.

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

409 410
With negative argument, convert previous words but do not move.
See also `capitalize-word'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
411
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
412
{
413
  return casify_word (CASE_UP, arg);
Jim Blandy's avatar
Jim Blandy committed
414 415 416
}

DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
417 418 419 420 421
       doc: /* Convert to lower case from point to end of word, moving over.

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

422
With negative argument, convert previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
423
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
424
{
425
  return casify_word (CASE_DOWN, arg);
Jim Blandy's avatar
Jim Blandy committed
426 427 428
}

DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
429 430
       doc: /* Capitalize from point to the end of word, moving over.
With numerical argument ARG, capitalize the next ARG-1 words as well.
431 432
This gives the word(s) a first character in upper case
and the rest lower case.
433 434 435 436

If point is in the middle of a word, the part of that word before point
is ignored when moving forward.

437
With negative argument, capitalize previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
438
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
439
{
440
  return casify_word (CASE_CAPITALIZE, arg);
Jim Blandy's avatar
Jim Blandy committed
441 442
}

Andreas Schwab's avatar
Andreas Schwab committed
443
void
444
syms_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
445
{
446
  DEFSYM (Qidentity, "identity");
Jim Blandy's avatar
Jim Blandy committed
447 448 449
  defsubr (&Supcase);
  defsubr (&Sdowncase);
  defsubr (&Scapitalize);
450
  defsubr (&Supcase_initials);
Jim Blandy's avatar
Jim Blandy committed
451 452 453
  defsubr (&Supcase_region);
  defsubr (&Sdowncase_region);
  defsubr (&Scapitalize_region);
454
  defsubr (&Supcase_initials_region);
Jim Blandy's avatar
Jim Blandy committed
455 456 457 458 459
  defsubr (&Supcase_word);
  defsubr (&Sdowncase_word);
  defsubr (&Scapitalize_word);
}

Andreas Schwab's avatar
Andreas Schwab committed
460
void
461
keys_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
462
{
Juanma Barranquero's avatar
Juanma Barranquero committed
463
  initial_define_key (control_x_map, Ctl ('U'), "upcase-region");
Jim Blandy's avatar
Jim Blandy committed
464
  Fput (intern ("upcase-region"), Qdisabled, Qt);
Juanma Barranquero's avatar
Juanma Barranquero committed
465
  initial_define_key (control_x_map, Ctl ('L'), "downcase-region");
Jim Blandy's avatar
Jim Blandy committed
466 467
  Fput (intern ("downcase-region"), Qdisabled, Qt);

Jim Blandy's avatar
Jim Blandy committed
468 469 470 471
  initial_define_key (meta_map, 'u', "upcase-word");
  initial_define_key (meta_map, 'l', "downcase-word");
  initial_define_key (meta_map, 'c', "capitalize-word");
}