casefiddle.c 12.9 KB
Newer Older
Jim Blandy's avatar
Jim Blandy committed
1
/* GNU Emacs case conversion functions.
2 3

Copyright (C) 1985, 1994, 1997-1999, 2001-2011 Free Software Foundation, Inc.
Jim Blandy's avatar
Jim Blandy committed
4 5 6

This file is part of GNU Emacs.

7
GNU Emacs is free software: you can redistribute it and/or modify
Jim Blandy's avatar
Jim Blandy committed
8
it under the terms of the GNU General Public License as published by
9 10
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Jim Blandy's avatar
Jim Blandy committed
11 12 13 14 15 16 17

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
18
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Jim Blandy's avatar
Jim Blandy committed
19 20


21
#include <config.h>
22
#include <setjmp.h>
Jim Blandy's avatar
Jim Blandy committed
23 24
#include "lisp.h"
#include "buffer.h"
25
#include "character.h"
Jim Blandy's avatar
Jim Blandy committed
26 27
#include "commands.h"
#include "syntax.h"
Kenichi Handa's avatar
Kenichi Handa committed
28
#include "composite.h"
Stefan Monnier's avatar
Stefan Monnier committed
29
#include "keymap.h"
Jim Blandy's avatar
Jim Blandy committed
30 31

enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
Karl Heuer's avatar
Karl Heuer committed
32 33

Lisp_Object Qidentity;
Jim Blandy's avatar
Jim Blandy committed
34 35

Lisp_Object
36
casify_object (enum case_action flag, Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
37
{
38
  register int c, c1;
Jim Blandy's avatar
Jim Blandy committed
39 40
  register int inword = flag == CASE_DOWN;

41
  /* If the case table is flagged as modified, rescan it.  */
Tom Tromey's avatar
Tom Tromey committed
42 43
  if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
    Fset_case_table (BVAR (current_buffer, downcase_table));
44

45
  if (INTEGERP (obj))
Jim Blandy's avatar
Jim Blandy committed
46
    {
47 48 49
      int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
		      | CHAR_SHIFT | CHAR_CTL | CHAR_META);
      int flags = XINT (obj) & flagbits;
Tom Tromey's avatar
Tom Tromey committed
50
      int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
51 52 53 54 55 56 57 58

      /* If the character has higher bits set
	 above the flags, return it unchanged.
	 It is not a real character.  */
      if ((unsigned) XFASTINT (obj) > (unsigned) flagbits)
	return obj;

      c1 = XFASTINT (obj) & ~flagbits;
59 60 61 62 63 64
      /* FIXME: Even if enable-multibyte-characters is nil, we may
	 manipulate multibyte chars.  This means we have a bug for latin-1
	 chars since when we receive an int 128-255 we can't tell whether
	 it's an eight-bit byte or a latin-1 char.  */
      if (c1 >= 256)
	multibyte = 1;
65 66 67 68 69 70
      if (! multibyte)
	MAKE_CHAR_MULTIBYTE (c1);
      c = DOWNCASE (c1);
      if (inword)
	XSETFASTINT (obj, c | flags);
      else if (c == (XFASTINT (obj) & ~flagbits))
Jim Blandy's avatar
Jim Blandy committed
71
	{
72 73
	  if (! inword)
	    c = UPCASE1 (c1);
74
	  if (! multibyte)
75 76
	    MAKE_CHAR_UNIBYTE (c);
	  XSETFASTINT (obj, c | flags);
Jim Blandy's avatar
Jim Blandy committed
77
	}
78 79
      return obj;
    }
80

81 82 83
  if (!STRINGP (obj))
    wrong_type_argument (Qchar_or_string_p, obj);
  else if (!STRING_MULTIBYTE (obj))
84
    {
85 86
      EMACS_INT i;
      EMACS_INT size = SCHARS (obj);
87

88
      obj = Fcopy_sequence (obj);
89
      for (i = 0; i < size; i++)
90
	{
91
	  c = SREF (obj, i);
92
	  MAKE_CHAR_MULTIBYTE (c);
93 94 95 96 97 98 99 100 101 102 103
	  c1 = c;
	  if (inword && flag != CASE_CAPITALIZE_UP)
	    c = DOWNCASE (c);
	  else if (!UPPERCASEP (c)
		   && (!inword || flag != CASE_CAPITALIZE_UP))
	    c = UPCASE1 (c1);
	  if ((int) flag >= (int) CASE_CAPITALIZE)
	    inword = (SYNTAX (c) == Sword);
	  if (c != c1)
	    {
		  MAKE_CHAR_UNIBYTE (c);
104 105 106 107 108 109 110
	      /* If the char can't be converted to a valid byte, just don't
		 change it.  */
	      if (c >= 0 && c < 256)
		SSET (obj, i, c);
	    }
	}
      return obj;
111 112 113 114 115
    }
  else
    {
      EMACS_INT i, i_byte, size = SCHARS (obj);
      int len;
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
      USE_SAFE_ALLOCA;
      unsigned char *dst, *o;
      /* Over-allocate by 12%: this is a minor overhead, but should be
	 sufficient in 99.999% of the cases to avoid a reallocation.  */
      EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 + MAX_MULTIBYTE_LENGTH;
      SAFE_ALLOCA (dst, void *, o_size);
      o = dst;

      for (i = i_byte = 0; i < size; i++, i_byte += len)
	{
	  if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
	    { /* Not enough space for the next char: grow the destination.  */
	      unsigned char *old_dst = dst;
	      o_size += o_size;	/* Probably overkill, but extremely rare.  */
	      SAFE_ALLOCA (dst, void *, o_size);
131
	      memcpy (dst, old_dst, o - old_dst);
132
	      o = dst + (o - old_dst);
133
	    }
134
	  c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
135 136 137 138 139 140 141 142
	  if (inword && flag != CASE_CAPITALIZE_UP)
	    c = DOWNCASE (c);
	  else if (!UPPERCASEP (c)
		   && (!inword || flag != CASE_CAPITALIZE_UP))
	    c = UPCASE1 (c);
	  if ((int) flag >= (int) CASE_CAPITALIZE)
	    inword = (SYNTAX (c) == Sword);
	  o += CHAR_STRING (c, o);
Jim Blandy's avatar
Jim Blandy committed
143
	}
144
      eassert (o - dst <= o_size);
145
      obj = make_multibyte_string ((char *) dst, size, o - dst);
146
      SAFE_FREE ();
147
      return obj;
Jim Blandy's avatar
Jim Blandy committed
148 149 150 151
    }
}

DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
152 153 154 155
       doc: /* Convert argument to upper case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.
See also `capitalize', `downcase' and `upcase-initials'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
156
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
157 158 159 160 161
{
  return casify_object (CASE_UP, obj);
}

DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
162 163 164
       doc: /* Convert argument to lower case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
165
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
166 167 168 169 170
{
  return casify_object (CASE_DOWN, obj);
}

DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
171 172 173 174 175
       doc: /* Convert argument to capitalized form and return that.
This means that each word's first character is upper case
and the rest is lower case.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
176
  (Lisp_Object obj)
Jim Blandy's avatar
Jim Blandy committed
177 178 179
{
  return casify_object (CASE_CAPITALIZE, obj);
}
180

181 182
/* Like Fcapitalize but change only the initials.  */

183
DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
184 185 186 187
       doc: /* Convert the initial of each word in the argument to upper case.
Do not change the other letters of each word.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
188
  (Lisp_Object obj)
189 190 191
{
  return casify_object (CASE_CAPITALIZE_UP, obj);
}
Jim Blandy's avatar
Jim Blandy committed
192 193 194 195

/* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
   b and e specify range of buffer to operate on. */

Andreas Schwab's avatar
Andreas Schwab committed
196
void
197
casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
Jim Blandy's avatar
Jim Blandy committed
198 199 200
{
  register int c;
  register int inword = flag == CASE_DOWN;
Tom Tromey's avatar
Tom Tromey committed
201
  register int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
202 203 204 205 206
  EMACS_INT start, end;
  EMACS_INT start_byte, end_byte;
  EMACS_INT first = -1, last;	/* Position of first and last changes.  */
  EMACS_INT opoint = PT;
  EMACS_INT opoint_byte = PT_BYTE;
Jim Blandy's avatar
Jim Blandy committed
207 208 209 210 211

  if (EQ (b, e))
    /* Not modifying because nothing marked */
    return;

212
  /* If the case table is flagged as modified, rescan it.  */
Tom Tromey's avatar
Tom Tromey committed
213 214
  if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
    Fset_case_table (BVAR (current_buffer, downcase_table));
215

Jim Blandy's avatar
Jim Blandy committed
216
  validate_region (&b, &e);
217 218
  start = XFASTINT (b);
  end = XFASTINT (e);
219
  modify_region (current_buffer, start, end, 0);
220
  record_change (start, end - start);
221 222
  start_byte = CHAR_TO_BYTE (start);
  end_byte = CHAR_TO_BYTE (end);
Jim Blandy's avatar
Jim Blandy committed
223

224 225
  SETUP_BUFFER_SYNTAX_TABLE();	/* For syntax_prefix_flag_p.  */

226
  while (start < end)
Karl Heuer's avatar
Karl Heuer committed
227
    {
228 229 230 231 232 233 234 235 236 237 238 239 240 241
      int c2, len;

      if (multibyte)
	{
	  c = FETCH_MULTIBYTE_CHAR (start_byte);
	  len = CHAR_BYTES (c);
	}
      else
	{
	  c = FETCH_BYTE (start_byte);
	  MAKE_CHAR_MULTIBYTE (c);
	  len = 1;
	}
      c2 = c;
242 243 244 245 246 247
      if (inword && flag != CASE_CAPITALIZE_UP)
	c = DOWNCASE (c);
      else if (!UPPERCASEP (c)
	       && (!inword || flag != CASE_CAPITALIZE_UP))
	c = UPCASE1 (c);
      if ((int) flag >= (int) CASE_CAPITALIZE)
248 249
	inword = ((SYNTAX (c) == Sword)
		  && (inword || !syntax_prefix_flag_p (c)));
250
      if (c != c2)
Karl Heuer's avatar
Karl Heuer committed
251
	{
252 253 254 255
	  last = start;
	  if (first < 0)
	    first = start;

256 257 258 259 260 261 262
	  if (! multibyte)
	    {
	      MAKE_CHAR_UNIBYTE (c);
	      FETCH_BYTE (start_byte) = c;
	    }
	  else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
	    FETCH_BYTE (start_byte) = c;
263
	  else
Karl Heuer's avatar
Karl Heuer committed
264
	    {
265
	      int tolen = CHAR_BYTES (c);
266
	      int j;
Kenichi Handa's avatar
Kenichi Handa committed
267
	      unsigned char str[MAX_MULTIBYTE_LENGTH];
Karl Heuer's avatar
Karl Heuer committed
268

269
	      CHAR_STRING (c, str);
270 271 272 273 274 275 276 277 278 279 280 281
	      if (len == tolen)
		{
		  /* Length is unchanged.  */
		  for (j = 0; j < len; ++j)
		    FETCH_BYTE (start_byte + j) = str[j];
		}
	      else
		{
		  /* Replace one character with the other,
		     keeping text properties the same.  */
		  replace_range_2 (start, start_byte,
				   start + 1, start_byte + len,
282
				   (char *) str, 1, tolen,
283 284 285
				   0);
		  len = tolen;
		}
Karl Heuer's avatar
Karl Heuer committed
286 287
	    }
	}
288 289
      start++;
      start_byte += len;
Jim Blandy's avatar
Jim Blandy committed
290 291
    }

Kenichi Handa's avatar
Kenichi Handa committed
292 293 294
  if (PT != opoint)
    TEMP_SET_PT_BOTH (opoint, opoint_byte);

295
  if (first >= 0)
Kenichi Handa's avatar
Kenichi Handa committed
296
    {
297 298
      signal_after_change (first, last + 1 - first, last + 1 - first);
      update_compositions (first, last + 1, CHECK_ALL);
Kenichi Handa's avatar
Kenichi Handa committed
299
    }
Jim Blandy's avatar
Jim Blandy committed
300 301 302
}

DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 2, "r",
303 304 305 306 307
       doc: /* Convert the region to upper case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.
See also `capitalize-region'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
308
  (Lisp_Object beg, Lisp_Object end)
Jim Blandy's avatar
Jim Blandy committed
309
{
310
  casify_region (CASE_UP, beg, end);
Jim Blandy's avatar
Jim Blandy committed
311 312 313 314
  return Qnil;
}

DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 2, "r",
315 316 317 318
       doc: /* Convert the region to lower case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
319
  (Lisp_Object beg, Lisp_Object end)
Jim Blandy's avatar
Jim Blandy committed
320
{
321
  casify_region (CASE_DOWN, beg, end);
Jim Blandy's avatar
Jim Blandy committed
322 323 324 325
  return Qnil;
}

DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
326 327 328 329 330
       doc: /* Convert the region to capitalized form.
Capitalized form means each word's first character is upper case
and the rest of it is lower case.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
331
  (Lisp_Object beg, Lisp_Object end)
Jim Blandy's avatar
Jim Blandy committed
332
{
333
  casify_region (CASE_CAPITALIZE, beg, end);
Jim Blandy's avatar
Jim Blandy committed
334 335 336
  return Qnil;
}

337 338
/* Like Fcapitalize_region but change only the initials.  */

339 340
DEFUN ("upcase-initials-region", Fupcase_initials_region,
       Supcase_initials_region, 2, 2, "r",
341 342 343 344
       doc: /* Upcase the initial of each word in the region.
Subsequent letters of each word are not changed.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
345
  (Lisp_Object beg, Lisp_Object end)
346
{
347
  casify_region (CASE_CAPITALIZE_UP, beg, end);
348 349
  return Qnil;
}
Jim Blandy's avatar
Jim Blandy committed
350

351
static Lisp_Object
352
operate_on_word (Lisp_Object arg, EMACS_INT *newpoint)
Jim Blandy's avatar
Jim Blandy committed
353
{
354
  Lisp_Object val;
355 356
  EMACS_INT farend;
  EMACS_INT iarg;
Jim Blandy's avatar
Jim Blandy committed
357

358
  CHECK_NUMBER (arg);
359
  iarg = XINT (arg);
360
  farend = scan_words (PT, iarg);
Jim Blandy's avatar
Jim Blandy committed
361
  if (!farend)
362
    farend = iarg > 0 ? ZV : BEGV;
Jim Blandy's avatar
Jim Blandy committed
363

364
  *newpoint = PT > farend ? PT : farend;
365
  XSETFASTINT (val, farend);
Jim Blandy's avatar
Jim Blandy committed
366 367 368 369 370

  return val;
}

DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
371 372 373
       doc: /* Convert following word (or ARG words) to upper case, moving over.
With negative argument, convert previous words but do not move.
See also `capitalize-word'.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
374
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
375
{
376
  Lisp_Object beg, end;
377
  EMACS_INT newpoint;
378
  XSETFASTINT (beg, PT);
379 380 381
  end = operate_on_word (arg, &newpoint);
  casify_region (CASE_UP, beg, end);
  SET_PT (newpoint);
Jim Blandy's avatar
Jim Blandy committed
382 383 384 385
  return Qnil;
}

DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
386 387
       doc: /* Convert following word (or ARG words) to lower case, moving over.
With negative argument, convert previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
388
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
389
{
390
  Lisp_Object beg, end;
391
  EMACS_INT newpoint;
392
  XSETFASTINT (beg, PT);
393 394 395
  end = operate_on_word (arg, &newpoint);
  casify_region (CASE_DOWN, beg, end);
  SET_PT (newpoint);
Jim Blandy's avatar
Jim Blandy committed
396 397 398 399
  return Qnil;
}

DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
400 401 402 403
       doc: /* Capitalize the following word (or ARG words), moving over.
This gives the word(s) a first character in upper case
and the rest lower case.
With negative argument, capitalize previous words but do not move.  */)
Dan Nicolaescu's avatar
Dan Nicolaescu committed
404
  (Lisp_Object arg)
Jim Blandy's avatar
Jim Blandy committed
405
{
406
  Lisp_Object beg, end;
407
  EMACS_INT newpoint;
408
  XSETFASTINT (beg, PT);
409 410 411
  end = operate_on_word (arg, &newpoint);
  casify_region (CASE_CAPITALIZE, beg, end);
  SET_PT (newpoint);
Jim Blandy's avatar
Jim Blandy committed
412 413 414
  return Qnil;
}

Andreas Schwab's avatar
Andreas Schwab committed
415
void
416
syms_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
417
{
Dan Nicolaescu's avatar
Dan Nicolaescu committed
418
  Qidentity = intern_c_string ("identity");
Karl Heuer's avatar
Karl Heuer committed
419
  staticpro (&Qidentity);
Jim Blandy's avatar
Jim Blandy committed
420 421 422
  defsubr (&Supcase);
  defsubr (&Sdowncase);
  defsubr (&Scapitalize);
423
  defsubr (&Supcase_initials);
Jim Blandy's avatar
Jim Blandy committed
424 425 426
  defsubr (&Supcase_region);
  defsubr (&Sdowncase_region);
  defsubr (&Scapitalize_region);
427
  defsubr (&Supcase_initials_region);
Jim Blandy's avatar
Jim Blandy committed
428 429 430 431 432
  defsubr (&Supcase_word);
  defsubr (&Sdowncase_word);
  defsubr (&Scapitalize_word);
}

Andreas Schwab's avatar
Andreas Schwab committed
433
void
434
keys_of_casefiddle (void)
Jim Blandy's avatar
Jim Blandy committed
435 436
{
  initial_define_key (control_x_map, Ctl('U'), "upcase-region");
Jim Blandy's avatar
Jim Blandy committed
437
  Fput (intern ("upcase-region"), Qdisabled, Qt);
Jim Blandy's avatar
Jim Blandy committed
438
  initial_define_key (control_x_map, Ctl('L'), "downcase-region");
Jim Blandy's avatar
Jim Blandy committed
439 440
  Fput (intern ("downcase-region"), Qdisabled, Qt);

Jim Blandy's avatar
Jim Blandy committed
441 442 443 444
  initial_define_key (meta_map, 'u', "upcase-word");
  initial_define_key (meta_map, 'l', "downcase-word");
  initial_define_key (meta_map, 'c', "capitalize-word");
}