casefiddle.c 13.1 KB
Newer Older
Jim Blandy's avatar
Jim Blandy committed
1
/* GNU Emacs case conversion functions.
2
   Copyright (C) 1985, 1994, 1997, 1998, 1999, 2001, 2002, 2003, 2004,
Glenn Morris's avatar
Glenn Morris committed
3
                 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
Jim Blandy's avatar
Jim Blandy committed
4 5 6

This file is part of GNU Emacs.

7
GNU Emacs is free software: you can redistribute it and/or modify
Jim Blandy's avatar
Jim Blandy committed
8
it under the terms of the GNU General Public License as published by
9 10
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Jim Blandy's avatar
Jim Blandy committed
11 12 13 14 15 16 17

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
18
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
Jim Blandy's avatar
Jim Blandy committed
19 20


21
#include <config.h>
22
#include <setjmp.h>
Jim Blandy's avatar
Jim Blandy committed
23 24
#include "lisp.h"
#include "buffer.h"
25
#include "character.h"
Jim Blandy's avatar
Jim Blandy committed
26 27
#include "commands.h"
#include "syntax.h"
Kenichi Handa's avatar
Kenichi Handa committed
28
#include "composite.h"
Stefan Monnier's avatar
Stefan Monnier committed
29
#include "keymap.h"
Jim Blandy's avatar
Jim Blandy committed
30 31

enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
Karl Heuer's avatar
Karl Heuer committed
32 33

Lisp_Object Qidentity;
Jim Blandy's avatar
Jim Blandy committed
34 35 36 37 38 39

Lisp_Object
casify_object (flag, obj)
     enum case_action flag;
     Lisp_Object obj;
{
40
  register int c, c1;
Jim Blandy's avatar
Jim Blandy committed
41 42
  register int inword = flag == CASE_DOWN;

43 44 45 46
  /* If the case table is flagged as modified, rescan it.  */
  if (NILP (XCHAR_TABLE (current_buffer->downcase_table)->extras[1]))
    Fset_case_table (current_buffer->downcase_table);

47
  if (INTEGERP (obj))
Jim Blandy's avatar
Jim Blandy committed
48
    {
49 50 51 52 53 54 55 56 57 58 59 60
      int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
		      | CHAR_SHIFT | CHAR_CTL | CHAR_META);
      int flags = XINT (obj) & flagbits;
      int multibyte = ! NILP (current_buffer->enable_multibyte_characters);

      /* If the character has higher bits set
	 above the flags, return it unchanged.
	 It is not a real character.  */
      if ((unsigned) XFASTINT (obj) > (unsigned) flagbits)
	return obj;

      c1 = XFASTINT (obj) & ~flagbits;
61 62 63 64 65 66
      /* FIXME: Even if enable-multibyte-characters is nil, we may
	 manipulate multibyte chars.  This means we have a bug for latin-1
	 chars since when we receive an int 128-255 we can't tell whether
	 it's an eight-bit byte or a latin-1 char.  */
      if (c1 >= 256)
	multibyte = 1;
67 68 69 70 71 72
      if (! multibyte)
	MAKE_CHAR_MULTIBYTE (c1);
      c = DOWNCASE (c1);
      if (inword)
	XSETFASTINT (obj, c | flags);
      else if (c == (XFASTINT (obj) & ~flagbits))
Jim Blandy's avatar
Jim Blandy committed
73
	{
74 75
	  if (! inword)
	    c = UPCASE1 (c1);
76
	  if (! multibyte)
77 78
	    MAKE_CHAR_UNIBYTE (c);
	  XSETFASTINT (obj, c | flags);
Jim Blandy's avatar
Jim Blandy committed
79
	}
80 81
      return obj;
    }
82

83 84 85
  if (!STRINGP (obj))
    wrong_type_argument (Qchar_or_string_p, obj);
  else if (!STRING_MULTIBYTE (obj))
86
    {
87 88
      EMACS_INT i;
      EMACS_INT size = SCHARS (obj);
89

90
      obj = Fcopy_sequence (obj);
91
      for (i = 0; i < size; i++)
92
	{
93
	  c = SREF (obj, i);
94
	  MAKE_CHAR_MULTIBYTE (c);
95 96 97 98 99 100 101 102 103 104 105
	  c1 = c;
	  if (inword && flag != CASE_CAPITALIZE_UP)
	    c = DOWNCASE (c);
	  else if (!UPPERCASEP (c)
		   && (!inword || flag != CASE_CAPITALIZE_UP))
	    c = UPCASE1 (c1);
	  if ((int) flag >= (int) CASE_CAPITALIZE)
	    inword = (SYNTAX (c) == Sword);
	  if (c != c1)
	    {
		  MAKE_CHAR_UNIBYTE (c);
106 107 108 109 110 111 112
	      /* If the char can't be converted to a valid byte, just don't
		 change it.  */
	      if (c >= 0 && c < 256)
		SSET (obj, i, c);
	    }
	}
      return obj;
113 114 115 116 117
    }
  else
    {
      EMACS_INT i, i_byte, size = SCHARS (obj);
      int len;
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
      USE_SAFE_ALLOCA;
      unsigned char *dst, *o;
      /* Over-allocate by 12%: this is a minor overhead, but should be
	 sufficient in 99.999% of the cases to avoid a reallocation.  */
      EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 + MAX_MULTIBYTE_LENGTH;
      SAFE_ALLOCA (dst, void *, o_size);
      o = dst;

      for (i = i_byte = 0; i < size; i++, i_byte += len)
	{
	  if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
	    { /* Not enough space for the next char: grow the destination.  */
	      unsigned char *old_dst = dst;
	      o_size += o_size;	/* Probably overkill, but extremely rare.  */
	      SAFE_ALLOCA (dst, void *, o_size);
	      bcopy (old_dst, dst, o - old_dst);
	      o = dst + (o - old_dst);
135
	    }
136
	  c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
137 138 139 140 141 142 143 144
	  if (inword && flag != CASE_CAPITALIZE_UP)
	    c = DOWNCASE (c);
	  else if (!UPPERCASEP (c)
		   && (!inword || flag != CASE_CAPITALIZE_UP))
	    c = UPCASE1 (c);
	  if ((int) flag >= (int) CASE_CAPITALIZE)
	    inword = (SYNTAX (c) == Sword);
	  o += CHAR_STRING (c, o);
Jim Blandy's avatar
Jim Blandy committed
145
	}
146 147 148
      eassert (o - dst <= o_size);
      obj = make_multibyte_string (dst, size, o - dst);
      SAFE_FREE ();
149
      return obj;
Jim Blandy's avatar
Jim Blandy committed
150 151 152 153
    }
}

DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
154 155 156 157 158
       doc: /* Convert argument to upper case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.
See also `capitalize', `downcase' and `upcase-initials'.  */)
     (obj)
Jim Blandy's avatar
Jim Blandy committed
159 160 161 162 163 164
     Lisp_Object obj;
{
  return casify_object (CASE_UP, obj);
}

DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
165 166 167 168
       doc: /* Convert argument to lower case and return that.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
     (obj)
Jim Blandy's avatar
Jim Blandy committed
169 170 171 172 173 174
     Lisp_Object obj;
{
  return casify_object (CASE_DOWN, obj);
}

DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
175 176 177 178 179 180
       doc: /* Convert argument to capitalized form and return that.
This means that each word's first character is upper case
and the rest is lower case.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
     (obj)
Jim Blandy's avatar
Jim Blandy committed
181 182 183 184
     Lisp_Object obj;
{
  return casify_object (CASE_CAPITALIZE, obj);
}
185

186 187
/* Like Fcapitalize but change only the initials.  */

188
DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
189 190 191 192 193
       doc: /* Convert the initial of each word in the argument to upper case.
Do not change the other letters of each word.
The argument may be a character or string.  The result has the same type.
The argument object is not altered--the value is a copy.  */)
     (obj)
194 195 196 197
     Lisp_Object obj;
{
  return casify_object (CASE_CAPITALIZE_UP, obj);
}
Jim Blandy's avatar
Jim Blandy committed
198 199 200 201

/* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
   b and e specify range of buffer to operate on. */

Andreas Schwab's avatar
Andreas Schwab committed
202
void
Jim Blandy's avatar
Jim Blandy committed
203 204 205 206 207 208
casify_region (flag, b, e)
     enum case_action flag;
     Lisp_Object b, e;
{
  register int c;
  register int inword = flag == CASE_DOWN;
209
  register int multibyte = !NILP (current_buffer->enable_multibyte_characters);
210 211 212 213 214
  EMACS_INT start, end;
  EMACS_INT start_byte, end_byte;
  EMACS_INT first = -1, last;	/* Position of first and last changes.  */
  EMACS_INT opoint = PT;
  EMACS_INT opoint_byte = PT_BYTE;
Jim Blandy's avatar
Jim Blandy committed
215 216 217 218 219

  if (EQ (b, e))
    /* Not modifying because nothing marked */
    return;

220 221 222 223
  /* If the case table is flagged as modified, rescan it.  */
  if (NILP (XCHAR_TABLE (current_buffer->downcase_table)->extras[1]))
    Fset_case_table (current_buffer->downcase_table);

Jim Blandy's avatar
Jim Blandy committed
224
  validate_region (&b, &e);
225 226
  start = XFASTINT (b);
  end = XFASTINT (e);
227
  modify_region (current_buffer, start, end, 0);
228
  record_change (start, end - start);
229 230
  start_byte = CHAR_TO_BYTE (start);
  end_byte = CHAR_TO_BYTE (end);
Jim Blandy's avatar
Jim Blandy committed
231

232
  while (start < end)
Karl Heuer's avatar
Karl Heuer committed
233
    {
234 235 236 237 238 239 240 241 242 243 244 245 246 247
      int c2, len;

      if (multibyte)
	{
	  c = FETCH_MULTIBYTE_CHAR (start_byte);
	  len = CHAR_BYTES (c);
	}
      else
	{
	  c = FETCH_BYTE (start_byte);
	  MAKE_CHAR_MULTIBYTE (c);
	  len = 1;
	}
      c2 = c;
248 249 250 251 252 253
      if (inword && flag != CASE_CAPITALIZE_UP)
	c = DOWNCASE (c);
      else if (!UPPERCASEP (c)
	       && (!inword || flag != CASE_CAPITALIZE_UP))
	c = UPCASE1 (c);
      if ((int) flag >= (int) CASE_CAPITALIZE)
Kenichi Handa's avatar
Kenichi Handa committed
254
	inword = ((SYNTAX (c) == Sword) && (inword || !SYNTAX_PREFIX (c)));
255
      if (c != c2)
Karl Heuer's avatar
Karl Heuer committed
256
	{
257 258 259 260
	  last = start;
	  if (first < 0)
	    first = start;

261 262 263 264 265 266 267
	  if (! multibyte)
	    {
	      MAKE_CHAR_UNIBYTE (c);
	      FETCH_BYTE (start_byte) = c;
	    }
	  else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
	    FETCH_BYTE (start_byte) = c;
268
	  else
Karl Heuer's avatar
Karl Heuer committed
269
	    {
270
	      int tolen = CHAR_BYTES (c);
271
	      int j;
Kenichi Handa's avatar
Kenichi Handa committed
272
	      unsigned char str[MAX_MULTIBYTE_LENGTH];
Karl Heuer's avatar
Karl Heuer committed
273

274
	      CHAR_STRING (c, str);
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
	      if (len == tolen)
		{
		  /* Length is unchanged.  */
		  for (j = 0; j < len; ++j)
		    FETCH_BYTE (start_byte + j) = str[j];
		}
	      else
		{
		  /* Replace one character with the other,
		     keeping text properties the same.  */
		  replace_range_2 (start, start_byte,
				   start + 1, start_byte + len,
				   str, 1, tolen,
				   0);
		  len = tolen;
		}
Karl Heuer's avatar
Karl Heuer committed
291 292
	    }
	}
293 294
      start++;
      start_byte += len;
Jim Blandy's avatar
Jim Blandy committed
295 296
    }

Kenichi Handa's avatar
Kenichi Handa committed
297 298 299
  if (PT != opoint)
    TEMP_SET_PT_BOTH (opoint, opoint_byte);

300
  if (first >= 0)
Kenichi Handa's avatar
Kenichi Handa committed
301
    {
302 303
      signal_after_change (first, last + 1 - first, last + 1 - first);
      update_compositions (first, last + 1, CHECK_ALL);
Kenichi Handa's avatar
Kenichi Handa committed
304
    }
Jim Blandy's avatar
Jim Blandy committed
305 306 307
}

DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 2, "r",
308 309 310 311 312 313
       doc: /* Convert the region to upper case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.
See also `capitalize-region'.  */)
     (beg, end)
314
     Lisp_Object beg, end;
Jim Blandy's avatar
Jim Blandy committed
315
{
316
  casify_region (CASE_UP, beg, end);
Jim Blandy's avatar
Jim Blandy committed
317 318 319 320
  return Qnil;
}

DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 2, "r",
321 322 323 324 325
       doc: /* Convert the region to lower case.  In programs, wants two arguments.
These arguments specify the starting and ending character numbers of
the region to operate on.  When used as a command, the text between
point and the mark is operated on.  */)
     (beg, end)
326
     Lisp_Object beg, end;
Jim Blandy's avatar
Jim Blandy committed
327
{
328
  casify_region (CASE_DOWN, beg, end);
Jim Blandy's avatar
Jim Blandy committed
329 330 331 332
  return Qnil;
}

DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
333 334 335 336 337 338
       doc: /* Convert the region to capitalized form.
Capitalized form means each word's first character is upper case
and the rest of it is lower case.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
     (beg, end)
339
     Lisp_Object beg, end;
Jim Blandy's avatar
Jim Blandy committed
340
{
341
  casify_region (CASE_CAPITALIZE, beg, end);
Jim Blandy's avatar
Jim Blandy committed
342 343 344
  return Qnil;
}

345 346
/* Like Fcapitalize_region but change only the initials.  */

347 348
DEFUN ("upcase-initials-region", Fupcase_initials_region,
       Supcase_initials_region, 2, 2, "r",
349 350 351 352 353
       doc: /* Upcase the initial of each word in the region.
Subsequent letters of each word are not changed.
In programs, give two arguments, the starting and ending
character positions to operate on.  */)
     (beg, end)
354
     Lisp_Object beg, end;
355
{
356
  casify_region (CASE_CAPITALIZE_UP, beg, end);
357 358
  return Qnil;
}
Jim Blandy's avatar
Jim Blandy committed
359

360
static Lisp_Object
361
operate_on_word (arg, newpoint)
Jim Blandy's avatar
Jim Blandy committed
362
     Lisp_Object arg;
363
     EMACS_INT *newpoint;
Jim Blandy's avatar
Jim Blandy committed
364
{
365
  Lisp_Object val;
366
  int farend;
367
  int iarg;
Jim Blandy's avatar
Jim Blandy committed
368

369
  CHECK_NUMBER (arg);
370
  iarg = XINT (arg);
371
  farend = scan_words (PT, iarg);
Jim Blandy's avatar
Jim Blandy committed
372
  if (!farend)
373
    farend = iarg > 0 ? ZV : BEGV;
Jim Blandy's avatar
Jim Blandy committed
374

375
  *newpoint = PT > farend ? PT : farend;
376
  XSETFASTINT (val, farend);
Jim Blandy's avatar
Jim Blandy committed
377 378 379 380 381

  return val;
}

DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
382 383 384 385
       doc: /* Convert following word (or ARG words) to upper case, moving over.
With negative argument, convert previous words but do not move.
See also `capitalize-word'.  */)
     (arg)
Jim Blandy's avatar
Jim Blandy committed
386 387
     Lisp_Object arg;
{
388
  Lisp_Object beg, end;
389
  EMACS_INT newpoint;
390
  XSETFASTINT (beg, PT);
391 392 393
  end = operate_on_word (arg, &newpoint);
  casify_region (CASE_UP, beg, end);
  SET_PT (newpoint);
Jim Blandy's avatar
Jim Blandy committed
394 395 396 397
  return Qnil;
}

DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
398 399 400
       doc: /* Convert following word (or ARG words) to lower case, moving over.
With negative argument, convert previous words but do not move.  */)
     (arg)
Jim Blandy's avatar
Jim Blandy committed
401 402
     Lisp_Object arg;
{
403
  Lisp_Object beg, end;
404
  EMACS_INT newpoint;
405
  XSETFASTINT (beg, PT);
406 407 408
  end = operate_on_word (arg, &newpoint);
  casify_region (CASE_DOWN, beg, end);
  SET_PT (newpoint);
Jim Blandy's avatar
Jim Blandy committed
409 410 411 412
  return Qnil;
}

DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
413 414 415 416 417
       doc: /* Capitalize the following word (or ARG words), moving over.
This gives the word(s) a first character in upper case
and the rest lower case.
With negative argument, capitalize previous words but do not move.  */)
     (arg)
Jim Blandy's avatar
Jim Blandy committed
418 419
     Lisp_Object arg;
{
420
  Lisp_Object beg, end;
421
  EMACS_INT newpoint;
422
  XSETFASTINT (beg, PT);
423 424 425
  end = operate_on_word (arg, &newpoint);
  casify_region (CASE_CAPITALIZE, beg, end);
  SET_PT (newpoint);
Jim Blandy's avatar
Jim Blandy committed
426 427 428
  return Qnil;
}

Andreas Schwab's avatar
Andreas Schwab committed
429
void
Jim Blandy's avatar
Jim Blandy committed
430 431
syms_of_casefiddle ()
{
Dan Nicolaescu's avatar
Dan Nicolaescu committed
432
  Qidentity = intern_c_string ("identity");
Karl Heuer's avatar
Karl Heuer committed
433
  staticpro (&Qidentity);
Jim Blandy's avatar
Jim Blandy committed
434 435 436
  defsubr (&Supcase);
  defsubr (&Sdowncase);
  defsubr (&Scapitalize);
437
  defsubr (&Supcase_initials);
Jim Blandy's avatar
Jim Blandy committed
438 439 440
  defsubr (&Supcase_region);
  defsubr (&Sdowncase_region);
  defsubr (&Scapitalize_region);
441
  defsubr (&Supcase_initials_region);
Jim Blandy's avatar
Jim Blandy committed
442 443 444 445 446
  defsubr (&Supcase_word);
  defsubr (&Sdowncase_word);
  defsubr (&Scapitalize_word);
}

Andreas Schwab's avatar
Andreas Schwab committed
447
void
Jim Blandy's avatar
Jim Blandy committed
448 449 450
keys_of_casefiddle ()
{
  initial_define_key (control_x_map, Ctl('U'), "upcase-region");
Jim Blandy's avatar
Jim Blandy committed
451
  Fput (intern ("upcase-region"), Qdisabled, Qt);
Jim Blandy's avatar
Jim Blandy committed
452
  initial_define_key (control_x_map, Ctl('L'), "downcase-region");
Jim Blandy's avatar
Jim Blandy committed
453 454
  Fput (intern ("downcase-region"), Qdisabled, Qt);

Jim Blandy's avatar
Jim Blandy committed
455 456 457 458
  initial_define_key (meta_map, 'u', "upcase-word");
  initial_define_key (meta_map, 'l', "downcase-word");
  initial_define_key (meta_map, 'c', "capitalize-word");
}
Kenichi Handa's avatar
Kenichi Handa committed
459 460 461

/* arch-tag: 60a73c66-5489-47e7-a81f-cead4057c526
   (do not change this comment) */