etags.c 159 KB
Newer Older
Eli Zaretskii's avatar
Eli Zaretskii committed
1
/* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
2
   Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
Karl Heuer's avatar
Karl Heuer committed
3
   Free Software Foundation, Inc. and Ken Arnold
4

5
 This file is not considered part of GNU Emacs.
Jim Blandy's avatar
Jim Blandy committed
6

7 8 9 10
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
Jim Blandy's avatar
Jim Blandy committed
11

12 13 14 15
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
Jim Blandy's avatar
Jim Blandy committed
16

17 18 19
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software Foundation,
 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
Jim Blandy's avatar
Jim Blandy committed
20 21 22 23

/*
 * Authors:
 *	Ctags originally by Ken Arnold.
24
 *	Fortran added by Jim Kleckner.
Jim Blandy's avatar
Jim Blandy committed
25 26
 *	Ed Pelegri-Llopart added C typedefs.
 *	Gnu Emacs TAGS format and modifications by RMS?
27
 * 1989	Sam Kendall added C++.
Francesco Potortì's avatar
Francesco Potortì committed
28 29
 * 1992 Joseph B. Wells improved C and C++ parsing.
 * 1993	Francesco Potort reorganised C and C++.
30
 * 1994	Line-by-line regexp tags by Tom Tromey.
31 32
 * 2001 Nested classes by Francesco Potort (concept by Mykola Dzyuba).
 * 2002 #line directives by Francesco Potort.
33
 *
34 35
 * Francesco Potort <pot@gnu.org> has maintained and improved it since 1993.
 *
Jim Blandy's avatar
Jim Blandy committed
36 37
 */

38
char pot_etags_version[] = "@(#) pot revision number is 16.29";
39 40 41

#define	TRUE	1
#define	FALSE	0
42

43 44 45 46 47 48
#ifdef DEBUG
#  undef DEBUG
#  define DEBUG TRUE
#else
#  define DEBUG  FALSE
#  define NDEBUG		/* disable assert */
49
#endif
50

51 52 53 54 55
#ifdef HAVE_CONFIG_H
# include <config.h>
  /* On some systems, Emacs defines static as nothing for the sake
     of unexec.  We don't want that here since we don't use unexec. */
# undef static
56 57
# define ETAGS_REGEXPS		/* use the regexp features */
# define LONG_OPTIONS		/* accept long options */
Francesco Potortì's avatar
Francesco Potortì committed
58 59 60 61 62 63
# ifndef PTR			/* for Xemacs */
#   define PTR void *
# endif
# ifndef __P			/* for Xemacs */
#   define __P(args) args
# endif
64
#else
Francesco Potortì's avatar
Francesco Potortì committed
65 66 67 68 69 70 71
# if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
#   define __P(args) args	/* use prototypes */
#   define PTR void *		/* for generic pointers */
# else
#   define __P(args) ()		/* no prototypes */
#   define const		/* remove const for old compilers' sake */
#   define PTR long *		/* don't use void* */
72 73
# endif
#endif /* !HAVE_CONFIG_H */
74

75 76 77 78
#ifndef _GNU_SOURCE
# define _GNU_SOURCE 1		/* enables some compiler checks on GNU */
#endif

79 80 81 82 83 84 85 86
/* WIN32_NATIVE is for Xemacs.
   MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
#ifdef WIN32_NATIVE
# undef MSDOS
# undef  WINDOWSNT
# define WINDOWSNT
#endif /* WIN32_NATIVE */

87
#ifdef MSDOS
88
# undef MSDOS
89
# define MSDOS TRUE
90 91
# include <fcntl.h>
# include <sys/param.h>
92 93 94 95 96
# include <io.h>
# ifndef HAVE_CONFIG_H
#   define DOS_NT
#   include <sys/config.h>
# endif
97 98
#else
# define MSDOS FALSE
99 100
#endif /* MSDOS */

101
#ifdef WINDOWSNT
102 103 104
# include <stdlib.h>
# include <fcntl.h>
# include <string.h>
105
# include <direct.h>
106
# include <io.h>
107
# define MAXPATHLEN _MAX_PATH
108 109 110
# undef HAVE_NTGUI
# undef  DOS_NT
# define DOS_NT
111 112 113
# ifndef HAVE_GETCWD
#   define HAVE_GETCWD
# endif /* undef HAVE_GETCWD */
114 115 116 117 118 119 120 121
#else /* !WINDOWSNT */
# ifdef STDC_HEADERS
#  include <stdlib.h>
#  include <string.h>
# else
    extern char *getenv ();
# endif
#endif /* !WINDOWSNT */
122

123 124 125
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#else
126
# if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
127
    extern char *getcwd (char *buf, size_t size);
128 129 130
# endif
#endif /* HAVE_UNISTD_H */

131 132
#include <stdio.h>
#include <ctype.h>
133 134
#include <errno.h>
#ifndef errno
135
  extern int errno;
136
#endif
137 138 139
#include <sys/types.h>
#include <sys/stat.h>

140 141 142 143
#include <assert.h>
#ifdef NDEBUG
# undef  assert			/* some systems have a buggy assert.h */
# define assert(x) ((void) 0)
144 145
#endif

146 147 148 149
#if !defined (S_ISREG) && defined (S_IFREG)
# define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
#endif

150 151 152 153 154 155 156
#ifdef LONG_OPTIONS
# include <getopt.h>
#else
# define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
  extern char *optarg;
  extern int optind, opterr;
#endif /* LONG_OPTIONS */
157 158

#ifdef ETAGS_REGEXPS
159 160 161 162 163 164 165 166
# ifndef HAVE_CONFIG_H		/* this is a standalone compilation */
#   ifdef __CYGWIN__         	/* compiling on Cygwin */
			     !!! NOTICE !!!
 the regex.h distributed with Cygwin is not compatible with etags, alas!
If you want regular expression support, you should delete this notice and
	      arrange to use the GNU regex.h and regex.c.
#   endif
# endif
167
# include <regex.h>
168
#endif /* ETAGS_REGEXPS */
169

170
/* Define CTAGS to make the program "ctags" compatible with the usual one.
171
 Leave it undefined to make the program "etags", which makes emacs-style
172 173 174 175 176 177
 tag tables and tags typedefs, #defines and struct/union/enum by default. */
#ifdef CTAGS
# undef  CTAGS
# define CTAGS TRUE
#else
# define CTAGS FALSE
Jim Blandy's avatar
Jim Blandy committed
178 179 180 181
#endif

/* Exit codes for success and failure.  */
#ifdef VMS
182 183
# define	GOOD	1
# define	BAD	0
Jim Blandy's avatar
Jim Blandy committed
184
#else
185 186
# define	GOOD	0
# define	BAD	1
Jim Blandy's avatar
Jim Blandy committed
187 188
#endif

189 190
#define streq(s,t)	(assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
#define strneq(s,t,n)	(assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
Jim Blandy's avatar
Jim Blandy committed
191

192
#define CHARS 256		/* 2^sizeof(char) */
193
#define CHAR(x)		((unsigned int)(x) & (CHARS - 1))
194 195 196 197 198
#define	iswhite(c)	(_wht[CHAR(c)]) /* c is white */
#define notinname(c)	(_nin[CHAR(c)]) /* c is not in a name */
#define	begtoken(c)	(_btk[CHAR(c)]) /* c can start token */
#define	intoken(c)	(_itk[CHAR(c)]) /* c can be in token */
#define	endtoken(c)	(_etk[CHAR(c)]) /* c ends tokens */
Jim Blandy's avatar
Jim Blandy committed
199

200 201 202 203 204 205 206
#define ISALNUM(c)	isalnum (CHAR(c))
#define ISALPHA(c)	isalpha (CHAR(c))
#define ISDIGIT(c)	isdigit (CHAR(c))
#define ISLOWER(c)	islower (CHAR(c))

#define lowcase(c)	tolower (CHAR(c))
#define upcase(c)	toupper (CHAR(c))
207

208

209
/*
210
 *	xnew, xrnew -- allocate, reallocate storage
211 212
 *
 * SYNOPSIS:	Type *xnew (int n, Type);
213
 *		void xrnew (OldPointer, int n, Type);
214
 */
215
#if DEBUG
216
# include "chkmalloc.h"
217 218
# define xnew(n,Type)	  ((Type *) trace_malloc (__FILE__, __LINE__, \
						  (n) * sizeof (Type)))
219 220
# define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
					(char *) (op), (n) * sizeof (Type)))
221
#else
222
# define xnew(n,Type)	  ((Type *) xmalloc ((n) * sizeof (Type)))
223 224
# define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
					(char *) (op), (n) * sizeof (Type)))
225
#endif
Jim Blandy's avatar
Jim Blandy committed
226

Francesco Potortì's avatar
Francesco Potortì committed
227
#define bool int
228

Francesco Potortì's avatar
Francesco Potortì committed
229
typedef void Lang_function __P((FILE *));
Jim Blandy's avatar
Jim Blandy committed
230

231 232
typedef struct
{
233 234
  char *suffix;			/* file name suffix for this compressor */
  char *command;		/* takes one arg and decompresses to stdout */
235
} compressor;
Jim Blandy's avatar
Jim Blandy committed
236

237 238
typedef struct
{
239 240 241 242 243 244
  char *name;			/* language name */
  bool metasource;		/* source used to generate other sources */
  Lang_function *function;	/* parse function */
  char **filenames;		/* names of this language's files */
  char **suffixes;		/* name suffixes of this language's files */
  char **interpreters;		/* interpreters for this language */
245 246
} language;

247 248 249 250 251 252 253 254 255 256 257 258
typedef struct fdesc
{
  struct fdesc *next;		/* for the linked list */
  char *infname;		/* uncompressed input file name */
  char *infabsname;		/* absolute uncompressed input file name */
  char *infabsdir;		/* absolute dir of input file */
  char *taggedfname;		/* file name to write in tagfile */
  language *lang;		/* language of file */
  char *prop;			/* file properties to write in tagfile */
  bool usecharno;		/* etags tags shall contain char number */
} fdesc;

259
typedef struct node_st
260 261 262 263 264 265 266 267 268
{				/* sorting structure */
  struct node_st *left, *right;	/* left and right sons */
  fdesc *fdp;			/* description of file to whom tag belongs */
  char *name;			/* tag name */
  char *pat;			/* search pattern */
  bool valid;			/* write this tag on the tag file */
  bool is_func;			/* function tag: use pattern in CTAGS mode */
  bool been_warned;		/* warning already given for duplicated tag */
  int lno;			/* line number tag is on */
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
  long cno;			/* character number line starts on */
} node;

/*
 * A `linebuffer' is a structure which holds a line of text.
 * `readline_internal' reads a line from a stream into a linebuffer
 * and works regardless of the length of the line.
 * SIZE is the size of BUFFER, LEN is the length of the string in
 * BUFFER after readline reads it.
 */
typedef struct
{
  long size;
  int len;
  char *buffer;
} linebuffer;
285

286 287 288 289 290 291
/* Used to support mixing of --lang and file names. */
typedef struct
{
  enum {
    at_language,		/* a language specification */
    at_regexp,			/* a regular expression */
292 293
    at_filename,		/* a file name */
    at_stdin			/* read from stdin here */
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
  } arg_type;			/* argument type */
  language *lang;		/* language associated with the argument */
  char *what;			/* the argument itself */
} argument;

#ifdef ETAGS_REGEXPS
/* Structure defining a regular expression. */
typedef struct pattern
{
  struct pattern *p_next;
  language *lang;
  char *regex;
  struct re_pattern_buffer *pat;
  struct re_registers regs;
  char *name_pattern;
  bool error_signaled;
  bool ignore_case;
311
  bool multi_line;
312 313 314 315
} pattern;
#endif /* ETAGS_REGEXPS */


316
/* Many compilers barf on this:
317
	Lang_function Ada_funcs;
318
   so let's write it this way */
Francesco Potortì's avatar
Francesco Potortì committed
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
static void Ada_funcs __P((FILE *));
static void Asm_labels __P((FILE *));
static void C_entries __P((int c_ext, FILE *));
static void default_C_entries __P((FILE *));
static void plain_C_entries __P((FILE *));
static void Cjava_entries __P((FILE *));
static void Cobol_paragraphs __P((FILE *));
static void Cplusplus_entries __P((FILE *));
static void Cstar_entries __P((FILE *));
static void Erlang_functions __P((FILE *));
static void Fortran_functions __P((FILE *));
static void Yacc_entries __P((FILE *));
static void Lisp_functions __P((FILE *));
static void Makefile_targets __P((FILE *));
static void Pascal_functions __P((FILE *));
static void Perl_functions __P((FILE *));
static void PHP_functions __P((FILE *));
static void Postscript_functions __P((FILE *));
static void Prolog_functions __P((FILE *));
static void Python_functions __P((FILE *));
static void Scheme_functions __P((FILE *));
static void TeX_commands __P((FILE *));
static void Texinfo_nodes __P((FILE *));
static void just_read_file __P((FILE *));

static void print_language_names __P((void));
static void print_version __P((void));
static void print_help __P((void));
int main __P((int, char **));

static compressor *get_compressor_from_suffix __P((char *, char **));
static language *get_language_from_langname __P((const char *));
static language *get_language_from_interpreter __P((char *));
352
static language *get_language_from_filename __P((char *, bool));
353
static void readline __P((linebuffer *, FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
354 355 356
static long readline_internal __P((linebuffer *, FILE *));
static bool nocase_tail __P((char *));
static char *get_tag __P((char *));
357

358
#ifdef ETAGS_REGEXPS
359
static void analyse_regex __P((char *));
Francesco Potortì's avatar
Francesco Potortì committed
360
static void free_patterns __P((void));
361
static void regex_tag_multiline __P((void));
362
#endif /* ETAGS_REGEXPS */
Francesco Potortì's avatar
Francesco Potortì committed
363 364 365 366 367 368 369 370
static void error __P((const char *, const char *));
static void suggest_asking_for_help __P((void));
void fatal __P((char *, char *));
static void pfatal __P((char *));
static void add_node __P((node *, node **));

static void init __P((void));
static void initbuffer __P((linebuffer *));
371 372
static void process_file_name __P((char *, language *));
static void process_file __P((FILE *, char *, language *));
373
static void find_entries __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
374
static void free_tree __P((node *));
375
static void free_fdesc __P((fdesc *));
Francesco Potortì's avatar
Francesco Potortì committed
376
static void pfnote __P((char *, bool, char *, int, int, long));
377
static void make_tag __P((char *, int, bool, char *, int, int, long));
378
static void invalidate_nodes __P((fdesc *, node **));
Francesco Potortì's avatar
Francesco Potortì committed
379 380 381 382 383 384 385 386 387
static void put_entries __P((node *));

static char *concat __P((char *, char *, char *));
static char *skip_spaces __P((char *));
static char *skip_non_spaces __P((char *));
static char *savenstr __P((char *, int));
static char *savestr __P((char *));
static char *etags_strchr __P((const char *, int));
static char *etags_strrchr __P((const char *, int));
388
static bool strcaseeq __P((const char *, const char *));
Francesco Potortì's avatar
Francesco Potortì committed
389 390 391 392 393 394 395
static char *etags_getcwd __P((void));
static char *relative_filename __P((char *, char *));
static char *absolute_filename __P((char *, char *));
static char *absolute_dirname __P((char *, char *));
static bool filename_is_absolute __P((char *f));
static void canonicalize_filename __P((char *));
static void linebuffer_setlen __P((linebuffer *, int));
396 397
static PTR xmalloc __P((unsigned int));
static PTR xrealloc __P((char *, unsigned int));
398

Jim Blandy's avatar
Jim Blandy committed
399

400
static char searchar = '/';	/* use /.../ searches */
Jim Blandy's avatar
Jim Blandy committed
401

402 403 404 405 406
static char *tagfile;		/* output file */
static char *progname;		/* name this program was invoked with */
static char *cwd;		/* current working directory */
static char *tagfiledir;	/* directory of tagfile */
static FILE *tagf;		/* ioptr for tags file */
407

408 409
static fdesc *fdhead;		/* head of file description list */
static fdesc *curfdp;		/* current file description */
410 411 412 413
static int lineno;		/* line number of current line */
static long charno;		/* current character number */
static long linecharno;		/* charno of start of current line */
static char *dbp;		/* pointer to start of current tag */
414

415
static const int invalidcharno = -1;
416

417
static node *nodehead;		/* the head of the binary tree of tags */
418
static node *last_node;		/* the last node created */
Jim Blandy's avatar
Jim Blandy committed
419

420
static linebuffer lb;		/* the current line */
421
static linebuffer filebuf;	/* a buffer containing the whole file */
Jim Blandy's avatar
Jim Blandy committed
422

423
/* boolean "functions" (see init)	*/
424 425
static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
static char
426
  /* white chars */
427
  *white = " \f\t\n\r\v",
428
  /* not in a name */
429
  *nonam = " \f\t\n\r()=,;",	/* look at make_tag before modifying! */
430
  /* token ending chars */
431
  *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
432 433 434
  /* token starting chars */
  *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
  /* valid in-token chars */
435
  *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
Jim Blandy's avatar
Jim Blandy committed
436

437
static bool append_to_tagfile;	/* -a: append to tags */
438
/* The next four default to TRUE for etags, but to FALSE for ctags.  */
439 440
static bool typedefs;		/* -t: create tags for C and Ada typedefs */
static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
Jim Blandy's avatar
Jim Blandy committed
441
				/* 0 struct/enum/union decls, and C++ */
442
				/* member functions. */
443
static bool constantypedefs;	/* -d: create tags for C #define, enum */
444
				/* constants and variables. */
Jim Blandy's avatar
Jim Blandy committed
445
				/* -D: opposite of -d.  Default under ctags. */
446
static bool globals;		/* create tags for global variables */
447
static bool declarations;	/* --declarations: tag them and extern in C&Co*/
448
static bool members;		/* create tags for C member variables */
449
static bool no_line_directive;	/* ignore #line directives (undocumented) */
450 451 452 453 454 455 456
static bool update;		/* -u: update tags */
static bool vgrind_style;	/* -v: create vgrind style index output */
static bool no_warnings;	/* -w: suppress warnings */
static bool cxref_style;	/* -x: create cxref style output */
static bool cplusplus;		/* .[hc] means C++, not C */
static bool noindentypedefs;	/* -I: ignore indentation in C */
static bool packages_only;	/* --packages-only: in Ada, only tag packages*/
457

458 459 460
#define STDIN 0x1001		/* returned by getopt_long on --parse-stdin */
static bool parsing_stdin;	/* --parse-stdin used */

461
#ifdef ETAGS_REGEXPS
462 463 464 465
static pattern *p_head;		/* list of all regexps */
static bool need_filebuf;	/* some regexes are multi-line */
#else
# define need_filebuf FALSE
466 467
#endif /* ETAGS_REGEXPS */

468
#ifdef LONG_OPTIONS
469
static struct option longopts[] =
470
{
471 472 473 474 475 476 477 478 479 480 481
  { "packages-only",      no_argument,	     &packages_only, 	 TRUE  },
  { "c++",		  no_argument,	     NULL,	     	 'C'   },
  { "declarations",	  no_argument,	     &declarations,  	 TRUE  },
  { "no-line-directive",  no_argument,	     &no_line_directive, TRUE  },
  { "help",		  no_argument,	     NULL,     	     	 'h'   },
  { "help",		  no_argument,	     NULL,     	     	 'H'   },
  { "ignore-indentation", no_argument,	     NULL,     	     	 'I'   },
  { "language",           required_argument, NULL,     	     	 'l'   },
  { "members",		  no_argument,	     &members, 	     	 TRUE  },
  { "no-members",	  no_argument,	     &members, 	     	 FALSE },
  { "output",		  required_argument, NULL,	     	 'o'   },
482
#ifdef ETAGS_REGEXPS
483 484 485
  { "regex",		  required_argument, NULL,	     	 'r'   },
  { "no-regex",		  no_argument,	     NULL,	     	 'R'   },
  { "ignore-case-regex",  required_argument, NULL,	     	 'c'   },
486
#endif /* ETAGS_REGEXPS */
487
  { "parse-stdin",        required_argument, NULL,               STDIN },
488 489 490 491 492 493 494
  { "version",		  no_argument,	     NULL,     	     	 'V'   },

#if CTAGS /* Etags options */
  { "backward-search",	  no_argument,	     NULL,	     	 'B'   },
  { "cxref",		  no_argument,	     NULL,	     	 'x'   },
  { "defines",		  no_argument,	     NULL,	     	 'd'   },
  { "globals",		  no_argument,	     &globals, 	     	 TRUE  },
495 496 497 498
  { "typedefs",		  no_argument,	     NULL,	     	 't'   },
  { "typedefs-and-c++",	  no_argument,	     NULL,     	     	 'T'   },
  { "update",		  no_argument,	     NULL,     	     	 'u'   },
  { "vgrind",		  no_argument,	     NULL,     	     	 'v'   },
499 500 501 502 503 504 505 506
  { "no-warn",		  no_argument,	     NULL,	     	 'w'   },

#else /* Ctags options */
  { "append",		  no_argument,	     NULL,	     	 'a'   },
  { "no-defines",	  no_argument,	     NULL,	     	 'D'   },
  { "no-globals",	  no_argument,	     &globals, 	     	 FALSE },
  { "include",		  required_argument, NULL,     	     	 'i'   },
#endif
507
  { NULL }
Jim Blandy's avatar
Jim Blandy committed
508
};
509
#endif /* LONG_OPTIONS */
Jim Blandy's avatar
Jim Blandy committed
510

511
static compressor compressors[] =
512 513 514 515 516 517 518 519 520
{
  { "z", "gzip -d -c"},
  { "Z", "gzip -d -c"},
  { "gz", "gzip -d -c"},
  { "GZ", "gzip -d -c"},
  { "bz2", "bzip2 -d -c" },
  { NULL }
};

521 522 523
/*
 * Language stuff.
 */
524

525
/* Ada code */
526
static char *Ada_suffixes [] =
527
  { "ads", "adb", "ada", NULL };
528 529

/* Assembly code */
530 531 532 533 534 535 536 537 538 539 540
static char *Asm_suffixes [] =
  { "a",	/* Unix assembler */
    "asm", /* Microcontroller assembly */
    "def", /* BSO/Tasking definition includes  */
    "inc", /* Microcontroller include files */
    "ins", /* Microcontroller include files */
    "s", "sa", /* Unix assembler */
    "S",   /* cpp-processed Unix assembler */
    "src", /* BSO/Tasking C compiler output */
    NULL
  };
541 542

/* Note that .c and .h can be considered C++, if the --c++ flag was
543 544
   given, or if the `class' keyowrd is met inside the file.
   That is why default_C_entries is called for these. */
545
static char *default_C_suffixes [] =
546 547
  { "c", "h", NULL };

548
static char *Cplusplus_suffixes [] =
549
  { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
550 551 552 553
    "M",			/* Objective C++ */
    "pdb",			/* Postscript with C syntax */
    NULL };

554
static char *Cjava_suffixes [] =
555
  { "java", NULL };
556

557
static char *Cobol_suffixes [] =
558 559
  { "COB", "cob", NULL };

560
static char *Cstar_suffixes [] =
561 562
  { "cs", "hs", NULL };

563
static char *Erlang_suffixes [] =
564 565
  { "erl", "hrl", NULL };

566
static char *Fortran_suffixes [] =
567 568
  { "F", "f", "f90", "for", NULL };

569
static char *Lisp_suffixes [] =
570 571
  { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };

572
static char *Makefile_filenames [] =
573
  { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
574

575
static char *Pascal_suffixes [] =
576 577
  { "p", "pas", NULL };

578
static char *Perl_suffixes [] =
579
  { "pl", "pm", NULL };
580 581

static char *Perl_interpreters [] =
582
  { "perl", "@PERL@", NULL };
583

584
static char *PHP_suffixes [] =
585 586
  { "php", "php3", "php4", NULL };

587
static char *plain_C_suffixes [] =
588
  { "lm",			/* Objective lex file */
589
    "m",			/* Objective C file */
590
    "pc",			/* Pro*C file */
591
     NULL };
592

593
static char *Postscript_suffixes [] =
594
  { "ps", "psw", NULL };	/* .psw is for PSWrap */
595

596
static char *Prolog_suffixes [] =
597 598
  { "prolog", NULL };

599
static char *Python_suffixes [] =
600 601
  { "py", NULL };

602
/* Can't do the `SCM' or `scm' prefix with a version number. */
603
static char *Scheme_suffixes [] =
604
  { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
605

606
static char *TeX_suffixes [] =
607
  { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
608

609
static char *Texinfo_suffixes [] =
610
  { "texi", "texinfo", "txi", NULL };
Dave Love's avatar
Dave Love committed
611

612
static char *Yacc_suffixes [] =
613
  { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
614

615 616 617 618 619 620
/*
 * Table of languages.
 *
 * It is ok for a given function to be listed under more than one
 * name.  I just didn't.
 */
621

622
static language lang_names [] =
623
{
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
  { "ada",      FALSE, Ada_funcs,            NULL, Ada_suffixes,        NULL },
  { "asm",      FALSE, Asm_labels,           NULL, Asm_suffixes,        NULL },
  { "c",        FALSE, default_C_entries,    NULL, default_C_suffixes,  NULL },
  { "c++",      FALSE, Cplusplus_entries,    NULL, Cplusplus_suffixes,  NULL },
  { "c*",       FALSE, Cstar_entries,        NULL, Cstar_suffixes,      NULL },
  { "cobol",    FALSE, Cobol_paragraphs,     NULL, Cobol_suffixes,      NULL },
  { "erlang",   FALSE, Erlang_functions,     NULL, Erlang_suffixes,     NULL },
  { "fortran",  FALSE, Fortran_functions,    NULL, Fortran_suffixes,    NULL },
  { "java",     FALSE, Cjava_entries,        NULL, Cjava_suffixes,      NULL },
  { "lisp",     FALSE, Lisp_functions,       NULL, Lisp_suffixes,       NULL },
  { "makefile", FALSE, Makefile_targets,     Makefile_filenames, NULL,  NULL },
  { "pascal",   FALSE, Pascal_functions,     NULL, Pascal_suffixes,     NULL },
  { "perl",     FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
  { "php",      FALSE, PHP_functions,        NULL, PHP_suffixes,        NULL },
  { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
  { "proc",     FALSE, plain_C_entries,      NULL, plain_C_suffixes,    NULL },
  { "prolog",   FALSE, Prolog_functions,     NULL, Prolog_suffixes,     NULL },
  { "python",   FALSE, Python_functions,     NULL, Python_suffixes,     NULL },
  { "scheme",   FALSE, Scheme_functions,     NULL, Scheme_suffixes,     NULL },
  { "tex",      FALSE, TeX_commands,         NULL, TeX_suffixes,        NULL },
  { "texinfo",  FALSE, Texinfo_nodes,        NULL, Texinfo_suffixes,    NULL },
  { "yacc",      TRUE, Yacc_entries,         NULL, Yacc_suffixes,       NULL },
  { "auto", FALSE, NULL },             /* default guessing scheme */
  { "none", FALSE, just_read_file },   /* regexp matching only */
  { NULL, FALSE, NULL }                /* end of list */
649
};
650

Jim Blandy's avatar
Jim Blandy committed
651

652
static void
653 654
print_language_names ()
{
655
  language *lang;
656
  char **name, **ext;
657 658

  puts ("\nThese are the currently supported languages, along with the\n\
659
default file names and dot suffixes:");
660
  for (lang = lang_names; lang->name != NULL; lang++)
661
    {
662 663 664 665
      printf ("  %-*s", 10, lang->name);
      if (lang->filenames != NULL)
	for (name = lang->filenames; *name != NULL; name++)
	  printf (" %s", *name);
666 667 668
      if (lang->suffixes != NULL)
	for (ext = lang->suffixes; *ext != NULL; ext++)
	  printf (" .%s", *ext);
669 670
      puts ("");
    }
671 672 673
  puts ("Where `auto' means use default language for files based on file\n\
name suffix, and `none' means only do regexp processing on files.\n\
If no language is specified and no matching suffix is found,\n\
674 675
the first line of the file is read for a sharp-bang (#!) sequence\n\
followed by the name of an interpreter.  If no such sequence is found,\n\
676
Fortran is tried first; if no tags are found, C is tried next.\n\
677
When parsing any C file, a \"class\" keyword switches to C++.\n\
678
Compressed files are supported using gzip and bzip2.");
679 680
}

681
#ifndef EMACS_NAME
682
# define EMACS_NAME "standalone"
683
#endif
684
#ifndef VERSION
685
# define VERSION "version"
686
#endif
687
static void
Jim Blandy's avatar
Jim Blandy committed
688 689
print_version ()
{
690
  printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
691
  puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
692
  puts ("This program is distributed under the same terms as Emacs");
Jim Blandy's avatar
Jim Blandy committed
693

694
  exit (GOOD);
Jim Blandy's avatar
Jim Blandy committed
695 696
}

697
static void
Jim Blandy's avatar
Jim Blandy committed
698 699
print_help ()
{
700 701 702 703 704 705 706 707 708
  printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
\n\
These are the options accepted by %s.\n", progname, progname);
#ifdef LONG_OPTIONS
  puts ("You may use unambiguous abbreviations for the long option names.");
#else
  puts ("Long option names do not work with this executable, as it is not\n\
linked with GNU getopt.");
#endif /* LONG_OPTIONS */
709 710 711
  puts ("  A - as file name means read names from stdin (one per line).\n\
Absolute names are stored in the output file as they are.\n\
Relative ones are stored relative to the output file's directory.\n");
Jim Blandy's avatar
Jim Blandy committed
712

713 714
  if (!CTAGS)
    puts ("-a, --append\n\
715
        Append tag entries to existing tags file.");
716

717
  puts ("--packages-only\n\
718
        For Ada files, only generate tags for packages.");
719

720 721
  if (CTAGS)
    puts ("-B, --backward-search\n\
722
        Write the search commands for the tag entries using '?', the\n\
723
        backward-search command instead of '/', the forward-search command.");
724

725 726 727 728
  /* This option is mostly obsolete, because etags can now automatically
     detect C++.  Retained for backward compatibility and for debugging and
     experimentation.  In principle, we could want to tag as C++ even
     before any "class" keyword.
729
  puts ("-C, --c++\n\
730
        Treat files whose name suffix defaults to C language as C++ files.");
731
  */
Jim Blandy's avatar
Jim Blandy committed
732

733 734 735 736 737 738 739 740
  puts ("--declarations\n\
	In C and derived languages, create tags for function declarations,");
  if (CTAGS)
    puts ("\tand create tags for extern variables if --globals is used.");
  else
    puts
      ("\tand create tags for extern variables unless --no-globals is used.");

741 742
  if (CTAGS)
    puts ("-d, --defines\n\
743
        Create tag entries for C #define constants and enum constants, too.");
744 745
  else
    puts ("-D, --no-defines\n\
746 747
        Don't create tag entries for C #define constants and enum constants.\n\
	This makes the tags file smaller.");
Jim Blandy's avatar
Jim Blandy committed
748

749
  if (!CTAGS)
750
    puts ("-i FILE, --include=FILE\n\
751 752 753
        Include a note in tag file indicating that, when searching for\n\
        a tag, one should also consult the tags file FILE after\n\
        checking the current file.");
754 755

  puts ("-l LANG, --language=LANG\n\
756 757
        Force the following files to be considered as written in the\n\
	named language up to the next --language=LANG option.");
758

759 760
  if (CTAGS)
    puts ("--globals\n\
761
	Create tag entries for global variables in some languages.");
762 763
  else
    puts ("--no-globals\n\
764 765
	Do not create tag entries for global variables in some\n\
	languages.  This makes the tags file smaller.");
766 767 768
  puts ("--members\n\
	Create tag entries for member variables in C and derived languages.");

769
#ifdef ETAGS_REGEXPS
770 771 772 773 774 775 776
  puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
        Make a tag for each line matching the regular expression pattern\n\
	in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
	files only.  REGEXFILE is a file containing one REGEXP per line.\n\
	REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
	optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
  puts ("	If TAGNAME/ is present, the tags created are named.\n\
777
	For example Tcl named tags can be created with:\n\
778 779 780 781
	  --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
	MODS are optional one-letter modifiers: `i' means to ignore case,\n\
	`m' means to allow multi-line matches, `s' implies `m' and\n\
	causes dot to match the newline character as well.");
782
  puts ("-R, --no-regex\n\
783 784
        Don't create tags from regexps for the following files.");
#endif /* ETAGS_REGEXPS */
785
  puts ("-I, --ignore-indentation\n\
Jim Blandy's avatar
Jim Blandy committed
786 787 788
        Don't rely on indentation quite as much as normal.  Currently,\n\
        this means not to assume that a closing brace in the first\n\
        column is the final brace of a function or structure\n\
789
        definition in C and C++.");
790 791 792 793
  puts ("-o FILE, --output=FILE\n\
        Write the tags to FILE.");
  puts ("--parse-stdin=NAME\n\
        Read from standard input and record tags as belonging to file NAME.");
Jim Blandy's avatar
Jim Blandy committed
794

795 796 797
  if (CTAGS)
    {
      puts ("-t, --typedefs\n\
798
        Generate tag entries for C and Ada typedefs.");
799 800 801
      puts ("-T, --typedefs-and-c++\n\
        Generate tag entries for C typedefs, C struct/enum/union tags,\n\
        and C++ member functions.");
802 803 804 805
    }

  if (CTAGS)
    puts ("-u, --update\n\
Jim Blandy's avatar
Jim Blandy committed
806 807 808 809 810
        Update the tag entries for the given files, leaving tag\n\
        entries for other files in place.  Currently, this is\n\
        implemented by deleting the existing entries for the given\n\
        files and then rewriting the new entries at the end of the\n\
        tags file.  It is often faster to simply rebuild the entire\n\
811
        tag file than to use this.");
812 813 814

  if (CTAGS)
    {
815
      puts ("-v, --vgrind\n\
Jim Blandy's avatar
Jim Blandy committed
816 817
        Generates an index of items intended for human consumption,\n\
        similar to the output of vgrind.  The index is sorted, and\n\
818
        gives the page number of each item.");
819 820 821
      puts ("-w, --no-warn\n\
        Suppress warning messages about entries defined in multiple\n\
        files.");
822
      puts ("-x, --cxref\n\
Jim Blandy's avatar
Jim Blandy committed
823 824 825
        Like --vgrind, but in the style of cxref, rather than vgrind.\n\
        The output uses line numbers instead of page numbers, but\n\
        beyond that the differences are cosmetic; try both to see\n\
826
        which you like.");
827
    }
Jim Blandy's avatar
Jim Blandy committed
828 829 830

  puts ("-V, --version\n\
        Print the version of the program.\n\
831
-h, --help\n\
Jim Blandy's avatar
Jim Blandy committed
832 833
        Print this help message.");

834 835
  print_language_names ();

836
  puts ("");
837
  puts ("Report bugs to bug-gnu-emacs@gnu.org");
838

839
  exit (GOOD);
Jim Blandy's avatar
Jim Blandy committed
840 841
}

842 843 844 845 846 847 848 849 850 851 852 853 854 855 856

#ifdef VMS			/* VMS specific functions */

#define	EOS	'\0'

/* This is a BUG!  ANY arbitrary limit is a BUG!
   Won't someone please fix this?  */
#define	MAX_FILE_SPEC_LEN	255
typedef struct	{
  short   curlen;
  char    body[MAX_FILE_SPEC_LEN + 1];
} vspec;

/*
 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
857
 returning in each successive call the next file name matching the input
858 859 860 861
 spec. The function expects that each in_spec passed
 to it will be processed to completion; in particular, up to and
 including the call following that in which the last matching name
 is returned, the function ignores the value of in_spec, and will
862
 only start processing a new spec with the following call.
863 864 865
 If an error occurs, on return out_spec contains the value
 of in_spec when the error occurred.

866
 With each successive file name returned in out_spec, the
867 868
 function's return value is one. When there are no more matching
 names the function returns zero. If on the first call no file
869
 matches in_spec, or there is any other error, -1 is returned.
870 871 872 873 874
*/

#include	<rmsdef.h>
#include	<descrip.h>
#define		OUTSIZE	MAX_FILE_SPEC_LEN
875
static short
876 877 878 879 880 881 882
fn_exp (out, in)
     vspec *out;
     char *in;
{
  static long context = 0;
  static struct dsc$descriptor_s o;
  static struct dsc$descriptor_s i;
883
  static bool pass1 = TRUE;
884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
  long status;
  short retval;

  if (pass1)
    {
      pass1 = FALSE;
      o.dsc$a_pointer = (char *) out;
      o.dsc$w_length = (short)OUTSIZE;
      i.dsc$a_pointer = in;
      i.dsc$w_length = (short)strlen(in);
      i.dsc$b_dtype = DSC$K_DTYPE_T;
      i.dsc$b_class = DSC$K_CLASS_S;
      o.dsc$b_dtype = DSC$K_DTYPE_VT;
      o.dsc$b_class = DSC$K_CLASS_VS;
    }
  if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
    {
      out->body[out->curlen] = EOS;
      return 1;
    }
  else if (status == RMS$_NMF)
    retval = 0;
  else
    {
      strcpy(out->body, in);
      retval = -1;
    }
  lib$find_file_end(&context);
  pass1 = TRUE;
  return retval;
914
}
915 916

/*
917
  v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
918 919
  name of each file specified by the provided arg expanding wildcards.
*/
920
static char *
921 922
gfnames (arg, p_error)
     char *arg;
923
     bool *p_error;
924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
{
  static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};

  switch (fn_exp (&filename, arg))
    {
    case 1:
      *p_error = FALSE;
      return filename.body;
    case 0:
      *p_error = FALSE;
      return NULL;
    default:
      *p_error = TRUE;
      return filename.body;
    }
}

#ifndef OLD  /* Newer versions of VMS do provide `system'.  */
system (cmd)
     char *cmd;
{
945
  error ("%s", "system() function not implemented under VMS");
946 947 948 949 950 951 952
}
#endif

#define	VERSION_DELIM	';'
char *massage_name (s)
     char *s;
{
953
  char *start = s;
954 955 956 957 958 959 960 961

  for ( ; *s; s++)
    if (*s == VERSION_DELIM)
      {
	*s = EOS;
	break;
      }
    else
962
      *s = lowcase (*s);
963 964 965 966
  return start;
}
#endif /* VMS */

Jim Blandy's avatar
Jim Blandy committed
967

Karl Heuer's avatar
Karl Heuer committed
968
int
Jim Blandy's avatar
Jim Blandy committed
969 970 971 972 973
main (argc, argv)
     int argc;
     char *argv[];
{
  int i;
974 975
  unsigned int nincluded_files;
  char **included_files;
976
  argument *argbuffer;
977
  int current_arg, file_count;
978
  linebuffer filename_lb;
Jim Blandy's avatar
Jim Blandy committed
979
#ifdef VMS
980
  bool got_err;
Jim Blandy's avatar
Jim Blandy committed
981
#endif
982 983 984
 char *optstring;
 int opt;

985

986
#ifdef DOS_NT
987
  _fmode = O_BINARY;   /* all of files are treated as binary files */
988
#endif /* DOS_NT */
989

Jim Blandy's avatar
Jim Blandy committed
990
  progname = argv[0];
991 992 993 994
  nincluded_files = 0;
  included_files = xnew (argc, char *);
  current_arg = 0;
  file_count = 0;
Jim Blandy's avatar
Jim Blandy committed
995

996 997
  /* Allocate enough no matter what happens.  Overkill, but each one
     is small. */
998
  argbuffer = xnew (argc, argument);
999

Jim Blandy's avatar
Jim Blandy committed
1000 1001
  /*
   * If etags, always find typedefs and structure tags.  Why not?
1002
   * Also default to find macro constants, enum constants and
1003
   * global variables.
Jim Blandy's avatar
Jim Blandy committed
1004
   */
1005
  if (!CTAGS)
1006
    {
1007
      typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1008 1009
      globals = TRUE;
    }
Jim Blandy's avatar
Jim Blandy committed
1010

1011
  optstring = "-";
1012
#ifdef ETAGS_REGEXPS
1013
  optstring = "-r:Rc:";
1014 1015
#endif /* ETAGS_REGEXPS */
#ifndef LONG_OPTIONS
1016
  optstring = optstring + 1;
1017
#endif /* LONG_OPTIONS */
1018 1019 1020
  optstring = concat (optstring,
		      "Cf:Il:o:SVhH",
		      (CTAGS) ? "BxdtTuvw" : "aDi:");
Jim Blandy's avatar
Jim Blandy committed
1021

1022 1023 1024 1025 1026 1027
  while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
    switch (opt)
      {
      case 0:
	/* If getopt returns 0, then it has already processed a
	   long-named option.  We should do nothing.  */
Jim Blandy's avatar
Jim Blandy committed
1028 1029
	break;

1030 1031 1032 1033 1034 1035 1036
      case 1:
	/* This means that a file name has been seen.  Record it. */
	argbuffer[current_arg].arg_type = at_filename;
	argbuffer[current_arg].what     = optarg;
	++current_arg;
	++file_count;
	break;
Jim Blandy's avatar
Jim Blandy committed
1037

1038 1039 1040 1041 1042 1043
      case STDIN:
	/* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
	argbuffer[current_arg].arg_type = at_stdin;
	argbuffer[current_arg].what     = optarg;
	++current_arg;
	++file_count;