etags.c 175 KB
Newer Older
Eli Zaretskii's avatar
Eli Zaretskii committed
1
/* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
2

3
Copyright (C) 1984 The Regents of the University of California
Jim Blandy's avatar
Jim Blandy committed
4

5 6 7 8 9 10 11 12 13 14 15 16
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the
   distribution.
3. Neither the name of the University nor the names of its
   contributors may be used to endorse or promote products derived
   from this software without specific prior written permission.
Jim Blandy's avatar
Jim Blandy committed
17

18 19 20 21 22 23 24 25 26 27 28 29 30
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


31 32
Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2011
  Free Software Foundation, Inc.
33 34 35

This file is not considered part of GNU Emacs.

36
This program is free software: you can redistribute it and/or modify
37
it under the terms of the GNU General Public License as published by
38 39
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
40 41 42 43 44 45 46

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
47
along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
48 49 50 51 52 53 54 55 56


/* NB To comply with the above BSD license, copyright information is
reproduced in etc/ETAGS.README.  That file should be updated when the
above notices are.

To the best of our knowledge, this code was originally based on the
ctags.c distributed with BSD4.2, which was copyrighted by the
University of California, as described above. */
Jim Blandy's avatar
Jim Blandy committed
57 58 59 60


/*
 * Authors:
61 62 63 64 65
 * 1983 Ctags originally by Ken Arnold.
 * 1984 Fortran added by Jim Kleckner.
 * 1984 Ed Pelegri-Llopart added C typedefs.
 * 1985 Emacs TAGS format by Richard Stallman.
 * 1989 Sam Kendall added C++.
Francesco Potortì's avatar
Francesco Potortì committed
66
 * 1992 Joseph B. Wells improved C and C++ parsing.
67 68
 * 1993 Francesco Potort reorganized C and C++.
 * 1994 Line-by-line regexp tags by Tom Tromey.
69 70
 * 2001 Nested classes by Francesco Potort (concept by Mykola Dzyuba).
 * 2002 #line directives by Francesco Potort.
71
 *
72
 * Francesco Potort <pot@gnu.org> has maintained and improved it since 1993.
Jim Blandy's avatar
Jim Blandy committed
73 74
 */

75 76
/*
 * If you want to add support for a new language, start by looking at the LUA
77 78
 * language, which is the simplest.  Alternatively, consider distributing etags
 * together with a configuration file containing regexp definitions for etags.
79 80
 */

81
char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
82 83 84

#define	TRUE	1
#define	FALSE	0
85

86 87 88 89 90 91
#ifdef DEBUG
#  undef DEBUG
#  define DEBUG TRUE
#else
#  define DEBUG  FALSE
#  define NDEBUG		/* disable assert */
92
#endif
93

94 95 96 97 98
#ifdef HAVE_CONFIG_H
# include <config.h>
  /* On some systems, Emacs defines static as nothing for the sake
     of unexec.  We don't want that here since we don't use unexec. */
# undef static
99
# ifndef PTR			/* for XEmacs */
Francesco Potortì's avatar
Francesco Potortì committed
100 101
#   define PTR void *
# endif
102
#else  /* no config.h */
Francesco Potortì's avatar
Francesco Potortì committed
103 104
# if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
#   define PTR void *		/* for generic pointers */
105
# else /* not standard C */
Francesco Potortì's avatar
Francesco Potortì committed
106 107
#   define const		/* remove const for old compilers' sake */
#   define PTR long *		/* don't use void* */
108 109
# endif
#endif /* !HAVE_CONFIG_H */
110

111 112 113 114
#ifndef _GNU_SOURCE
# define _GNU_SOURCE 1		/* enables some compiler checks on GNU */
#endif

115
/* WIN32_NATIVE is for XEmacs.
116 117 118 119 120 121 122
   MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
#ifdef WIN32_NATIVE
# undef MSDOS
# undef  WINDOWSNT
# define WINDOWSNT
#endif /* WIN32_NATIVE */

123
#ifdef MSDOS
124
# undef MSDOS
125
# define MSDOS TRUE
126 127
# include <fcntl.h>
# include <sys/param.h>
128 129 130 131 132
# include <io.h>
# ifndef HAVE_CONFIG_H
#   define DOS_NT
#   include <sys/config.h>
# endif
133 134
#else
# define MSDOS FALSE
135 136
#endif /* MSDOS */

137
#ifdef WINDOWSNT
138 139 140
# include <stdlib.h>
# include <fcntl.h>
# include <string.h>
141
# include <direct.h>
142
# include <io.h>
143
# define MAXPATHLEN _MAX_PATH
144 145 146
# undef HAVE_NTGUI
# undef  DOS_NT
# define DOS_NT
147 148 149
# ifndef HAVE_GETCWD
#   define HAVE_GETCWD
# endif /* undef HAVE_GETCWD */
150
#else /* not WINDOWSNT */
151 152 153
# ifdef STDC_HEADERS
#  include <stdlib.h>
#  include <string.h>
154
# else /* no standard C headers */
155 156 157 158 159 160 161 162 163 164 165 166 167 168
   extern char *getenv (const char *);
   extern char *strcpy (char *, const char *);
   extern char *strncpy (char *, const char *, unsigned long);
   extern char *strcat (char *, const char *);
   extern char *strncat (char *, const char *, unsigned long);
   extern int strcmp (const char *, const char *);
   extern int strncmp (const char *, const char *, unsigned long);
   extern int system (const char *);
   extern unsigned long strlen (const char *);
   extern void *malloc (unsigned long);
   extern void *realloc (void *, unsigned long);
   extern void exit (int);
   extern void free (void *);
   extern void *memmove (void *, const void *, unsigned long);
Dan Nicolaescu's avatar
Dan Nicolaescu committed
169 170
#  define EXIT_SUCCESS	0
#  define EXIT_FAILURE	1
171 172
# endif
#endif /* !WINDOWSNT */
173

Paul Eggert's avatar
Paul Eggert committed
174 175
#include <unistd.h>
#ifndef HAVE_UNISTD_H
176
# if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
177
    extern char *getcwd (char *buf, size_t size);
178 179 180
# endif
#endif /* HAVE_UNISTD_H */

181 182
#include <stdio.h>
#include <ctype.h>
183
#include <errno.h>
184 185 186
#include <sys/types.h>
#include <sys/stat.h>

187 188 189 190
#include <assert.h>
#ifdef NDEBUG
# undef  assert			/* some systems have a buggy assert.h */
# define assert(x) ((void) 0)
191 192
#endif

193 194
#ifdef NO_LONG_OPTIONS		/* define this if you don't have GNU getopt */
# define NO_LONG_OPTIONS TRUE
195 196 197
# define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
  extern char *optarg;
  extern int optind, opterr;
198 199 200 201
#else
# define NO_LONG_OPTIONS FALSE
# include <getopt.h>
#endif /* NO_LONG_OPTIONS */
202

203 204
#ifndef HAVE_CONFIG_H		/* this is a standalone compilation */
# ifdef __CYGWIN__         	/* compiling on Cygwin */
205 206 207 208 209
			     !!! NOTICE !!!
 the regex.h distributed with Cygwin is not compatible with etags, alas!
If you want regular expression support, you should delete this notice and
	      arrange to use the GNU regex.h and regex.c.
# endif
210 211
#endif
#include <regex.h>
212

213
/* Define CTAGS to make the program "ctags" compatible with the usual one.
214
 Leave it undefined to make the program "etags", which makes emacs-style
215 216 217 218 219 220
 tag tables and tags typedefs, #defines and struct/union/enum by default. */
#ifdef CTAGS
# undef  CTAGS
# define CTAGS TRUE
#else
# define CTAGS FALSE
Jim Blandy's avatar
Jim Blandy committed
221 222
#endif

223
#define streq(s,t)	(assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
224
#define strcaseeq(s,t)	(assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
225
#define strneq(s,t,n)	(assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
226
#define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
Jim Blandy's avatar
Jim Blandy committed
227

228
#define CHARS 256		/* 2^sizeof(char) */
229
#define CHAR(x)		((unsigned int)(x) & (CHARS - 1))
230 231 232 233 234
#define	iswhite(c)	(_wht[CHAR(c)]) /* c is white (see white) */
#define notinname(c)	(_nin[CHAR(c)]) /* c is not in a name (see nonam) */
#define	begtoken(c)	(_btk[CHAR(c)]) /* c can start token (see begtk) */
#define	intoken(c)	(_itk[CHAR(c)]) /* c can be in token (see midtk) */
#define	endtoken(c)	(_etk[CHAR(c)]) /* c ends tokens (see endtk) */
Jim Blandy's avatar
Jim Blandy committed
235

236 237 238 239 240 241
#define ISALNUM(c)	isalnum (CHAR(c))
#define ISALPHA(c)	isalpha (CHAR(c))
#define ISDIGIT(c)	isdigit (CHAR(c))
#define ISLOWER(c)	islower (CHAR(c))

#define lowcase(c)	tolower (CHAR(c))
242

243

244
/*
245
 *	xnew, xrnew -- allocate, reallocate storage
246 247
 *
 * SYNOPSIS:	Type *xnew (int n, Type);
248
 *		void xrnew (OldPointer, int n, Type);
249
 */
250
#if DEBUG
251
# include "chkmalloc.h"
252 253
# define xnew(n,Type)	  ((Type *) trace_malloc (__FILE__, __LINE__, \
						  (n) * sizeof (Type)))
254 255
# define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
					(char *) (op), (n) * sizeof (Type)))
256
#else
257
# define xnew(n,Type)	  ((Type *) xmalloc ((n) * sizeof (Type)))
258 259
# define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
					(char *) (op), (n) * sizeof (Type)))
260
#endif
Jim Blandy's avatar
Jim Blandy committed
261

Francesco Potortì's avatar
Francesco Potortì committed
262
#define bool int
263

264
typedef void Lang_function (FILE *);
Jim Blandy's avatar
Jim Blandy committed
265

266 267
typedef struct
{
268 269
  const char *suffix;           /* file name suffix for this compressor */
  const char *command;		/* takes one arg and decompresses to stdout */
270
} compressor;
Jim Blandy's avatar
Jim Blandy committed
271

272 273
typedef struct
{
274 275
  const char *name;             /* language name */
  const char *help; 		/* detailed help for the language */
276
  Lang_function *function;	/* parse function */
277 278 279
  const char **suffixes;        /* name suffixes of this language's files */
  const char **filenames;       /* names of this language's files */
  const char **interpreters;    /* interpreters for this language */
280
  bool metasource;		/* source used to generate other sources */
281 282
} language;

283 284 285 286 287 288 289 290 291 292
typedef struct fdesc
{
  struct fdesc *next;		/* for the linked list */
  char *infname;		/* uncompressed input file name */
  char *infabsname;		/* absolute uncompressed input file name */
  char *infabsdir;		/* absolute dir of input file */
  char *taggedfname;		/* file name to write in tagfile */
  language *lang;		/* language of file */
  char *prop;			/* file properties to write in tagfile */
  bool usecharno;		/* etags tags shall contain char number */
293
  bool written;			/* entry written in the tags file */
294 295
} fdesc;

296
typedef struct node_st
297 298 299
{				/* sorting structure */
  struct node_st *left, *right;	/* left and right sons */
  fdesc *fdp;			/* description of file to whom tag belongs */
300
  char *name; 			/* tag name */
301
  char *regex;			/* search regexp */
302
  bool valid;			/* write this tag on the tag file */
303
  bool is_func;			/* function tag: use regexp in CTAGS mode */
304 305
  bool been_warned;		/* warning already given for duplicated tag */
  int lno;			/* line number tag is on */
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
  long cno;			/* character number line starts on */
} node;

/*
 * A `linebuffer' is a structure which holds a line of text.
 * `readline_internal' reads a line from a stream into a linebuffer
 * and works regardless of the length of the line.
 * SIZE is the size of BUFFER, LEN is the length of the string in
 * BUFFER after readline reads it.
 */
typedef struct
{
  long size;
  int len;
  char *buffer;
} linebuffer;
322

323 324 325 326 327 328
/* Used to support mixing of --lang and file names. */
typedef struct
{
  enum {
    at_language,		/* a language specification */
    at_regexp,			/* a regular expression */
329
    at_filename,		/* a file name */
330 331
    at_stdin,			/* read from stdin here */
    at_end			/* stop parsing the list */
332 333 334 335 336 337
  } arg_type;			/* argument type */
  language *lang;		/* language associated with the argument */
  char *what;			/* the argument itself */
} argument;

/* Structure defining a regular expression. */
338
typedef struct regexp
339
{
340 341 342 343 344 345 346 347 348 349 350
  struct regexp *p_next;	/* pointer to next in list */
  language *lang;		/* if set, use only for this language */
  char *pattern;		/* the regexp pattern */
  char *name;			/* tag name */
  struct re_pattern_buffer *pat; /* the compiled pattern */
  struct re_registers regs;	/* re registers */
  bool error_signaled;		/* already signaled for this regexp */
  bool force_explicit_name;	/* do not allow implict tag name */
  bool ignore_case;		/* ignore case when matching */
  bool multi_line;		/* do a multi-line match on the whole file */
} regexp;
351 352


353
/* Many compilers barf on this:
354
	Lang_function Ada_funcs;
355
   so let's write it this way */
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
static void Ada_funcs (FILE *);
static void Asm_labels (FILE *);
static void C_entries (int c_ext, FILE *);
static void default_C_entries (FILE *);
static void plain_C_entries (FILE *);
static void Cjava_entries (FILE *);
static void Cobol_paragraphs (FILE *);
static void Cplusplus_entries (FILE *);
static void Cstar_entries (FILE *);
static void Erlang_functions (FILE *);
static void Forth_words (FILE *);
static void Fortran_functions (FILE *);
static void HTML_labels (FILE *);
static void Lisp_functions (FILE *);
static void Lua_functions (FILE *);
static void Makefile_targets (FILE *);
static void Pascal_functions (FILE *);
static void Perl_functions (FILE *);
static void PHP_functions (FILE *);
static void PS_functions (FILE *);
static void Prolog_functions (FILE *);
static void Python_functions (FILE *);
static void Scheme_functions (FILE *);
static void TeX_commands (FILE *);
static void Texinfo_nodes (FILE *);
static void Yacc_entries (FILE *);
static void just_read_file (FILE *);

static void print_language_names (void);
static void print_version (void);
static void print_help (argument *);
int main (int, char **);

static compressor *get_compressor_from_suffix (char *, char **);
static language *get_language_from_langname (const char *);
static language *get_language_from_interpreter (char *);
static language *get_language_from_filename (char *, bool);
static void readline (linebuffer *, FILE *);
static long readline_internal (linebuffer *, FILE *);
395
static bool nocase_tail (const char *);
396 397 398 399 400 401
static void get_tag (char *, char **);

static void analyse_regex (char *);
static void free_regexps (void);
static void regex_tag_multiline (void);
static void error (const char *, const char *);
402
static void suggest_asking_for_help (void) NO_RETURN;
403 404
void fatal (const char *, const char *) NO_RETURN;
static void pfatal (const char *) NO_RETURN;
405 406 407 408 409 410 411 412 413
static void add_node (node *, node **);

static void init (void);
static void process_file_name (char *, language *);
static void process_file (FILE *, char *, language *);
static void find_entries (FILE *);
static void free_tree (node *);
static void free_fdesc (fdesc *);
static void pfnote (char *, bool, char *, int, int, long);
414
static void make_tag (const char *, int, bool, char *, int, int, long);
415 416 417
static void invalidate_nodes (fdesc *, node **);
static void put_entries (node *);

418
static char *concat (const char *, const char *, const char *);
419 420
static char *skip_spaces (char *);
static char *skip_non_spaces (char *);
421 422
static char *savenstr (const char *, int);
static char *savestr (const char *);
423 424 425 426 427 428 429 430 431 432 433 434 435 436
static char *etags_strchr (const char *, int);
static char *etags_strrchr (const char *, int);
static int etags_strcasecmp (const char *, const char *);
static int etags_strncasecmp (const char *, const char *, int);
static char *etags_getcwd (void);
static char *relative_filename (char *, char *);
static char *absolute_filename (char *, char *);
static char *absolute_dirname (char *, char *);
static bool filename_is_absolute (char *f);
static void canonicalize_filename (char *);
static void linebuffer_init (linebuffer *);
static void linebuffer_setlen (linebuffer *, int);
static PTR xmalloc (unsigned int);
static PTR xrealloc (char *, unsigned int);
437

Jim Blandy's avatar
Jim Blandy committed
438

439
static char searchar = '/';	/* use /.../ searches */
Jim Blandy's avatar
Jim Blandy committed
440

441 442 443 444 445
static char *tagfile;		/* output file */
static char *progname;		/* name this program was invoked with */
static char *cwd;		/* current working directory */
static char *tagfiledir;	/* directory of tagfile */
static FILE *tagf;		/* ioptr for tags file */
446

447 448
static fdesc *fdhead;		/* head of file description list */
static fdesc *curfdp;		/* current file description */
449 450 451 452
static int lineno;		/* line number of current line */
static long charno;		/* current character number */
static long linecharno;		/* charno of start of current line */
static char *dbp;		/* pointer to start of current tag */
453

454
static const int invalidcharno = -1;
455

456
static node *nodehead;		/* the head of the binary tree of tags */
457
static node *last_node;		/* the last node created */
Jim Blandy's avatar
Jim Blandy committed
458

459
static linebuffer lb;		/* the current line */
460
static linebuffer filebuf;	/* a buffer containing the whole file */
461
static linebuffer token_name;	/* a buffer containing a tag name */
Jim Blandy's avatar
Jim Blandy committed
462

463
/* boolean "functions" (see init)	*/
464
static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
465
static const char
466
  /* white chars */
467
  *white = " \f\t\n\r\v",
468
  /* not in a name */
469
  *nonam = " \f\t\n\r()=,;",	/* look at make_tag before modifying! */
470
  /* token ending chars */
471
  *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
472 473 474
  /* token starting chars */
  *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
  /* valid in-token chars */
475
  *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
Jim Blandy's avatar
Jim Blandy committed
476

477
static bool append_to_tagfile;	/* -a: append to tags */
478
/* The next five default to TRUE in C and derived languages.  */
479 480
static bool typedefs;		/* -t: create tags for C and Ada typedefs */
static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
Jim Blandy's avatar
Jim Blandy committed
481
				/* 0 struct/enum/union decls, and C++ */
482
				/* member functions. */
483
static bool constantypedefs;	/* -d: create tags for C #define, enum */
484
				/* constants and variables. */
Jim Blandy's avatar
Jim Blandy committed
485
				/* -D: opposite of -d.  Default under ctags. */
486 487
static bool globals;		/* create tags for global variables */
static bool members;		/* create tags for C member variables */
488
static bool declarations;	/* --declarations: tag them and extern in C&Co*/
489
static bool no_line_directive;	/* ignore #line directives (undocumented) */
490
static bool no_duplicates;	/* no duplicate tags for ctags (undocumented) */
491 492
static bool update;		/* -u: update tags */
static bool vgrind_style;	/* -v: create vgrind style index output */
493
static bool no_warnings;	/* -w: suppress warnings (undocumented) */
494
static bool cxref_style;	/* -x: create cxref style output */
495
static bool cplusplus;		/* .[hc] means C++, not C (undocumented) */
496
static bool ignoreindent;	/* -I: ignore indentation in C */
497
static bool packages_only;	/* --packages-only: in Ada, only tag packages*/
498

499 500
/* STDIN is defined in LynxOS system headers */
#ifdef STDIN
501
# undef STDIN
502 503
#endif

504 505 506
#define STDIN 0x1001		/* returned by getopt_long on --parse-stdin */
static bool parsing_stdin;	/* --parse-stdin used */

507
static regexp *p_head;		/* list of all regexps */
508
static bool need_filebuf;	/* some regexes are multi-line */
509

510
static struct option longopts[] =
511
{
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
  { "append",             no_argument,       NULL,               'a'   },
  { "packages-only",      no_argument,       &packages_only,     TRUE  },
  { "c++",                no_argument,       NULL,               'C'   },
  { "declarations",       no_argument,       &declarations,      TRUE  },
  { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
  { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
  { "help",               no_argument,       NULL,               'h'   },
  { "help",               no_argument,       NULL,               'H'   },
  { "ignore-indentation", no_argument,       NULL,               'I'   },
  { "language",           required_argument, NULL,               'l'   },
  { "members",            no_argument,       &members,           TRUE  },
  { "no-members",         no_argument,       &members,           FALSE },
  { "output",             required_argument, NULL,               'o'   },
  { "regex",              required_argument, NULL,               'r'   },
  { "no-regex",           no_argument,       NULL,               'R'   },
  { "ignore-case-regex",  required_argument, NULL,               'c'   },
528
  { "parse-stdin",        required_argument, NULL,               STDIN },
529
  { "version",            no_argument,       NULL,               'V'   },
530

531
#if CTAGS /* Ctags options */
532 533 534 535 536 537 538 539 540
  { "backward-search",    no_argument,       NULL,               'B'   },
  { "cxref",              no_argument,       NULL,               'x'   },
  { "defines",            no_argument,       NULL,               'd'   },
  { "globals",            no_argument,       &globals,           TRUE  },
  { "typedefs",           no_argument,       NULL,               't'   },
  { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
  { "update",             no_argument,       NULL,               'u'   },
  { "vgrind",             no_argument,       NULL,               'v'   },
  { "no-warn",            no_argument,       NULL,               'w'   },
541

542
#else /* Etags options */
543 544 545
  { "no-defines",         no_argument,       NULL,               'D'   },
  { "no-globals",         no_argument,       &globals,           FALSE },
  { "include",            required_argument, NULL,               'i'   },
546
#endif
547
  { NULL }
Jim Blandy's avatar
Jim Blandy committed
548 549
};

550
static compressor compressors[] =
551 552 553 554 555 556
{
  { "z", "gzip -d -c"},
  { "Z", "gzip -d -c"},
  { "gz", "gzip -d -c"},
  { "GZ", "gzip -d -c"},
  { "bz2", "bzip2 -d -c" },
557
  { "xz", "xz -d -c" },
558 559 560
  { NULL }
};

561 562 563
/*
 * Language stuff.
 */
564

565
/* Ada code */
566
static const char *Ada_suffixes [] =
567
  { "ads", "adb", "ada", NULL };
568
static const char Ada_help [] =
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
"In Ada code, functions, procedures, packages, tasks and types are\n\
tags.  Use the `--packages-only' option to create tags for\n\
packages only.\n\
Ada tag names have suffixes indicating the type of entity:\n\
	Entity type:	Qualifier:\n\
	------------	----------\n\
	function	/f\n\
	procedure	/p\n\
	package spec	/s\n\
	package body	/b\n\
	type		/t\n\
	task		/k\n\
Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
will just search for any tag `bidule'.";
584 585

/* Assembly code */
586
static const char *Asm_suffixes [] =
587 588 589 590 591 592 593 594 595 596
  { "a",	/* Unix assembler */
    "asm", /* Microcontroller assembly */
    "def", /* BSO/Tasking definition includes  */
    "inc", /* Microcontroller include files */
    "ins", /* Microcontroller include files */
    "s", "sa", /* Unix assembler */
    "S",   /* cpp-processed Unix assembler */
    "src", /* BSO/Tasking C compiler output */
    NULL
  };
597
static const char Asm_help [] =
598 599 600
"In assembler code, labels appearing at the beginning of a line,\n\
followed by a colon, are tags.";

601 602

/* Note that .c and .h can be considered C++, if the --c++ flag was
603
   given, or if the `class' or `template' keywords are met inside the file.
604
   That is why default_C_entries is called for these. */
605
static const char *default_C_suffixes [] =
606
  { "c", "h", NULL };
607
#if CTAGS				/* C help for Ctags */
608
static const char default_C_help [] =
609 610 611 612 613 614 615
"In C code, any C function is a tag.  Use -t to tag typedefs.\n\
Use -T to tag definitions of `struct', `union' and `enum'.\n\
Use -d to tag `#define' macro definitions and `enum' constants.\n\
Use --globals to tag global variables.\n\
You can tag function declarations and external variables by\n\
using `--declarations', and struct members by using `--members'.";
#else					/* C help for Etags */
616
static const char default_C_help [] =
617 618 619 620
"In C code, any C function or typedef is a tag, and so are\n\
definitions of `struct', `union' and `enum'.  `#define' macro\n\
definitions and `enum' constants are tags unless you specify\n\
`--no-defines'.  Global variables are tags unless you specify\n\
621 622 623
`--no-globals' and so are struct members unless you specify\n\
`--no-members'.  Use of `--no-globals', `--no-defines' and\n\
`--no-members' can make the tags table file much smaller.\n\
624
You can tag function declarations and external variables by\n\
625
using `--declarations'.";
626
#endif	/* C help for Ctags and Etags */
627

628
static const char *Cplusplus_suffixes [] =
629
  { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
630 631 632
    "M",			/* Objective C++ */
    "pdb",			/* Postscript with C syntax */
    NULL };
633
static const char Cplusplus_help [] =
634 635
"In C++ code, all the tag constructs of C code are tagged.  (Use\n\
--help --lang=c --lang=c++ for full help.)\n\
636 637
In addition to C tags, member functions are also recognized.  Member\n\
variables are recognized unless you use the `--no-members' option.\n\
638 639 640
Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
`operator+'.";
641

642
static const char *Cjava_suffixes [] =
643
  { "java", NULL };
644 645
static char Cjava_help [] =
"In Java code, all the tags constructs of C and C++ code are\n\
646
tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
647

648

649
static const char *Cobol_suffixes [] =
650
  { "COB", "cob", NULL };
651 652 653
static char Cobol_help [] =
"In Cobol code, tags are paragraph names; that is, any word\n\
starting in column 8 and followed by a period.";
654

655
static const char *Cstar_suffixes [] =
656 657
  { "cs", "hs", NULL };

658
static const char *Erlang_suffixes [] =
659
  { "erl", "hrl", NULL };
660
static const char Erlang_help [] =
661 662
"In Erlang code, the tags are the functions, records and macros\n\
defined in the file.";
663

664
const char *Forth_suffixes [] =
665
  { "fth", "tok", NULL };
666
static const char Forth_help [] =
667 668 669
"In Forth code, tags are words defined by `:',\n\
constant, code, create, defer, value, variable, buffer:, field.";

670
static const char *Fortran_suffixes [] =
671
  { "F", "f", "f90", "for", NULL };
672
static const char Fortran_help [] =
673
"In Fortran code, functions, subroutines and block data are tags.";
674

675
static const char *HTML_suffixes [] =
676
  { "htm", "html", "shtml", NULL };
677
static const char HTML_help [] =
678 679 680
"In HTML input files, the tags are the `title' and the `h1', `h2',\n\
`h3' headers.  Also, tags are `name=' in anchors and all\n\
occurrences of `id='.";
681

682
static const char *Lisp_suffixes [] =
683
  { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
684
static const char Lisp_help [] =
685 686 687 688
"In Lisp code, any function defined with `defun', any variable\n\
defined with `defvar' or `defconst', and in general the first\n\
argument of any expression that starts with `(def' in column zero\n\
is a tag.";
689

690
static const char *Lua_suffixes [] =
691
  { "lua", "LUA", NULL };
692
static const char Lua_help [] =
693 694
"In Lua scripts, all functions are tags.";

695
static const char *Makefile_filenames [] =
696
  { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
697
static const char Makefile_help [] =
698 699 700
"In makefiles, targets are tags; additionally, variables are tags\n\
unless you specify `--no-globals'.";

701
static const char *Objc_suffixes [] =
702 703 704
  { "lm",			/* Objective lex file */
    "m",			/* Objective C file */
     NULL };
705
static const char Objc_help [] =
706 707
"In Objective C code, tags include Objective C definitions for classes,\n\
class categories, methods and protocols.  Tags for variables and\n\
708 709
functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
(Use --help --lang=c --lang=objc --lang=java for full help.)";
710

711
static const char *Pascal_suffixes [] =
712
  { "p", "pas", NULL };
713
static const char Pascal_help [] =
714 715
"In Pascal code, the tags are the functions and procedures defined\n\
in the file.";
716
/* " // this is for working around an Emacs highlighting bug... */
717

718
static const char *Perl_suffixes [] =
719
  { "pl", "pm", NULL };
720
static const char *Perl_interpreters [] =
721
  { "perl", "@PERL@", NULL };
722
static const char Perl_help [] =
723 724 725 726 727
"In Perl code, the tags are the packages, subroutines and variables\n\
defined by the `package', `sub', `my' and `local' keywords.  Use\n\
`--globals' if you want to tag global variables.  Tags for\n\
subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
defined in the default package is `main::SUB'.";
728

729
static const char *PHP_suffixes [] =
730
  { "php", "php3", "php4", NULL };
731
static const char PHP_help [] =
732 733
"In PHP code, tags are functions, classes and defines.  Unless you use\n\
the `--no-members' option, vars are tags too.";
734

735
static const char *plain_C_suffixes [] =
736
  { "pc",			/* Pro*C file */
737
     NULL };
738

739
static const char *PS_suffixes [] =
740
  { "ps", "psw", NULL };	/* .psw is for PSWrap */
741
static const char PS_help [] =
742
"In PostScript code, the tags are the functions.";
743

744
static const char *Prolog_suffixes [] =
745
  { "prolog", NULL };
746
static const char Prolog_help [] =
747 748
"In Prolog code, tags are predicates and rules at the beginning of\n\
line.";
749

750
static const char *Python_suffixes [] =
751
  { "py", NULL };
752
static const char Python_help [] =
753 754
"In Python code, `def' or `class' at the beginning of a line\n\
generate a tag.";
755

756
/* Can't do the `SCM' or `scm' prefix with a version number. */
757
static const char *Scheme_suffixes [] =
758
  { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
759
static const char Scheme_help [] =
760 761 762
"In Scheme code, tags include anything defined with `def' or with a\n\
construct whose name starts with `def'.  They also include\n\
variables set with `set!' at top level in the file.";
763

764
static const char *TeX_suffixes [] =
765
  { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
766
static const char TeX_help [] =
767 768 769 770 771 772 773 774 775 776
"In LaTeX text, the argument of any of the commands `\\chapter',\n\
`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
`\\newenvironment' or `\\renewenvironment' is a tag.\n\
\n\
Other commands can be specified by setting the environment variable\n\
`TEXTAGS' to a colon-separated list like, for example,\n\
     TEXTAGS=\"mycommand:myothercommand\".";

777

778
static const char *Texinfo_suffixes [] =
779
  { "texi", "texinfo", "txi", NULL };
780
static const char Texinfo_help [] =
781
"for texinfo files, lines starting with @node are tagged.";
Dave Love's avatar
Dave Love committed
782

783
static const char *Yacc_suffixes [] =
784
  { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
785
static const char Yacc_help [] =
786 787 788 789 790
"In Bison or Yacc input files, each rule defines as a tag the\n\
nonterminal it constructs.  The portions of the file that contain\n\
C code are parsed as C code (use --help --lang=c --lang=yacc\n\
for full help).";

791
static const char auto_help [] =
792 793 794
"`auto' is not a real language, it indicates to use\n\
a default language for files base on file name suffix and file contents.";

795
static const char none_help [] =
796 797 798
"`none' is not a real language, it indicates to only do\n\
regexp processing on files.";

799
static const char no_lang_help [] =
800 801
"No detailed help available for this language.";

802

803 804 805 806 807 808
/*
 * Table of languages.
 *
 * It is ok for a given function to be listed under more than one
 * name.  I just didn't.
 */
809

810
static language lang_names [] =
811
{
812 813 814 815 816 817 818
  { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
  { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
  { "c",         default_C_help, default_C_entries, default_C_suffixes },
  { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
  { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
  { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
  { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
819
  { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
820 821 822 823
  { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
  { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
  { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
  { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
824
  { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
  { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
  { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
  { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
  { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
  { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
  { "postscript",PS_help,        PS_functions,      PS_suffixes        },
  { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
  { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
  { "python",    Python_help,    Python_functions,  Python_suffixes    },
  { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
  { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
  { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
  { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
  { "auto",      auto_help },                      /* default guessing scheme */
  { "none",      none_help,      just_read_file }, /* regexp matching only */
  { NULL }                /* end of list */
841
};
842

Jim Blandy's avatar
Jim Blandy committed
843

844
static void
845
print_language_names (void)
846
{
847
  language *lang;
848
  const char **name, **ext;
849 850

  puts ("\nThese are the currently supported languages, along with the\n\
851
default file names and dot suffixes:");
852
  for (lang = lang_names; lang->name != NULL; lang++)
853
    {
854 855 856 857
      printf ("  %-*s", 10, lang->name);
      if (lang->filenames != NULL)
	for (name = lang->filenames; *name != NULL; name++)
	  printf (" %s", *name);
858 859 860
      if (lang->suffixes != NULL)
	for (ext = lang->suffixes; *ext != NULL; ext++)
	  printf (" .%s", *ext);
861 862
      puts ("");
    }
863
  puts ("where `auto' means use default language for files based on file\n\
864 865
name suffix, and `none' means only do regexp processing on files.\n\
If no language is specified and no matching suffix is found,\n\
866 867
the first line of the file is read for a sharp-bang (#!) sequence\n\
followed by the name of an interpreter.  If no such sequence is found,\n\
868
Fortran is tried first; if no tags are found, C is tried next.\n\
869 870
When parsing any C file, a \"class\" or \"template\" keyword\n\
switches to C++.");
871
  puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
872 873 874
\n\
For detailed help on a given language use, for example,\n\
etags --help --lang=ada.");
875 876
}

877
#ifndef EMACS_NAME
878
# define EMACS_NAME "standalone"
879
#endif
880
#ifndef VERSION
881
# define VERSION "17.38.1.4"
882
#endif
883
static void
884
print_version (void)
Jim Blandy's avatar
Jim Blandy committed
885
{
886
  /* Makes it easier to update automatically. */
887
  char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
888

889
  printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
890
  puts (emacs_copyright);
891
  puts ("This program is distributed under the terms in ETAGS.README");
Jim Blandy's avatar
Jim Blandy committed
892

893
  exit (EXIT_SUCCESS);
Jim Blandy's avatar
Jim Blandy committed
894 895
}

896 897 898 899
#ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
# define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
#endif