etags.c 188 KB
Newer Older
1
/* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
2

3
Copyright (C) 1984 The Regents of the University of California
Jim Blandy's avatar
Jim Blandy committed
4

5 6 7 8 9 10 11 12 13 14 15 16
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the
   distribution.
3. Neither the name of the University nor the names of its
   contributors may be used to endorse or promote products derived
   from this software without specific prior written permission.
Jim Blandy's avatar
Jim Blandy committed
17

18 19 20 21 22 23 24 25 26 27 28 29 30
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


Paul Eggert's avatar
Paul Eggert committed
31
Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2020 Free Software
32
Foundation, Inc.
33 34 35

This file is not considered part of GNU Emacs.

36
This program is free software: you can redistribute it and/or modify
37
it under the terms of the GNU General Public License as published by
38 39
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
40 41 42 43 44 45 46

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
47
along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
48 49 50 51 52 53 54 55 56


/* NB To comply with the above BSD license, copyright information is
reproduced in etc/ETAGS.README.  That file should be updated when the
above notices are.

To the best of our knowledge, this code was originally based on the
ctags.c distributed with BSD4.2, which was copyrighted by the
University of California, as described above. */
Jim Blandy's avatar
Jim Blandy committed
57 58 59 60


/*
 * Authors:
61 62 63 64 65
 * 1983 Ctags originally by Ken Arnold.
 * 1984 Fortran added by Jim Kleckner.
 * 1984 Ed Pelegri-Llopart added C typedefs.
 * 1985 Emacs TAGS format by Richard Stallman.
 * 1989 Sam Kendall added C++.
66
 * 1992 Joseph B. Wells improved C and C++ parsing.
67
 * 1993 Francesco Potortì reorganized C and C++.
68
 * 1994 Line-by-line regexp tags by Tom Tromey.
69 70
 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
 * 2002 #line directives by Francesco Potortì.
Glenn Morris's avatar
Glenn Morris committed
71 72
 * Francesco Potortì maintained and improved it for many years
   starting in 1993.
Jim Blandy's avatar
Jim Blandy committed
73 74
 */

75 76
/*
 * If you want to add support for a new language, start by looking at the LUA
77 78
 * language, which is the simplest.  Alternatively, consider distributing etags
 * together with a configuration file containing regexp definitions for etags.
79 80
 */

81 82
#ifdef DEBUG
#  undef DEBUG
83
#  define DEBUG true
84
#else
85
#  define DEBUG false
86
#endif
87

88
#include <config.h>
89

90
/* WIN32_NATIVE is for XEmacs.
91 92 93 94 95 96 97
   MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
#ifdef WIN32_NATIVE
# undef MSDOS
# undef  WINDOWSNT
# define WINDOWSNT
#endif /* WIN32_NATIVE */

98
#ifdef MSDOS
99
# undef MSDOS
100
# define MSDOS true
101
# include <sys/param.h>
102
#else
103
# define MSDOS false
104 105
#endif /* MSDOS */

106
#ifdef WINDOWSNT
107
# include <direct.h>
108 109 110
# undef HAVE_NTGUI
# undef  DOS_NT
# define DOS_NT
Eli Zaretskii's avatar
Eli Zaretskii committed
111 112
/* The WINDOWSNT build doesn't use Gnulib's fcntl.h.  */
# define O_CLOEXEC O_NOINHERIT
113
#endif /* WINDOWSNT */
114

115
#include <inttypes.h>
116
#include <limits.h>
117
#include <unistd.h>
118
#include <stdarg.h>
119 120
#include <stdlib.h>
#include <string.h>
121
#include <sysstdio.h>
122
#include <errno.h>
123
#include <fcntl.h>
124
#include <binary-io.h>
125
#include <intprops.h>
126
#include <unlocked-io.h>
127
#include <c-ctype.h>
128
#include <c-strcase.h>
129

130
#include <assert.h>
131
#include <getopt.h>
Paul Eggert's avatar
Paul Eggert committed
132
#include <regex.h>
133

134
/* Define CTAGS to make the program "ctags" compatible with the usual one.
135
 Leave it undefined to make the program "etags", which makes emacs-style
136 137 138
 tag tables and tags typedefs, #defines and struct/union/enum by default. */
#ifdef CTAGS
# undef  CTAGS
139
# define CTAGS true
140
#else
141
# define CTAGS false
Jim Blandy's avatar
Jim Blandy committed
142 143
#endif

144 145 146 147 148 149 150 151
/* Copy to DEST from SRC (containing LEN bytes), and append a NUL byte.  */
static void
memcpyz (void *dest, void const *src, ptrdiff_t len)
{
  char *e = mempcpy (dest, src, len);
  *e = '\0';
}

152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
static bool
streq (char const *s, char const *t)
{
  return strcmp (s, t) == 0;
}

static bool
strcaseeq (char const *s, char const *t)
{
  return c_strcasecmp (s, t) == 0;
}

static bool
strneq (char const *s, char const *t, size_t n)
{
  return strncmp (s, t, n) == 0;
}

static bool
strncaseeq (char const *s, char const *t, size_t n)
{
  return c_strncasecmp (s, t, n) == 0;
}
Jim Blandy's avatar
Jim Blandy committed
175

176 177 178 179 180 181 182 183 184 185 186
/* C is not in a name.  */
static bool
notinname (unsigned char c)
{
  /* Look at make_tag before modifying!  */
  static bool const table[UCHAR_MAX + 1] = {
    ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
    ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
  };
  return table[c];
}
Jim Blandy's avatar
Jim Blandy committed
187

188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
/* C can start a token.  */
static bool
begtoken (unsigned char c)
{
  static bool const table[UCHAR_MAX + 1] = {
    ['$']=1, ['@']=1,
    ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
    ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
    ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
    ['Y']=1, ['Z']=1,
    ['_']=1,
    ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
    ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
    ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
    ['y']=1, ['z']=1,
    ['~']=1
  };
  return table[c];
}
207

208 209 210 211 212
/* C can be in the middle of a token.  */
static bool
intoken (unsigned char c)
{
  static bool const table[UCHAR_MAX + 1] = {
213
    ['$']=1,
214 215 216 217 218 219 220 221 222 223 224 225 226 227
    ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
    ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
    ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
    ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
    ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
    ['Y']=1, ['Z']=1,
    ['_']=1,
    ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
    ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
    ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
    ['y']=1, ['z']=1
  };
  return table[c];
}
228

229 230 231 232 233 234 235 236 237 238 239 240 241
/* C can end a token.  */
static bool
endtoken (unsigned char c)
{
  static bool const table[UCHAR_MAX + 1] = {
    ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
    ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
    ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
    ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
    ['{']=1, ['|']=1, ['}']=1, ['~']=1
  };
  return table[c];
}
242

243
/*
244
 *	xnew, xrnew -- allocate, reallocate storage
245
 *
246 247
 * SYNOPSIS:	Type *xnew (ptrdiff_t n, Type);
 *		void xrnew (OldPointer, ptrdiff_t n, int multiplier);
248
 */
249 250
#define xnew(n, Type) ((Type *) xnmalloc (n, sizeof (Type)))
#define xrnew(op, n, m) ((op) = xnrealloc (op, n, (m) * sizeof *(op)))
Jim Blandy's avatar
Jim Blandy committed
251

252
typedef void Lang_function (FILE *);
Jim Blandy's avatar
Jim Blandy committed
253

254 255
typedef struct
{
256 257
  const char *suffix;           /* file name suffix for this compressor */
  const char *command;		/* takes one arg and decompresses to stdout */
258
} compressor;
Jim Blandy's avatar
Jim Blandy committed
259

260 261
typedef struct
{
262 263
  const char *name;             /* language name */
  const char *help; 		/* detailed help for the language */
264
  Lang_function *function;	/* parse function */
265 266 267
  const char **suffixes;        /* name suffixes of this language's files */
  const char **filenames;       /* names of this language's files */
  const char **interpreters;    /* interpreters for this language */
268
  bool metasource;		/* source used to generate other sources */
269 270
} language;

271 272 273 274 275 276 277 278 279 280
typedef struct fdesc
{
  struct fdesc *next;		/* for the linked list */
  char *infname;		/* uncompressed input file name */
  char *infabsname;		/* absolute uncompressed input file name */
  char *infabsdir;		/* absolute dir of input file */
  char *taggedfname;		/* file name to write in tagfile */
  language *lang;		/* language of file */
  char *prop;			/* file properties to write in tagfile */
  bool usecharno;		/* etags tags shall contain char number */
281
  bool written;			/* entry written in the tags file */
282 283
} fdesc;

284
typedef struct node_st
285 286 287
{				/* sorting structure */
  struct node_st *left, *right;	/* left and right sons */
  fdesc *fdp;			/* description of file to whom tag belongs */
288
  char *name; 			/* tag name */
289
  char *regex;			/* search regexp */
290
  bool valid;			/* write this tag on the tag file */
291
  bool is_func;			/* function tag: use regexp in CTAGS mode */
292
  bool been_warned;		/* warning already given for duplicated tag */
293 294
  intmax_t lno;			/* line number tag is on */
  intmax_t cno;			/* character number line starts on */
295 296 297 298 299 300 301 302 303 304 305
} node;

/*
 * A `linebuffer' is a structure which holds a line of text.
 * `readline_internal' reads a line from a stream into a linebuffer
 * and works regardless of the length of the line.
 * SIZE is the size of BUFFER, LEN is the length of the string in
 * BUFFER after readline reads it.
 */
typedef struct
{
306 307
  ptrdiff_t size;
  ptrdiff_t len;
308 309
  char *buffer;
} linebuffer;
310

311 312 313 314 315 316
/* Used to support mixing of --lang and file names. */
typedef struct
{
  enum {
    at_language,		/* a language specification */
    at_regexp,			/* a regular expression */
317
    at_filename,		/* a file name */
318 319
    at_stdin,			/* read from stdin here */
    at_end			/* stop parsing the list */
320 321 322 323 324 325
  } arg_type;			/* argument type */
  language *lang;		/* language associated with the argument */
  char *what;			/* the argument itself */
} argument;

/* Structure defining a regular expression. */
326
typedef struct regexp
327
{
328 329 330 331 332 333 334
  struct regexp *p_next;	/* pointer to next in list */
  language *lang;		/* if set, use only for this language */
  char *pattern;		/* the regexp pattern */
  char *name;			/* tag name */
  struct re_pattern_buffer *pat; /* the compiled pattern */
  struct re_registers regs;	/* re registers */
  bool error_signaled;		/* already signaled for this regexp */
Paul Eggert's avatar
Paul Eggert committed
335
  bool force_explicit_name;	/* do not allow implicit tag name */
336 337 338
  bool ignore_case;		/* ignore case when matching */
  bool multi_line;		/* do a multi-line match on the whole file */
} regexp;
339 340


341
/* Many compilers barf on this:
342
	Lang_function Ada_funcs;
343
   so let's write it this way */
344 345 346 347 348 349 350 351 352 353 354 355
static void Ada_funcs (FILE *);
static void Asm_labels (FILE *);
static void C_entries (int c_ext, FILE *);
static void default_C_entries (FILE *);
static void plain_C_entries (FILE *);
static void Cjava_entries (FILE *);
static void Cobol_paragraphs (FILE *);
static void Cplusplus_entries (FILE *);
static void Cstar_entries (FILE *);
static void Erlang_functions (FILE *);
static void Forth_words (FILE *);
static void Fortran_functions (FILE *);
lu4nx's avatar
lu4nx committed
356
static void Go_functions (FILE *);
357 358 359 360 361 362 363 364 365 366
static void HTML_labels (FILE *);
static void Lisp_functions (FILE *);
static void Lua_functions (FILE *);
static void Makefile_targets (FILE *);
static void Pascal_functions (FILE *);
static void Perl_functions (FILE *);
static void PHP_functions (FILE *);
static void PS_functions (FILE *);
static void Prolog_functions (FILE *);
static void Python_functions (FILE *);
Xi Lu's avatar
Xi Lu committed
367
static void Ruby_functions (FILE *);
368 369 370 371 372 373 374 375
static void Scheme_functions (FILE *);
static void TeX_commands (FILE *);
static void Texinfo_nodes (FILE *);
static void Yacc_entries (FILE *);
static void just_read_file (FILE *);

static language *get_language_from_langname (const char *);
static void readline (linebuffer *, FILE *);
376
static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *);
377
static bool nocase_tail (const char *);
378
static void get_tag (char *, char **);
379
static void get_lispy_tag (char *);
380

Paul Eggert's avatar
Paul Eggert committed
381
static void analyze_regex (char *);
382 383
static void free_regexps (void);
static void regex_tag_multiline (void);
384
static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
385
static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
386
static _Noreturn void suggest_asking_for_help (void);
387
static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
388
static _Noreturn void pfatal (const char *);
389 390 391 392 393 394 395
static void add_node (node *, node **);

static void process_file_name (char *, language *);
static void process_file (FILE *, char *, language *);
static void find_entries (FILE *);
static void free_tree (node *);
static void free_fdesc (fdesc *);
396
static void pfnote (char *, bool, char *, ptrdiff_t, intmax_t, intmax_t);
397 398 399
static void invalidate_nodes (fdesc *, node **);
static void put_entries (node *);

400
static char *concat (const char *, const char *, const char *);
401 402
static char *skip_spaces (char *);
static char *skip_non_spaces (char *);
403
static char *skip_name (char *);
404
static char *savenstr (const char *, ptrdiff_t);
405
static char *savestr (const char *);
406 407 408 409 410 411
static char *etags_getcwd (void);
static char *relative_filename (char *, char *);
static char *absolute_filename (char *, char *);
static char *absolute_dirname (char *, char *);
static bool filename_is_absolute (char *f);
static void canonicalize_filename (char *);
412
static char *etags_mktmp (void);
413
static void linebuffer_init (linebuffer *);
414 415 416 417 418
static void linebuffer_setlen (linebuffer *, ptrdiff_t);
static void *xmalloc (ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1));
static void *xnmalloc (ptrdiff_t, ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1,2));
static void *xnrealloc (void *, ptrdiff_t, ptrdiff_t)
  ATTRIBUTE_ALLOC_SIZE ((2,3));
419

Jim Blandy's avatar
Jim Blandy committed
420

421
static char searchar = '/';	/* use /.../ searches */
Jim Blandy's avatar
Jim Blandy committed
422

423 424 425 426 427
static char *tagfile;		/* output file */
static char *progname;		/* name this program was invoked with */
static char *cwd;		/* current working directory */
static char *tagfiledir;	/* directory of tagfile */
static FILE *tagf;		/* ioptr for tags file */
428
static ptrdiff_t whatlen_max;	/* maximum length of any 'what' member */
429

430 431
static fdesc *fdhead;		/* head of file description list */
static fdesc *curfdp;		/* current file description */
432
static char *infilename;	/* current input file name */
433 434 435
static intmax_t lineno;		/* line number of current line */
static intmax_t charno;		/* current character number */
static intmax_t linecharno;	/* charno of start of current line */
436
static char *dbp;		/* pointer to start of current tag */
437

438
static intmax_t const invalidcharno = -1;
439

440
static node *nodehead;		/* the head of the binary tree of tags */
441
static node *last_node;		/* the last node created */
Jim Blandy's avatar
Jim Blandy committed
442

443
static linebuffer lb;		/* the current line */
444
static linebuffer filebuf;	/* a buffer containing the whole file */
445
static linebuffer token_name;	/* a buffer containing a tag name */
Jim Blandy's avatar
Jim Blandy committed
446

447
static bool append_to_tagfile;	/* -a: append to tags */
448
/* The next five default to true in C and derived languages.  */
449 450
static bool typedefs;		/* -t: create tags for C and Ada typedefs */
static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
Jim Blandy's avatar
Jim Blandy committed
451
				/* 0 struct/enum/union decls, and C++ */
452
				/* member functions. */
453
static bool constantypedefs;	/* -d: create tags for C #define, enum */
454
				/* constants and variables. */
Jim Blandy's avatar
Jim Blandy committed
455
				/* -D: opposite of -d.  Default under ctags. */
456 457 458 459 460
static int globals;		/* create tags for global variables */
static int members;		/* create tags for C member variables */
static int declarations;	/* --declarations: tag them and extern in C&Co*/
static int no_line_directive;	/* ignore #line directives (undocumented) */
static int no_duplicates;	/* no duplicate tags for ctags (undocumented) */
461 462
static bool update;		/* -u: update tags */
static bool vgrind_style;	/* -v: create vgrind style index output */
463
static bool no_warnings;	/* -w: suppress warnings (undocumented) */
464
static bool cxref_style;	/* -x: create cxref style output */
465
static bool cplusplus;		/* .[hc] means C++, not C (undocumented) */
466
static bool ignoreindent;	/* -I: ignore indentation in C */
467
static int packages_only;	/* --packages-only: in Ada, only tag packages*/
468
static int class_qualify;	/* -Q: produce class-qualified tags in C++/Java */
Eli Zaretskii's avatar
Eli Zaretskii committed
469
static int debug;		/* --debug */
470

471 472
/* STDIN is defined in LynxOS system headers */
#ifdef STDIN
473
# undef STDIN
474 475
#endif

476 477 478
#define STDIN 0x1001		/* returned by getopt_long on --parse-stdin */
static bool parsing_stdin;	/* --parse-stdin used */

479
static regexp *p_head;		/* list of all regexps */
480
static bool need_filebuf;	/* some regexes are multi-line */
481

482
static struct option longopts[] =
483
{
484
  { "append",             no_argument,       NULL,               'a'   },
485
  { "packages-only",      no_argument,       &packages_only,     1     },
486
  { "c++",                no_argument,       NULL,               'C'   },
Eli Zaretskii's avatar
Eli Zaretskii committed
487
  { "debug",              no_argument,       &debug,             1     },
488 489 490
  { "declarations",       no_argument,       &declarations,      1     },
  { "no-line-directive",  no_argument,       &no_line_directive, 1     },
  { "no-duplicates",      no_argument,       &no_duplicates,     1     },
491 492 493 494
  { "help",               no_argument,       NULL,               'h'   },
  { "help",               no_argument,       NULL,               'H'   },
  { "ignore-indentation", no_argument,       NULL,               'I'   },
  { "language",           required_argument, NULL,               'l'   },
495 496
  { "members",            no_argument,       &members,           1     },
  { "no-members",         no_argument,       &members,           0     },
497
  { "output",             required_argument, NULL,               'o'   },
498
  { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
499 500 501
  { "regex",              required_argument, NULL,               'r'   },
  { "no-regex",           no_argument,       NULL,               'R'   },
  { "ignore-case-regex",  required_argument, NULL,               'c'   },
502
  { "parse-stdin",        required_argument, NULL,               STDIN },
503
  { "version",            no_argument,       NULL,               'V'   },
504

505
#if CTAGS /* Ctags options */
506 507 508
  { "backward-search",    no_argument,       NULL,               'B'   },
  { "cxref",              no_argument,       NULL,               'x'   },
  { "defines",            no_argument,       NULL,               'd'   },
509
  { "globals",            no_argument,       &globals,           1     },
510 511 512 513 514
  { "typedefs",           no_argument,       NULL,               't'   },
  { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
  { "update",             no_argument,       NULL,               'u'   },
  { "vgrind",             no_argument,       NULL,               'v'   },
  { "no-warn",            no_argument,       NULL,               'w'   },
515

516
#else /* Etags options */
517
  { "no-defines",         no_argument,       NULL,               'D'   },
518
  { "no-globals",         no_argument,       &globals,           0     },
519
  { "include",            required_argument, NULL,               'i'   },
520
#endif
521
  { NULL }
Jim Blandy's avatar
Jim Blandy committed
522 523
};

524
static compressor compressors[] =
525 526 527 528 529 530
{
  { "z", "gzip -d -c"},
  { "Z", "gzip -d -c"},
  { "gz", "gzip -d -c"},
  { "GZ", "gzip -d -c"},
  { "bz2", "bzip2 -d -c" },
531
  { "xz", "xz -d -c" },
532
  { "zst", "zstd -d -c" },
533 534 535
  { NULL }
};

536 537 538
/*
 * Language stuff.
 */
539

540
/* Ada code */
541
static const char *Ada_suffixes [] =
542
  { "ads", "adb", "ada", NULL };
543
static const char Ada_help [] =
544
"In Ada code, functions, procedures, packages, tasks and types are\n\
545
tags.  Use the '--packages-only' option to create tags for\n\
546 547 548 549 550 551 552 553 554 555
packages only.\n\
Ada tag names have suffixes indicating the type of entity:\n\
	Entity type:	Qualifier:\n\
	------------	----------\n\
	function	/f\n\
	procedure	/p\n\
	package spec	/s\n\
	package body	/b\n\
	type		/t\n\
	task		/k\n\
556 557 558
Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
will just search for any tag 'bidule'.";
559 560

/* Assembly code */
561
static const char *Asm_suffixes [] =
562 563 564 565 566 567 568 569 570 571
  { "a",	/* Unix assembler */
    "asm", /* Microcontroller assembly */
    "def", /* BSO/Tasking definition includes  */
    "inc", /* Microcontroller include files */
    "ins", /* Microcontroller include files */
    "s", "sa", /* Unix assembler */
    "S",   /* cpp-processed Unix assembler */
    "src", /* BSO/Tasking C compiler output */
    NULL
  };
572
static const char Asm_help [] =
573 574 575
"In assembler code, labels appearing at the beginning of a line,\n\
followed by a colon, are tags.";

576 577

/* Note that .c and .h can be considered C++, if the --c++ flag was
578
   given, or if the `class' or `template' keywords are met inside the file.
579
   That is why default_C_entries is called for these. */
580
static const char *default_C_suffixes [] =
581
  { "c", "h", NULL };
582
#if CTAGS				/* C help for Ctags */
583
static const char default_C_help [] =
584
"In C code, any C function is a tag.  Use -t to tag typedefs.\n\
585 586
Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
Use -d to tag '#define' macro definitions and 'enum' constants.\n\
587 588
Use --globals to tag global variables.\n\
You can tag function declarations and external variables by\n\
589
using '--declarations', and struct members by using '--members'.";
590
#else					/* C help for Etags */
591
static const char default_C_help [] =
592
"In C code, any C function or typedef is a tag, and so are\n\
593 594 595 596 597 598
definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
definitions and 'enum' constants are tags unless you specify\n\
'--no-defines'.  Global variables are tags unless you specify\n\
'--no-globals' and so are struct members unless you specify\n\
'--no-members'.  Use of '--no-globals', '--no-defines' and\n\
'--no-members' can make the tags table file much smaller.\n\
599
You can tag function declarations and external variables by\n\
600
using '--declarations'.";
601
#endif	/* C help for Ctags and Etags */
602

603
static const char *Cplusplus_suffixes [] =
604
  { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
605
    "M",			/* Objective C++ */
Juanma Barranquero's avatar
Juanma Barranquero committed
606
    "pdb",			/* PostScript with C syntax */
607
    NULL };
608
static const char Cplusplus_help [] =
609 610
"In C++ code, all the tag constructs of C code are tagged.  (Use\n\
--help --lang=c --lang=c++ for full help.)\n\
611
In addition to C tags, member functions are also recognized.  Member\n\
612 613 614 615
variables are recognized unless you use the '--no-members' option.\n\
Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
'operator+'.";
616

617
static const char *Cjava_suffixes [] =
618
  { "java", NULL };
619 620
static char Cjava_help [] =
"In Java code, all the tags constructs of C and C++ code are\n\
621
tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
622

623

624
static const char *Cobol_suffixes [] =
625
  { "COB", "cob", NULL };
626 627 628
static char Cobol_help [] =
"In Cobol code, tags are paragraph names; that is, any word\n\
starting in column 8 and followed by a period.";
629

630
static const char *Cstar_suffixes [] =
631 632
  { "cs", "hs", NULL };

633
static const char *Erlang_suffixes [] =
634
  { "erl", "hrl", NULL };
635
static const char Erlang_help [] =
636 637
"In Erlang code, the tags are the functions, records and macros\n\
defined in the file.";
638 639
static const char *Erlang_interpreters [] =
  { "escript", NULL };
640

641
static const char *Forth_suffixes [] =
642
  { "fth", "tok", NULL };
643
static const char Forth_help [] =
644
"In Forth code, tags are words defined by ':',\n\
645 646
constant, code, create, defer, value, variable, buffer:, field.";

647
static const char *Fortran_suffixes [] =
648
  { "F", "f", "f90", "for", NULL };
649
static const char Fortran_help [] =
650
"In Fortran code, functions, subroutines and block data are tags.";
651

lu4nx's avatar
lu4nx committed
652 653 654 655
static const char *Go_suffixes [] = {"go", NULL};
static const char Go_help [] =
  "In Go code, functions, interfaces and packages are tags.";

656
static const char *HTML_suffixes [] =
657
  { "htm", "html", "shtml", NULL };
658
static const char HTML_help [] =
659 660 661
"In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
'h3' headers.  Also, tags are 'name=' in anchors and all\n\
occurrences of 'id='.";
662

663
static const char *Lisp_suffixes [] =
664
  { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
665
static const char Lisp_help [] =
666 667 668
"In Lisp code, any function defined with 'defun', any variable\n\
defined with 'defvar' or 'defconst', and in general the first\n\
argument of any expression that starts with '(def' in column zero\n\
669
is a tag.\n\
670
The '--declarations' option tags \"(defvar foo)\" constructs too.";
671

672
static const char *Lua_suffixes [] =
673
  { "lua", "LUA", NULL };
674
static const char Lua_help [] =
675
"In Lua scripts, all functions are tags.";
676 677
static const char *Lua_interpreters [] =
  { "lua", NULL };
678

679
static const char *Makefile_filenames [] =
680
  { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
681
static const char Makefile_help [] =
682
"In makefiles, targets are tags; additionally, variables are tags\n\
683
unless you specify '--no-globals'.";
684

685
static const char *Objc_suffixes [] =
686 687 688
  { "lm",			/* Objective lex file */
    "m",			/* Objective C file */
     NULL };
689
static const char Objc_help [] =
690 691
"In Objective C code, tags include Objective C definitions for classes,\n\
class categories, methods and protocols.  Tags for variables and\n\
692 693
functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
\n(Use --help --lang=c --lang=objc --lang=java for full help.)";
694

695
static const char *Pascal_suffixes [] =
696
  { "p", "pas", NULL };
697
static const char Pascal_help [] =
698 699
"In Pascal code, the tags are the functions and procedures defined\n\
in the file.";
700
/* " // this is for working around an Emacs highlighting bug... */
701

702
static const char *Perl_suffixes [] =
703
  { "pl", "pm", NULL };
704
static const char *Perl_interpreters [] =
705
  { "perl", "@PERL@", NULL };
706
static const char Perl_help [] =
707
"In Perl code, the tags are the packages, subroutines and variables\n\
708 709 710 711
defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
'--globals' if you want to tag global variables.  Tags for\n\
subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
defined in the default package is 'main::SUB'.";
712

713
static const char *PHP_suffixes [] =
714
  { "php", "php3", "php4", NULL };
715
static const char PHP_help [] =
716
"In PHP code, tags are functions, classes and defines.  Unless you use\n\
717
the '--no-members' option, vars are tags too.";
718

719
static const char *plain_C_suffixes [] =
720
  { "pc",			/* Pro*C file */
721
     NULL };
722

723
static const char *PS_suffixes [] =
724
  { "ps", "psw", NULL };	/* .psw is for PSWrap */
725
static const char PS_help [] =
726
"In PostScript code, the tags are the functions.";
727

728
static const char *Prolog_suffixes [] =
729
  { "prolog", NULL };
730
static const char Prolog_help [] =
731 732
"In Prolog code, tags are predicates and rules at the beginning of\n\
line.";
733 734
static const char *Prolog_interpreters [] =
  { "gprolog", "pl", "yap", "swipl", "prolog", NULL };
735

736
static const char *Python_suffixes [] =
737
  { "py", NULL };
738
static const char Python_help [] =
739
"In Python code, 'def' or 'class' at the beginning of a line\n\
740
generate a tag.";
741 742
static const char *Python_interpreters [] =
  { "python", NULL };
743

Xi Lu's avatar
Xi Lu committed
744
static const char *Ruby_suffixes [] =
745 746 747
  { "rb", "ru", "rbw", NULL };
static const char *Ruby_filenames [] =
  { "Rakefile", "Thorfile", NULL };
Xi Lu's avatar
Xi Lu committed
748
static const char Ruby_help [] =
749
  "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
lu4nx's avatar
lu4nx committed
750
a line generate a tag.  Constants also generate a tag.";
751 752
static const char *Ruby_interpreters [] =
  { "ruby", NULL };
Xi Lu's avatar
Xi Lu committed
753

754
/* Can't do the `SCM' or `scm' prefix with a version number. */
755
static const char *Scheme_suffixes [] =
756
  { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
757
static const char Scheme_help [] =
758 759 760
"In Scheme code, tags include anything defined with 'def' or with a\n\
construct whose name starts with 'def'.  They also include\n\
variables set with 'set!' at top level in the file.";
761

762
static const char *TeX_suffixes [] =
763
  { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
764
static const char TeX_help [] =
765 766 767 768 769
"In LaTeX text, the argument of any of the commands '\\chapter',\n\
'\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
'\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
'\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
'\\newenvironment' or '\\renewenvironment' is a tag.\n\
770 771
\n\
Other commands can be specified by setting the environment variable\n\
772
'TEXTAGS' to a colon-separated list like, for example,\n\
773 774
     TEXTAGS=\"mycommand:myothercommand\".";

775

776
static const char *Texinfo_suffixes [] =
777
  { "texi", "texinfo", "txi", NULL };
778
static const char Texinfo_help [] =
779
"for texinfo files, lines starting with @node are tagged.";
780

781
static const char *Yacc_suffixes [] =
782
  { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
783
static const char Yacc_help [] =
784 785 786 787 788
"In Bison or Yacc input files, each rule defines as a tag the\n\
nonterminal it constructs.  The portions of the file that contain\n\
C code are parsed as C code (use --help --lang=c --lang=yacc\n\
for full help).";

789
static const char auto_help [] =
790
"'auto' is not a real language, it indicates to use\n\
791 792
a default language for files base on file name suffix and file contents.";

793
static const char none_help [] =
794
"'none' is not a real language, it indicates to only do\n\
795 796
regexp processing on files.";

797
static const char no_lang_help [] =
798 799
"No detailed help available for this language.";

800

801 802 803 804 805 806
/*
 * Table of languages.
 *
 * It is ok for a given function to be listed under more than one
 * name.  I just didn't.
 */
807

808
static language lang_names [] =
809
{
810 811 812 813 814 815
  { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
  { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
  { "c",         default_C_help, default_C_entries, default_C_suffixes },
  { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
  { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
  { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
816 817
  { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes,
                 NULL,           Erlang_interpreters },
818
  { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
819
  { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
lu4nx's avatar
lu4nx committed
820
  { "go",        Go_help,        Go_functions,      Go_suffixes        },
821 822 823
  { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
  { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
  { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
824
  { "lua",       Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
825 826 827 828 829 830 831
  { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
  { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
  { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
  { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
  { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
  { "postscript",PS_help,        PS_functions,      PS_suffixes        },
  { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
832 833 834 835 836 837
  { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes,
                 NULL,           Prolog_interpreters },
  { "python",    Python_help,    Python_functions,  Python_suffixes,
                 NULL,           Python_interpreters },
  { "ruby",      Ruby_help,      Ruby_functions,    Ruby_suffixes,
                 Ruby_filenames, Ruby_interpreters },
838 839 840
  { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
  { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
  { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
841
  { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
842 843 844
  { "auto",      auto_help },                      /* default guessing scheme */
  { "none",      none_help,      just_read_file }, /* regexp matching only */
  { NULL }                /* end of list */
845
};
846

Jim Blandy's avatar
Jim Blandy committed
847

848
static void
849
print_language_names (void)
850
{
851
  language *lang;
852
  const char **name, **ext;
853 854

  puts ("\nThese are the currently supported languages, along with the\n\
855
default file names and dot suffixes:");
856
  for (lang = lang_names; lang->name != NULL; lang++)
857
    {
858 859 860 861
      printf ("  %-*s", 10, lang->name);
      if (lang->filenames != NULL)
	for (name = lang->filenames; *name != NULL; name++)
	  printf (" %s", *name);
862 863 864
      if (lang->suffixes != NULL)
	for (ext = lang->suffixes; *ext != NULL; ext++)
	  printf (" .%s", *ext);
865 866
      puts ("");
    }
867 868
  puts ("where 'auto' means use default language for files based on file\n\
name suffix, and 'none' means only do regexp processing on files.\n\
869
If no language is specified and no matching suffix is found,\n\
870 871
the first line of the file is read for a sharp-bang (#!) sequence\n\
followed by the name of an interpreter.  If no such sequence is found,\n\
872
Fortran is tried first; if no tags are found, C is tried next.\n\
873 874
When parsing any C file, a \"class\" or \"template\" keyword\n\
switches to C++.");
875
  puts ("Compressed files are supported using gzip, bzip2, xz, and zstd.\n\
876 877 878
\n\
For detailed help on a given language use, for example,\n\
etags --help --lang=ada.");
879 880
}

881 882 883 884
#if CTAGS
# define PROGRAM_NAME "ctags"
#else
# define PROGRAM_NAME "etags"
885
#endif
886
static _Noreturn void
887
print_version (void)
Jim Blandy's avatar
Jim Blandy committed
888
{
889 890 891 892
  fputs ((PROGRAM_NAME " (" PACKAGE_NAME " " PACKAGE_VERSION ")\n"
	  COPYRIGHT "\n"
	  "This program is distributed under the terms in ETAGS.README\n"),
	 stdout);
893
  exit (EXIT_SUCCESS);
Jim Blandy's avatar
Jim Blandy committed
894 895
}

896
#ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
897
# define PRINT_UNDOCUMENTED_OPTIONS_HELP false
898 899
#endif

900
static _Noreturn void
901
print_help (argument *argbuffer)
Jim Blandy's avatar
Jim Blandy committed
902
{
903
  bool help_for_lang = false;
904 905 906 907 908 909 910

  for (; argbuffer->arg_type != at_end; argbuffer++)
    if (argbuffer->arg_type == at_language)
      {
	if (help_for_lang)
	  puts ("");
	puts (argbuffer->lang->help);
911
	help_for_lang = true;
912 913 914
      }

  if (help_for_lang)
915
    exit (EXIT_SUCCESS);
916