etags.c 178 KB
Newer Older
Eli Zaretskii's avatar
Eli Zaretskii committed
1
/* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
2

3
Copyright (C) 1984 The Regents of the University of California
Jim Blandy's avatar
Jim Blandy committed
4

5 6 7 8 9 10 11 12 13 14 15 16
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the
   distribution.
3. Neither the name of the University nor the names of its
   contributors may be used to endorse or promote products derived
   from this software without specific prior written permission.
Jim Blandy's avatar
Jim Blandy committed
17

18 19 20 21 22 23 24 25 26 27 28 29 30 31
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
Glenn Morris's avatar
Glenn Morris committed
32
  2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 34 35 36 37 38
  Free Software Foundation, Inc.

This file is not considered part of GNU Emacs.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
39 40
the Free Software Foundation; either version 3, or (at your option)
any later version.
41 42 43 44 45 46 47

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
48 49 50
along with this program; see the file COPYING.  If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
51 52 53 54 55 56 57 58 59


/* NB To comply with the above BSD license, copyright information is
reproduced in etc/ETAGS.README.  That file should be updated when the
above notices are.

To the best of our knowledge, this code was originally based on the
ctags.c distributed with BSD4.2, which was copyrighted by the
University of California, as described above. */
Jim Blandy's avatar
Jim Blandy committed
60 61 62 63


/*
 * Authors:
64 65 66 67
 * 1983	Ctags originally by Ken Arnold.
 * 1984	Fortran added by Jim Kleckner.
 * 1984	Ed Pelegri-Llopart added C typedefs.
 * 1985	Emacs TAGS format by Richard Stallman.
68
 * 1989	Sam Kendall added C++.
Francesco Potortì's avatar
Francesco Potortì committed
69 70
 * 1992 Joseph B. Wells improved C and C++ parsing.
 * 1993	Francesco Potort reorganised C and C++.
71
 * 1994	Line-by-line regexp tags by Tom Tromey.
72 73
 * 2001 Nested classes by Francesco Potort (concept by Mykola Dzyuba).
 * 2002 #line directives by Francesco Potort.
74
 *
75
 * Francesco Potort <pot@gnu.org> has maintained and improved it since 1993.
Jim Blandy's avatar
Jim Blandy committed
76 77
 */

78 79 80 81 82 83
/*
 * If you want to add support for a new language, start by looking at the LUA
 * language, which is the simplest.  Alternatively, consider shipping a
 * configuration file containing regexp definitions for etags.
 */

84
char pot_etags_version[] = "@(#) pot revision number is 17.26";
85 86 87

#define	TRUE	1
#define	FALSE	0
88

89 90 91 92 93 94
#ifdef DEBUG
#  undef DEBUG
#  define DEBUG TRUE
#else
#  define DEBUG  FALSE
#  define NDEBUG		/* disable assert */
95
#endif
96

97 98 99 100 101
#ifdef HAVE_CONFIG_H
# include <config.h>
  /* On some systems, Emacs defines static as nothing for the sake
     of unexec.  We don't want that here since we don't use unexec. */
# undef static
102
# ifndef PTR			/* for XEmacs */
Francesco Potortì's avatar
Francesco Potortì committed
103 104
#   define PTR void *
# endif
105
# ifndef __P			/* for XEmacs */
Francesco Potortì's avatar
Francesco Potortì committed
106 107
#   define __P(args) args
# endif
108
#else  /* no config.h */
Francesco Potortì's avatar
Francesco Potortì committed
109 110 111
# if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
#   define __P(args) args	/* use prototypes */
#   define PTR void *		/* for generic pointers */
112
# else /* not standard C */
Francesco Potortì's avatar
Francesco Potortì committed
113 114 115
#   define __P(args) ()		/* no prototypes */
#   define const		/* remove const for old compilers' sake */
#   define PTR long *		/* don't use void* */
116 117
# endif
#endif /* !HAVE_CONFIG_H */
118

119 120 121 122
#ifndef _GNU_SOURCE
# define _GNU_SOURCE 1		/* enables some compiler checks on GNU */
#endif

123
/* WIN32_NATIVE is for XEmacs.
124 125 126 127 128 129 130
   MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
#ifdef WIN32_NATIVE
# undef MSDOS
# undef  WINDOWSNT
# define WINDOWSNT
#endif /* WIN32_NATIVE */

131
#ifdef MSDOS
132
# undef MSDOS
133
# define MSDOS TRUE
134 135
# include <fcntl.h>
# include <sys/param.h>
136 137 138 139 140
# include <io.h>
# ifndef HAVE_CONFIG_H
#   define DOS_NT
#   include <sys/config.h>
# endif
141 142
#else
# define MSDOS FALSE
143 144
#endif /* MSDOS */

145
#ifdef WINDOWSNT
146 147 148
# include <stdlib.h>
# include <fcntl.h>
# include <string.h>
149
# include <direct.h>
150
# include <io.h>
151
# define MAXPATHLEN _MAX_PATH
152 153 154
# undef HAVE_NTGUI
# undef  DOS_NT
# define DOS_NT
155 156 157
# ifndef HAVE_GETCWD
#   define HAVE_GETCWD
# endif /* undef HAVE_GETCWD */
158
#else /* not WINDOWSNT */
159 160 161
# ifdef STDC_HEADERS
#  include <stdlib.h>
#  include <string.h>
162
# else /* no standard C headers */
163
    extern char *getenv ();
164 165 166 167 168 169 170
#  ifdef VMS
#   define EXIT_SUCCESS	1
#   define EXIT_FAILURE	0
#  else /* no VMS */
#   define EXIT_SUCCESS	0
#   define EXIT_FAILURE	1
#  endif
171 172
# endif
#endif /* !WINDOWSNT */
173

174 175 176
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#else
177
# if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
178
    extern char *getcwd (char *buf, size_t size);
179 180 181
# endif
#endif /* HAVE_UNISTD_H */

182 183
#include <stdio.h>
#include <ctype.h>
184 185
#include <errno.h>
#ifndef errno
186
  extern int errno;
187
#endif
188 189 190
#include <sys/types.h>
#include <sys/stat.h>

191 192 193 194
#include <assert.h>
#ifdef NDEBUG
# undef  assert			/* some systems have a buggy assert.h */
# define assert(x) ((void) 0)
195 196
#endif

197 198 199 200
#if !defined (S_ISREG) && defined (S_IFREG)
# define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
#endif

201 202
#ifdef NO_LONG_OPTIONS		/* define this if you don't have GNU getopt */
# define NO_LONG_OPTIONS TRUE
203 204 205
# define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
  extern char *optarg;
  extern int optind, opterr;
206 207 208 209
#else
# define NO_LONG_OPTIONS FALSE
# include <getopt.h>
#endif /* NO_LONG_OPTIONS */
210

211 212
#ifndef HAVE_CONFIG_H		/* this is a standalone compilation */
# ifdef __CYGWIN__         	/* compiling on Cygwin */
213 214 215 216 217
			     !!! NOTICE !!!
 the regex.h distributed with Cygwin is not compatible with etags, alas!
If you want regular expression support, you should delete this notice and
	      arrange to use the GNU regex.h and regex.c.
# endif
218 219
#endif
#include <regex.h>
220

221
/* Define CTAGS to make the program "ctags" compatible with the usual one.
222
 Leave it undefined to make the program "etags", which makes emacs-style
223 224 225 226 227 228
 tag tables and tags typedefs, #defines and struct/union/enum by default. */
#ifdef CTAGS
# undef  CTAGS
# define CTAGS TRUE
#else
# define CTAGS FALSE
Jim Blandy's avatar
Jim Blandy committed
229 230
#endif

231
#define streq(s,t)	(assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
232
#define strcaseeq(s,t)	(assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
233
#define strneq(s,t,n)	(assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
234
#define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
Jim Blandy's avatar
Jim Blandy committed
235

236
#define CHARS 256		/* 2^sizeof(char) */
237
#define CHAR(x)		((unsigned int)(x) & (CHARS - 1))
238 239 240 241 242
#define	iswhite(c)	(_wht[CHAR(c)]) /* c is white (see white) */
#define notinname(c)	(_nin[CHAR(c)]) /* c is not in a name (see nonam) */
#define	begtoken(c)	(_btk[CHAR(c)]) /* c can start token (see begtk) */
#define	intoken(c)	(_itk[CHAR(c)]) /* c can be in token (see midtk) */
#define	endtoken(c)	(_etk[CHAR(c)]) /* c ends tokens (see endtk) */
Jim Blandy's avatar
Jim Blandy committed
243

244 245 246 247 248 249 250
#define ISALNUM(c)	isalnum (CHAR(c))
#define ISALPHA(c)	isalpha (CHAR(c))
#define ISDIGIT(c)	isdigit (CHAR(c))
#define ISLOWER(c)	islower (CHAR(c))

#define lowcase(c)	tolower (CHAR(c))
#define upcase(c)	toupper (CHAR(c))
251

252

253
/*
254
 *	xnew, xrnew -- allocate, reallocate storage
255 256
 *
 * SYNOPSIS:	Type *xnew (int n, Type);
257
 *		void xrnew (OldPointer, int n, Type);
258
 */
259
#if DEBUG
260
# include "chkmalloc.h"
261 262
# define xnew(n,Type)	  ((Type *) trace_malloc (__FILE__, __LINE__, \
						  (n) * sizeof (Type)))
263 264
# define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
					(char *) (op), (n) * sizeof (Type)))
265
#else
266
# define xnew(n,Type)	  ((Type *) xmalloc ((n) * sizeof (Type)))
267 268
# define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
					(char *) (op), (n) * sizeof (Type)))
269
#endif
Jim Blandy's avatar
Jim Blandy committed
270

Francesco Potortì's avatar
Francesco Potortì committed
271
#define bool int
272

Francesco Potortì's avatar
Francesco Potortì committed
273
typedef void Lang_function __P((FILE *));
Jim Blandy's avatar
Jim Blandy committed
274

275 276
typedef struct
{
277 278
  char *suffix;			/* file name suffix for this compressor */
  char *command;		/* takes one arg and decompresses to stdout */
279
} compressor;
Jim Blandy's avatar
Jim Blandy committed
280

281 282
typedef struct
{
283
  char *name;			/* language name */
284
  char *help;                   /* detailed help for the language */
285 286
  Lang_function *function;	/* parse function */
  char **suffixes;		/* name suffixes of this language's files */
287
  char **filenames;		/* names of this language's files */
288
  char **interpreters;		/* interpreters for this language */
289
  bool metasource;		/* source used to generate other sources */
290 291
} language;

292 293 294 295 296 297 298 299 300 301
typedef struct fdesc
{
  struct fdesc *next;		/* for the linked list */
  char *infname;		/* uncompressed input file name */
  char *infabsname;		/* absolute uncompressed input file name */
  char *infabsdir;		/* absolute dir of input file */
  char *taggedfname;		/* file name to write in tagfile */
  language *lang;		/* language of file */
  char *prop;			/* file properties to write in tagfile */
  bool usecharno;		/* etags tags shall contain char number */
302
  bool written;			/* entry written in the tags file */
303 304
} fdesc;

305
typedef struct node_st
306 307 308 309
{				/* sorting structure */
  struct node_st *left, *right;	/* left and right sons */
  fdesc *fdp;			/* description of file to whom tag belongs */
  char *name;			/* tag name */
310
  char *regex;			/* search regexp */
311
  bool valid;			/* write this tag on the tag file */
312
  bool is_func;			/* function tag: use regexp in CTAGS mode */
313 314
  bool been_warned;		/* warning already given for duplicated tag */
  int lno;			/* line number tag is on */
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
  long cno;			/* character number line starts on */
} node;

/*
 * A `linebuffer' is a structure which holds a line of text.
 * `readline_internal' reads a line from a stream into a linebuffer
 * and works regardless of the length of the line.
 * SIZE is the size of BUFFER, LEN is the length of the string in
 * BUFFER after readline reads it.
 */
typedef struct
{
  long size;
  int len;
  char *buffer;
} linebuffer;
331

332 333 334 335 336 337
/* Used to support mixing of --lang and file names. */
typedef struct
{
  enum {
    at_language,		/* a language specification */
    at_regexp,			/* a regular expression */
338
    at_filename,		/* a file name */
339 340
    at_stdin,			/* read from stdin here */
    at_end			/* stop parsing the list */
341 342 343 344 345 346
  } arg_type;			/* argument type */
  language *lang;		/* language associated with the argument */
  char *what;			/* the argument itself */
} argument;

/* Structure defining a regular expression. */
347
typedef struct regexp
348
{
349 350 351 352 353 354 355 356 357 358 359
  struct regexp *p_next;	/* pointer to next in list */
  language *lang;		/* if set, use only for this language */
  char *pattern;		/* the regexp pattern */
  char *name;			/* tag name */
  struct re_pattern_buffer *pat; /* the compiled pattern */
  struct re_registers regs;	/* re registers */
  bool error_signaled;		/* already signaled for this regexp */
  bool force_explicit_name;	/* do not allow implict tag name */
  bool ignore_case;		/* ignore case when matching */
  bool multi_line;		/* do a multi-line match on the whole file */
} regexp;
360 361


362
/* Many compilers barf on this:
363
	Lang_function Ada_funcs;
364
   so let's write it this way */
Francesco Potortì's avatar
Francesco Potortì committed
365 366 367 368 369 370 371 372 373 374
static void Ada_funcs __P((FILE *));
static void Asm_labels __P((FILE *));
static void C_entries __P((int c_ext, FILE *));
static void default_C_entries __P((FILE *));
static void plain_C_entries __P((FILE *));
static void Cjava_entries __P((FILE *));
static void Cobol_paragraphs __P((FILE *));
static void Cplusplus_entries __P((FILE *));
static void Cstar_entries __P((FILE *));
static void Erlang_functions __P((FILE *));
375
static void Forth_words __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
376
static void Fortran_functions __P((FILE *));
377
static void HTML_labels __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
378
static void Lisp_functions __P((FILE *));
379
static void Lua_functions __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
380 381 382 383
static void Makefile_targets __P((FILE *));
static void Pascal_functions __P((FILE *));
static void Perl_functions __P((FILE *));
static void PHP_functions __P((FILE *));
384
static void PS_functions __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
385 386 387 388 389
static void Prolog_functions __P((FILE *));
static void Python_functions __P((FILE *));
static void Scheme_functions __P((FILE *));
static void TeX_commands __P((FILE *));
static void Texinfo_nodes __P((FILE *));
390
static void Yacc_entries __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
391 392 393 394
static void just_read_file __P((FILE *));

static void print_language_names __P((void));
static void print_version __P((void));
395
static void print_help __P((argument *));
Francesco Potortì's avatar
Francesco Potortì committed
396 397 398 399 400
int main __P((int, char **));

static compressor *get_compressor_from_suffix __P((char *, char **));
static language *get_language_from_langname __P((const char *));
static language *get_language_from_interpreter __P((char *));
401
static language *get_language_from_filename __P((char *, bool));
402
static void readline __P((linebuffer *, FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
403 404
static long readline_internal __P((linebuffer *, FILE *));
static bool nocase_tail __P((char *));
405
static void get_tag __P((char *, char **));
406

407
static void analyse_regex __P((char *));
408
static void free_regexps __P((void));
409
static void regex_tag_multiline __P((void));
Francesco Potortì's avatar
Francesco Potortì committed
410 411 412 413 414 415 416
static void error __P((const char *, const char *));
static void suggest_asking_for_help __P((void));
void fatal __P((char *, char *));
static void pfatal __P((char *));
static void add_node __P((node *, node **));

static void init __P((void));
417 418
static void process_file_name __P((char *, language *));
static void process_file __P((FILE *, char *, language *));
419
static void find_entries __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
420
static void free_tree __P((node *));
421
static void free_fdesc __P((fdesc *));
Francesco Potortì's avatar
Francesco Potortì committed
422
static void pfnote __P((char *, bool, char *, int, int, long));
423
static void make_tag __P((char *, int, bool, char *, int, int, long));
424
static void invalidate_nodes __P((fdesc *, node **));
Francesco Potortì's avatar
Francesco Potortì committed
425 426 427 428 429 430 431 432 433
static void put_entries __P((node *));

static char *concat __P((char *, char *, char *));
static char *skip_spaces __P((char *));
static char *skip_non_spaces __P((char *));
static char *savenstr __P((char *, int));
static char *savestr __P((char *));
static char *etags_strchr __P((const char *, int));
static char *etags_strrchr __P((const char *, int));
434 435
static int etags_strcasecmp __P((const char *, const char *));
static int etags_strncasecmp __P((const char *, const char *, int));
Francesco Potortì's avatar
Francesco Potortì committed
436 437 438 439 440 441
static char *etags_getcwd __P((void));
static char *relative_filename __P((char *, char *));
static char *absolute_filename __P((char *, char *));
static char *absolute_dirname __P((char *, char *));
static bool filename_is_absolute __P((char *f));
static void canonicalize_filename __P((char *));
442
static void linebuffer_init __P((linebuffer *));
Francesco Potortì's avatar
Francesco Potortì committed
443
static void linebuffer_setlen __P((linebuffer *, int));
444 445
static PTR xmalloc __P((unsigned int));
static PTR xrealloc __P((char *, unsigned int));
446

Jim Blandy's avatar
Jim Blandy committed
447

448
static char searchar = '/';	/* use /.../ searches */
Jim Blandy's avatar
Jim Blandy committed
449

450 451 452 453 454
static char *tagfile;		/* output file */
static char *progname;		/* name this program was invoked with */
static char *cwd;		/* current working directory */
static char *tagfiledir;	/* directory of tagfile */
static FILE *tagf;		/* ioptr for tags file */
455

456 457
static fdesc *fdhead;		/* head of file description list */
static fdesc *curfdp;		/* current file description */
458 459 460 461
static int lineno;		/* line number of current line */
static long charno;		/* current character number */
static long linecharno;		/* charno of start of current line */
static char *dbp;		/* pointer to start of current tag */
462

463
static const int invalidcharno = -1;
464

465
static node *nodehead;		/* the head of the binary tree of tags */
466
static node *last_node;		/* the last node created */
Jim Blandy's avatar
Jim Blandy committed
467

468
static linebuffer lb;		/* the current line */
469
static linebuffer filebuf;	/* a buffer containing the whole file */
470
static linebuffer token_name;	/* a buffer containing a tag name */
Jim Blandy's avatar
Jim Blandy committed
471

472
/* boolean "functions" (see init)	*/
473 474
static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
static char
475
  /* white chars */
476
  *white = " \f\t\n\r\v",
477
  /* not in a name */
478
  *nonam = " \f\t\n\r()=,;",	/* look at make_tag before modifying! */
479
  /* token ending chars */
480
  *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
481 482 483
  /* token starting chars */
  *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
  /* valid in-token chars */
484
  *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
Jim Blandy's avatar
Jim Blandy committed
485

486
static bool append_to_tagfile;	/* -a: append to tags */
487
/* The next four default to TRUE for etags, but to FALSE for ctags.  */
488 489
static bool typedefs;		/* -t: create tags for C and Ada typedefs */
static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
Jim Blandy's avatar
Jim Blandy committed
490
				/* 0 struct/enum/union decls, and C++ */
491
				/* member functions. */
492
static bool constantypedefs;	/* -d: create tags for C #define, enum */
493
				/* constants and variables. */
Jim Blandy's avatar
Jim Blandy committed
494
				/* -D: opposite of -d.  Default under ctags. */
495 496
static bool globals;		/* create tags for global variables */
static bool members;		/* create tags for C member variables */
497
static bool declarations;	/* --declarations: tag them and extern in C&Co*/
498
static bool no_line_directive;	/* ignore #line directives (undocumented) */
499
static bool no_duplicates;	/* no duplicate tags for ctags (undocumented) */
500 501
static bool update;		/* -u: update tags */
static bool vgrind_style;	/* -v: create vgrind style index output */
502
static bool no_warnings;	/* -w: suppress warnings (undocumented) */
503
static bool cxref_style;	/* -x: create cxref style output */
504
static bool cplusplus;		/* .[hc] means C++, not C (undocumented) */
505
static bool ignoreindent;	/* -I: ignore indentation in C */
506
static bool packages_only;	/* --packages-only: in Ada, only tag packages*/
507

508 509
/* STDIN is defined in LynxOS system headers */
#ifdef STDIN
510
# undef STDIN
511 512
#endif

513 514 515
#define STDIN 0x1001		/* returned by getopt_long on --parse-stdin */
static bool parsing_stdin;	/* --parse-stdin used */

516
static regexp *p_head;		/* list of all regexps */
517
static bool need_filebuf;	/* some regexes are multi-line */
518

519
static struct option longopts[] =
520
{
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
  { "append",             no_argument,       NULL,               'a'   },
  { "packages-only",      no_argument,       &packages_only,     TRUE  },
  { "c++",                no_argument,       NULL,               'C'   },
  { "declarations",       no_argument,       &declarations,      TRUE  },
  { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
  { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
  { "help",               no_argument,       NULL,               'h'   },
  { "help",               no_argument,       NULL,               'H'   },
  { "ignore-indentation", no_argument,       NULL,               'I'   },
  { "language",           required_argument, NULL,               'l'   },
  { "members",            no_argument,       &members,           TRUE  },
  { "no-members",         no_argument,       &members,           FALSE },
  { "output",             required_argument, NULL,               'o'   },
  { "regex",              required_argument, NULL,               'r'   },
  { "no-regex",           no_argument,       NULL,               'R'   },
  { "ignore-case-regex",  required_argument, NULL,               'c'   },
537
  { "parse-stdin",        required_argument, NULL,               STDIN },
538
  { "version",            no_argument,       NULL,               'V'   },
539

540
#if CTAGS /* Ctags options */
541 542 543 544 545 546 547 548 549
  { "backward-search",    no_argument,       NULL,               'B'   },
  { "cxref",              no_argument,       NULL,               'x'   },
  { "defines",            no_argument,       NULL,               'd'   },
  { "globals",            no_argument,       &globals,           TRUE  },
  { "typedefs",           no_argument,       NULL,               't'   },
  { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
  { "update",             no_argument,       NULL,               'u'   },
  { "vgrind",             no_argument,       NULL,               'v'   },
  { "no-warn",            no_argument,       NULL,               'w'   },
550

551
#else /* Etags options */
552 553 554
  { "no-defines",         no_argument,       NULL,               'D'   },
  { "no-globals",         no_argument,       &globals,           FALSE },
  { "include",            required_argument, NULL,               'i'   },
555
#endif
556
  { NULL }
Jim Blandy's avatar
Jim Blandy committed
557 558
};

559
static compressor compressors[] =
560 561 562 563 564 565 566 567 568
{
  { "z", "gzip -d -c"},
  { "Z", "gzip -d -c"},
  { "gz", "gzip -d -c"},
  { "GZ", "gzip -d -c"},
  { "bz2", "bzip2 -d -c" },
  { NULL }
};

569 570 571
/*
 * Language stuff.
 */
572

573
/* Ada code */
574
static char *Ada_suffixes [] =
575
  { "ads", "adb", "ada", NULL };
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
static char Ada_help [] =
"In Ada code, functions, procedures, packages, tasks and types are\n\
tags.  Use the `--packages-only' option to create tags for\n\
packages only.\n\
Ada tag names have suffixes indicating the type of entity:\n\
	Entity type:	Qualifier:\n\
	------------	----------\n\
	function	/f\n\
	procedure	/p\n\
	package spec	/s\n\
	package body	/b\n\
	type		/t\n\
	task		/k\n\
Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
will just search for any tag `bidule'.";
592 593

/* Assembly code */
594 595 596 597 598 599 600 601 602 603 604
static char *Asm_suffixes [] =
  { "a",	/* Unix assembler */
    "asm", /* Microcontroller assembly */
    "def", /* BSO/Tasking definition includes  */
    "inc", /* Microcontroller include files */
    "ins", /* Microcontroller include files */
    "s", "sa", /* Unix assembler */
    "S",   /* cpp-processed Unix assembler */
    "src", /* BSO/Tasking C compiler output */
    NULL
  };
605 606 607 608
static char Asm_help [] =
"In assembler code, labels appearing at the beginning of a line,\n\
followed by a colon, are tags.";

609 610

/* Note that .c and .h can be considered C++, if the --c++ flag was
611
   given, or if the `class' or `template' keywords are met inside the file.
612
   That is why default_C_entries is called for these. */
613
static char *default_C_suffixes [] =
614
  { "c", "h", NULL };
615 616 617 618 619 620 621 622 623
#if CTAGS				/* C help for Ctags */
static char default_C_help [] =
"In C code, any C function is a tag.  Use -t to tag typedefs.\n\
Use -T to tag definitions of `struct', `union' and `enum'.\n\
Use -d to tag `#define' macro definitions and `enum' constants.\n\
Use --globals to tag global variables.\n\
You can tag function declarations and external variables by\n\
using `--declarations', and struct members by using `--members'.";
#else					/* C help for Etags */
624 625 626 627 628
static char default_C_help [] =
"In C code, any C function or typedef is a tag, and so are\n\
definitions of `struct', `union' and `enum'.  `#define' macro\n\
definitions and `enum' constants are tags unless you specify\n\
`--no-defines'.  Global variables are tags unless you specify\n\
629 630
`--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
can make the tags table file much smaller.\n\
631
You can tag function declarations and external variables by\n\
632 633
using `--declarations', and struct members by using `--members'.";
#endif	/* C help for Ctags and Etags */
634

635
static char *Cplusplus_suffixes [] =
636
  { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
637 638 639
    "M",			/* Objective C++ */
    "pdb",			/* Postscript with C syntax */
    NULL };
640
static char Cplusplus_help [] =
641 642
"In C++ code, all the tag constructs of C code are tagged.  (Use\n\
--help --lang=c --lang=c++ for full help.)\n\
643
In addition to C tags, member functions are also recognized.  Member\n\
644
variables are also recognized if you use the `--members' option.\n\
645 646 647
Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
`operator+'.";
648

649
static char *Cjava_suffixes [] =
650
  { "java", NULL };
651 652
static char Cjava_help [] =
"In Java code, all the tags constructs of C and C++ code are\n\
653
tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
654

655

656
static char *Cobol_suffixes [] =
657
  { "COB", "cob", NULL };
658 659 660
static char Cobol_help [] =
"In Cobol code, tags are paragraph names; that is, any word\n\
starting in column 8 and followed by a period.";
661

662
static char *Cstar_suffixes [] =
663 664
  { "cs", "hs", NULL };

665
static char *Erlang_suffixes [] =
666
  { "erl", "hrl", NULL };
667 668 669
static char Erlang_help [] =
"In Erlang code, the tags are the functions, records and macros\n\
defined in the file.";
670

671 672 673 674 675 676
char *Forth_suffixes [] =
  { "fth", "tok", NULL };
static char Forth_help [] =
"In Forth code, tags are words defined by `:',\n\
constant, code, create, defer, value, variable, buffer:, field.";

677
static char *Fortran_suffixes [] =
678
  { "F", "f", "f90", "for", NULL };
679 680
static char Fortran_help [] =
"In Fortran code, functions, subroutines and block data are tags.";
681

682 683
static char *HTML_suffixes [] =
  { "htm", "html", "shtml", NULL };
684 685 686 687
static char HTML_help [] =
"In HTML input files, the tags are the `title' and the `h1', `h2',\n\
`h3' headers.  Also, tags are `name=' in anchors and all\n\
occurrences of `id='.";
688

689
static char *Lisp_suffixes [] =
690
  { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
691 692 693 694 695
static char Lisp_help [] =
"In Lisp code, any function defined with `defun', any variable\n\
defined with `defvar' or `defconst', and in general the first\n\
argument of any expression that starts with `(def' in column zero\n\
is a tag.";
696

697 698 699 700 701
static char *Lua_suffixes [] =
  { "lua", "LUA", NULL };
static char Lua_help [] =
"In Lua scripts, all functions are tags.";

702
static char *Makefile_filenames [] =
703
  { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
704 705 706 707 708 709 710 711 712 713 714
static char Makefile_help [] =
"In makefiles, targets are tags; additionally, variables are tags\n\
unless you specify `--no-globals'.";

static char *Objc_suffixes [] =
  { "lm",			/* Objective lex file */
    "m",			/* Objective C file */
     NULL };
static char Objc_help [] =
"In Objective C code, tags include Objective C definitions for classes,\n\
class categories, methods and protocols.  Tags for variables and\n\
715 716
functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
(Use --help --lang=c --lang=objc --lang=java for full help.)";
717

718
static char *Pascal_suffixes [] =
719
  { "p", "pas", NULL };
720 721 722
static char Pascal_help [] =
"In Pascal code, the tags are the functions and procedures defined\n\
in the file.";
723
/* " // this is for working around an Emacs highlighting bug... */
724

725
static char *Perl_suffixes [] =
726
  { "pl", "pm", NULL };
727
static char *Perl_interpreters [] =
728
  { "perl", "@PERL@", NULL };
729 730 731 732 733 734
static char Perl_help [] =
"In Perl code, the tags are the packages, subroutines and variables\n\
defined by the `package', `sub', `my' and `local' keywords.  Use\n\
`--globals' if you want to tag global variables.  Tags for\n\
subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
defined in the default package is `main::SUB'.";
735

736
static char *PHP_suffixes [] =
737
  { "php", "php3", "php4", NULL };
738
static char PHP_help [] =
739 740
"In PHP code, tags are functions, classes and defines.  When using\n\
the `--members' option, vars are tags too.";
741

742
static char *plain_C_suffixes [] =
743
  { "pc",			/* Pro*C file */
744
     NULL };
745

746
static char *PS_suffixes [] =
747
  { "ps", "psw", NULL };	/* .psw is for PSWrap */
748 749
static char PS_help [] =
"In PostScript code, the tags are the functions.";
750

751
static char *Prolog_suffixes [] =
752
  { "prolog", NULL };
753 754 755
static char Prolog_help [] =
"In Prolog code, tags are predicates and rules at the beginning of\n\
line.";
756

757
static char *Python_suffixes [] =
758
  { "py", NULL };
759 760 761
static char Python_help [] =
"In Python code, `def' or `class' at the beginning of a line\n\
generate a tag.";
762

763
/* Can't do the `SCM' or `scm' prefix with a version number. */
764
static char *Scheme_suffixes [] =
765
  { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
766 767 768 769
static char Scheme_help [] =
"In Scheme code, tags include anything defined with `def' or with a\n\
construct whose name starts with `def'.  They also include\n\
variables set with `set!' at top level in the file.";
770

771
static char *TeX_suffixes [] =
772
  { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
773 774 775 776 777 778 779 780 781 782 783
static char TeX_help [] =
"In LaTeX text, the argument of any of the commands `\\chapter',\n\
`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
`\\newenvironment' or `\\renewenvironment' is a tag.\n\
\n\
Other commands can be specified by setting the environment variable\n\
`TEXTAGS' to a colon-separated list like, for example,\n\
     TEXTAGS=\"mycommand:myothercommand\".";

784

785
static char *Texinfo_suffixes [] =
786
  { "texi", "texinfo", "txi", NULL };
787 788
static char Texinfo_help [] =
"for texinfo files, lines starting with @node are tagged.";
Dave Love's avatar
Dave Love committed
789

790
static char *Yacc_suffixes [] =
791
  { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
static char Yacc_help [] =
"In Bison or Yacc input files, each rule defines as a tag the\n\
nonterminal it constructs.  The portions of the file that contain\n\
C code are parsed as C code (use --help --lang=c --lang=yacc\n\
for full help).";

static char auto_help [] =
"`auto' is not a real language, it indicates to use\n\
a default language for files base on file name suffix and file contents.";

static char none_help [] =
"`none' is not a real language, it indicates to only do\n\
regexp processing on files.";

static char no_lang_help [] =
"No detailed help available for this language.";

809

810 811 812 813 814 815
/*
 * Table of languages.
 *
 * It is ok for a given function to be listed under more than one
 * name.  I just didn't.
 */
816

817
static language lang_names [] =
818
{
819 820 821 822 823 824 825
  { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
  { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
  { "c",         default_C_help, default_C_entries, default_C_suffixes },
  { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
  { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
  { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
  { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
826
  { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
827 828 829 830
  { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
  { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
  { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
  { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
831
  { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
  { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
  { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
  { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
  { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
  { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
  { "postscript",PS_help,        PS_functions,      PS_suffixes        },
  { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
  { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
  { "python",    Python_help,    Python_functions,  Python_suffixes    },
  { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
  { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
  { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
  { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
  { "auto",      auto_help },                      /* default guessing scheme */
  { "none",      none_help,      just_read_file }, /* regexp matching only */
  { NULL }                /* end of list */
848
};
849

Jim Blandy's avatar
Jim Blandy committed
850

851
static void
852 853
print_language_names ()
{
854
  language *lang;
855
  char **name, **ext;
856 857

  puts ("\nThese are the currently supported languages, along with the\n\
858
default file names and dot suffixes:");
859
  for (lang = lang_names; lang->name != NULL; lang++)
860
    {
861 862 863 864
      printf ("  %-*s", 10, lang->name);
      if (lang->filenames != NULL)
	for (name = lang->filenames; *name != NULL; name++)
	  printf (" %s", *name);
865 866 867
      if (lang->suffixes != NULL)
	for (ext = lang->suffixes; *ext != NULL; ext++)
	  printf (" .%s", *ext);
868 869
      puts ("");
    }
870
  puts ("where `auto' means use default language for files based on file\n\
871 872
name suffix, and `none' means only do regexp processing on files.\n\
If no language is specified and no matching suffix is found,\n\
873 874
the first line of the file is read for a sharp-bang (#!) sequence\n\
followed by the name of an interpreter.  If no such sequence is found,\n\
875
Fortran is tried first; if no tags are found, C is tried next.\n\
876 877 878 879 880 881
When parsing any C file, a \"class\" or \"template\" keyword\n\
switches to C++.");
  puts ("Compressed files are supported using gzip and bzip2.\n\
\n\
For detailed help on a given language use, for example,\n\
etags --help --lang=ada.");
882 883
}

884
#ifndef EMACS_NAME
885
# define EMACS_NAME "standalone"
886
#endif
887
#ifndef VERSION
888
# define VERSION "17.26"
889
#endif
890
static void
Jim Blandy's avatar
Jim Blandy committed
891 892
print_version ()
{
893
  printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
894
  puts ("Copyright (C) 2008 Free Software Foundation, Inc.");
895
  puts ("This program is distributed under the terms in ETAGS.README");
Jim Blandy's avatar
Jim Blandy committed
896

897
  exit (EXIT_SUCCESS);
Jim Blandy's avatar
Jim Blandy committed
898 899
}

900
static void
901 902
print_help (argbuffer)
     argument *argbuffer;
Jim Blandy's avatar
Jim Blandy committed
903
{
904 905 906 907 908 909 910 911 912 913 914 915
  bool help_for_lang = FALSE;

  for (; argbuffer->arg_type != at_end; argbuffer++)
    if (argbuffer->arg_type == at_language)
      {
	if (help_for_lang)
	  puts ("");
	puts (argbuffer->lang->help);
	help_for_lang = TRUE;
      }

  if (help_for_lang)
916
    exit (EXIT_SUCCESS);
917

918 919 920
  printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
\n\
These are the options accepted by %s.\n", progname, progname);
921 922 923
  if (NO_LONG_OPTIONS)
    puts ("WARNING: long option names do not work with this executable,\n\
as it is not linked with GNU getopt.");
924
  else
925
    puts ("You may use unambiguous abbreviations for the long option names.");
926 927 928
  puts ("  A - as file name means read names from stdin (one per line).\n\
Absolute names are stored in the output file as they are.\n\
Relative ones are stored relative to the output file's directory.\n");
Jim Blandy's avatar
Jim Blandy committed
929

930
  puts ("-a, --append\n\
931
        Append tag entries to existing tags file.");
932

933
  puts ("--packages-only\n\
934
        For Ada files, only generate tags for packages.");