etags.c 179 KB
Newer Older
Eli Zaretskii's avatar
Eli Zaretskii committed
1
/* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
2

3
Copyright (C) 1984 The Regents of the University of California
Jim Blandy's avatar
Jim Blandy committed
4

5 6 7 8 9 10 11 12 13 14 15 16
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the
   distribution.
3. Neither the name of the University nor the names of its
   contributors may be used to endorse or promote products derived
   from this software without specific prior written permission.
Jim Blandy's avatar
Jim Blandy committed
17

18 19 20 21 22 23 24 25 26 27 28 29 30 31
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32
  2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 34 35 36
  Free Software Foundation, Inc.

This file is not considered part of GNU Emacs.

37
This program is free software: you can redistribute it and/or modify
38
it under the terms of the GNU General Public License as published by
39 40
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
41 42 43 44 45 46 47

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
48
along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
49 50 51 52 53 54 55 56 57


/* NB To comply with the above BSD license, copyright information is
reproduced in etc/ETAGS.README.  That file should be updated when the
above notices are.

To the best of our knowledge, this code was originally based on the
ctags.c distributed with BSD4.2, which was copyrighted by the
University of California, as described above. */
Jim Blandy's avatar
Jim Blandy committed
58 59 60 61


/*
 * Authors:
62 63 64 65
 * 1983	Ctags originally by Ken Arnold.
 * 1984	Fortran added by Jim Kleckner.
 * 1984	Ed Pelegri-Llopart added C typedefs.
 * 1985	Emacs TAGS format by Richard Stallman.
66
 * 1989	Sam Kendall added C++.
Francesco Potortì's avatar
Francesco Potortì committed
67 68
 * 1992 Joseph B. Wells improved C and C++ parsing.
 * 1993	Francesco Potort reorganised C and C++.
69
 * 1994	Line-by-line regexp tags by Tom Tromey.
70 71
 * 2001 Nested classes by Francesco Potort (concept by Mykola Dzyuba).
 * 2002 #line directives by Francesco Potort.
72
 *
73
 * Francesco Potort <pot@gnu.org> has maintained and improved it since 1993.
Jim Blandy's avatar
Jim Blandy committed
74 75
 */

76 77 78 79 80 81
/*
 * If you want to add support for a new language, start by looking at the LUA
 * language, which is the simplest.  Alternatively, consider shipping a
 * configuration file containing regexp definitions for etags.
 */

82
char pot_etags_version[] = "@(#) pot revision number is 17.38";
83 84 85

#define	TRUE	1
#define	FALSE	0
86

87 88 89 90 91 92
#ifdef DEBUG
#  undef DEBUG
#  define DEBUG TRUE
#else
#  define DEBUG  FALSE
#  define NDEBUG		/* disable assert */
93
#endif
94

95 96 97 98 99
#ifdef HAVE_CONFIG_H
# include <config.h>
  /* On some systems, Emacs defines static as nothing for the sake
     of unexec.  We don't want that here since we don't use unexec. */
# undef static
100
# ifndef PTR			/* for XEmacs */
Francesco Potortì's avatar
Francesco Potortì committed
101 102
#   define PTR void *
# endif
103
# ifndef __P			/* for XEmacs */
Francesco Potortì's avatar
Francesco Potortì committed
104 105
#   define __P(args) args
# endif
106
#else  /* no config.h */
Francesco Potortì's avatar
Francesco Potortì committed
107 108 109
# if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
#   define __P(args) args	/* use prototypes */
#   define PTR void *		/* for generic pointers */
110
# else /* not standard C */
Francesco Potortì's avatar
Francesco Potortì committed
111 112 113
#   define __P(args) ()		/* no prototypes */
#   define const		/* remove const for old compilers' sake */
#   define PTR long *		/* don't use void* */
114 115
# endif
#endif /* !HAVE_CONFIG_H */
116

117 118 119 120
#ifndef _GNU_SOURCE
# define _GNU_SOURCE 1		/* enables some compiler checks on GNU */
#endif

121
/* WIN32_NATIVE is for XEmacs.
122 123 124 125 126 127 128
   MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
#ifdef WIN32_NATIVE
# undef MSDOS
# undef  WINDOWSNT
# define WINDOWSNT
#endif /* WIN32_NATIVE */

129
#ifdef MSDOS
130
# undef MSDOS
131
# define MSDOS TRUE
132 133
# include <fcntl.h>
# include <sys/param.h>
134 135 136 137 138
# include <io.h>
# ifndef HAVE_CONFIG_H
#   define DOS_NT
#   include <sys/config.h>
# endif
139 140
#else
# define MSDOS FALSE
141 142
#endif /* MSDOS */

143
#ifdef WINDOWSNT
144 145 146
# include <stdlib.h>
# include <fcntl.h>
# include <string.h>
147
# include <direct.h>
148
# include <io.h>
149
# define MAXPATHLEN _MAX_PATH
150 151 152
# undef HAVE_NTGUI
# undef  DOS_NT
# define DOS_NT
153 154 155
# ifndef HAVE_GETCWD
#   define HAVE_GETCWD
# endif /* undef HAVE_GETCWD */
156
#else /* not WINDOWSNT */
157 158 159
# ifdef STDC_HEADERS
#  include <stdlib.h>
#  include <string.h>
160
# else /* no standard C headers */
161 162 163 164 165 166 167 168 169 170 171 172 173 174
   extern char *getenv __P((const char *));
   extern char *strcpy __P((char *, const char *));
   extern char *strncpy __P((char *, const char *, unsigned long));
   extern char *strcat __P((char *, const char *));
   extern char *strncat __P((char *, const char *, unsigned long));
   extern int strcmp __P((const char *, const char *));
   extern int strncmp __P((const char *, const char *, unsigned long));
   extern int system __P((const char *));
   extern unsigned long strlen __P((const char *));
   extern void *malloc __P((unsigned long));
   extern void *realloc __P((void *, unsigned long));
   extern void exit __P((int));
   extern void free __P((void *));
   extern void *memmove __P((void *, const void *, unsigned long));
175 176 177 178 179 180 181
#  ifdef VMS
#   define EXIT_SUCCESS	1
#   define EXIT_FAILURE	0
#  else /* no VMS */
#   define EXIT_SUCCESS	0
#   define EXIT_FAILURE	1
#  endif
182 183
# endif
#endif /* !WINDOWSNT */
184

185 186 187
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#else
188
# if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
189
    extern char *getcwd (char *buf, size_t size);
190 191 192
# endif
#endif /* HAVE_UNISTD_H */

193 194
#include <stdio.h>
#include <ctype.h>
195 196
#include <errno.h>
#ifndef errno
197
  extern int errno;
198
#endif
199 200 201
#include <sys/types.h>
#include <sys/stat.h>

202 203 204 205
#include <assert.h>
#ifdef NDEBUG
# undef  assert			/* some systems have a buggy assert.h */
# define assert(x) ((void) 0)
206 207
#endif

208 209 210 211
#if !defined (S_ISREG) && defined (S_IFREG)
# define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
#endif

212 213
#ifdef NO_LONG_OPTIONS		/* define this if you don't have GNU getopt */
# define NO_LONG_OPTIONS TRUE
214 215 216
# define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
  extern char *optarg;
  extern int optind, opterr;
217 218 219 220
#else
# define NO_LONG_OPTIONS FALSE
# include <getopt.h>
#endif /* NO_LONG_OPTIONS */
221

222 223
#ifndef HAVE_CONFIG_H		/* this is a standalone compilation */
# ifdef __CYGWIN__         	/* compiling on Cygwin */
224 225 226 227 228
			     !!! NOTICE !!!
 the regex.h distributed with Cygwin is not compatible with etags, alas!
If you want regular expression support, you should delete this notice and
	      arrange to use the GNU regex.h and regex.c.
# endif
229 230
#endif
#include <regex.h>
231

232
/* Define CTAGS to make the program "ctags" compatible with the usual one.
233
 Leave it undefined to make the program "etags", which makes emacs-style
234 235 236 237 238 239
 tag tables and tags typedefs, #defines and struct/union/enum by default. */
#ifdef CTAGS
# undef  CTAGS
# define CTAGS TRUE
#else
# define CTAGS FALSE
Jim Blandy's avatar
Jim Blandy committed
240 241
#endif

242
#define streq(s,t)	(assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
243
#define strcaseeq(s,t)	(assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
244
#define strneq(s,t,n)	(assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
245
#define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
Jim Blandy's avatar
Jim Blandy committed
246

247
#define CHARS 256		/* 2^sizeof(char) */
248
#define CHAR(x)		((unsigned int)(x) & (CHARS - 1))
249 250 251 252 253
#define	iswhite(c)	(_wht[CHAR(c)]) /* c is white (see white) */
#define notinname(c)	(_nin[CHAR(c)]) /* c is not in a name (see nonam) */
#define	begtoken(c)	(_btk[CHAR(c)]) /* c can start token (see begtk) */
#define	intoken(c)	(_itk[CHAR(c)]) /* c can be in token (see midtk) */
#define	endtoken(c)	(_etk[CHAR(c)]) /* c ends tokens (see endtk) */
Jim Blandy's avatar
Jim Blandy committed
254

255 256 257 258 259 260 261
#define ISALNUM(c)	isalnum (CHAR(c))
#define ISALPHA(c)	isalpha (CHAR(c))
#define ISDIGIT(c)	isdigit (CHAR(c))
#define ISLOWER(c)	islower (CHAR(c))

#define lowcase(c)	tolower (CHAR(c))
#define upcase(c)	toupper (CHAR(c))
262

263

264
/*
265
 *	xnew, xrnew -- allocate, reallocate storage
266 267
 *
 * SYNOPSIS:	Type *xnew (int n, Type);
268
 *		void xrnew (OldPointer, int n, Type);
269
 */
270
#if DEBUG
271
# include "chkmalloc.h"
272 273
# define xnew(n,Type)	  ((Type *) trace_malloc (__FILE__, __LINE__, \
						  (n) * sizeof (Type)))
274 275
# define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
					(char *) (op), (n) * sizeof (Type)))
276
#else
277
# define xnew(n,Type)	  ((Type *) xmalloc ((n) * sizeof (Type)))
278 279
# define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
					(char *) (op), (n) * sizeof (Type)))
280
#endif
Jim Blandy's avatar
Jim Blandy committed
281

Francesco Potortì's avatar
Francesco Potortì committed
282
#define bool int
283

Francesco Potortì's avatar
Francesco Potortì committed
284
typedef void Lang_function __P((FILE *));
Jim Blandy's avatar
Jim Blandy committed
285

286 287
typedef struct
{
288 289
  char *suffix;			/* file name suffix for this compressor */
  char *command;		/* takes one arg and decompresses to stdout */
290
} compressor;
Jim Blandy's avatar
Jim Blandy committed
291

292 293
typedef struct
{
294
  char *name;			/* language name */
295
  char *help;                   /* detailed help for the language */
296 297
  Lang_function *function;	/* parse function */
  char **suffixes;		/* name suffixes of this language's files */
298
  char **filenames;		/* names of this language's files */
299
  char **interpreters;		/* interpreters for this language */
300
  bool metasource;		/* source used to generate other sources */
301 302
} language;

303 304 305 306 307 308 309 310 311 312
typedef struct fdesc
{
  struct fdesc *next;		/* for the linked list */
  char *infname;		/* uncompressed input file name */
  char *infabsname;		/* absolute uncompressed input file name */
  char *infabsdir;		/* absolute dir of input file */
  char *taggedfname;		/* file name to write in tagfile */
  language *lang;		/* language of file */
  char *prop;			/* file properties to write in tagfile */
  bool usecharno;		/* etags tags shall contain char number */
313
  bool written;			/* entry written in the tags file */
314 315
} fdesc;

316
typedef struct node_st
317 318 319 320
{				/* sorting structure */
  struct node_st *left, *right;	/* left and right sons */
  fdesc *fdp;			/* description of file to whom tag belongs */
  char *name;			/* tag name */
321
  char *regex;			/* search regexp */
322
  bool valid;			/* write this tag on the tag file */
323
  bool is_func;			/* function tag: use regexp in CTAGS mode */
324 325
  bool been_warned;		/* warning already given for duplicated tag */
  int lno;			/* line number tag is on */
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
  long cno;			/* character number line starts on */
} node;

/*
 * A `linebuffer' is a structure which holds a line of text.
 * `readline_internal' reads a line from a stream into a linebuffer
 * and works regardless of the length of the line.
 * SIZE is the size of BUFFER, LEN is the length of the string in
 * BUFFER after readline reads it.
 */
typedef struct
{
  long size;
  int len;
  char *buffer;
} linebuffer;
342

343 344 345 346 347 348
/* Used to support mixing of --lang and file names. */
typedef struct
{
  enum {
    at_language,		/* a language specification */
    at_regexp,			/* a regular expression */
349
    at_filename,		/* a file name */
350 351
    at_stdin,			/* read from stdin here */
    at_end			/* stop parsing the list */
352 353 354 355 356 357
  } arg_type;			/* argument type */
  language *lang;		/* language associated with the argument */
  char *what;			/* the argument itself */
} argument;

/* Structure defining a regular expression. */
358
typedef struct regexp
359
{
360 361 362 363 364 365 366 367 368 369 370
  struct regexp *p_next;	/* pointer to next in list */
  language *lang;		/* if set, use only for this language */
  char *pattern;		/* the regexp pattern */
  char *name;			/* tag name */
  struct re_pattern_buffer *pat; /* the compiled pattern */
  struct re_registers regs;	/* re registers */
  bool error_signaled;		/* already signaled for this regexp */
  bool force_explicit_name;	/* do not allow implict tag name */
  bool ignore_case;		/* ignore case when matching */
  bool multi_line;		/* do a multi-line match on the whole file */
} regexp;
371 372


373
/* Many compilers barf on this:
374
	Lang_function Ada_funcs;
375
   so let's write it this way */
Francesco Potortì's avatar
Francesco Potortì committed
376 377 378 379 380 381 382 383 384 385
static void Ada_funcs __P((FILE *));
static void Asm_labels __P((FILE *));
static void C_entries __P((int c_ext, FILE *));
static void default_C_entries __P((FILE *));
static void plain_C_entries __P((FILE *));
static void Cjava_entries __P((FILE *));
static void Cobol_paragraphs __P((FILE *));
static void Cplusplus_entries __P((FILE *));
static void Cstar_entries __P((FILE *));
static void Erlang_functions __P((FILE *));
386
static void Forth_words __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
387
static void Fortran_functions __P((FILE *));
388
static void HTML_labels __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
389
static void Lisp_functions __P((FILE *));
390
static void Lua_functions __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
391 392 393 394
static void Makefile_targets __P((FILE *));
static void Pascal_functions __P((FILE *));
static void Perl_functions __P((FILE *));
static void PHP_functions __P((FILE *));
395
static void PS_functions __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
396 397 398 399 400
static void Prolog_functions __P((FILE *));
static void Python_functions __P((FILE *));
static void Scheme_functions __P((FILE *));
static void TeX_commands __P((FILE *));
static void Texinfo_nodes __P((FILE *));
401
static void Yacc_entries __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
402 403 404 405
static void just_read_file __P((FILE *));

static void print_language_names __P((void));
static void print_version __P((void));
406
static void print_help __P((argument *));
Francesco Potortì's avatar
Francesco Potortì committed
407 408 409 410 411
int main __P((int, char **));

static compressor *get_compressor_from_suffix __P((char *, char **));
static language *get_language_from_langname __P((const char *));
static language *get_language_from_interpreter __P((char *));
412
static language *get_language_from_filename __P((char *, bool));
413
static void readline __P((linebuffer *, FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
414 415
static long readline_internal __P((linebuffer *, FILE *));
static bool nocase_tail __P((char *));
416
static void get_tag __P((char *, char **));
417

418
static void analyse_regex __P((char *));
419
static void free_regexps __P((void));
420
static void regex_tag_multiline __P((void));
Francesco Potortì's avatar
Francesco Potortì committed
421 422 423 424 425 426 427
static void error __P((const char *, const char *));
static void suggest_asking_for_help __P((void));
void fatal __P((char *, char *));
static void pfatal __P((char *));
static void add_node __P((node *, node **));

static void init __P((void));
428 429
static void process_file_name __P((char *, language *));
static void process_file __P((FILE *, char *, language *));
430
static void find_entries __P((FILE *));
Francesco Potortì's avatar
Francesco Potortì committed
431
static void free_tree __P((node *));
432
static void free_fdesc __P((fdesc *));
Francesco Potortì's avatar
Francesco Potortì committed
433
static void pfnote __P((char *, bool, char *, int, int, long));
434
static void make_tag __P((char *, int, bool, char *, int, int, long));
435
static void invalidate_nodes __P((fdesc *, node **));
Francesco Potortì's avatar
Francesco Potortì committed
436 437 438 439 440 441 442 443 444
static void put_entries __P((node *));

static char *concat __P((char *, char *, char *));
static char *skip_spaces __P((char *));
static char *skip_non_spaces __P((char *));
static char *savenstr __P((char *, int));
static char *savestr __P((char *));
static char *etags_strchr __P((const char *, int));
static char *etags_strrchr __P((const char *, int));
445 446
static int etags_strcasecmp __P((const char *, const char *));
static int etags_strncasecmp __P((const char *, const char *, int));
Francesco Potortì's avatar
Francesco Potortì committed
447 448 449 450 451 452
static char *etags_getcwd __P((void));
static char *relative_filename __P((char *, char *));
static char *absolute_filename __P((char *, char *));
static char *absolute_dirname __P((char *, char *));
static bool filename_is_absolute __P((char *f));
static void canonicalize_filename __P((char *));
453
static void linebuffer_init __P((linebuffer *));
Francesco Potortì's avatar
Francesco Potortì committed
454
static void linebuffer_setlen __P((linebuffer *, int));
455 456
static PTR xmalloc __P((unsigned int));
static PTR xrealloc __P((char *, unsigned int));
457

Jim Blandy's avatar
Jim Blandy committed
458

459
static char searchar = '/';	/* use /.../ searches */
Jim Blandy's avatar
Jim Blandy committed
460

461 462 463 464 465
static char *tagfile;		/* output file */
static char *progname;		/* name this program was invoked with */
static char *cwd;		/* current working directory */
static char *tagfiledir;	/* directory of tagfile */
static FILE *tagf;		/* ioptr for tags file */
466

467 468
static fdesc *fdhead;		/* head of file description list */
static fdesc *curfdp;		/* current file description */
469 470 471 472
static int lineno;		/* line number of current line */
static long charno;		/* current character number */
static long linecharno;		/* charno of start of current line */
static char *dbp;		/* pointer to start of current tag */
473

474
static const int invalidcharno = -1;
475

476
static node *nodehead;		/* the head of the binary tree of tags */
477
static node *last_node;		/* the last node created */
Jim Blandy's avatar
Jim Blandy committed
478

479
static linebuffer lb;		/* the current line */
480
static linebuffer filebuf;	/* a buffer containing the whole file */
481
static linebuffer token_name;	/* a buffer containing a tag name */
Jim Blandy's avatar
Jim Blandy committed
482

483
/* boolean "functions" (see init)	*/
484 485
static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
static char
486
  /* white chars */
487
  *white = " \f\t\n\r\v",
488
  /* not in a name */
489
  *nonam = " \f\t\n\r()=,;",	/* look at make_tag before modifying! */
490
  /* token ending chars */
491
  *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
492 493 494
  /* token starting chars */
  *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
  /* valid in-token chars */
495
  *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
Jim Blandy's avatar
Jim Blandy committed
496

497
static bool append_to_tagfile;	/* -a: append to tags */
498
/* The next five default to TRUE in C and derived languages.  */
499 500
static bool typedefs;		/* -t: create tags for C and Ada typedefs */
static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
Jim Blandy's avatar
Jim Blandy committed
501
				/* 0 struct/enum/union decls, and C++ */
502
				/* member functions. */
503
static bool constantypedefs;	/* -d: create tags for C #define, enum */
504
				/* constants and variables. */
Jim Blandy's avatar
Jim Blandy committed
505
				/* -D: opposite of -d.  Default under ctags. */
506 507
static bool globals;		/* create tags for global variables */
static bool members;		/* create tags for C member variables */
508
static bool declarations;	/* --declarations: tag them and extern in C&Co*/
509
static bool no_line_directive;	/* ignore #line directives (undocumented) */
510
static bool no_duplicates;	/* no duplicate tags for ctags (undocumented) */
511 512
static bool update;		/* -u: update tags */
static bool vgrind_style;	/* -v: create vgrind style index output */
513
static bool no_warnings;	/* -w: suppress warnings (undocumented) */
514
static bool cxref_style;	/* -x: create cxref style output */
515
static bool cplusplus;		/* .[hc] means C++, not C (undocumented) */
516
static bool ignoreindent;	/* -I: ignore indentation in C */
517
static bool packages_only;	/* --packages-only: in Ada, only tag packages*/
518

519 520
/* STDIN is defined in LynxOS system headers */
#ifdef STDIN
521
# undef STDIN
522 523
#endif

524 525 526
#define STDIN 0x1001		/* returned by getopt_long on --parse-stdin */
static bool parsing_stdin;	/* --parse-stdin used */

527
static regexp *p_head;		/* list of all regexps */
528
static bool need_filebuf;	/* some regexes are multi-line */
529

530
static struct option longopts[] =
531
{
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
  { "append",             no_argument,       NULL,               'a'   },
  { "packages-only",      no_argument,       &packages_only,     TRUE  },
  { "c++",                no_argument,       NULL,               'C'   },
  { "declarations",       no_argument,       &declarations,      TRUE  },
  { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
  { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
  { "help",               no_argument,       NULL,               'h'   },
  { "help",               no_argument,       NULL,               'H'   },
  { "ignore-indentation", no_argument,       NULL,               'I'   },
  { "language",           required_argument, NULL,               'l'   },
  { "members",            no_argument,       &members,           TRUE  },
  { "no-members",         no_argument,       &members,           FALSE },
  { "output",             required_argument, NULL,               'o'   },
  { "regex",              required_argument, NULL,               'r'   },
  { "no-regex",           no_argument,       NULL,               'R'   },
  { "ignore-case-regex",  required_argument, NULL,               'c'   },
548
  { "parse-stdin",        required_argument, NULL,               STDIN },
549
  { "version",            no_argument,       NULL,               'V'   },
550

551
#if CTAGS /* Ctags options */
552 553 554 555 556 557 558 559 560
  { "backward-search",    no_argument,       NULL,               'B'   },
  { "cxref",              no_argument,       NULL,               'x'   },
  { "defines",            no_argument,       NULL,               'd'   },
  { "globals",            no_argument,       &globals,           TRUE  },
  { "typedefs",           no_argument,       NULL,               't'   },
  { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
  { "update",             no_argument,       NULL,               'u'   },
  { "vgrind",             no_argument,       NULL,               'v'   },
  { "no-warn",            no_argument,       NULL,               'w'   },
561

562
#else /* Etags options */
563 564 565
  { "no-defines",         no_argument,       NULL,               'D'   },
  { "no-globals",         no_argument,       &globals,           FALSE },
  { "include",            required_argument, NULL,               'i'   },
566
#endif
567
  { NULL }
Jim Blandy's avatar
Jim Blandy committed
568 569
};

570
static compressor compressors[] =
571 572 573 574 575 576 577 578 579
{
  { "z", "gzip -d -c"},
  { "Z", "gzip -d -c"},
  { "gz", "gzip -d -c"},
  { "GZ", "gzip -d -c"},
  { "bz2", "bzip2 -d -c" },
  { NULL }
};

580 581 582
/*
 * Language stuff.
 */
583

584
/* Ada code */
585
static char *Ada_suffixes [] =
586
  { "ads", "adb", "ada", NULL };
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
static char Ada_help [] =
"In Ada code, functions, procedures, packages, tasks and types are\n\
tags.  Use the `--packages-only' option to create tags for\n\
packages only.\n\
Ada tag names have suffixes indicating the type of entity:\n\
	Entity type:	Qualifier:\n\
	------------	----------\n\
	function	/f\n\
	procedure	/p\n\
	package spec	/s\n\
	package body	/b\n\
	type		/t\n\
	task		/k\n\
Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
will just search for any tag `bidule'.";
603 604

/* Assembly code */
605 606 607 608 609 610 611 612 613 614 615
static char *Asm_suffixes [] =
  { "a",	/* Unix assembler */
    "asm", /* Microcontroller assembly */
    "def", /* BSO/Tasking definition includes  */
    "inc", /* Microcontroller include files */
    "ins", /* Microcontroller include files */
    "s", "sa", /* Unix assembler */
    "S",   /* cpp-processed Unix assembler */
    "src", /* BSO/Tasking C compiler output */
    NULL
  };
616 617 618 619
static char Asm_help [] =
"In assembler code, labels appearing at the beginning of a line,\n\
followed by a colon, are tags.";

620 621

/* Note that .c and .h can be considered C++, if the --c++ flag was
622
   given, or if the `class' or `template' keywords are met inside the file.
623
   That is why default_C_entries is called for these. */
624
static char *default_C_suffixes [] =
625
  { "c", "h", NULL };
626 627 628 629 630 631 632 633 634
#if CTAGS				/* C help for Ctags */
static char default_C_help [] =
"In C code, any C function is a tag.  Use -t to tag typedefs.\n\
Use -T to tag definitions of `struct', `union' and `enum'.\n\
Use -d to tag `#define' macro definitions and `enum' constants.\n\
Use --globals to tag global variables.\n\
You can tag function declarations and external variables by\n\
using `--declarations', and struct members by using `--members'.";
#else					/* C help for Etags */
635 636 637 638 639
static char default_C_help [] =
"In C code, any C function or typedef is a tag, and so are\n\
definitions of `struct', `union' and `enum'.  `#define' macro\n\
definitions and `enum' constants are tags unless you specify\n\
`--no-defines'.  Global variables are tags unless you specify\n\
640 641 642
`--no-globals' and so are struct members unless you specify\n\
`--no-members'.  Use of `--no-globals', `--no-defines' and\n\
`--no-members' can make the tags table file much smaller.\n\
643
You can tag function declarations and external variables by\n\
644
using `--declarations'.";
645
#endif	/* C help for Ctags and Etags */
646

647
static char *Cplusplus_suffixes [] =
648
  { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
649 650 651
    "M",			/* Objective C++ */
    "pdb",			/* Postscript with C syntax */
    NULL };
652
static char Cplusplus_help [] =
653 654
"In C++ code, all the tag constructs of C code are tagged.  (Use\n\
--help --lang=c --lang=c++ for full help.)\n\
655 656
In addition to C tags, member functions are also recognized.  Member\n\
variables are recognized unless you use the `--no-members' option.\n\
657 658 659
Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
`operator+'.";
660

661
static char *Cjava_suffixes [] =
662
  { "java", NULL };
663 664
static char Cjava_help [] =
"In Java code, all the tags constructs of C and C++ code are\n\
665
tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
666

667

668
static char *Cobol_suffixes [] =
669
  { "COB", "cob", NULL };
670 671 672
static char Cobol_help [] =
"In Cobol code, tags are paragraph names; that is, any word\n\
starting in column 8 and followed by a period.";
673

674
static char *Cstar_suffixes [] =
675 676
  { "cs", "hs", NULL };

677
static char *Erlang_suffixes [] =
678
  { "erl", "hrl", NULL };
679 680 681
static char Erlang_help [] =
"In Erlang code, the tags are the functions, records and macros\n\
defined in the file.";
682

683 684 685 686 687 688
char *Forth_suffixes [] =
  { "fth", "tok", NULL };
static char Forth_help [] =
"In Forth code, tags are words defined by `:',\n\
constant, code, create, defer, value, variable, buffer:, field.";

689
static char *Fortran_suffixes [] =
690
  { "F", "f", "f90", "for", NULL };
691 692
static char Fortran_help [] =
"In Fortran code, functions, subroutines and block data are tags.";
693

694 695
static char *HTML_suffixes [] =
  { "htm", "html", "shtml", NULL };
696 697 698 699
static char HTML_help [] =
"In HTML input files, the tags are the `title' and the `h1', `h2',\n\
`h3' headers.  Also, tags are `name=' in anchors and all\n\
occurrences of `id='.";
700

701
static char *Lisp_suffixes [] =
702
  { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
703 704 705 706 707
static char Lisp_help [] =
"In Lisp code, any function defined with `defun', any variable\n\
defined with `defvar' or `defconst', and in general the first\n\
argument of any expression that starts with `(def' in column zero\n\
is a tag.";
708

709 710 711 712 713
static char *Lua_suffixes [] =
  { "lua", "LUA", NULL };
static char Lua_help [] =
"In Lua scripts, all functions are tags.";

714
static char *Makefile_filenames [] =
715
  { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
716 717 718 719 720 721 722 723 724 725 726
static char Makefile_help [] =
"In makefiles, targets are tags; additionally, variables are tags\n\
unless you specify `--no-globals'.";

static char *Objc_suffixes [] =
  { "lm",			/* Objective lex file */
    "m",			/* Objective C file */
     NULL };
static char Objc_help [] =
"In Objective C code, tags include Objective C definitions for classes,\n\
class categories, methods and protocols.  Tags for variables and\n\
727 728
functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
(Use --help --lang=c --lang=objc --lang=java for full help.)";
729

730
static char *Pascal_suffixes [] =
731
  { "p", "pas", NULL };
732 733 734
static char Pascal_help [] =
"In Pascal code, the tags are the functions and procedures defined\n\
in the file.";
735
/* " // this is for working around an Emacs highlighting bug... */
736

737
static char *Perl_suffixes [] =
738
  { "pl", "pm", NULL };
739
static char *Perl_interpreters [] =
740
  { "perl", "@PERL@", NULL };
741 742 743 744 745 746
static char Perl_help [] =
"In Perl code, the tags are the packages, subroutines and variables\n\
defined by the `package', `sub', `my' and `local' keywords.  Use\n\
`--globals' if you want to tag global variables.  Tags for\n\
subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
defined in the default package is `main::SUB'.";
747

748
static char *PHP_suffixes [] =
749
  { "php", "php3", "php4", NULL };
750
static char PHP_help [] =
751 752
"In PHP code, tags are functions, classes and defines.  Unless you use\n\
the `--no-members' option, vars are tags too.";
753

754
static char *plain_C_suffixes [] =
755
  { "pc",			/* Pro*C file */
756
     NULL };
757

758
static char *PS_suffixes [] =
759
  { "ps", "psw", NULL };	/* .psw is for PSWrap */
760 761
static char PS_help [] =
"In PostScript code, the tags are the functions.";
762

763
static char *Prolog_suffixes [] =
764
  { "prolog", NULL };
765 766 767
static char Prolog_help [] =
"In Prolog code, tags are predicates and rules at the beginning of\n\
line.";
768

769
static char *Python_suffixes [] =
770
  { "py", NULL };
771 772 773
static char Python_help [] =
"In Python code, `def' or `class' at the beginning of a line\n\
generate a tag.";
774

775
/* Can't do the `SCM' or `scm' prefix with a version number. */
776
static char *Scheme_suffixes [] =
777
  { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
778 779 780 781
static char Scheme_help [] =
"In Scheme code, tags include anything defined with `def' or with a\n\
construct whose name starts with `def'.  They also include\n\
variables set with `set!' at top level in the file.";
782

783
static char *TeX_suffixes [] =
784
  { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
785 786 787 788 789 790 791 792 793 794 795
static char TeX_help [] =
"In LaTeX text, the argument of any of the commands `\\chapter',\n\
`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
`\\newenvironment' or `\\renewenvironment' is a tag.\n\
\n\
Other commands can be specified by setting the environment variable\n\
`TEXTAGS' to a colon-separated list like, for example,\n\
     TEXTAGS=\"mycommand:myothercommand\".";

796

797
static char *Texinfo_suffixes [] =
798
  { "texi", "texinfo", "txi", NULL };
799 800
static char Texinfo_help [] =
"for texinfo files, lines starting with @node are tagged.";
Dave Love's avatar
Dave Love committed
801

802
static char *Yacc_suffixes [] =
803
  { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
static char Yacc_help [] =
"In Bison or Yacc input files, each rule defines as a tag the\n\
nonterminal it constructs.  The portions of the file that contain\n\
C code are parsed as C code (use --help --lang=c --lang=yacc\n\
for full help).";

static char auto_help [] =
"`auto' is not a real language, it indicates to use\n\
a default language for files base on file name suffix and file contents.";

static char none_help [] =
"`none' is not a real language, it indicates to only do\n\
regexp processing on files.";

static char no_lang_help [] =
"No detailed help available for this language.";

821

822 823 824 825 826 827
/*
 * Table of languages.
 *
 * It is ok for a given function to be listed under more than one
 * name.  I just didn't.
 */
828

829
static language lang_names [] =
830
{
831 832 833 834 835 836 837
  { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
  { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
  { "c",         default_C_help, default_C_entries, default_C_suffixes },
  { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
  { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
  { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
  { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
838
  { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
839 840 841 842
  { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
  { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
  { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
  { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
843
  { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
  { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
  { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
  { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
  { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
  { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
  { "postscript",PS_help,        PS_functions,      PS_suffixes        },
  { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
  { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
  { "python",    Python_help,    Python_functions,  Python_suffixes    },
  { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
  { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
  { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
  { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
  { "auto",      auto_help },                      /* default guessing scheme */
  { "none",      none_help,      just_read_file }, /* regexp matching only */
  { NULL }                /* end of list */
860
};
861

Jim Blandy's avatar
Jim Blandy committed
862

863
static void
864 865
print_language_names ()
{
866
  language *lang;
867
  char **name, **ext;
868 869

  puts ("\nThese are the currently supported languages, along with the\n\
870
default file names and dot suffixes:");
871
  for (lang = lang_names; lang->name != NULL; lang++)
872
    {
Francesco Potortì's avatar