xref: /illumos-gate/usr/src/cmd/vi/misc/ctags.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1988 AT&T	*/
7 /*	All Rights Reserved	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved.  The Berkeley software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 /*
16  *   Modify ctags to handle C++ in C_entries(), etc:
17  *	-  Handles C++ comment token "//"
18  *	-  Handles C++ scope operator "::".
19  *		This helps to distinguish between xyz()
20  *	   definition and X::xyz() definition.
21  *	-  Recognizes C++ reserved word "class" in typedef processing
22  *		(for "-t" option)
23  *	-  Handles Sun C++ special file name extensions: .c, .C, .cc, and .cxx.
24  *	-  Handles overloaded unary/binary operator names
25  *   Doesn't handle yet:
26  *	-  inline functions in class definition (currently they get
27  *		swallowed within a class definition)
28  *	-  Tags with scope operator :: with spaces in between,
29  *		e.g. classz ::afunc
30  *
31  *   Enhance operator functions support:
32  *  	-  Control flow involving operator tokens scanning are
33  *	   consistent with that of other function tokens - original
34  *	   hacking method for 2.0 is removed.  This will accurately
35  *	   identify tags for declarations of the form 'operator+()'
36  *	   (bugid 1027806) as well as allowing spaces in between
37  *	   'operator' and 'oprtk', e.g. 'operator + ()'.
38  *
39  */
40 
41 #ifndef lint
42 char copyright[] = "@(#) Copyright (c) 1980 Regents of the University of "
43 			"California.\nAll rights reserved.\n";
44 #endif
45 
46 #pragma ident	"%Z%%M%	%I%	%E% SMI"
47 		/* from UCB 5.1 5/31/85 */
48 
49 #include <stdio.h>
50 #include <ctype.h>
51 #include <locale.h>
52 #include <unistd.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <limits.h>
56 #include <sys/types.h>
57 #include <sys/stat.h>
58 
59 /*
60  * ctags: create a tags file
61  */
62 
63 #define	bool	char
64 
65 #define	TRUE	(1)
66 #define	FALSE	(0)
67 
68 #define	CPFLAG	3			/* # of bytes in a flag		*/
69 
70 #define	iswhite(arg)	(_wht[arg])	/* T if char is white		*/
71 #define	begtoken(arg)	(_btk[arg])	/* T if char can start token	*/
72 #define	intoken(arg)	(_itk[arg])	/* T if char can be in token	*/
73 #define	endtoken(arg)	(_etk[arg])	/* T if char ends tokens	*/
74 #define	isgood(arg)	(_gd[arg])	/* T if char can be after ')'	*/
75 
76 #define	optoken(arg)	(_opr[arg])	/* T if char can be 		*/
77 					/* an overloaded operator token	*/
78 
79 #define	max(I1, I2)	(I1 > I2 ? I1 : I2)
80 
81 struct	nd_st {			/* sorting structure			*/
82 	char	*entry;			/* function or type name	*/
83 	char	*file;			/* file name			*/
84 	bool	f;			/* use pattern or line no	*/
85 	int	lno;			/* for -x option		*/
86 	char	*pat;			/* search pattern		*/
87 	bool	been_warned;		/* set if noticed dup		*/
88 	struct	nd_st	*left, *right;	/* left and right sons		*/
89 };
90 
91 long	ftell();
92 typedef	struct	nd_st	NODE;
93 
94 static bool
95 	number,				/* T if on line starting with #	*/
96 	gotone,				/* found a func already on line	*/
97 					/* boolean "func" (see init)	*/
98 	_wht[0177], _etk[0177], _itk[0177], _btk[0177], _gd[0177];
99 
100 /* boolean array for overloadable operator symbols			*/
101 static bool	_opr[0177];
102 
103 	/*
104 	 * typedefs are recognized using a simple finite automata,
105 	 * tydef is its state variable.
106 	 */
107 typedef enum {none, begin, begin_rec, begin_tag, middle, end } TYST;
108 
109 static TYST tydef = none;
110 
111 static char	searchar = '/';		/* use /.../ searches 		*/
112 
113 static int	lineno;			/* line number of current line */
114 static char
115 	line[4*BUFSIZ],		/* current input line			*/
116 	*curfile,		/* current input file name		*/
117 	*outfile = "tags",	/* output file				*/
118 	*white	= " \f\t\n",	/* white chars				*/
119 	*endtk	= " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
120 				/* token ending chars			*/
121 	*begtk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
122 				/* token starting chars			*/
123 	*intk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
124 		    "0123456789",
125 				/* valid in-token chars			*/
126 	*notgd	= ",;";		/* non-valid after-function chars	*/
127 
128 static char	*oprtk	= " =-+%*/&|^~!<>[]()";	/* overloadable operators */
129 
130 static int	file_num;	/* current file number			*/
131 static int	aflag;		/* -a: append to tags */
132 
133 #ifndef XPG4			/* XPG4: handle typedefs by default	*/
134 static int	tflag;		/* -t: create tags for typedefs		*/
135 #endif /*  !XPG4 */
136 
137 static int	uflag;		/* -u: update tags			*/
138 static int	wflag;		/* -w: suppress warnings		*/
139 static int	vflag;		/* -v: create vgrind style index output */
140 static int	xflag;		/* -x: create cxref style output	*/
141 
142 static char	lbuf[LINE_MAX];
143 
144 static FILE
145 	*inf,			/* ioptr for current input file		*/
146 	*outf;			/* ioptr for tags file			*/
147 
148 static long	lineftell;	/* ftell after getc( inf ) == '\n' 	*/
149 
150 static NODE	*head;		/* the head of the sorted binary tree	*/
151 
152 #ifdef __STDC__
153 char	*strrchr(), *strchr();
154 #else
155 char	*rindex(), *index();
156 #endif
157 
158 static int	infile_fail;	/* Count of bad opens. Fix bug ID #1082298 */
159 
160 static char	*dbp = lbuf;
161 static int	pfcnt;
162 
163 static int	mac;		/* our modified argc, after parseargs() */
164 static char	**mav;		/* our modified argv, after parseargs() */
165 
166 
167 /* our local functions:							*/
168 static void	init();
169 static void	find_entries(char *file);
170 static void	pfnote();
171 static void	C_entries();
172 static int	start_entry(char **lp, char *token, int *f);
173 static void	Y_entries();
174 static char	*toss_comment(char *start);
175 static void	getline(long int where);
176 static void	free_tree(NODE *node);
177 static void	add_node(NODE *node, NODE *cur_node);
178 static void	put_entries(NODE *node);
179 static int	PF_funcs(FILE *fi);
180 static int	tail(char *cp);
181 static void	takeprec();
182 static void	getit();
183 static char	*savestr(char *cp);
184 static void	L_funcs(FILE *fi);
185 static void	L_getit(int special);
186 static int	striccmp(char *str, char *pat);
187 static int	first_char();
188 static void	toss_yysec();
189 static void	Usage();
190 static void	parseargs(int ac, char **av);
191 
192 int
193 main(int ac, char *av[])
194 {
195 	int i;
196 	char cmd[100];
197 
198 	(void) setlocale(LC_ALL, "");
199 #if !defined(TEXT_DOMAIN)
200 #define	TEXT_DOMAIN "SYS_TEST"
201 #endif
202 	(void) textdomain(TEXT_DOMAIN);
203 
204 	parseargs(ac, av);
205 
206 	while ((i = getopt(mac, mav, "aBFtuvwxf:")) != EOF) {
207 		switch (i) {
208 		case 'a':	/* -a: Append output to existing tags file */
209 			aflag++;
210 			break;
211 
212 		case 'B':	/* -B: Use backward search patterns (?...?) */
213 			searchar = '?';
214 			break;
215 
216 		case 'F':	/* -F: Use forward search patterns (/.../) */
217 			searchar = '/';
218 			break;
219 
220 		case 't':	/* -t: Create tags for typedefs.	*/
221 				/* for XPG4 , we silently ignore "-t".	*/
222 #ifndef XPG4
223 			tflag++;
224 #endif /*  !XPG4 */
225 			break;
226 
227 		case 'u':	/* -u: Update the specified tags file	*/
228 			uflag++;
229 			break;
230 
231 		case 'v':	/* -v: Index listing on stdout		*/
232 			vflag++;
233 			xflag++;
234 			break;
235 
236 		case 'w':	/* -w: Suppress warnings		*/
237 			wflag++;
238 			break;
239 
240 		case 'x':	/* -x: Produce a simple index		*/
241 			xflag++;
242 			break;
243 
244 		case 'f':	/* -f tagsfile: output to tagsfile	*/
245 			outfile = strdup(optarg);
246 			break;
247 
248 		default:
249 			Usage();	/* never returns		*/
250 			break;
251 		}
252 	}
253 
254 	/* if we didn't specify any source code to parse, complain and die. */
255 	if (optind == mac) {
256 		Usage();	/* never returns		*/
257 	}
258 
259 
260 	init();			/* set up boolean "functions"		*/
261 	/*
262 	 * loop through files finding functions
263 	 */
264 	for (file_num = optind; file_num < mac; file_num++)
265 		find_entries(mav[file_num]);
266 
267 	if (xflag) {
268 		put_entries(head);
269 		exit(infile_fail > 0 ? 2 : 0); /* Fix for 1082298 */
270 	}
271 	if (uflag) {
272 		for (i = 1; i < mac; i++) {
273 			(void) sprintf(cmd,
274 			"mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
275 				outfile, mav[i], outfile);
276 			(void) system(cmd);
277 		}
278 		aflag++;
279 	}
280 	outf = fopen(outfile, aflag ? "a" : "w");
281 	if (outf == NULL) {
282 		perror(outfile);
283 		exit(1);
284 	}
285 	put_entries(head);
286 	(void) fclose(outf);
287 	if (uflag) {
288 		(void) sprintf(cmd, "sort %s -o %s", outfile, outfile);
289 		(void) system(cmd);
290 	}
291 	return (infile_fail > 0 ? 2 : 0); /* Fix for #1082298 */
292 }
293 
294 /*
295  * This routine sets up the boolean psuedo-functions which work
296  * by seting boolean flags dependent upon the corresponding character
297  * Every char which is NOT in that string is not a white char.  Therefore,
298  * all of the array "_wht" is set to FALSE, and then the elements
299  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
300  * of a char is TRUE if it is the string "white", else FALSE.
301  */
302 static void
303 init()
304 {
305 	char	*sp;
306 	int	i;
307 
308 	for (i = 0; i < 0177; i++) {
309 		_wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
310 		_opr[i] = FALSE;	/* initialize boolean		*/
311 					/* array of operator symbols	*/
312 		_gd[i] = TRUE;
313 	}
314 	for (sp = white; *sp; sp++)
315 		_wht[*sp] = TRUE;
316 	for (sp = endtk; *sp; sp++)
317 		_etk[*sp] = TRUE;
318 	for (sp = intk; *sp; sp++)
319 		_itk[*sp] = TRUE;
320 	for (sp = begtk; *sp; sp++)
321 		_btk[*sp] = TRUE;
322 
323 	/* mark overloadable operator symbols				*/
324 	for (sp = oprtk; *sp; sp++)
325 		_opr[*sp] = TRUE;
326 
327 	for (sp = notgd; *sp; sp++)
328 		_gd[*sp] = FALSE;
329 }
330 
331 /*
332  * This routine opens the specified file and calls the function
333  * which finds the function and type definitions.
334  */
335 static void
336 find_entries(file)
337 char	*file;
338 {
339 	char *cp;
340 	struct stat st;
341 
342 	/* skip anything that isn't a regular file */
343 	if (stat(file, &st) == 0 && !S_ISREG(st.st_mode))
344 		return;
345 
346 	if ((inf = fopen(file, "r")) == NULL) {
347 		perror(file);
348 		infile_fail++;		/* Count bad opens. ID #1082298 */
349 		return;
350 	}
351 	curfile = savestr(file);
352 	lineno = 0;
353 #ifdef __STDC__
354 	cp = strrchr(file, '.');
355 #else
356 	cp = rindex(file, '.');
357 #endif
358 	/* .l implies lisp or lex source code */
359 	if (cp && cp[1] == 'l' && cp[2] == '\0') {
360 #ifdef __STDC__
361 		if (strchr(";([", first_char()) != NULL) 	/* lisp */
362 #else
363 		if (index(";([", first_char()) != NULL) 	/* lisp */
364 #endif
365 		{
366 			L_funcs(inf);
367 			(void) fclose(inf);
368 			return;
369 		} else {					/* lex */
370 			/*
371 			 * throw away all the code before the second "%%"
372 			 */
373 			toss_yysec();
374 			getline(lineftell);
375 			pfnote("yylex", lineno, TRUE);
376 			toss_yysec();
377 			C_entries();
378 			(void) fclose(inf);
379 			return;
380 		}
381 	}
382 	/* .y implies a yacc file */
383 	if (cp && cp[1] == 'y' && cp[2] == '\0') {
384 		toss_yysec();
385 		Y_entries();
386 		C_entries();
387 		(void) fclose(inf);
388 		return;
389 	}
390 
391 	/*
392 	 * Add in file name extension support for Sun C++ which
393 	 * permits .C/.c (AT&T), .cc (G++) and .cxx (Gloksp.)
394 	 */
395 
396 	/* if not a .c, .C, .cc, .cxx or .h file, try fortran */
397 	if (cp && (cp[1] != 'C' && cp[1] != 'c' && cp[1] != 'h') &&
398 	    cp[2] == '\0' && (strcmp(cp, ".cc") == 0) &&
399 	    (strcmp(cp, ".cxx") == 0)) {
400 		if (PF_funcs(inf) != 0) {
401 			(void) fclose(inf);
402 			return;
403 		}
404 		rewind(inf);	/* no fortran tags found, try C */
405 	}
406 	C_entries();
407 	(void) fclose(inf);
408 }
409 
410 static void
411 pfnote(name, ln, f)
412 char	*name;
413 int	ln;
414 bool	f;		/* f == TRUE when function */
415 {
416 	char *fp;
417 	NODE *np;
418 	char *nametk;	/* hold temporary tokens from name */
419 	char nbuf[BUFSIZ];
420 
421 	if ((np = malloc(sizeof (NODE))) == NULL) {
422 		(void) fprintf(stderr,
423 				gettext("ctags: too many entries to sort\n"));
424 		put_entries(head);
425 		free_tree(head);
426 		head = np = (NODE *) malloc(sizeof (NODE));
427 	}
428 	if (xflag == 0 && (strcmp(name, "main") == 0)) {
429 #ifdef __STDC__
430 		fp = strrchr(curfile, '/');
431 #else
432 		fp = rindex(curfile, '/');
433 #endif
434 		if (fp == 0)
435 			fp = curfile;
436 		else
437 			fp++;
438 		(void) sprintf(nbuf, "M%s", fp);
439 #ifdef __STDC__
440 		fp = strrchr(nbuf, '.');
441 #else
442 		fp = rindex(nbuf, '.');
443 #endif
444 		/* Chop off .cc and .cxx as well as .c, .h, etc		*/
445 		if (fp && ((fp[2] == 0) || (fp[2] == 'c' && fp[3] == 0) ||
446 			    (fp[3] == 'x' && fp[4] == 0)))
447 			*fp = 0;
448 		name = nbuf;
449 	}
450 
451 	/* remove in-between blanks operator function tags */
452 #ifdef __STDC__
453 	if (strchr(name, ' ') != NULL)
454 #else
455 	if (index(name, ' ') != NULL)
456 #endif
457 	{
458 		(void) strcpy(name, strtok(name, " "));
459 		while (nametk = strtok(0, " "))
460 			(void) strcat(name, nametk);
461 	}
462 	np->entry = savestr(name);
463 	np->file = curfile;
464 	np->f = f;
465 	np->lno = ln;
466 	np->left = np->right = 0;
467 	if (xflag == 0) {
468 		lbuf[50] = 0;
469 		(void) strcat(lbuf, "$");
470 		lbuf[50] = 0;
471 	}
472 	np->pat = savestr(lbuf);
473 	if (head == NULL)
474 		head = np;
475 	else
476 		add_node(np, head);
477 }
478 
479 /*
480  * This routine finds functions and typedefs in C syntax and adds them
481  * to the list.
482  */
483 static void
484 C_entries()
485 {
486 	int c;
487 	char *token, *tp;
488 	bool incomm, inquote, inchar, midtoken, isoperator, optfound;
489 	int level;
490 	char *sp;
491 	char tok[BUFSIZ];
492 	long int tokftell;
493 
494 	number = gotone = midtoken = inquote = inchar =
495 	incomm = isoperator = optfound = FALSE;
496 
497 	level = 0;
498 	sp = tp = token = line;
499 	lineno++;
500 	lineftell = tokftell = ftell(inf);
501 	for (;;) {
502 		*sp = c = getc(inf);
503 		if (feof(inf))
504 			break;
505 		if (c == '\n') {
506 			lineftell = ftell(inf);
507 			lineno++;
508 		} else if (c == '\\') {
509 			c = *++sp = getc(inf);
510 			if ((c == '\n') || (c == EOF)) { /* c == EOF, 1091005 */
511 				lineftell = ftell(inf);
512 				lineno++;
513 				c = ' ';
514 			}
515 		} else if (incomm) {
516 			if (c == '*') {
517 				while ((*++sp = c = getc(inf)) == '*')
518 					continue;
519 
520 				/* c == EOF 1091005			*/
521 				if ((c == '\n') || (c == EOF)) {
522 					lineftell = ftell(inf);
523 					lineno++;
524 				}
525 
526 				if (c == '/')
527 					incomm = FALSE;
528 			}
529 		} else if (inquote) {
530 			/*
531 			 * Too dumb to know about \" not being magic, but
532 			 * they usually occur in pairs anyway.
533 			 */
534 			if (c == '"')
535 				inquote = FALSE;
536 			continue;
537 		} else if (inchar) {
538 			if (c == '\'')
539 				inchar = FALSE;
540 			continue;
541 		} else if (midtoken == TRUE) {	/* if white space omitted */
542 			goto dotoken;
543 		} else switch (c) {
544 		    case '"':
545 			inquote = TRUE;
546 			continue;
547 		    case '\'':
548 			inchar = TRUE;
549 			continue;
550 		    case '/':
551 			*++sp = c = getc(inf);
552 			/* Handles the C++ comment token "//" 		*/
553 			if (c == '*')
554 				incomm = TRUE;
555 			else if (c == '/') {
556 				/*
557 				 * Skip over all the characters after
558 				 * "//" until a newline character. Now also
559 				 * includes fix for 1091005, check for EOF.
560 				 */
561 				do  {
562 					c = getc(inf);
563 				/* 1091005:				*/
564 				} while ((c != '\n') && (c != EOF));
565 
566 
567 				/*
568 				 * Fixed bugid 1030014
569 				 * Return the current position of the
570 				 * file after the newline.
571 				 */
572 				lineftell = ftell(inf);
573 				lineno++;
574 				*--sp = c;
575 			}
576 			else
577 				(void) ungetc(*sp, inf);
578 			continue;
579 		    case '#':
580 			if (sp == line)
581 				number = TRUE;
582 			continue;
583 		    case '{':
584 			if ((tydef == begin_rec) || (tydef == begin_tag)) {
585 				tydef = middle;
586 			}
587 			level++;
588 			continue;
589 		    case '}':
590 			/*
591 			 * Heuristic for function or structure end;
592 			 * common for #ifdef/#else blocks to add extra "{"
593 			 */
594 			if (sp == line)
595 				level = 0;	/* reset */
596 			else
597 				level--;
598 			if (!level && tydef == middle) {
599 				tydef = end;
600 			}
601 			if (!level && tydef == none) /* Fix for #1034126 */
602 				goto dotoken;
603 			continue;
604 		}
605 
606 dotoken:
607 
608 
609 		if (!level && !inquote && !incomm && gotone == FALSE) {
610 			if (midtoken) {
611 				if (endtoken(c)) {
612 
613 				/*
614 				 *
615 				 *    ':'  +---> ':' -> midtok
616 				 *
617 				 *    +---> operator{+,-, etc} -> midtok
618 				 *		(continue)
619 				 *    +---> endtok
620 				 */
621 		/*
622 		 * Enhance operator function support and
623 		 *	fix bugid 1027806
624 		 *
625 		 *  For operator token, scanning will continue until
626 		 *  '(' is found.  Spaces between 'operater' and
627 		 *  'oprtk' are allowed (e.g. 'operator + ()'), but
628 		 *  will be removed when the actual entry for the tag
629 		 *  is made.
630 		 *  Note that functions of the form 'operator ()(int)'
631 		 *  will be recognized, but 'operator ()' will not,
632 		 *  even though this is legitimate in C.
633 		 */
634 
635 					if (optoken(c)) {
636 					    if (isoperator) {
637 					    if (optfound) {
638 						    if (c != '(') {
639 						    tp++;
640 						    goto next_char;
641 						    }
642 					    } else {
643 						    if (c != ' ') {
644 						    optfound = TRUE;
645 						    }
646 						    tp++;
647 						    goto next_char;
648 					    }
649 					    } else {
650 				/* start: this code shifted left for cstyle */
651 				char *backptr = tp - 7;
652 				if (strncmp(backptr, "operator", 8) == 0) {
653 					/* This is an overloaded operator */
654 					isoperator = TRUE;
655 					if (c != ' ') {
656 						optfound = TRUE;
657 					}
658 
659 					tp++;
660 					goto next_char;
661 				} else if (c == '~') {
662 					/* This is a destructor		*/
663 					tp++;
664 					goto next_char;
665 				}
666 				/* end: above code shifted left for cstyle */
667 					}
668 					} else if (c == ':') {
669 					    if ((*++sp = getc(inf)) == ':') {
670 						tp += 2;
671 						c = *sp;
672 						goto next_char;
673 					    } else {
674 						(void) ungetc (*sp, inf);
675 						--sp;
676 					    }
677 					}
678 
679 				/* start: this code shifted left for cstyle */
680 				{
681 				int f;
682 				int pfline = lineno;
683 
684 				if (start_entry(&sp, token, &f)) {
685 					(void) strncpy(tok, token, tp-token+1);
686 					tok[tp-token+1] = 0;
687 					getline(tokftell);
688 					pfnote(tok, pfline, f);
689 					gotone = f;	/* function */
690 				}
691 
692 				isoperator = optfound = midtoken = FALSE;
693 				token = sp;
694 				}
695 				/* end: above code shifted left for cstyle */
696 				} else if (intoken(c))
697 					tp++;
698 			} else if (begtoken(c)) {
699 				token = tp = sp;
700 				midtoken = TRUE;
701 				tokftell = lineftell;
702 			}
703 		}
704 	next_char:
705 		if (c == ';' && tydef == end)	/* clean with typedefs */
706 			tydef = none;
707 		sp++;
708 			/* The "c == }" was added to fix #1034126 */
709 		if (c == '\n' ||c == '}'|| sp > &line[sizeof (line) - BUFSIZ]) {
710 			tp = token = sp = line;
711 			number = gotone = midtoken = inquote =
712 			inchar = isoperator = optfound = FALSE;
713 		}
714 	}
715 }
716 
717 /*
718  * This routine  checks to see if the current token is
719  * at the start of a function, or corresponds to a typedef
720  * It updates the input line * so that the '(' will be
721  * in it when it returns.
722  */
723 static int
724 start_entry(lp, token, f)
725 char	**lp, *token;
726 int	*f;
727 {
728 	char	*sp;
729 	int	c;
730 	static	bool	found;
731 	bool	firsttok;	/* T if have seen first token in ()'s	*/
732 	int	bad;
733 
734 	*f = 1;			/* a function */
735 	sp = *lp;
736 	c = *sp;
737 	bad = FALSE;
738 	if (!number) {		/* space is not allowed in macro defs	*/
739 		while (iswhite(c)) {
740 			*++sp = c = getc(inf);
741 			if ((c == '\n') || (c == EOF)) { /* c==EOF, #1091005 */
742 				lineno++;
743 				lineftell = ftell(inf);
744 				if (sp > &line[sizeof (line) - BUFSIZ])
745 					goto ret;
746 			}
747 		}
748 	/* the following tries to make it so that a #define	a b(c)	*/
749 	/* doesn't count as a define of b.				*/
750 	} else {
751 		if (strncmp(token, "define", 6) == 0)
752 			found = 0;
753 		else
754 			found++;
755 		if (found >= 2) {
756 			gotone = TRUE;
757 badone:			bad = TRUE;
758 			goto ret;
759 		}
760 	}
761 	/* check for the typedef cases		*/
762 #ifdef XPG4
763 	if (strncmp(token, "typedef", 7) == 0) {
764 #else /*  !XPG4 */
765 	if (tflag && (strncmp(token, "typedef", 7) == 0)) {
766 #endif /*  XPG4 */
767 		tydef = begin;
768 		goto badone;
769 	}
770 	/* Handles 'class' besides 'struct' etc.			*/
771 	if (tydef == begin && ((strncmp(token, "struct", 6) == 0) ||
772 			    (strncmp(token, "class", 5) == 0) ||
773 			    (strncmp(token, "union", 5) == 0)||
774 			    (strncmp(token, "enum", 4) == 0))) {
775 		tydef = begin_rec;
776 		goto badone;
777 	}
778 	if (tydef == begin) {
779 		tydef = end;
780 		goto badone;
781 	}
782 	if (tydef == begin_rec) {
783 		tydef = begin_tag;
784 		goto badone;
785 	}
786 	if (tydef == begin_tag) {
787 		tydef = end;
788 		goto gottydef;	/* Fall through to "tydef==end" */
789 	}
790 
791 gottydef:
792 	if (tydef == end) {
793 		*f = 0;
794 		goto ret;
795 	}
796 	if (c != '(')
797 		goto badone;
798 	firsttok = FALSE;
799 	while ((*++sp = c = getc(inf)) != ')') {
800 		if ((c == '\n') || (c == EOF)) { /* c == EOF Fix for #1091005 */
801 			lineftell = ftell(inf);
802 			lineno++;
803 			if (sp > &line[sizeof (line) - BUFSIZ])
804 				goto ret;
805 		}
806 		/*
807 		 * This line used to confuse ctags:
808 		 *	int	(*oldhup)();
809 		 * This fixes it. A nonwhite char before the first
810 		 * token, other than a / (in case of a comment in there)
811 		 * makes this not a declaration.
812 		 */
813 		if (begtoken(c) || c == '/')
814 			firsttok = TRUE;
815 		else if (!iswhite(c) && !firsttok)
816 			goto badone;
817 	}
818 	while (iswhite(*++sp = c = getc(inf)))
819 		if ((c == '\n') || (c == EOF)) { /* c == EOF fix for #1091005 */
820 			lineno++;
821 			lineftell = ftell(inf);
822 			if (sp > &line[sizeof (line) - BUFSIZ])
823 				break;
824 		}
825 ret:
826 	*lp = --sp;
827 	if (c == '\n')
828 		lineno--;
829 	(void) ungetc(c, inf);
830 	return (!bad && (!*f || isgood(c)));
831 					/* hack for typedefs */
832 }
833 
834 /*
835  * Y_entries:
836  *	Find the yacc tags and put them in.
837  */
838 static void
839 Y_entries()
840 {
841 	char	*sp, *orig_sp;
842 	int	brace;
843 	bool	in_rule, toklen;
844 	char		tok[BUFSIZ];
845 
846 	brace = 0;
847 	getline(lineftell);
848 	pfnote("yyparse", lineno, TRUE);
849 	while (fgets(line, sizeof (line), inf) != NULL)
850 		for (sp = line; *sp; sp++)
851 			switch (*sp) {
852 			    case '\n':
853 				lineno++;
854 				/* FALLTHROUGH */
855 			    case ' ':
856 			    case '\t':
857 			    case '\f':
858 			    case '\r':
859 				break;
860 			    case '"':
861 				do {
862 					while (*++sp != '"')
863 						continue;
864 				} while (sp[-1] == '\\');
865 				break;
866 			    case '\'':
867 				do {
868 					while (*++sp != '\'')
869 						continue;
870 				} while (sp[-1] == '\\');
871 				break;
872 			    case '/':
873 				if (*++sp == '*')
874 					sp = toss_comment(sp);
875 				else
876 					--sp;
877 				break;
878 			    case '{':
879 				brace++;
880 				break;
881 			    case '}':
882 				brace--;
883 				break;
884 			    case '%':
885 				if (sp[1] == '%' && sp == line)
886 					return;
887 				break;
888 			    case '|':
889 			    case ';':
890 				in_rule = FALSE;
891 				break;
892 			    default:
893 				if (brace == 0 && !in_rule && (isalpha(*sp) ||
894 								*sp == '.' ||
895 								*sp == '_')) {
896 					orig_sp = sp;
897 					++sp;
898 					while (isalnum(*sp) || *sp == '_' ||
899 						*sp == '.')
900 						sp++;
901 					toklen = sp - orig_sp;
902 					while (isspace(*sp))
903 						sp++;
904 					if (*sp == ':' || (*sp == '\0' &&
905 						    first_char() == ':')) {
906 						(void) strncpy(tok,
907 							orig_sp, toklen);
908 						tok[toklen] = '\0';
909 						(void) strcpy(lbuf, line);
910 						lbuf[strlen(lbuf) - 1] = '\0';
911 						pfnote(tok, lineno, TRUE);
912 						in_rule = TRUE;
913 					}
914 					else
915 						sp--;
916 				}
917 				break;
918 			}
919 }
920 
921 static char *
922 toss_comment(start)
923 char	*start;
924 {
925 	char	*sp;
926 
927 	/*
928 	 * first, see if the end-of-comment is on the same line
929 	 */
930 	do {
931 #ifdef __STDC__
932 		while ((sp = strchr(start, '*')) != NULL)
933 #else
934 		while ((sp = index(start, '*')) != NULL)
935 #endif
936 			if (sp[1] == '/')
937 				return (++sp);
938 			else
939 				start = (++sp);
940 		start = line;
941 		lineno++;
942 	} while (fgets(line, sizeof (line), inf) != NULL);
943 
944 	/*
945 	 * running this through lint revealed that the original version
946 	 * of this routine didn't explicitly return something; while
947 	 * the return value was always used!. so i've added this
948 	 * next line.
949 	 */
950 	return (sp);
951 }
952 
953 static void
954 getline(where)
955 long int where;
956 {
957 	long saveftell = ftell(inf);
958 	char *cp;
959 
960 	(void) fseek(inf, where, 0);
961 	(void) fgets(lbuf, sizeof (lbuf), inf);
962 #ifdef __STDC__
963 	cp = strrchr(lbuf, '\n');
964 #else
965 	cp = rindex(lbuf, '\n');
966 #endif
967 	if (cp)
968 		*cp = 0;
969 	(void) fseek(inf, saveftell, 0);
970 }
971 
972 static void
973 free_tree(node)
974 NODE	*node;
975 {
976 	while (node) {
977 		free_tree(node->right);
978 		free(node);
979 		node = node->left;
980 	}
981 }
982 
983 static void
984 add_node(node, cur_node)
985 NODE *node, *cur_node;
986 {
987 	int dif;
988 
989 	dif = strcmp(node->entry, cur_node->entry);
990 	if (dif == 0) {
991 		if (node->file == cur_node->file) {
992 			if (!wflag) {
993 			(void) fprintf(stderr,
994 			gettext("Duplicate entry in file %s, line %d: %s\n"),
995 			node->file, lineno, node->entry);
996 			(void) fprintf(stderr,
997 					gettext("Second entry ignored\n"));
998 			}
999 			return;
1000 		}
1001 		if (!cur_node->been_warned)
1002 			if (!wflag) {
1003 				(void) fprintf(stderr, gettext("Duplicate "
1004 					    "entry in files %s and %s: %s "
1005 					    "(Warning only)\n"),
1006 					    node->file, cur_node->file,
1007 					    node->entry);
1008 			}
1009 		cur_node->been_warned = TRUE;
1010 		return;
1011 	}
1012 
1013 	if (dif < 0) {
1014 		if (cur_node->left != NULL)
1015 			add_node(node, cur_node->left);
1016 		else
1017 			cur_node->left = node;
1018 		return;
1019 	}
1020 	if (cur_node->right != NULL)
1021 		add_node(node, cur_node->right);
1022 	else
1023 		cur_node->right = node;
1024 }
1025 
1026 static void
1027 put_entries(node)
1028 NODE	*node;
1029 {
1030 	char	*sp;
1031 
1032 	if (node == NULL)
1033 		return;
1034 	put_entries(node->left);
1035 
1036 	/*
1037 	 * while the code in the following #ifdef section could be combined,
1038 	 * it's explicitly separated here to make maintainance easier.
1039 	 */
1040 #ifdef XPG4
1041 	/*
1042 	 * POSIX 2003: we no longer have a "-t" flag; the logic is
1043 	 * automatically assumed to be "turned on" here.
1044 	 */
1045 	if (xflag == 0) {
1046 			(void) fprintf(outf, "%s\t%s\t%c^",
1047 				node->entry, node->file, searchar);
1048 			for (sp = node->pat; *sp; sp++)
1049 				if (*sp == '\\')
1050 					(void) fprintf(outf, "\\\\");
1051 				else if (*sp == searchar)
1052 					(void) fprintf(outf, "\\%c", searchar);
1053 				else
1054 					(void) putc(*sp, outf);
1055 			(void) fprintf(outf, "%c\n", searchar);
1056 	} else if (vflag)
1057 		(void) fprintf(stdout, "%s %s %d\n",
1058 				node->entry, node->file, (node->lno+63)/64);
1059 	else
1060 		(void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1061 			node->entry, node->lno, node->file, node->pat);
1062 #else /* XPG4 */
1063 	/*
1064 	 * original way of doing things. "-t" logic is only turned on
1065 	 * when the user has specified it via a command-line argument.
1066 	 */
1067 	if (xflag == 0)
1068 		if (node->f) {		/* a function */
1069 			(void) fprintf(outf, "%s\t%s\t%c^",
1070 				node->entry, node->file, searchar);
1071 			for (sp = node->pat; *sp; sp++)
1072 				if (*sp == '\\')
1073 					(void) fprintf(outf, "\\\\");
1074 				else if (*sp == searchar)
1075 					(void) fprintf(outf, "\\%c", searchar);
1076 				else
1077 					(void) putc(*sp, outf);
1078 			(void) fprintf(outf, "%c\n", searchar);
1079 		} else {		/* a typedef; text pattern inadequate */
1080 			(void) fprintf(outf, "%s\t%s\t%d\n",
1081 				node->entry, node->file, node->lno);
1082 		} else if (vflag)
1083 		(void) fprintf(stdout, "%s %s %d\n",
1084 				node->entry, node->file, (node->lno+63)/64);
1085 	else
1086 		(void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1087 			node->entry, node->lno, node->file, node->pat);
1088 #endif /* XPG4 */
1089 	put_entries(node->right);
1090 }
1091 
1092 
1093 static int
1094 PF_funcs(fi)
1095 FILE *fi;
1096 {
1097 
1098 	pfcnt = 0;
1099 	while (fgets(lbuf, sizeof (lbuf), fi)) {
1100 		lineno++;
1101 		dbp = lbuf;
1102 		if (*dbp == '%') dbp++;	/* Ratfor escape to fortran */
1103 		while (isspace(*dbp))
1104 			dbp++;
1105 		if (*dbp == 0)
1106 			continue;
1107 		switch (*dbp |' ') {
1108 
1109 		    case 'i':
1110 			if (tail("integer"))
1111 				takeprec();
1112 			break;
1113 		    case 'r':
1114 			if (tail("real"))
1115 				takeprec();
1116 			break;
1117 		    case 'l':
1118 			if (tail("logical"))
1119 				takeprec();
1120 			break;
1121 		    case 'c':
1122 			if (tail("complex") || tail("character"))
1123 				takeprec();
1124 			break;
1125 		    case 'd':
1126 			if (tail("double")) {
1127 				while (isspace(*dbp))
1128 					dbp++;
1129 				if (*dbp == 0)
1130 					continue;
1131 				if (tail("precision"))
1132 					break;
1133 				continue;
1134 			}
1135 			break;
1136 		}
1137 		while (isspace(*dbp))
1138 			dbp++;
1139 		if (*dbp == 0)
1140 			continue;
1141 		switch (*dbp|' ') {
1142 
1143 		    case 'f':
1144 			if (tail("function"))
1145 				getit();
1146 			continue;
1147 		    case 's':
1148 			if (tail("subroutine"))
1149 				getit();
1150 			continue;
1151 		    case 'p':
1152 			if (tail("program")) {
1153 				getit();
1154 				continue;
1155 			}
1156 			if (tail("procedure"))
1157 				getit();
1158 			continue;
1159 		}
1160 	}
1161 	return (pfcnt);
1162 }
1163 
1164 static int
1165 tail(cp)
1166 char *cp;
1167 {
1168 	int len = 0;
1169 
1170 	while (*cp && (*cp&~' ') == ((*(dbp+len))&~' '))
1171 		cp++, len++;
1172 	if (*cp == 0) {
1173 		dbp += len;
1174 		return (1);
1175 	}
1176 	return (0);
1177 }
1178 
1179 static void
1180 takeprec()
1181 {
1182 
1183 	while (isspace(*dbp))
1184 		dbp++;
1185 	if (*dbp != '*')
1186 		return;
1187 	dbp++;
1188 	while (isspace(*dbp))
1189 		dbp++;
1190 	if (!isdigit(*dbp)) {
1191 		--dbp;		/* force failure */
1192 		return;
1193 	}
1194 	do
1195 		dbp++;
1196 	while (isdigit(*dbp));
1197 }
1198 
1199 static void
1200 getit()
1201 {
1202 	char *cp;
1203 	char c;
1204 	char nambuf[BUFSIZ];
1205 
1206 	for (cp = lbuf; *cp; cp++)
1207 		;
1208 	*--cp = 0;	/* zap newline */
1209 	while (isspace(*dbp))
1210 		dbp++;
1211 	if (*dbp == 0 || !isalpha(*dbp) || !isascii(*dbp))
1212 		return;
1213 	for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++)
1214 		continue;
1215 	c = cp[0];
1216 	cp[0] = 0;
1217 	(void) strcpy(nambuf, dbp);
1218 	cp[0] = c;
1219 	pfnote(nambuf, lineno, TRUE);
1220 	pfcnt++;
1221 }
1222 
1223 static char *
1224 savestr(cp)
1225 char *cp;
1226 {
1227 	int len;
1228 	char *dp;
1229 
1230 	len = strlen(cp);
1231 	dp = (char *)malloc(len+1);
1232 	(void) strcpy(dp, cp);
1233 
1234 	return (dp);
1235 }
1236 
1237 #ifndef __STDC__
1238 /*
1239  * Return the ptr in sp at which the character c last
1240  * appears; NULL if not found
1241  *
1242  * Identical to v7 rindex, included for portability.
1243  */
1244 
1245 static char *
1246 rindex(sp, c)
1247 char *sp, c;
1248 {
1249 	char *r;
1250 
1251 	r = NULL;
1252 	do {
1253 		if (*sp == c)
1254 			r = sp;
1255 	} while (*sp++);
1256 	return (r);
1257 }
1258 #endif
1259 
1260 /*
1261  * lisp tag functions
1262  * just look for (def or (DEF
1263  */
1264 
1265 static void
1266 L_funcs(fi)
1267 FILE *fi;
1268 {
1269 	int	special;
1270 
1271 	pfcnt = 0;
1272 	while (fgets(lbuf, sizeof (lbuf), fi)) {
1273 		lineno++;
1274 		dbp = lbuf;
1275 		if (dbp[0] == '(' &&
1276 		    (dbp[1] == 'D' || dbp[1] == 'd') &&
1277 		    (dbp[2] == 'E' || dbp[2] == 'e') &&
1278 		    (dbp[3] == 'F' || dbp[3] == 'f')) {
1279 			dbp += 4;
1280 			if (striccmp(dbp, "method") == 0 ||
1281 			    striccmp(dbp, "wrapper") == 0 ||
1282 			    striccmp(dbp, "whopper") == 0)
1283 				special = TRUE;
1284 			else
1285 				special = FALSE;
1286 			while (!isspace(*dbp))
1287 				dbp++;
1288 			while (isspace(*dbp))
1289 				dbp++;
1290 			L_getit(special);
1291 		}
1292 	}
1293 }
1294 
1295 static void
1296 L_getit(special)
1297 int	special;
1298 {
1299 	char	*cp;
1300 	char	c;
1301 	char		nambuf[BUFSIZ];
1302 
1303 	for (cp = lbuf; *cp; cp++)
1304 		continue;
1305 	*--cp = 0;		/* zap newline */
1306 	if (*dbp == 0)
1307 		return;
1308 	if (special) {
1309 #ifdef __STDC__
1310 		if ((cp = strchr(dbp, ')')) == NULL)
1311 #else
1312 		if ((cp = index(dbp, ')')) == NULL)
1313 #endif
1314 			return;
1315 		while (cp >= dbp && *cp != ':')
1316 			cp--;
1317 		if (cp < dbp)
1318 			return;
1319 		dbp = cp;
1320 		while (*cp && *cp != ')' && *cp != ' ')
1321 			cp++;
1322 	}
1323 	else
1324 		for (cp = dbp + 1; *cp && *cp != '(' && *cp != ' '; cp++)
1325 			continue;
1326 	c = cp[0];
1327 	cp[0] = 0;
1328 	(void) strcpy(nambuf, dbp);
1329 	cp[0] = c;
1330 	pfnote(nambuf, lineno, TRUE);
1331 	pfcnt++;
1332 }
1333 
1334 /*
1335  * striccmp:
1336  *	Compare two strings over the length of the second, ignoring
1337  *	case distinctions.  If they are the same, return 0.  If they
1338  *	are different, return the difference of the first two different
1339  *	characters.  It is assumed that the pattern (second string) is
1340  *	completely lower case.
1341  */
1342 static int
1343 striccmp(str, pat)
1344 char	*str, *pat;
1345 {
1346 	int	c1;
1347 
1348 	while (*pat) {
1349 		if (isupper(*str))
1350 			c1 = tolower(*str);
1351 		else
1352 			c1 = *str;
1353 		if (c1 != *pat)
1354 			return (c1 - *pat);
1355 		pat++;
1356 		str++;
1357 	}
1358 	return (0);
1359 }
1360 
1361 /*
1362  * first_char:
1363  *	Return the first non-blank character in the file.  After
1364  *	finding it, rewind the input file so we start at the beginning
1365  *	again.
1366  */
1367 static int
1368 first_char()
1369 {
1370 	int	c;
1371 	long	off;
1372 
1373 	off = ftell(inf);
1374 	while ((c = getc(inf)) != EOF)
1375 		if (!isspace(c) && c != '\r') {
1376 			(void) fseek(inf, off, 0);
1377 			return (c);
1378 		}
1379 	(void) fseek(inf, off, 0);
1380 	return (EOF);
1381 }
1382 
1383 /*
1384  * toss_yysec:
1385  *	Toss away code until the next "%%" line.
1386  */
1387 static void
1388 toss_yysec()
1389 {
1390 	char		buf[BUFSIZ];
1391 
1392 	for (;;) {
1393 		lineftell = ftell(inf);
1394 		if (fgets(buf, BUFSIZ, inf) == NULL)
1395 			return;
1396 		lineno++;
1397 		if (strncmp(buf, "%%", 2) == 0)
1398 			return;
1399 	}
1400 }
1401 
1402 static void
1403 Usage()
1404 {
1405 #ifdef XPG4
1406 	(void) fprintf(stderr, gettext("Usage:\tctags [-aBFuvw] "
1407 #else /*  !XPG4 */
1408 	(void) fprintf(stderr, gettext("Usage:\tctags [-aBFtuvw] "
1409 #endif /*  XPG4 */
1410 		    "[-f tagsfile] file ...\n"));
1411 	(void) fprintf(stderr, gettext("OR:\tctags [-x] file ...\n"));
1412 	exit(1);
1413 }
1414 
1415 
1416 /*
1417  * parseargs():		modify the args
1418  *	the purpose of this routine is to transform any ancient argument
1419  *	usage into a format which is acceptable to getopt(3C), so that we
1420  *	retain backwards Solaris 2.[0-4] compatibility.
1421  *
1422  *	This routine allows us to make full use of getopts, without any
1423  *	funny argument processing in main().
1424  *
1425  *	The other alternative would be to hand-craft the processed arguments
1426  *	during and after getopt(3C) - which usually leads to uglier code
1427  *	in main(). I've opted to keep the ugliness isolated down here,
1428  *	instead of in main().
1429  *
1430  *	In a nutshell, if the user has used the old Solaris syntax of:
1431  *		ctags [-aBFtuvwx] [-f tagsfile] filename ...
1432  *	We simply change this into:
1433  *		ctags [-a] [-B] [-F] [-t] [-u] [-v] [-w] [-x] [-f tags] file...
1434  *
1435  *	If the user has specified the new getopt(3C) syntax, we merely
1436  *	copy that into our modified argument space.
1437  */
1438 static void
1439 parseargs(ac, av)
1440 int ac;				/* argument count			*/
1441 char **av;			/* ptr to original argument space	*/
1442 {
1443 	int i;			/* current argument			*/
1444 	int a;			/* used to parse combined arguments	*/
1445 	int fflag;		/* 1 = we're only parsing filenames	*/
1446 	size_t sz;		/* size of the argument			*/
1447 	size_t mav_sz;		/* size of our psuedo argument space	*/
1448 
1449 	i = mac = fflag = 0;	/* proper initializations */
1450 
1451 	mav_sz = ((ac + 1) * sizeof (char *));
1452 	if ((mav = malloc(mav_sz)) == (char **)NULL) {
1453 		perror("Can't malloc argument space");
1454 		exit(1);
1455 	}
1456 
1457 	/* for each argument, see if we need to change things:		*/
1458 	for (; (av[i] != (char *)NULL) && (av[i][0] != (char)NULL); i++) {
1459 
1460 		if (strcmp(av[i], "--") == 0) {
1461 			fflag = 1;	/* just handle filenames now	*/
1462 		}
1463 
1464 		sz = strlen(&av[i][0]);	/* get this arg's size		*/
1465 
1466 		/*
1467 		 * if the argument starts with a "-", and has more than
1468 		 * 1 flag, then we have to search through each character,
1469 		 * and separate any flags which have been combined.
1470 		 *
1471 		 * so, if we've found a "-" string which needs separating:
1472 		 */
1473 		if (fflag == 0 && 	/* not handling filename args	*/
1474 		    av[i][0] == '-' &&	/* and this is a flag		*/
1475 		    sz > 2) {		/* and there's more than 1 flag	*/
1476 			/* then for each flag after the "-" sign:	*/
1477 			for (a = 1; av[i][a]; a++) {
1478 				/* copy the flag into mav space.	*/
1479 				if (a > 1) {
1480 					/*
1481 					 * we need to call realloc() after the
1482 					 * 1st combined flag, because "ac"
1483 					 * doesn't include combined args.
1484 					 */
1485 					mav_sz += sizeof (char *);
1486 					if ((mav = realloc(mav, mav_sz)) ==
1487 					    (char **)NULL) {
1488 						perror("Can't realloc "
1489 							"argument space");
1490 						exit(1);
1491 					}
1492 				}
1493 
1494 				if ((mav[mac] = malloc((size_t)CPFLAG)) ==
1495 				    (char *)NULL) {
1496 					perror("Can't malloc argument space");
1497 					exit(1);
1498 				}
1499 				(void) sprintf(mav[mac], "-%c", av[i][a]);
1500 				++mac;
1501 			}
1502 		} else {
1503 			/* otherwise, just copy the argument:		*/
1504 			if ((mav[mac] = malloc(sz + 1)) == (char *)NULL) {
1505 				perror("Can't malloc argument space");
1506 				exit(1);
1507 			}
1508 			(void) strcpy(mav[mac], av[i]);
1509 			++mac;
1510 		}
1511 	}
1512 
1513 	mav[mac] = (char *)NULL;
1514 }
1515