xref: /illumos-gate/usr/src/tools/cscope-fast/crossref.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1988 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 /*
34  *	cscope - interactive C symbol cross-reference
35  *
36  *	build cross-reference file
37  */
38 
39 #include "global.h"
40 
41 /* convert long to a string */
42 #define	ltobase(value)	n = value; \
43 			s = buf + (sizeof (buf) - 1); \
44 			*s = '\0'; \
45 			digits = 1; \
46 			while (n >= BASE) { \
47 				++digits; \
48 				i = n; \
49 				n /= BASE; \
50 				*--s = i - n * BASE + '!'; \
51 			} \
52 			*--s = n + '!';
53 
54 #define	SYMBOLINC	20	/* symbol list size increment */
55 #define	FREAD	"r"		/* fopen for reading */
56 
57 long	dboffset;		/* new database offset */
58 BOOL	errorsfound;		/* prompt before clearing messages */
59 long	fileindex;		/* source file name index */
60 long	lineoffset;		/* source line database offset */
61 long	npostings;		/* number of postings */
62 int	nsrcoffset;		/* number of file name database offsets */
63 long	*srcoffset;		/* source file name database offsets */
64 int	symbols;		/* number of symbols */
65 
66 static	char	*filename;	/* file name for warning messages */
67 static	long	fcnoffset;	/* function name database offset */
68 static	long	macrooffset;	/* macro name database offset */
69 static	int	msymbols = SYMBOLINC;	/* maximum number of symbols */
70 static	struct	symbol {	/* symbol data */
71 	int	type;		/* type */
72 	int	first;		/* index of first character in text */
73 	int	last;		/* index of last+1 character in text */
74 	int	length;		/* symbol length */
75 } *symbol;
76 
77 static void putcrossref(void);
78 
79 void
80 crossref(char *srcfile)
81 {
82 	int	i;
83 	int	length;		/* symbol length */
84 	int	token;			/* current token */
85 
86 	/* open the source file */
87 	if ((yyin = vpfopen(srcfile, FREAD)) == NULL) {
88 		cannotopen(srcfile);
89 		errorsfound = YES;
90 		return;
91 	}
92 	filename = srcfile;	/* save the file name for warning messages */
93 	putfilename(srcfile);	/* output the file name */
94 	dbputc('\n');
95 	dbputc('\n');
96 
97 	/* read the source file */
98 	initscanner(srcfile);
99 	fcnoffset = macrooffset = 0;
100 	symbols = 0;
101 	if (symbol == NULL) {
102 		symbol = mymalloc(msymbols * sizeof (struct symbol));
103 	}
104 	for (;;) {
105 
106 		/* get the next token */
107 		switch (token = yylex()) {
108 		default:
109 			/* if requested, truncate C symbols */
110 			length = last - first;
111 			if (truncatesyms && length > 8 &&
112 			    token != INCLUDE && token != NEWFILE) {
113 				length = 8;
114 				last = first + 8;
115 			}
116 			/* see if the token has a symbol */
117 			if (length == 0) {
118 				savesymbol(token);
119 				break;
120 			}
121 			/* see if the symbol is already in the list */
122 			for (i = 0; i < symbols; ++i) {
123 				if (length == symbol[i].length &&
124 				    strncmp(yytext + first, yytext +
125 					symbol[i].first, length) == 0 &&
126 				    (token == IDENT ||
127 					token == symbol[i].type)) {
128 					first = yyleng;
129 					break;
130 				}
131 			}
132 			if (i == symbols) {	/* if not already in list */
133 				savesymbol(token);
134 			}
135 			break;
136 
137 		case NEWLINE:	/* end of line containing symbols */
138 			--yyleng;	/* remove the newline */
139 			putcrossref();	/* output the symbols and source line */
140 			lineno = yylineno; /* save the symbol line number */
141 			break;
142 
143 		case LEXEOF:	/* end of file; last line may not have \n */
144 
145 			/*
146 			 * if there were symbols, output them and the
147 			 * source line
148 			 */
149 			if (symbols > 0) {
150 				putcrossref();
151 			}
152 			(void) fclose(yyin);	/* close the source file */
153 
154 			/* output the leading tab expected by the next call */
155 			dbputc('\t');
156 			return;
157 		}
158 	}
159 }
160 
161 /* save the symbol in the list */
162 
163 void
164 savesymbol(int token)
165 {
166 	/* make sure there is room for the symbol */
167 	if (symbols == msymbols) {
168 		msymbols += SYMBOLINC;
169 		symbol = (struct symbol *)myrealloc(symbol,
170 		    msymbols * sizeof (struct symbol));
171 	}
172 	/* save the symbol */
173 	symbol[symbols].type = token;
174 	symbol[symbols].first = first;
175 	symbol[symbols].last = last;
176 	symbol[symbols].length = last - first;
177 	++symbols;
178 	first = yyleng;
179 }
180 
181 /* output the file name */
182 
183 void
184 putfilename(char *srcfile)
185 {
186 	/* check for file system out of space */
187 	/* note: dbputc is not used to avoid lint complaint */
188 	if (putc(NEWFILE, newrefs) == EOF) {
189 		cannotwrite(newreffile);
190 		/* NOTREACHED */
191 	}
192 	++dboffset;
193 	if (invertedindex) {
194 		srcoffset[nsrcoffset++] = dboffset;
195 	}
196 	dbfputs(srcfile);
197 	fcnoffset = macrooffset = 0;
198 }
199 
200 /* output the symbols and source line */
201 
202 static void
203 putcrossref(void)
204 {
205 	int	i, j;
206 	unsigned c;
207 	BOOL	blank = NO;	/* output blank */
208 	BOOL	newline = NO;	/* output newline */
209 	int	symput = 0;	/* symbols output */
210 	int	type;
211 
212 	/* output the source line */
213 	lineoffset = dboffset;
214 	dbfprintf(newrefs, "%d ", lineno);
215 	for (i = 0; i < yyleng; ++i) {
216 
217 		/* change a tab to a blank and compress blanks */
218 		if ((c = yytext[i]) == ' ' || c == '\t') {
219 			blank = YES;
220 		}
221 		/* look for the start of a symbol */
222 		else if (symput < symbols && i == symbol[symput].first) {
223 
224 			/* check for compressed blanks */
225 			if (blank) {
226 				blank = NO;
227 				if (newline) {
228 					dbputc('\n');
229 				}
230 				dbputc(' ');
231 			}
232 			dbputc('\n');	/* symbols start on a new line */
233 
234 			/* output any symbol type */
235 			if ((type = symbol[symput].type) != IDENT) {
236 				dbputc('\t');
237 				dbputc(type);
238 			} else {
239 				type = ' ';
240 			}
241 			/* output the symbol */
242 			j = symbol[symput].last;
243 			c = yytext[j];
244 			yytext[j] = '\0';
245 			if (invertedindex) {
246 				putposting(yytext + i, type);
247 			}
248 			putstring(yytext + i);
249 			newline = YES;
250 			yytext[j] = (char)c;
251 			i = j - 1;
252 			++symput;
253 		} else {
254 			if (newline) {
255 				newline = NO;
256 				dbputc('\n');
257 			}
258 			/* check for compressed blanks */
259 			if (blank) {
260 				if (dicode2[c]) {
261 					c = (0200 - 2) + dicode1[' '] +
262 					    dicode2[c];
263 				} else {
264 					dbputc(' ');
265 				}
266 			} else if (dicode1[c] &&
267 			    (j = dicode2[(unsigned)yytext[i + 1]]) != 0 &&
268 			    symput < symbols && i + 1 != symbol[symput].first) {
269 				/* compress digraphs */
270 				c = (0200 - 2) + dicode1[c] + j;
271 				++i;
272 			}
273 			/*
274 			 * if the last line of the file is a '}' without a
275 			 * newline, the lex EOF code overwrites it with a 0
276 			 */
277 			if (c) {
278 				dbputc((int)c);
279 			} else {
280 				dbputc(' ');
281 			}
282 			blank = NO;
283 
284 			/* skip compressed characters */
285 			if (c < ' ') {
286 				++i;
287 
288 				/* skip blanks before a preprocesor keyword */
289 				/*
290 				 * note: don't use isspace() because \f and \v
291 				 * are used for keywords
292 				 */
293 				while ((j = yytext[i]) == ' ' || j == '\t') {
294 					++i;
295 				}
296 				/* skip the rest of the keyword */
297 				while (isalpha(yytext[i])) {
298 					++i;
299 				}
300 				/* skip space after certain keywords */
301 				if (keyword[c].delim != '\0') {
302 					while ((j = yytext[i]) == ' ' ||
303 					    j == '\t') {
304 						++i;
305 					}
306 				}
307 				/* skip a '(' after certain keywords */
308 				if (keyword[c].delim == '(' &&
309 				    yytext[i] == '(') {
310 					++i;
311 				}
312 				--i;	/* compensate for ++i in for() */
313 			}
314 		}
315 	}
316 	/* ignore trailing blanks */
317 	dbputc('\n');
318 	dbputc('\n');
319 
320 	/* output any #define end marker */
321 	/*
322 	 * note: must not be part of #define so putsource() doesn't discard it
323 	 * so findcalledbysub() can find it and return
324 	 */
325 	if (symput < symbols && symbol[symput].type == DEFINEEND) {
326 		dbputc('\t');
327 		dbputc(DEFINEEND);
328 		dbputc('\n');
329 		dbputc('\n');	/* mark beginning of next source line */
330 		macrooffset = 0;
331 	}
332 	symbols = 0;
333 }
334 
335 /* output the inverted index posting */
336 
337 void
338 putposting(char *term, int type)
339 {
340 	long	i, n;
341 	char	*s;
342 	int	digits;		/* digits output */
343 	long	offset;		/* function/macro database offset */
344 	char	buf[11];		/* number buffer */
345 
346 	/* get the function or macro name offset */
347 	offset = fcnoffset;
348 	if (macrooffset != 0) {
349 		offset = macrooffset;
350 	}
351 	/* then update them to avoid negative relative name offset */
352 	switch (type) {
353 	case DEFINE:
354 		macrooffset = dboffset;
355 		break;
356 	case DEFINEEND:
357 		macrooffset = 0;
358 		return;		/* null term */
359 	case FCNDEF:
360 		fcnoffset = dboffset;
361 		break;
362 	case FCNEND:
363 		fcnoffset = 0;
364 		return;		/* null term */
365 	}
366 	/* ignore a null term caused by a enum/struct/union without a tag */
367 	if (*term == '\0') {
368 		return;
369 	}
370 	/* skip any #include secondary type char (< or ") */
371 	if (type == INCLUDE) {
372 		++term;
373 	}
374 	/*
375 	 * output the posting, which should be as small as possible to reduce
376 	 * the temp file size and sort time
377 	 */
378 	(void) fputs(term, postings);
379 	(void) putc(' ', postings);
380 
381 	/*
382 	 * the line offset is padded so postings for the same term will sort
383 	 * in ascending line offset order to order the references as they
384 	 * appear withing a source file
385 	 */
386 	ltobase(lineoffset);
387 	for (i = PRECISION - digits; i > 0; --i) {
388 		(void) putc('!', postings);
389 	}
390 	do {
391 		(void) putc(*s, postings);
392 	} while (*++s != '\0');
393 
394 	/* postings are also sorted by type */
395 	(void) putc(type, postings);
396 
397 	/* function or macro name offset */
398 	if (offset > 0) {
399 		(void) putc(' ', postings);
400 		ltobase(offset);
401 		do {
402 			(void) putc(*s, postings);
403 		} while (*++s != '\0');
404 	}
405 	if (putc('\n', postings) == EOF) {
406 		cannotwrite(temp1);
407 		/* NOTREACHED */
408 	}
409 	++npostings;
410 }
411 
412 /* put the string into the new database */
413 
414 void
415 putstring(char *s)
416 {
417 	unsigned c;
418 	int	i;
419 
420 	/* compress digraphs */
421 	for (i = 0; (c = s[i]) != '\0'; ++i) {
422 		if (dicode1[c] && dicode2[(unsigned)s[i + 1]]) {
423 			c = (0200 - 2) + dicode1[c] +
424 			    dicode2[(unsigned)s[i + 1]];
425 			++i;
426 		}
427 		dbputc((int)c);
428 	}
429 }
430 
431 /* print a warning message with the file name and line number */
432 
433 void
434 warning(text)
435 char	*text;
436 {
437 	extern	int	yylineno;
438 
439 	(void) fprintf(stderr, "cscope: \"%s\", line %d: warning: %s\n",
440 	    filename, yylineno, text);
441 	errorsfound = YES;
442 }
443