xref: /illumos-gate/usr/src/cmd/checknr/checknr.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
2 /*	  All Rights Reserved  	*/
3 
4 
5 /*
6  * Copyright (c) 1980 Regents of the University of California.
7  * All rights reserved. The Berkeley software License Agreement
8  * specifies the terms and conditions for redistribution.
9  */
10 
11 /*
12  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
13  * Use is subject to license terms.
14  */
15 
16 #pragma ident	"%Z%%M%	%I%	%E% SMI"
17 
18 /*
19  * checknr: check an nroff/troff input file for matching macro calls.
20  * we also attempt to match size and font changes, but only the embedded
21  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
22  * later but for now think of these restrictions as contributions to
23  * structured typesetting.
24  */
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <unistd.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <locale.h>
31 
32 #define	MAXSTK	100	/* Stack size */
33 static	int	maxstk;
34 #define	MAXBR	100	/* Max number of bracket pairs known */
35 #define	MAXCMDS	500	/* Max number of commands known */
36 
37 /*
38  * The stack on which we remember what we've seen so far.
39  */
40 static struct stkstr {
41 	int opno;	/* number of opening bracket */
42 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
43 	int parm;	/* parm to size, font, etc */
44 	int lno;	/* line number the thing came in in */
45 } *stk;
46 static int stktop;
47 
48 /*
49  * The kinds of opening and closing brackets.
50  */
51 static struct brstr {
52 	char *opbr;
53 	char *clbr;
54 } br[MAXBR] = {
55 	/* A few bare bones troff commands */
56 #define	SZ	0
57 	"sz",	"sz",	/* also \s */
58 #define	FT	1
59 	"ft",	"ft",	/* also \f */
60 	/* the -mm package */
61 	"AL",	"LE",
62 	"AS",	"AE",
63 	"BL",	"LE",
64 	"BS",	"BE",
65 	"DF",	"DE",
66 	"DL",	"LE",
67 	"DS",	"DE",
68 	"FS",	"FE",
69 	"ML",	"LE",
70 	"NS",	"NE",
71 	"RL",	"LE",
72 	"VL",	"LE",
73 	/* the -ms package */
74 	"AB",	"AE",
75 	"BD",	"DE",
76 	"CD",	"DE",
77 	"DS",	"DE",
78 	"FS",	"FE",
79 	"ID",	"DE",
80 	"KF",	"KE",
81 	"KS",	"KE",
82 	"LD",	"DE",
83 	"LG",	"NL",
84 	"QS",	"QE",
85 	"RS",	"RE",
86 	"SM",	"NL",
87 	"XA",	"XE",
88 	"XS",	"XE",
89 	/* The -me package */
90 	"(b",	")b",
91 	"(c",	")c",
92 	"(d",	")d",
93 	"(f",	")f",
94 	"(l",	")l",
95 	"(q",	")q",
96 	"(x",	")x",
97 	"(z",	")z",
98 	/* Things needed by preprocessors */
99 	"EQ",	"EN",
100 	"TS",	"TE",
101 	/* Refer */
102 	"[",	"]",
103 	0,	0
104 };
105 
106 /*
107  * All commands known to nroff, plus macro packages.
108  * Used so we can complain about unrecognized commands.
109  */
110 static char *knowncmds[MAXCMDS] = {
111 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
112 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
113 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
114 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
115 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
116 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
117 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
118 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
119 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
120 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
121 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
122 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
123 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
124 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
125 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
126 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
127 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
128 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
129 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
130 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
131 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
132 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
133 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
134 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
135 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
136 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
137 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
138 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
139 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
140 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
141 "yr", 0
142 };
143 
144 static	int	lineno;		/* current line number in input file */
145 static	char	line[256];	/* the current line */
146 static	char	*cfilename;	/* name of current file */
147 static	int	nfiles;		/* number of files to process */
148 static	int	fflag;		/* -f: ignore \f */
149 static	int	sflag;		/* -s: ignore \s */
150 static	int	ncmds;		/* size of knowncmds */
151 static	int	slot;		/* slot in knowncmds found by binsrch */
152 
153 static void growstk();
154 static void usage();
155 static void process(FILE *f);
156 static void complain(int i);
157 static void prop(int i);
158 static void chkcmd(char *line, char *mac);
159 static void nomatch(char *mac);
160 static int eq(char *s1, char *s2);
161 static void pe(int lineno);
162 static void checkknown(char *mac);
163 static void addcmd(char *line);
164 static void addmac(char *mac);
165 static int binsrch(char *mac);
166 
167 static void
168 growstk()
169 {
170 	stktop++;
171 	if (stktop >= maxstk) {
172 		maxstk *= 2;
173 		stk = (struct stkstr *)realloc(stk,
174 		    sizeof (struct stkstr) * maxstk);
175 	}
176 }
177 
178 int
179 main(argc, argv)
180 int argc;
181 char **argv;
182 {
183 	FILE *f;
184 	int i;
185 	char *cp;
186 	char b1[4];
187 
188 	(void) setlocale(LC_ALL, "");
189 #if !defined(TEXT_DOMAIN)
190 #define	TEXT_DOMAIN	"SYS_TEST"
191 #endif
192 	(void) textdomain(TEXT_DOMAIN);
193 	stk = (struct stkstr *)calloc(sizeof (struct stkstr), 100);
194 	maxstk = 100;
195 	/* Figure out how many known commands there are */
196 	while (knowncmds[ncmds])
197 		ncmds++;
198 	while (argc > 1 && argv[1][0] == '-') {
199 		switch (argv[1][1]) {
200 
201 		/* -a: add pairs of macros */
202 		case 'a':
203 			i = strlen(argv[1]) - 2;
204 			if (i % 6 != 0)
205 				usage();
206 			/* look for empty macro slots */
207 			for (i = 0; br[i].opbr; i++)
208 				;
209 			for (cp = argv[1]+3; cp[-1]; cp += 6) {
210 				br[i].opbr = malloc(3);
211 				(void) strncpy(br[i].opbr, cp, 2);
212 				br[i].clbr = malloc(3);
213 				(void) strncpy(br[i].clbr, cp+3, 2);
214 				/* knows pairs are also known cmds */
215 				addmac(br[i].opbr);
216 				addmac(br[i].clbr);
217 				i++;
218 			}
219 			break;
220 
221 		/* -c: add known commands */
222 		case 'c':
223 			i = strlen(argv[1]) - 2;
224 			if (i % 3 != 0)
225 				usage();
226 			for (cp = argv[1]+3; cp[-1]; cp += 3) {
227 				if (cp[2] && cp[2] != '.')
228 					usage();
229 				(void) strncpy(b1, cp, 2);
230 				addmac(b1);
231 			}
232 			break;
233 
234 		/* -f: ignore font changes */
235 		case 'f':
236 			fflag = 1;
237 			break;
238 
239 		/* -s: ignore size changes */
240 		case 's':
241 			sflag = 1;
242 			break;
243 		default:
244 			usage();
245 		}
246 		argc--; argv++;
247 	}
248 
249 	nfiles = argc - 1;
250 
251 	if (nfiles > 0) {
252 		for (i = 1; i < argc; i++) {
253 			cfilename = argv[i];
254 			f = fopen(cfilename, "r");
255 			if (f == NULL) {
256 				perror(cfilename);
257 				exit(1);
258 				}
259 			else
260 				process(f);
261 		}
262 	} else {
263 		cfilename = "stdin";
264 		process(stdin);
265 	}
266 	return (0);
267 }
268 
269 static void
270 usage()
271 {
272 	(void) printf(gettext("Usage: \
273 checknr [ -fs ] [ -a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [ filename .. ]\n"));
274 	exit(1);
275 }
276 
277 static void
278 process(FILE *f)
279 {
280 	int i, n;
281 	char mac[5];	/* The current macro or nroff command */
282 	int pl;
283 
284 	stktop = -1;
285 	for (lineno = 1; fgets(line, sizeof (line), f); lineno++) {
286 		if (line[0] == '.') {
287 			/*
288 			 * find and isolate the macro/command name.
289 			 */
290 			(void) strncpy(mac, line+1, 4);
291 			if (isspace(mac[0])) {
292 				pe(lineno);
293 				(void) printf(gettext("Empty command\n"));
294 			} else if (isspace(mac[1])) {
295 				mac[1] = 0;
296 			} else if (isspace(mac[2])) {
297 				mac[2] = 0;
298 			} else if (mac[0] != '\\' || mac[1] != '\"') {
299 				pe(lineno);
300 				(void) printf(gettext("Command too long\n"));
301 			}
302 
303 			/*
304 			 * Is it a known command?
305 			 */
306 			checkknown(mac);
307 
308 			/*
309 			 * Should we add it?
310 			 */
311 			if (eq(mac, "de"))
312 				addcmd(line);
313 
314 			chkcmd(line, mac);
315 		}
316 
317 		/*
318 		 * At this point we process the line looking
319 		 * for \s and \f.
320 		 */
321 		for (i = 0; line[i]; i++)
322 			if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
323 				if (!sflag && line[++i] == 's') {
324 					pl = line[++i];
325 					if (isdigit(pl)) {
326 						n = pl - '0';
327 						pl = ' ';
328 					} else
329 						n = 0;
330 					while (isdigit(line[++i]))
331 						n = 10 * n + line[i] - '0';
332 					i--;
333 					if (n == 0) {
334 						if (stk[stktop].opno == SZ) {
335 							stktop--;
336 						} else {
337 							pe(lineno);
338 							(void) printf(
339 						gettext("unmatched \\s0\n"));
340 						}
341 					} else {
342 						growstk();
343 						stk[stktop].opno = SZ;
344 						stk[stktop].pl = pl;
345 						stk[stktop].parm = n;
346 						stk[stktop].lno = lineno;
347 					}
348 				} else if (!fflag && line[i] == 'f') {
349 					n = line[++i];
350 					if (n == 'P') {
351 						if (stk[stktop].opno == FT) {
352 							stktop--;
353 						} else {
354 							pe(lineno);
355 							(void) printf(
356 						gettext("unmatched \\fP\n"));
357 						}
358 					} else {
359 						growstk();
360 						stk[stktop].opno = FT;
361 						stk[stktop].pl = 1;
362 						stk[stktop].parm = n;
363 						stk[stktop].lno = lineno;
364 					}
365 				}
366 			}
367 	}
368 	/*
369 	 * We've hit the end and look at all this stuff that hasn't been
370 	 * matched yet!  Complain, complain.
371 	 */
372 	for (i = stktop; i >= 0; i--) {
373 		complain(i);
374 	}
375 }
376 
377 static void
378 complain(int i)
379 {
380 	pe(stk[i].lno);
381 	(void) printf(gettext("Unmatched "));
382 	prop(i);
383 	(void) printf("\n");
384 }
385 
386 static void
387 prop(int i)
388 {
389 	if (stk[i].pl == 0)
390 		(void) printf(".%s", br[stk[i].opno].opbr);
391 	else switch (stk[i].opno) {
392 	case SZ:
393 		(void) printf("\\s%c%d", stk[i].pl, stk[i].parm);
394 		break;
395 	case FT:
396 		(void) printf("\\f%c", stk[i].parm);
397 		break;
398 	default:
399 		(void) printf(gettext("Bug: stk[%d].opno = %d = .%s, .%s"),
400 			i, stk[i].opno, br[stk[i].opno].opbr,
401 			br[stk[i].opno].clbr);
402 	}
403 }
404 
405 /* ARGSUSED */
406 static void
407 chkcmd(char *line, char *mac)
408 {
409 	int i;
410 
411 	/*
412 	 * Check to see if it matches top of stack.
413 	 */
414 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
415 		stktop--;	/* OK. Pop & forget */
416 	else {
417 		/* No. Maybe it's an opener */
418 		for (i = 0; br[i].opbr; i++) {
419 			if (eq(mac, br[i].opbr)) {
420 				/* Found. Push it. */
421 				growstk();
422 				stk[stktop].opno = i;
423 				stk[stktop].pl = 0;
424 				stk[stktop].parm = 0;
425 				stk[stktop].lno = lineno;
426 				break;
427 			}
428 			/*
429 			 * Maybe it's an unmatched closer.
430 			 * NOTE: this depends on the fact
431 			 * that none of the closers can be
432 			 * openers too.
433 			 */
434 			if (eq(mac, br[i].clbr)) {
435 				nomatch(mac);
436 				break;
437 			}
438 		}
439 	}
440 }
441 
442 static void
443 nomatch(char *mac)
444 {
445 	int i, j;
446 
447 	/*
448 	 * Look for a match further down on stack
449 	 * If we find one, it suggests that the stuff in
450 	 * between is supposed to match itself.
451 	 */
452 	for (j = stktop; j >= 0; j--)
453 		if (eq(mac, br[stk[j].opno].clbr)) {
454 			/* Found.  Make a good diagnostic. */
455 			if (j == stktop-2) {
456 				/*
457 				 * Check for special case \fx..\fR and don't
458 				 * complain.
459 				 */
460 				if (stk[j+1].opno == FT &&
461 				    stk[j+1].parm != 'R' &&
462 				    stk[j+2].opno == FT &&
463 				    stk[j+2].parm == 'R') {
464 					stktop = j -1;
465 					return;
466 				}
467 				/*
468 				 * We have two unmatched frobs.  Chances are
469 				 * they were intended to match, so we mention
470 				 * them together.
471 				 */
472 				pe(stk[j+1].lno);
473 				prop(j+1);
474 				(void) printf(gettext(" does not match %d: "),
475 					stk[j+2].lno);
476 				prop(j+2);
477 				(void) printf("\n");
478 			} else for (i = j+1; i <= stktop; i++) {
479 				complain(i);
480 			}
481 			stktop = j-1;
482 			return;
483 		}
484 	/* Didn't find one.  Throw this away. */
485 	pe(lineno);
486 	(void) printf(gettext("Unmatched .%s\n"), mac);
487 }
488 
489 /* eq: are two strings equal? */
490 static int
491 eq(char *s1, char *s2)
492 {
493 	return (strcmp(s1, s2) == 0);
494 }
495 
496 /* print the first part of an error message, given the line number */
497 static void
498 pe(int lineno)
499 {
500 	if (nfiles > 1)
501 		(void) printf("%s: ", cfilename);
502 	(void) printf("%d: ", lineno);
503 }
504 
505 static void
506 checkknown(char *mac)
507 {
508 
509 	if (eq(mac, "."))
510 		return;
511 	if (binsrch(mac) >= 0)
512 		return;
513 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
514 		return;
515 
516 	pe(lineno);
517 	(void) printf(gettext("Unknown command: .%s\n"), mac);
518 }
519 
520 /*
521  * We have a .de xx line in "line".  Add xx to the list of known commands.
522  */
523 static void
524 addcmd(char *line)
525 {
526 	char *mac;
527 
528 	/* grab the macro being defined */
529 	mac = line+4;
530 	while (isspace(*mac))
531 		mac++;
532 	if (*mac == 0) {
533 		pe(lineno);
534 		(void) printf(gettext("illegal define: %s\n"), line);
535 		return;
536 	}
537 	mac[2] = 0;
538 	if (isspace(mac[1]) || mac[1] == '\\')
539 		mac[1] = 0;
540 	if (ncmds >= MAXCMDS) {
541 		(void) printf(gettext("Only %d known commands allowed\n"),
542 		    MAXCMDS);
543 		exit(1);
544 	}
545 	addmac(mac);
546 }
547 
548 /*
549  * Add mac to the list.  We should really have some kind of tree
550  * structure here but this is a quick-and-dirty job and I just don't
551  * have time to mess with it.  (I wonder if this will come back to haunt
552  * me someday?)  Anyway, I claim that .de is fairly rare in user
553  * nroff programs, and the loop below is pretty fast.
554  */
555 static void
556 addmac(char *mac)
557 {
558 	char **src, **dest, **loc;
559 
560 	if (binsrch(mac) >= 0) {	/* it's OK to redefine something */
561 #ifdef DEBUG
562 		(void) printf("binsrch(%s) -> already in table\n", mac);
563 #endif
564 		return;
565 	}
566 	/* binsrch sets slot as a side effect */
567 #ifdef DEBUG
568 printf("binsrch(%s) -> %d\n", mac, slot);
569 #endif
570 	loc = &knowncmds[slot];
571 	src = &knowncmds[ncmds-1];
572 	dest = src+1;
573 	while (dest > loc)
574 		*dest-- = *src--;
575 	*loc = malloc(3);
576 	(void) strcpy(*loc, mac);
577 	ncmds++;
578 #ifdef DEBUG
579 	(void) printf("after: %s %s %s %s %s, %d cmds\n",
580 	    knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
581 	    knowncmds[slot+1], knowncmds[slot+2], ncmds);
582 #endif
583 }
584 
585 /*
586  * Do a binary search in knowncmds for mac.
587  * If found, return the index.  If not, return -1.
588  */
589 static int
590 binsrch(char *mac)
591 {
592 	char *p;	/* pointer to current cmd in list */
593 	int d;		/* difference if any */
594 	int mid;	/* mid point in binary search */
595 	int top, bot;	/* boundaries of bin search, inclusive */
596 
597 	top = ncmds-1;
598 	bot = 0;
599 	while (top >= bot) {
600 		mid = (top+bot)/2;
601 		p = knowncmds[mid];
602 		d = p[0] - mac[0];
603 		if (d == 0)
604 			d = p[1] - mac[1];
605 		if (d == 0)
606 			return (mid);
607 		if (d < 0)
608 			bot = mid + 1;
609 		else
610 			top = mid - 1;
611 	}
612 	slot = bot;	/* place it would have gone */
613 	return (-1);
614 }
615