Annotation of src/usr.bin/checknr/checknr.c, Revision 1.3
1.3 ! aaron 1: /* $OpenBSD: checknr.c,v 1.2 1996/06/26 05:31:50 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1980, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the University of
19: * California, Berkeley and its contributors.
20: * 4. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: */
36:
37: #ifndef lint
38: static char copyright[] =
39: "@(#) Copyright (c) 1980, 1993\n\
40: The Regents of the University of California. All rights reserved.\n";
41: #endif /* not lint */
42:
43: #ifndef lint
44: #if 0
45: static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
46: #else
1.3 ! aaron 47: static char rcsid[] = "$OpenBSD: checknr.c,v 1.2 1996/06/26 05:31:50 deraadt Exp $";
1.1 deraadt 48: #endif
49: #endif /* not lint */
50:
51: /*
52: * checknr: check an nroff/troff input file for matching macro calls.
53: * we also attempt to match size and font changes, but only the embedded
54: * kind. These must end in \s0 and \fP resp. Maybe more sophistication
55: * later but for now think of these restrictions as contributions to
56: * structured typesetting.
57: */
58: #include <stdio.h>
59: #include <string.h>
60: #include <ctype.h>
61:
62: #define MAXSTK 100 /* Stack size */
63: #define MAXBR 100 /* Max number of bracket pairs known */
64: #define MAXCMDS 500 /* Max number of commands known */
65:
66: /*
67: * The stack on which we remember what we've seen so far.
68: */
69: struct stkstr {
70: int opno; /* number of opening bracket */
71: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
72: int parm; /* parm to size, font, etc */
73: int lno; /* line number the thing came in in */
74: } stk[MAXSTK];
75: int stktop;
76:
77: /*
78: * The kinds of opening and closing brackets.
79: */
80: struct brstr {
81: char *opbr;
82: char *clbr;
83: } br[MAXBR] = {
84: /* A few bare bones troff commands */
85: #define SZ 0
86: "sz", "sz", /* also \s */
87: #define FT 1
88: "ft", "ft", /* also \f */
89: /* the -mm package */
90: "AL", "LE",
91: "AS", "AE",
92: "BL", "LE",
93: "BS", "BE",
94: "DF", "DE",
95: "DL", "LE",
96: "DS", "DE",
97: "FS", "FE",
98: "ML", "LE",
99: "NS", "NE",
100: "RL", "LE",
101: "VL", "LE",
102: /* the -ms package */
103: "AB", "AE",
104: "BD", "DE",
105: "CD", "DE",
106: "DS", "DE",
107: "FS", "FE",
108: "ID", "DE",
109: "KF", "KE",
110: "KS", "KE",
111: "LD", "DE",
112: "LG", "NL",
113: "QS", "QE",
114: "RS", "RE",
115: "SM", "NL",
116: "XA", "XE",
117: "XS", "XE",
118: /* The -me package */
119: "(b", ")b",
120: "(c", ")c",
121: "(d", ")d",
122: "(f", ")f",
123: "(l", ")l",
124: "(q", ")q",
125: "(x", ")x",
126: "(z", ")z",
127: /* Things needed by preprocessors */
128: "EQ", "EN",
129: "TS", "TE",
130: /* Refer */
131: "[", "]",
132: 0, 0
133: };
134:
135: /*
136: * All commands known to nroff, plus macro packages.
137: * Used so we can complain about unrecognized commands.
138: */
139: char *knowncmds[MAXCMDS] = {
140: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
141: "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
142: "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
143: "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
144: "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
145: "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
146: "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
147: "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
148: "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
149: "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
150: "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
151: "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
152: "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
153: "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
154: "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
155: "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
156: "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
157: "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
158: "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
159: "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
160: "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
161: "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
162: "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
163: "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
164: "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
165: "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
166: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
167: "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
168: "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
169: "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
170: "yr", 0
171: };
172:
173: int lineno; /* current line number in input file */
174: char line[256]; /* the current line */
175: char *cfilename; /* name of current file */
176: int nfiles; /* number of files to process */
177: int fflag; /* -f: ignore \f */
178: int sflag; /* -s: ignore \s */
179: int ncmds; /* size of knowncmds */
180: int slot; /* slot in knowncmds found by binsrch */
181:
182: char *malloc();
183:
184: main(argc, argv)
185: int argc;
186: char **argv;
187: {
188: FILE *f;
189: int i;
190: char *cp;
191: char b1[4];
192:
193: /* Figure out how many known commands there are */
194: while (knowncmds[ncmds])
195: ncmds++;
196: while (argc > 1 && argv[1][0] == '-') {
197: switch(argv[1][1]) {
198:
199: /* -a: add pairs of macros */
200: case 'a':
201: i = strlen(argv[1]) - 2;
202: if (i % 6 != 0)
203: usage();
204: /* look for empty macro slots */
205: for (i=0; br[i].opbr; i++)
206: ;
207: for (cp=argv[1]+3; cp[-1]; cp += 6) {
208: br[i].opbr = malloc(3);
209: strncpy(br[i].opbr, cp, 2);
210: br[i].clbr = malloc(3);
211: strncpy(br[i].clbr, cp+3, 2);
212: addmac(br[i].opbr); /* knows pairs are also known cmds */
213: addmac(br[i].clbr);
214: i++;
215: }
216: break;
217:
218: /* -c: add known commands */
219: case 'c':
220: i = strlen(argv[1]) - 2;
221: if (i % 3 != 0)
222: usage();
223: for (cp=argv[1]+3; cp[-1]; cp += 3) {
224: if (cp[2] && cp[2] != '.')
225: usage();
226: strncpy(b1, cp, 2);
227: addmac(b1);
228: }
229: break;
230:
231: /* -f: ignore font changes */
232: case 'f':
233: fflag = 1;
234: break;
235:
236: /* -s: ignore size changes */
237: case 's':
238: sflag = 1;
239: break;
240: default:
241: usage();
242: }
243: argc--; argv++;
244: }
245:
246: nfiles = argc - 1;
247:
248: if (nfiles > 0) {
249: for (i=1; i<argc; i++) {
250: cfilename = argv[i];
251: f = fopen(cfilename, "r");
252: if (f == NULL)
253: perror(cfilename);
254: else
255: process(f);
256: }
257: } else {
258: cfilename = "stdin";
259: process(stdin);
260: }
261: exit(0);
262: }
263:
264: usage()
265: {
1.3 ! aaron 266: (void)fprintf(stderr,
! 267: "usage: checknr [-sf] [-a.x1.y1.x2.y2. ... .xn.yn] "
! 268: "[-c.x1.x2.x3. ... .xn] [file]\n");
1.1 deraadt 269: exit(1);
270: }
271:
272: process(f)
273: FILE *f;
274: {
275: register int i, n;
276: char mac[5]; /* The current macro or nroff command */
277: int pl;
278:
279: stktop = -1;
280: for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
281: if (line[0] == '.') {
282: /*
283: * find and isolate the macro/command name.
284: */
285: strncpy(mac, line+1, 4);
286: if (isspace(mac[0])) {
287: pe(lineno);
288: printf("Empty command\n");
289: } else if (isspace(mac[1])) {
290: mac[1] = 0;
291: } else if (isspace(mac[2])) {
292: mac[2] = 0;
293: } else if (mac[0] != '\\' || mac[1] != '\"') {
294: pe(lineno);
295: printf("Command too long\n");
296: }
297:
298: /*
299: * Is it a known command?
300: */
301: checkknown(mac);
302:
303: /*
304: * Should we add it?
305: */
306: if (eq(mac, "de"))
307: addcmd(line);
308:
309: chkcmd(line, mac);
310: }
311:
312: /*
313: * At this point we process the line looking
314: * for \s and \f.
315: */
316: for (i=0; line[i]; i++)
317: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
318: if (!sflag && line[++i]=='s') {
319: pl = line[++i];
320: if (isdigit(pl)) {
321: n = pl - '0';
322: pl = ' ';
323: } else
324: n = 0;
325: while (isdigit(line[++i]))
326: n = 10 * n + line[i] - '0';
327: i--;
328: if (n == 0) {
329: if (stk[stktop].opno == SZ) {
330: stktop--;
331: } else {
332: pe(lineno);
333: printf("unmatched \\s0\n");
334: }
335: } else {
336: stk[++stktop].opno = SZ;
337: stk[stktop].pl = pl;
338: stk[stktop].parm = n;
339: stk[stktop].lno = lineno;
340: }
341: } else if (!fflag && line[i]=='f') {
342: n = line[++i];
343: if (n == 'P') {
344: if (stk[stktop].opno == FT) {
345: stktop--;
346: } else {
347: pe(lineno);
348: printf("unmatched \\fP\n");
349: }
350: } else {
351: stk[++stktop].opno = FT;
352: stk[stktop].pl = 1;
353: stk[stktop].parm = n;
354: stk[stktop].lno = lineno;
355: }
356: }
357: }
358: }
359: /*
360: * We've hit the end and look at all this stuff that hasn't been
361: * matched yet! Complain, complain.
362: */
363: for (i=stktop; i>=0; i--) {
364: complain(i);
365: }
366: }
367:
368: complain(i)
369: {
370: pe(stk[i].lno);
371: printf("Unmatched ");
372: prop(i);
373: printf("\n");
374: }
375:
376: prop(i)
377: {
378: if (stk[i].pl == 0)
379: printf(".%s", br[stk[i].opno].opbr);
380: else switch(stk[i].opno) {
381: case SZ:
382: printf("\\s%c%d", stk[i].pl, stk[i].parm);
383: break;
384: case FT:
385: printf("\\f%c", stk[i].parm);
386: break;
387: default:
388: printf("Bug: stk[%d].opno = %d = .%s, .%s",
389: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
390: }
391: }
392:
393: chkcmd(line, mac)
394: char *line;
395: char *mac;
396: {
397: register int i, n;
398:
399: /*
400: * Check to see if it matches top of stack.
401: */
402: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
403: stktop--; /* OK. Pop & forget */
404: else {
405: /* No. Maybe it's an opener */
406: for (i=0; br[i].opbr; i++) {
407: if (eq(mac, br[i].opbr)) {
408: /* Found. Push it. */
409: stktop++;
410: stk[stktop].opno = i;
411: stk[stktop].pl = 0;
412: stk[stktop].parm = 0;
413: stk[stktop].lno = lineno;
414: break;
415: }
416: /*
417: * Maybe it's an unmatched closer.
418: * NOTE: this depends on the fact
419: * that none of the closers can be
420: * openers too.
421: */
422: if (eq(mac, br[i].clbr)) {
423: nomatch(mac);
424: break;
425: }
426: }
427: }
428: }
429:
430: nomatch(mac)
431: char *mac;
432: {
433: register int i, j;
434:
435: /*
436: * Look for a match further down on stack
437: * If we find one, it suggests that the stuff in
438: * between is supposed to match itself.
439: */
440: for (j=stktop; j>=0; j--)
441: if (eq(mac,br[stk[j].opno].clbr)) {
442: /* Found. Make a good diagnostic. */
443: if (j == stktop-2) {
444: /*
445: * Check for special case \fx..\fR and don't
446: * complain.
447: */
448: if (stk[j+1].opno==FT && stk[j+1].parm!='R'
449: && stk[j+2].opno==FT && stk[j+2].parm=='R') {
450: stktop = j -1;
451: return;
452: }
453: /*
454: * We have two unmatched frobs. Chances are
455: * they were intended to match, so we mention
456: * them together.
457: */
458: pe(stk[j+1].lno);
459: prop(j+1);
460: printf(" does not match %d: ", stk[j+2].lno);
461: prop(j+2);
462: printf("\n");
463: } else for (i=j+1; i <= stktop; i++) {
464: complain(i);
465: }
466: stktop = j-1;
467: return;
468: }
469: /* Didn't find one. Throw this away. */
470: pe(lineno);
471: printf("Unmatched .%s\n", mac);
472: }
473:
474: /* eq: are two strings equal? */
475: eq(s1, s2)
476: char *s1, *s2;
477: {
478: return (strcmp(s1, s2) == 0);
479: }
480:
481: /* print the first part of an error message, given the line number */
482: pe(lineno)
483: int lineno;
484: {
485: if (nfiles > 1)
486: printf("%s: ", cfilename);
487: printf("%d: ", lineno);
488: }
489:
490: checkknown(mac)
491: char *mac;
492: {
493:
494: if (eq(mac, "."))
495: return;
496: if (binsrch(mac) >= 0)
497: return;
498: if (mac[0] == '\\' && mac[1] == '"') /* comments */
499: return;
500:
501: pe(lineno);
502: printf("Unknown command: .%s\n", mac);
503: }
504:
505: /*
506: * We have a .de xx line in "line". Add xx to the list of known commands.
507: */
508: addcmd(line)
509: char *line;
510: {
511: char *mac;
512:
513: /* grab the macro being defined */
514: mac = line+4;
515: while (isspace(*mac))
516: mac++;
517: if (*mac == 0) {
518: pe(lineno);
519: printf("illegal define: %s\n", line);
520: return;
521: }
522: mac[2] = 0;
523: if (isspace(mac[1]) || mac[1] == '\\')
524: mac[1] = 0;
525: if (ncmds >= MAXCMDS) {
526: printf("Only %d known commands allowed\n", MAXCMDS);
527: exit(1);
528: }
529: addmac(mac);
530: }
531:
532: /*
533: * Add mac to the list. We should really have some kind of tree
534: * structure here but this is a quick-and-dirty job and I just don't
535: * have time to mess with it. (I wonder if this will come back to haunt
536: * me someday?) Anyway, I claim that .de is fairly rare in user
537: * nroff programs, and the register loop below is pretty fast.
538: */
539: addmac(mac)
540: char *mac;
541: {
542: register char **src, **dest, **loc;
543:
544: if (binsrch(mac) >= 0){ /* it's OK to redefine something */
545: #ifdef DEBUG
546: printf("binsrch(%s) -> already in table\n", mac);
547: #endif DEBUG
548: return;
549: }
550: /* binsrch sets slot as a side effect */
551: #ifdef DEBUG
552: printf("binsrch(%s) -> %d\n", mac, slot);
553: #endif
554: loc = &knowncmds[slot];
555: src = &knowncmds[ncmds-1];
556: dest = src+1;
557: while (dest > loc)
558: *dest-- = *src--;
559: *loc = malloc(3);
560: strcpy(*loc, mac);
561: ncmds++;
562: #ifdef DEBUG
563: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
564: #endif
565: }
566:
567: /*
568: * Do a binary search in knowncmds for mac.
569: * If found, return the index. If not, return -1.
570: */
571: binsrch(mac)
572: char *mac;
573: {
574: register char *p; /* pointer to current cmd in list */
575: register int d; /* difference if any */
576: register int mid; /* mid point in binary search */
577: register int top, bot; /* boundaries of bin search, inclusive */
578:
579: top = ncmds-1;
580: bot = 0;
581: while (top >= bot) {
582: mid = (top+bot)/2;
583: p = knowncmds[mid];
584: d = p[0] - mac[0];
585: if (d == 0)
586: d = p[1] - mac[1];
587: if (d == 0)
588: return mid;
589: if (d < 0)
590: bot = mid + 1;
591: else
592: top = mid - 1;
593: }
594: slot = bot; /* place it would have gone */
595: return -1;
596: }