Annotation of src/usr.bin/checknr/checknr.c, Revision 1.6
1.6 ! heko 1: /* $OpenBSD: checknr.c,v 1.5 2001/07/12 05:16:56 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1980, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the University of
19: * California, Berkeley and its contributors.
20: * 4. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: */
36:
37: #ifndef lint
38: static char copyright[] =
39: "@(#) Copyright (c) 1980, 1993\n\
40: The Regents of the University of California. All rights reserved.\n";
41: #endif /* not lint */
42:
43: #ifndef lint
44: #if 0
45: static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
46: #else
1.6 ! heko 47: static char rcsid[] = "$OpenBSD: checknr.c,v 1.5 2001/07/12 05:16:56 deraadt Exp $";
1.1 deraadt 48: #endif
49: #endif /* not lint */
50:
51: /*
52: * checknr: check an nroff/troff input file for matching macro calls.
53: * we also attempt to match size and font changes, but only the embedded
54: * kind. These must end in \s0 and \fP resp. Maybe more sophistication
55: * later but for now think of these restrictions as contributions to
56: * structured typesetting.
57: */
58: #include <stdio.h>
59: #include <string.h>
60: #include <ctype.h>
61:
62: #define MAXSTK 100 /* Stack size */
63: #define MAXBR 100 /* Max number of bracket pairs known */
64: #define MAXCMDS 500 /* Max number of commands known */
65:
66: /*
67: * The stack on which we remember what we've seen so far.
68: */
69: struct stkstr {
70: int opno; /* number of opening bracket */
71: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
72: int parm; /* parm to size, font, etc */
73: int lno; /* line number the thing came in in */
74: } stk[MAXSTK];
75: int stktop;
76:
1.5 deraadt 77: void usage(void);
78: void addmac(char *);
79: void process(FILE *);
80: void pe(int);
81: int eq(char *, char *);
82: void complain(int);
83: void prop(int);
84: void chkcmd(char *, char *);
85: void addcmd(char *);
86: void nomatch(char *);
87: void checkknown(char *);
88: int binsrch(char *);
89:
1.1 deraadt 90: /*
91: * The kinds of opening and closing brackets.
92: */
93: struct brstr {
94: char *opbr;
95: char *clbr;
96: } br[MAXBR] = {
97: /* A few bare bones troff commands */
98: #define SZ 0
1.5 deraadt 99: { "sz", "sz" }, /* also \s */
1.1 deraadt 100: #define FT 1
1.5 deraadt 101: { "ft", "ft" }, /* also \f */
1.1 deraadt 102: /* the -mm package */
1.5 deraadt 103: { "AL", "LE" },
104: { "AS", "AE" },
105: { "BL", "LE" },
106: { "BS", "BE" },
107: { "DF", "DE" },
108: { "DL", "LE" },
109: { "DS", "DE" },
110: { "FS", "FE" },
111: { "ML", "LE" },
112: { "NS", "NE" },
113: { "RL", "LE" },
114: { "VL", "LE" },
1.1 deraadt 115: /* the -ms package */
1.5 deraadt 116: { "AB", "AE" },
117: { "BD", "DE" },
118: { "CD", "DE" },
119: { "DS", "DE" },
120: { "FS", "FE" },
121: { "ID", "DE" },
122: { "KF", "KE" },
123: { "KS", "KE" },
124: { "LD", "DE" },
125: { "LG", "NL" },
126: { "QS", "QE" },
127: { "RS", "RE" },
128: { "SM", "NL" },
129: { "XA", "XE" },
130: { "XS", "XE" },
1.1 deraadt 131: /* The -me package */
1.5 deraadt 132: { "(b", ")b" },
133: { "(c", ")c" },
134: { "(d", ")d" },
135: { "(f", ")f" },
136: { "(l", ")l" },
137: { "(q", ")q" },
138: { "(x", ")x" },
139: { "(z", ")z" },
1.1 deraadt 140: /* Things needed by preprocessors */
1.5 deraadt 141: { "EQ", "EN" },
142: { "TS", "TE" },
1.1 deraadt 143: /* Refer */
1.5 deraadt 144: { "[", "]" },
145: { 0, },
1.1 deraadt 146: };
147:
148: /*
149: * All commands known to nroff, plus macro packages.
150: * Used so we can complain about unrecognized commands.
151: */
152: char *knowncmds[MAXCMDS] = {
153: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
154: "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
155: "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
156: "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
157: "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
158: "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
159: "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
160: "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
161: "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
162: "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
163: "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
164: "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
165: "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
166: "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
167: "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
168: "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
169: "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
170: "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
171: "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
172: "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
173: "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
174: "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
175: "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
176: "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
177: "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
178: "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
179: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
180: "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
181: "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
182: "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
183: "yr", 0
184: };
185:
186: int lineno; /* current line number in input file */
187: char line[256]; /* the current line */
188: char *cfilename; /* name of current file */
189: int nfiles; /* number of files to process */
190: int fflag; /* -f: ignore \f */
191: int sflag; /* -s: ignore \s */
192: int ncmds; /* size of knowncmds */
193: int slot; /* slot in knowncmds found by binsrch */
194:
195: char *malloc();
196:
1.5 deraadt 197: int
1.1 deraadt 198: main(argc, argv)
199: int argc;
200: char **argv;
201: {
202: FILE *f;
203: int i;
204: char *cp;
205: char b1[4];
206:
207: /* Figure out how many known commands there are */
208: while (knowncmds[ncmds])
209: ncmds++;
210: while (argc > 1 && argv[1][0] == '-') {
211: switch(argv[1][1]) {
212:
213: /* -a: add pairs of macros */
214: case 'a':
215: i = strlen(argv[1]) - 2;
216: if (i % 6 != 0)
217: usage();
218: /* look for empty macro slots */
219: for (i=0; br[i].opbr; i++)
220: ;
221: for (cp=argv[1]+3; cp[-1]; cp += 6) {
222: br[i].opbr = malloc(3);
223: strncpy(br[i].opbr, cp, 2);
224: br[i].clbr = malloc(3);
225: strncpy(br[i].clbr, cp+3, 2);
226: addmac(br[i].opbr); /* knows pairs are also known cmds */
227: addmac(br[i].clbr);
228: i++;
229: }
230: break;
231:
232: /* -c: add known commands */
233: case 'c':
234: i = strlen(argv[1]) - 2;
235: if (i % 3 != 0)
236: usage();
237: for (cp=argv[1]+3; cp[-1]; cp += 3) {
238: if (cp[2] && cp[2] != '.')
239: usage();
240: strncpy(b1, cp, 2);
241: addmac(b1);
242: }
243: break;
244:
245: /* -f: ignore font changes */
246: case 'f':
247: fflag = 1;
248: break;
249:
250: /* -s: ignore size changes */
251: case 's':
252: sflag = 1;
253: break;
254: default:
255: usage();
256: }
257: argc--; argv++;
258: }
259:
260: nfiles = argc - 1;
261:
262: if (nfiles > 0) {
263: for (i=1; i<argc; i++) {
264: cfilename = argv[i];
265: f = fopen(cfilename, "r");
266: if (f == NULL)
267: perror(cfilename);
268: else
269: process(f);
270: }
271: } else {
272: cfilename = "stdin";
273: process(stdin);
274: }
275: exit(0);
276: }
277:
1.5 deraadt 278: void
1.1 deraadt 279: usage()
280: {
1.3 aaron 281: (void)fprintf(stderr,
1.4 aaron 282: "usage: checknr [-fs] [-a.x1.y1.x2.y2. ... .xn.yn] "
1.3 aaron 283: "[-c.x1.x2.x3. ... .xn] [file]\n");
1.1 deraadt 284: exit(1);
285: }
286:
1.5 deraadt 287: void
1.1 deraadt 288: process(f)
289: FILE *f;
290: {
291: register int i, n;
292: char mac[5]; /* The current macro or nroff command */
293: int pl;
294:
295: stktop = -1;
296: for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
297: if (line[0] == '.') {
298: /*
299: * find and isolate the macro/command name.
300: */
301: strncpy(mac, line+1, 4);
302: if (isspace(mac[0])) {
303: pe(lineno);
304: printf("Empty command\n");
305: } else if (isspace(mac[1])) {
306: mac[1] = 0;
307: } else if (isspace(mac[2])) {
308: mac[2] = 0;
309: } else if (mac[0] != '\\' || mac[1] != '\"') {
310: pe(lineno);
311: printf("Command too long\n");
312: }
313:
314: /*
315: * Is it a known command?
316: */
317: checkknown(mac);
318:
319: /*
320: * Should we add it?
321: */
322: if (eq(mac, "de"))
323: addcmd(line);
324:
325: chkcmd(line, mac);
326: }
327:
328: /*
329: * At this point we process the line looking
330: * for \s and \f.
331: */
332: for (i=0; line[i]; i++)
333: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
334: if (!sflag && line[++i]=='s') {
335: pl = line[++i];
336: if (isdigit(pl)) {
337: n = pl - '0';
338: pl = ' ';
339: } else
340: n = 0;
341: while (isdigit(line[++i]))
342: n = 10 * n + line[i] - '0';
343: i--;
344: if (n == 0) {
345: if (stk[stktop].opno == SZ) {
346: stktop--;
347: } else {
348: pe(lineno);
349: printf("unmatched \\s0\n");
350: }
351: } else {
352: stk[++stktop].opno = SZ;
353: stk[stktop].pl = pl;
354: stk[stktop].parm = n;
355: stk[stktop].lno = lineno;
356: }
357: } else if (!fflag && line[i]=='f') {
358: n = line[++i];
359: if (n == 'P') {
360: if (stk[stktop].opno == FT) {
361: stktop--;
362: } else {
363: pe(lineno);
364: printf("unmatched \\fP\n");
365: }
366: } else {
367: stk[++stktop].opno = FT;
368: stk[stktop].pl = 1;
369: stk[stktop].parm = n;
370: stk[stktop].lno = lineno;
371: }
372: }
373: }
374: }
375: /*
376: * We've hit the end and look at all this stuff that hasn't been
377: * matched yet! Complain, complain.
378: */
379: for (i=stktop; i>=0; i--) {
380: complain(i);
381: }
382: }
383:
1.5 deraadt 384: void
1.1 deraadt 385: complain(i)
386: {
387: pe(stk[i].lno);
388: printf("Unmatched ");
389: prop(i);
390: printf("\n");
391: }
392:
1.5 deraadt 393: void
1.1 deraadt 394: prop(i)
1.5 deraadt 395: int i;
1.1 deraadt 396: {
397: if (stk[i].pl == 0)
398: printf(".%s", br[stk[i].opno].opbr);
399: else switch(stk[i].opno) {
400: case SZ:
401: printf("\\s%c%d", stk[i].pl, stk[i].parm);
402: break;
403: case FT:
404: printf("\\f%c", stk[i].parm);
405: break;
406: default:
407: printf("Bug: stk[%d].opno = %d = .%s, .%s",
408: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
409: }
410: }
411:
1.5 deraadt 412: void
1.1 deraadt 413: chkcmd(line, mac)
414: char *line;
415: char *mac;
416: {
1.5 deraadt 417: register int i;
1.1 deraadt 418:
419: /*
420: * Check to see if it matches top of stack.
421: */
422: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
423: stktop--; /* OK. Pop & forget */
424: else {
425: /* No. Maybe it's an opener */
426: for (i=0; br[i].opbr; i++) {
427: if (eq(mac, br[i].opbr)) {
428: /* Found. Push it. */
429: stktop++;
430: stk[stktop].opno = i;
431: stk[stktop].pl = 0;
432: stk[stktop].parm = 0;
433: stk[stktop].lno = lineno;
434: break;
435: }
436: /*
437: * Maybe it's an unmatched closer.
438: * NOTE: this depends on the fact
439: * that none of the closers can be
440: * openers too.
441: */
442: if (eq(mac, br[i].clbr)) {
443: nomatch(mac);
444: break;
445: }
446: }
447: }
448: }
449:
1.5 deraadt 450: void
1.1 deraadt 451: nomatch(mac)
452: char *mac;
453: {
454: register int i, j;
455:
456: /*
457: * Look for a match further down on stack
458: * If we find one, it suggests that the stuff in
459: * between is supposed to match itself.
460: */
461: for (j=stktop; j>=0; j--)
462: if (eq(mac,br[stk[j].opno].clbr)) {
463: /* Found. Make a good diagnostic. */
464: if (j == stktop-2) {
465: /*
466: * Check for special case \fx..\fR and don't
467: * complain.
468: */
469: if (stk[j+1].opno==FT && stk[j+1].parm!='R'
470: && stk[j+2].opno==FT && stk[j+2].parm=='R') {
471: stktop = j -1;
472: return;
473: }
474: /*
475: * We have two unmatched frobs. Chances are
476: * they were intended to match, so we mention
477: * them together.
478: */
479: pe(stk[j+1].lno);
480: prop(j+1);
481: printf(" does not match %d: ", stk[j+2].lno);
482: prop(j+2);
483: printf("\n");
484: } else for (i=j+1; i <= stktop; i++) {
485: complain(i);
486: }
487: stktop = j-1;
488: return;
489: }
490: /* Didn't find one. Throw this away. */
491: pe(lineno);
492: printf("Unmatched .%s\n", mac);
493: }
494:
495: /* eq: are two strings equal? */
1.5 deraadt 496: int
1.1 deraadt 497: eq(s1, s2)
498: char *s1, *s2;
499: {
500: return (strcmp(s1, s2) == 0);
501: }
502:
503: /* print the first part of an error message, given the line number */
1.5 deraadt 504: void
1.1 deraadt 505: pe(lineno)
506: int lineno;
507: {
508: if (nfiles > 1)
509: printf("%s: ", cfilename);
510: printf("%d: ", lineno);
511: }
512:
1.5 deraadt 513: void
1.1 deraadt 514: checkknown(mac)
515: char *mac;
516: {
517:
518: if (eq(mac, "."))
519: return;
520: if (binsrch(mac) >= 0)
521: return;
522: if (mac[0] == '\\' && mac[1] == '"') /* comments */
523: return;
524:
525: pe(lineno);
526: printf("Unknown command: .%s\n", mac);
527: }
528:
529: /*
530: * We have a .de xx line in "line". Add xx to the list of known commands.
531: */
1.5 deraadt 532: void
1.1 deraadt 533: addcmd(line)
534: char *line;
535: {
536: char *mac;
537:
538: /* grab the macro being defined */
539: mac = line+4;
540: while (isspace(*mac))
541: mac++;
542: if (*mac == 0) {
543: pe(lineno);
544: printf("illegal define: %s\n", line);
545: return;
546: }
547: mac[2] = 0;
548: if (isspace(mac[1]) || mac[1] == '\\')
549: mac[1] = 0;
550: if (ncmds >= MAXCMDS) {
551: printf("Only %d known commands allowed\n", MAXCMDS);
552: exit(1);
553: }
554: addmac(mac);
555: }
556:
557: /*
558: * Add mac to the list. We should really have some kind of tree
559: * structure here but this is a quick-and-dirty job and I just don't
560: * have time to mess with it. (I wonder if this will come back to haunt
561: * me someday?) Anyway, I claim that .de is fairly rare in user
562: * nroff programs, and the register loop below is pretty fast.
563: */
1.5 deraadt 564: void
1.1 deraadt 565: addmac(mac)
566: char *mac;
567: {
568: register char **src, **dest, **loc;
569:
570: if (binsrch(mac) >= 0){ /* it's OK to redefine something */
571: #ifdef DEBUG
572: printf("binsrch(%s) -> already in table\n", mac);
1.6 ! heko 573: #endif /* DEBUG */
1.1 deraadt 574: return;
575: }
576: /* binsrch sets slot as a side effect */
577: #ifdef DEBUG
578: printf("binsrch(%s) -> %d\n", mac, slot);
579: #endif
580: loc = &knowncmds[slot];
581: src = &knowncmds[ncmds-1];
582: dest = src+1;
583: while (dest > loc)
584: *dest-- = *src--;
585: *loc = malloc(3);
586: strcpy(*loc, mac);
587: ncmds++;
588: #ifdef DEBUG
589: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
590: #endif
591: }
592:
593: /*
594: * Do a binary search in knowncmds for mac.
595: * If found, return the index. If not, return -1.
596: */
1.5 deraadt 597: int
1.1 deraadt 598: binsrch(mac)
599: char *mac;
600: {
601: register char *p; /* pointer to current cmd in list */
602: register int d; /* difference if any */
603: register int mid; /* mid point in binary search */
604: register int top, bot; /* boundaries of bin search, inclusive */
605:
606: top = ncmds-1;
607: bot = 0;
608: while (top >= bot) {
609: mid = (top+bot)/2;
610: p = knowncmds[mid];
611: d = p[0] - mac[0];
612: if (d == 0)
613: d = p[1] - mac[1];
614: if (d == 0)
615: return mid;
616: if (d < 0)
617: bot = mid + 1;
618: else
619: top = mid - 1;
620: }
621: slot = bot; /* place it would have gone */
622: return -1;
623: }