Annotation of src/usr.bin/checknr/checknr.c, Revision 1.9
1.9 ! millert 1: /* $OpenBSD: checknr.c,v 1.8 2003/04/03 22:09:04 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1980, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.9 ! millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #ifndef lint
34: static char copyright[] =
35: "@(#) Copyright (c) 1980, 1993\n\
36: The Regents of the University of California. All rights reserved.\n";
37: #endif /* not lint */
38:
39: #ifndef lint
40: #if 0
41: static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
42: #else
1.9 ! millert 43: static char rcsid[] = "$OpenBSD: checknr.c,v 1.8 2003/04/03 22:09:04 deraadt Exp $";
1.1 deraadt 44: #endif
45: #endif /* not lint */
46:
47: /*
48: * checknr: check an nroff/troff input file for matching macro calls.
49: * we also attempt to match size and font changes, but only the embedded
50: * kind. These must end in \s0 and \fP resp. Maybe more sophistication
51: * later but for now think of these restrictions as contributions to
52: * structured typesetting.
53: */
54: #include <stdio.h>
55: #include <string.h>
56: #include <ctype.h>
57:
58: #define MAXSTK 100 /* Stack size */
59: #define MAXBR 100 /* Max number of bracket pairs known */
60: #define MAXCMDS 500 /* Max number of commands known */
61:
62: /*
63: * The stack on which we remember what we've seen so far.
64: */
65: struct stkstr {
66: int opno; /* number of opening bracket */
67: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
68: int parm; /* parm to size, font, etc */
69: int lno; /* line number the thing came in in */
70: } stk[MAXSTK];
71: int stktop;
72:
1.5 deraadt 73: void usage(void);
74: void addmac(char *);
75: void process(FILE *);
76: void pe(int);
77: int eq(char *, char *);
78: void complain(int);
79: void prop(int);
80: void chkcmd(char *, char *);
81: void addcmd(char *);
82: void nomatch(char *);
83: void checkknown(char *);
84: int binsrch(char *);
85:
1.1 deraadt 86: /*
87: * The kinds of opening and closing brackets.
88: */
89: struct brstr {
90: char *opbr;
91: char *clbr;
92: } br[MAXBR] = {
93: /* A few bare bones troff commands */
94: #define SZ 0
1.5 deraadt 95: { "sz", "sz" }, /* also \s */
1.1 deraadt 96: #define FT 1
1.5 deraadt 97: { "ft", "ft" }, /* also \f */
1.1 deraadt 98: /* the -mm package */
1.5 deraadt 99: { "AL", "LE" },
100: { "AS", "AE" },
101: { "BL", "LE" },
102: { "BS", "BE" },
103: { "DF", "DE" },
104: { "DL", "LE" },
105: { "DS", "DE" },
106: { "FS", "FE" },
107: { "ML", "LE" },
108: { "NS", "NE" },
109: { "RL", "LE" },
110: { "VL", "LE" },
1.1 deraadt 111: /* the -ms package */
1.5 deraadt 112: { "AB", "AE" },
113: { "BD", "DE" },
114: { "CD", "DE" },
115: { "DS", "DE" },
116: { "FS", "FE" },
117: { "ID", "DE" },
118: { "KF", "KE" },
119: { "KS", "KE" },
120: { "LD", "DE" },
121: { "LG", "NL" },
122: { "QS", "QE" },
123: { "RS", "RE" },
124: { "SM", "NL" },
125: { "XA", "XE" },
126: { "XS", "XE" },
1.1 deraadt 127: /* The -me package */
1.5 deraadt 128: { "(b", ")b" },
129: { "(c", ")c" },
130: { "(d", ")d" },
131: { "(f", ")f" },
132: { "(l", ")l" },
133: { "(q", ")q" },
134: { "(x", ")x" },
135: { "(z", ")z" },
1.1 deraadt 136: /* Things needed by preprocessors */
1.5 deraadt 137: { "EQ", "EN" },
138: { "TS", "TE" },
1.1 deraadt 139: /* Refer */
1.5 deraadt 140: { "[", "]" },
141: { 0, },
1.1 deraadt 142: };
143:
144: /*
145: * All commands known to nroff, plus macro packages.
146: * Used so we can complain about unrecognized commands.
147: */
148: char *knowncmds[MAXCMDS] = {
149: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
150: "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
151: "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
152: "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
153: "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
154: "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
155: "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
156: "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
157: "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
158: "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
159: "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
160: "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
161: "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
162: "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
163: "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
164: "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
165: "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
166: "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
167: "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
168: "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
169: "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
170: "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
171: "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
172: "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
173: "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
174: "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
175: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
176: "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
177: "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
178: "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
179: "yr", 0
180: };
181:
182: int lineno; /* current line number in input file */
183: char line[256]; /* the current line */
184: char *cfilename; /* name of current file */
185: int nfiles; /* number of files to process */
186: int fflag; /* -f: ignore \f */
187: int sflag; /* -s: ignore \s */
188: int ncmds; /* size of knowncmds */
189: int slot; /* slot in knowncmds found by binsrch */
190:
191: char *malloc();
192:
1.5 deraadt 193: int
1.1 deraadt 194: main(argc, argv)
195: int argc;
196: char **argv;
197: {
198: FILE *f;
199: int i;
200: char *cp;
201: char b1[4];
202:
203: /* Figure out how many known commands there are */
204: while (knowncmds[ncmds])
205: ncmds++;
206: while (argc > 1 && argv[1][0] == '-') {
207: switch(argv[1][1]) {
208:
209: /* -a: add pairs of macros */
210: case 'a':
211: i = strlen(argv[1]) - 2;
212: if (i % 6 != 0)
213: usage();
214: /* look for empty macro slots */
215: for (i=0; br[i].opbr; i++)
216: ;
217: for (cp=argv[1]+3; cp[-1]; cp += 6) {
218: br[i].opbr = malloc(3);
219: strncpy(br[i].opbr, cp, 2);
220: br[i].clbr = malloc(3);
221: strncpy(br[i].clbr, cp+3, 2);
222: addmac(br[i].opbr); /* knows pairs are also known cmds */
223: addmac(br[i].clbr);
224: i++;
225: }
226: break;
227:
228: /* -c: add known commands */
229: case 'c':
230: i = strlen(argv[1]) - 2;
231: if (i % 3 != 0)
232: usage();
233: for (cp=argv[1]+3; cp[-1]; cp += 3) {
234: if (cp[2] && cp[2] != '.')
235: usage();
236: strncpy(b1, cp, 2);
237: addmac(b1);
238: }
239: break;
240:
241: /* -f: ignore font changes */
242: case 'f':
243: fflag = 1;
244: break;
245:
246: /* -s: ignore size changes */
247: case 's':
248: sflag = 1;
249: break;
250: default:
251: usage();
252: }
253: argc--; argv++;
254: }
255:
256: nfiles = argc - 1;
257:
258: if (nfiles > 0) {
259: for (i=1; i<argc; i++) {
260: cfilename = argv[i];
261: f = fopen(cfilename, "r");
262: if (f == NULL)
263: perror(cfilename);
264: else
265: process(f);
266: }
267: } else {
268: cfilename = "stdin";
269: process(stdin);
270: }
271: exit(0);
272: }
273:
1.5 deraadt 274: void
1.1 deraadt 275: usage()
276: {
1.3 aaron 277: (void)fprintf(stderr,
1.4 aaron 278: "usage: checknr [-fs] [-a.x1.y1.x2.y2. ... .xn.yn] "
1.3 aaron 279: "[-c.x1.x2.x3. ... .xn] [file]\n");
1.1 deraadt 280: exit(1);
281: }
282:
1.5 deraadt 283: void
1.1 deraadt 284: process(f)
285: FILE *f;
286: {
1.7 mpech 287: int i, n;
1.1 deraadt 288: char mac[5]; /* The current macro or nroff command */
289: int pl;
290:
291: stktop = -1;
292: for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
293: if (line[0] == '.') {
294: /*
295: * find and isolate the macro/command name.
296: */
297: strncpy(mac, line+1, 4);
298: if (isspace(mac[0])) {
299: pe(lineno);
300: printf("Empty command\n");
301: } else if (isspace(mac[1])) {
302: mac[1] = 0;
303: } else if (isspace(mac[2])) {
304: mac[2] = 0;
305: } else if (mac[0] != '\\' || mac[1] != '\"') {
306: pe(lineno);
307: printf("Command too long\n");
308: }
309:
310: /*
311: * Is it a known command?
312: */
313: checkknown(mac);
314:
315: /*
316: * Should we add it?
317: */
318: if (eq(mac, "de"))
319: addcmd(line);
320:
321: chkcmd(line, mac);
322: }
323:
324: /*
325: * At this point we process the line looking
326: * for \s and \f.
327: */
328: for (i=0; line[i]; i++)
329: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
330: if (!sflag && line[++i]=='s') {
331: pl = line[++i];
332: if (isdigit(pl)) {
333: n = pl - '0';
334: pl = ' ';
335: } else
336: n = 0;
337: while (isdigit(line[++i]))
338: n = 10 * n + line[i] - '0';
339: i--;
340: if (n == 0) {
341: if (stk[stktop].opno == SZ) {
342: stktop--;
343: } else {
344: pe(lineno);
345: printf("unmatched \\s0\n");
346: }
347: } else {
348: stk[++stktop].opno = SZ;
349: stk[stktop].pl = pl;
350: stk[stktop].parm = n;
351: stk[stktop].lno = lineno;
352: }
353: } else if (!fflag && line[i]=='f') {
354: n = line[++i];
355: if (n == 'P') {
356: if (stk[stktop].opno == FT) {
357: stktop--;
358: } else {
359: pe(lineno);
360: printf("unmatched \\fP\n");
361: }
362: } else {
363: stk[++stktop].opno = FT;
364: stk[stktop].pl = 1;
365: stk[stktop].parm = n;
366: stk[stktop].lno = lineno;
367: }
368: }
369: }
370: }
371: /*
372: * We've hit the end and look at all this stuff that hasn't been
373: * matched yet! Complain, complain.
374: */
375: for (i=stktop; i>=0; i--) {
376: complain(i);
377: }
378: }
379:
1.5 deraadt 380: void
1.1 deraadt 381: complain(i)
382: {
383: pe(stk[i].lno);
384: printf("Unmatched ");
385: prop(i);
386: printf("\n");
387: }
388:
1.5 deraadt 389: void
1.1 deraadt 390: prop(i)
1.5 deraadt 391: int i;
1.1 deraadt 392: {
393: if (stk[i].pl == 0)
394: printf(".%s", br[stk[i].opno].opbr);
395: else switch(stk[i].opno) {
396: case SZ:
397: printf("\\s%c%d", stk[i].pl, stk[i].parm);
398: break;
399: case FT:
400: printf("\\f%c", stk[i].parm);
401: break;
402: default:
403: printf("Bug: stk[%d].opno = %d = .%s, .%s",
404: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
405: }
406: }
407:
1.5 deraadt 408: void
1.1 deraadt 409: chkcmd(line, mac)
410: char *line;
411: char *mac;
412: {
1.7 mpech 413: int i;
1.1 deraadt 414:
415: /*
416: * Check to see if it matches top of stack.
417: */
418: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
419: stktop--; /* OK. Pop & forget */
420: else {
421: /* No. Maybe it's an opener */
422: for (i=0; br[i].opbr; i++) {
423: if (eq(mac, br[i].opbr)) {
424: /* Found. Push it. */
425: stktop++;
426: stk[stktop].opno = i;
427: stk[stktop].pl = 0;
428: stk[stktop].parm = 0;
429: stk[stktop].lno = lineno;
430: break;
431: }
432: /*
433: * Maybe it's an unmatched closer.
434: * NOTE: this depends on the fact
435: * that none of the closers can be
436: * openers too.
437: */
438: if (eq(mac, br[i].clbr)) {
439: nomatch(mac);
440: break;
441: }
442: }
443: }
444: }
445:
1.5 deraadt 446: void
1.1 deraadt 447: nomatch(mac)
448: char *mac;
449: {
1.7 mpech 450: int i, j;
1.1 deraadt 451:
452: /*
453: * Look for a match further down on stack
454: * If we find one, it suggests that the stuff in
455: * between is supposed to match itself.
456: */
457: for (j=stktop; j>=0; j--)
458: if (eq(mac,br[stk[j].opno].clbr)) {
459: /* Found. Make a good diagnostic. */
460: if (j == stktop-2) {
461: /*
462: * Check for special case \fx..\fR and don't
463: * complain.
464: */
465: if (stk[j+1].opno==FT && stk[j+1].parm!='R'
466: && stk[j+2].opno==FT && stk[j+2].parm=='R') {
467: stktop = j -1;
468: return;
469: }
470: /*
471: * We have two unmatched frobs. Chances are
472: * they were intended to match, so we mention
473: * them together.
474: */
475: pe(stk[j+1].lno);
476: prop(j+1);
477: printf(" does not match %d: ", stk[j+2].lno);
478: prop(j+2);
479: printf("\n");
480: } else for (i=j+1; i <= stktop; i++) {
481: complain(i);
482: }
483: stktop = j-1;
484: return;
485: }
486: /* Didn't find one. Throw this away. */
487: pe(lineno);
488: printf("Unmatched .%s\n", mac);
489: }
490:
491: /* eq: are two strings equal? */
1.5 deraadt 492: int
1.1 deraadt 493: eq(s1, s2)
494: char *s1, *s2;
495: {
496: return (strcmp(s1, s2) == 0);
497: }
498:
499: /* print the first part of an error message, given the line number */
1.5 deraadt 500: void
1.1 deraadt 501: pe(lineno)
502: int lineno;
503: {
504: if (nfiles > 1)
505: printf("%s: ", cfilename);
506: printf("%d: ", lineno);
507: }
508:
1.5 deraadt 509: void
1.1 deraadt 510: checkknown(mac)
511: char *mac;
512: {
513:
514: if (eq(mac, "."))
515: return;
516: if (binsrch(mac) >= 0)
517: return;
518: if (mac[0] == '\\' && mac[1] == '"') /* comments */
519: return;
520:
521: pe(lineno);
522: printf("Unknown command: .%s\n", mac);
523: }
524:
525: /*
526: * We have a .de xx line in "line". Add xx to the list of known commands.
527: */
1.5 deraadt 528: void
1.1 deraadt 529: addcmd(line)
530: char *line;
531: {
532: char *mac;
533:
534: /* grab the macro being defined */
535: mac = line+4;
536: while (isspace(*mac))
537: mac++;
538: if (*mac == 0) {
539: pe(lineno);
540: printf("illegal define: %s\n", line);
541: return;
542: }
543: mac[2] = 0;
544: if (isspace(mac[1]) || mac[1] == '\\')
545: mac[1] = 0;
546: if (ncmds >= MAXCMDS) {
547: printf("Only %d known commands allowed\n", MAXCMDS);
548: exit(1);
549: }
550: addmac(mac);
551: }
552:
553: /*
554: * Add mac to the list. We should really have some kind of tree
555: * structure here but this is a quick-and-dirty job and I just don't
556: * have time to mess with it. (I wonder if this will come back to haunt
557: * me someday?) Anyway, I claim that .de is fairly rare in user
558: * nroff programs, and the register loop below is pretty fast.
559: */
1.5 deraadt 560: void
1.1 deraadt 561: addmac(mac)
562: char *mac;
563: {
1.7 mpech 564: char **src, **dest, **loc;
1.1 deraadt 565:
566: if (binsrch(mac) >= 0){ /* it's OK to redefine something */
567: #ifdef DEBUG
568: printf("binsrch(%s) -> already in table\n", mac);
1.6 heko 569: #endif /* DEBUG */
1.1 deraadt 570: return;
571: }
572: /* binsrch sets slot as a side effect */
573: #ifdef DEBUG
574: printf("binsrch(%s) -> %d\n", mac, slot);
575: #endif
576: loc = &knowncmds[slot];
577: src = &knowncmds[ncmds-1];
578: dest = src+1;
579: while (dest > loc)
580: *dest-- = *src--;
1.8 deraadt 581: *loc = strdup(mac);
1.1 deraadt 582: ncmds++;
583: #ifdef DEBUG
584: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
585: #endif
586: }
587:
588: /*
589: * Do a binary search in knowncmds for mac.
590: * If found, return the index. If not, return -1.
591: */
1.5 deraadt 592: int
1.1 deraadt 593: binsrch(mac)
594: char *mac;
595: {
1.7 mpech 596: char *p; /* pointer to current cmd in list */
597: int d; /* difference if any */
598: int mid; /* mid point in binary search */
599: int top, bot; /* boundaries of bin search, inclusive */
1.1 deraadt 600:
601: top = ncmds-1;
602: bot = 0;
603: while (top >= bot) {
604: mid = (top+bot)/2;
605: p = knowncmds[mid];
606: d = p[0] - mac[0];
607: if (d == 0)
608: d = p[1] - mac[1];
609: if (d == 0)
610: return mid;
611: if (d < 0)
612: bot = mid + 1;
613: else
614: top = mid - 1;
615: }
616: slot = bot; /* place it would have gone */
617: return -1;
618: }