Annotation of src/usr.bin/checknr/checknr.c, Revision 1.15
1.15 ! ray 1: /* $OpenBSD: checknr.c,v 1.14 2005/03/29 23:46:19 jaredy Exp $ */
1.1 deraadt 2: /* $NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1980, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.9 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #ifndef lint
1.12 mickey 34: static const char copyright[] =
1.1 deraadt 35: "@(#) Copyright (c) 1980, 1993\n\
36: The Regents of the University of California. All rights reserved.\n";
37: #endif /* not lint */
38:
39: #ifndef lint
40: #if 0
1.12 mickey 41: static const char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
1.1 deraadt 42: #else
1.15 ! ray 43: static const char rcsid[] = "$OpenBSD: checknr.c,v 1.14 2005/03/29 23:46:19 jaredy Exp $";
1.1 deraadt 44: #endif
45: #endif /* not lint */
46:
47: /*
48: * checknr: check an nroff/troff input file for matching macro calls.
49: * we also attempt to match size and font changes, but only the embedded
50: * kind. These must end in \s0 and \fP resp. Maybe more sophistication
51: * later but for now think of these restrictions as contributions to
52: * structured typesetting.
53: */
54: #include <stdio.h>
55: #include <string.h>
1.11 deraadt 56: #include <stdlib.h>
57: #include <unistd.h>
1.1 deraadt 58: #include <ctype.h>
1.12 mickey 59: #include <err.h>
1.1 deraadt 60:
61: #define MAXSTK 100 /* Stack size */
62: #define MAXBR 100 /* Max number of bracket pairs known */
63: #define MAXCMDS 500 /* Max number of commands known */
64:
65: /*
66: * The stack on which we remember what we've seen so far.
67: */
68: struct stkstr {
69: int opno; /* number of opening bracket */
70: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
71: int parm; /* parm to size, font, etc */
72: int lno; /* line number the thing came in in */
73: } stk[MAXSTK];
74: int stktop;
75:
1.5 deraadt 76: void usage(void);
77: void addmac(char *);
78: void process(FILE *);
79: void pe(int);
80: int eq(char *, char *);
81: void complain(int);
82: void prop(int);
83: void chkcmd(char *, char *);
84: void addcmd(char *);
85: void nomatch(char *);
86: void checkknown(char *);
87: int binsrch(char *);
88:
1.1 deraadt 89: /*
90: * The kinds of opening and closing brackets.
91: */
92: struct brstr {
93: char *opbr;
94: char *clbr;
95: } br[MAXBR] = {
96: /* A few bare bones troff commands */
97: #define SZ 0
1.5 deraadt 98: { "sz", "sz" }, /* also \s */
1.1 deraadt 99: #define FT 1
1.5 deraadt 100: { "ft", "ft" }, /* also \f */
1.1 deraadt 101: /* the -mm package */
1.5 deraadt 102: { "AL", "LE" },
103: { "AS", "AE" },
104: { "BL", "LE" },
105: { "BS", "BE" },
106: { "DF", "DE" },
107: { "DL", "LE" },
108: { "DS", "DE" },
109: { "FS", "FE" },
110: { "ML", "LE" },
111: { "NS", "NE" },
112: { "RL", "LE" },
113: { "VL", "LE" },
1.1 deraadt 114: /* the -ms package */
1.5 deraadt 115: { "AB", "AE" },
116: { "BD", "DE" },
117: { "CD", "DE" },
118: { "DS", "DE" },
119: { "FS", "FE" },
120: { "ID", "DE" },
121: { "KF", "KE" },
122: { "KS", "KE" },
123: { "LD", "DE" },
124: { "LG", "NL" },
125: { "QS", "QE" },
126: { "RS", "RE" },
127: { "SM", "NL" },
128: { "XA", "XE" },
129: { "XS", "XE" },
1.1 deraadt 130: /* The -me package */
1.5 deraadt 131: { "(b", ")b" },
132: { "(c", ")c" },
133: { "(d", ")d" },
134: { "(f", ")f" },
135: { "(l", ")l" },
136: { "(q", ")q" },
137: { "(x", ")x" },
138: { "(z", ")z" },
1.1 deraadt 139: /* Things needed by preprocessors */
1.5 deraadt 140: { "EQ", "EN" },
141: { "TS", "TE" },
1.1 deraadt 142: /* Refer */
1.5 deraadt 143: { "[", "]" },
1.14 jaredy 144: { 0, }
1.1 deraadt 145: };
146:
147: /*
148: * All commands known to nroff, plus macro packages.
149: * Used so we can complain about unrecognized commands.
150: */
151: char *knowncmds[MAXCMDS] = {
152: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
153: "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
154: "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
155: "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
156: "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
157: "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
158: "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
159: "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
160: "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
161: "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
162: "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
163: "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
164: "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
165: "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
166: "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
167: "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
168: "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
169: "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
170: "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
171: "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
172: "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
173: "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
174: "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
175: "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
176: "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
177: "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
178: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
179: "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
180: "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
181: "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
182: "yr", 0
183: };
184:
185: int lineno; /* current line number in input file */
186: char line[256]; /* the current line */
187: char *cfilename; /* name of current file */
188: int nfiles; /* number of files to process */
189: int fflag; /* -f: ignore \f */
190: int sflag; /* -s: ignore \s */
191: int ncmds; /* size of knowncmds */
192: int slot; /* slot in knowncmds found by binsrch */
193:
1.5 deraadt 194: int
1.10 deraadt 195: main(int argc, char *argv[])
1.1 deraadt 196: {
197: FILE *f;
198: int i;
199: char *cp;
200: char b1[4];
201:
202: /* Figure out how many known commands there are */
203: while (knowncmds[ncmds])
204: ncmds++;
205: while (argc > 1 && argv[1][0] == '-') {
206: switch(argv[1][1]) {
207:
208: /* -a: add pairs of macros */
209: case 'a':
210: i = strlen(argv[1]) - 2;
1.15 ! ray 211: if (i == 0 || i % 6 != 0)
1.1 deraadt 212: usage();
213: /* look for empty macro slots */
214: for (i=0; br[i].opbr; i++)
215: ;
216: for (cp=argv[1]+3; cp[-1]; cp += 6) {
1.14 jaredy 217: if (i >= MAXBR)
218: errx(1, "too many pairs");
219: if ((br[i].opbr = malloc(3)) == NULL)
220: err(1, "malloc");
221: strlcpy(br[i].opbr, cp, 3);
222: if ((br[i].clbr = malloc(3)) == NULL)
223: err(1, "malloc");
224: strlcpy(br[i].clbr, cp+3, 3);
1.1 deraadt 225: addmac(br[i].opbr); /* knows pairs are also known cmds */
226: addmac(br[i].clbr);
227: i++;
228: }
229: break;
230:
231: /* -c: add known commands */
232: case 'c':
233: i = strlen(argv[1]) - 2;
1.15 ! ray 234: if (i == 0 || i % 3 != 0)
1.1 deraadt 235: usage();
236: for (cp=argv[1]+3; cp[-1]; cp += 3) {
237: if (cp[2] && cp[2] != '.')
238: usage();
239: strncpy(b1, cp, 2);
240: addmac(b1);
241: }
242: break;
243:
244: /* -f: ignore font changes */
245: case 'f':
246: fflag = 1;
247: break;
248:
249: /* -s: ignore size changes */
250: case 's':
251: sflag = 1;
252: break;
253: default:
254: usage();
255: }
256: argc--; argv++;
257: }
258:
259: nfiles = argc - 1;
260:
261: if (nfiles > 0) {
262: for (i=1; i<argc; i++) {
263: cfilename = argv[i];
264: f = fopen(cfilename, "r");
265: if (f == NULL)
1.13 cloder 266: warn("%s", cfilename);
1.15 ! ray 267: else {
1.1 deraadt 268: process(f);
1.15 ! ray 269: fclose(f);
! 270: }
1.1 deraadt 271: }
272: } else {
273: cfilename = "stdin";
274: process(stdin);
275: }
276: exit(0);
277: }
278:
1.5 deraadt 279: void
1.10 deraadt 280: usage(void)
1.1 deraadt 281: {
1.12 mickey 282: extern char *__progname;
1.3 aaron 283: (void)fprintf(stderr,
1.12 mickey 284: "usage: %s [-fs] [-a.x1.y1.x2.y2. ... .xn.yn] "
285: "[-c.x1.x2.x3. ... .xn] [file]\n", __progname);
1.1 deraadt 286: exit(1);
287: }
288:
1.5 deraadt 289: void
1.10 deraadt 290: process(FILE *f)
1.1 deraadt 291: {
1.7 mpech 292: int i, n;
1.1 deraadt 293: char mac[5]; /* The current macro or nroff command */
294: int pl;
295:
296: stktop = -1;
297: for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
298: if (line[0] == '.') {
299: /*
300: * find and isolate the macro/command name.
301: */
302: strncpy(mac, line+1, 4);
303: if (isspace(mac[0])) {
304: pe(lineno);
305: printf("Empty command\n");
306: } else if (isspace(mac[1])) {
307: mac[1] = 0;
308: } else if (isspace(mac[2])) {
309: mac[2] = 0;
310: } else if (mac[0] != '\\' || mac[1] != '\"') {
311: pe(lineno);
312: printf("Command too long\n");
313: }
314:
315: /*
316: * Is it a known command?
317: */
318: checkknown(mac);
319:
320: /*
321: * Should we add it?
322: */
323: if (eq(mac, "de"))
324: addcmd(line);
325:
326: chkcmd(line, mac);
327: }
328:
329: /*
330: * At this point we process the line looking
331: * for \s and \f.
332: */
333: for (i=0; line[i]; i++)
334: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
335: if (!sflag && line[++i]=='s') {
336: pl = line[++i];
337: if (isdigit(pl)) {
338: n = pl - '0';
339: pl = ' ';
340: } else
341: n = 0;
342: while (isdigit(line[++i]))
343: n = 10 * n + line[i] - '0';
344: i--;
345: if (n == 0) {
346: if (stk[stktop].opno == SZ) {
347: stktop--;
348: } else {
349: pe(lineno);
350: printf("unmatched \\s0\n");
351: }
352: } else {
353: stk[++stktop].opno = SZ;
354: stk[stktop].pl = pl;
355: stk[stktop].parm = n;
356: stk[stktop].lno = lineno;
357: }
358: } else if (!fflag && line[i]=='f') {
359: n = line[++i];
360: if (n == 'P') {
361: if (stk[stktop].opno == FT) {
362: stktop--;
363: } else {
364: pe(lineno);
365: printf("unmatched \\fP\n");
366: }
367: } else {
368: stk[++stktop].opno = FT;
369: stk[stktop].pl = 1;
370: stk[stktop].parm = n;
371: stk[stktop].lno = lineno;
372: }
373: }
374: }
375: }
376: /*
377: * We've hit the end and look at all this stuff that hasn't been
378: * matched yet! Complain, complain.
379: */
380: for (i=stktop; i>=0; i--) {
381: complain(i);
382: }
383: }
384:
1.5 deraadt 385: void
1.10 deraadt 386: complain(int i)
1.1 deraadt 387: {
388: pe(stk[i].lno);
389: printf("Unmatched ");
390: prop(i);
391: printf("\n");
392: }
393:
1.5 deraadt 394: void
1.10 deraadt 395: prop(int i)
1.1 deraadt 396: {
397: if (stk[i].pl == 0)
398: printf(".%s", br[stk[i].opno].opbr);
399: else switch(stk[i].opno) {
400: case SZ:
401: printf("\\s%c%d", stk[i].pl, stk[i].parm);
402: break;
403: case FT:
404: printf("\\f%c", stk[i].parm);
405: break;
406: default:
407: printf("Bug: stk[%d].opno = %d = .%s, .%s",
408: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
409: }
410: }
411:
1.5 deraadt 412: void
1.10 deraadt 413: chkcmd(char *line, char *mac)
1.1 deraadt 414: {
1.7 mpech 415: int i;
1.1 deraadt 416:
417: /*
418: * Check to see if it matches top of stack.
419: */
420: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
421: stktop--; /* OK. Pop & forget */
422: else {
423: /* No. Maybe it's an opener */
424: for (i=0; br[i].opbr; i++) {
425: if (eq(mac, br[i].opbr)) {
426: /* Found. Push it. */
427: stktop++;
428: stk[stktop].opno = i;
429: stk[stktop].pl = 0;
430: stk[stktop].parm = 0;
431: stk[stktop].lno = lineno;
432: break;
433: }
434: /*
435: * Maybe it's an unmatched closer.
436: * NOTE: this depends on the fact
437: * that none of the closers can be
438: * openers too.
439: */
440: if (eq(mac, br[i].clbr)) {
441: nomatch(mac);
442: break;
443: }
444: }
445: }
446: }
447:
1.5 deraadt 448: void
1.10 deraadt 449: nomatch(char *mac)
1.1 deraadt 450: {
1.7 mpech 451: int i, j;
1.1 deraadt 452:
453: /*
454: * Look for a match further down on stack
455: * If we find one, it suggests that the stuff in
456: * between is supposed to match itself.
457: */
458: for (j=stktop; j>=0; j--)
459: if (eq(mac,br[stk[j].opno].clbr)) {
460: /* Found. Make a good diagnostic. */
461: if (j == stktop-2) {
462: /*
463: * Check for special case \fx..\fR and don't
464: * complain.
465: */
466: if (stk[j+1].opno==FT && stk[j+1].parm!='R'
467: && stk[j+2].opno==FT && stk[j+2].parm=='R') {
468: stktop = j -1;
469: return;
470: }
471: /*
472: * We have two unmatched frobs. Chances are
473: * they were intended to match, so we mention
474: * them together.
475: */
476: pe(stk[j+1].lno);
477: prop(j+1);
478: printf(" does not match %d: ", stk[j+2].lno);
479: prop(j+2);
480: printf("\n");
481: } else for (i=j+1; i <= stktop; i++) {
482: complain(i);
483: }
484: stktop = j-1;
485: return;
486: }
487: /* Didn't find one. Throw this away. */
488: pe(lineno);
489: printf("Unmatched .%s\n", mac);
490: }
491:
492: /* eq: are two strings equal? */
1.5 deraadt 493: int
1.10 deraadt 494: eq(char *s1, char *s2)
1.1 deraadt 495: {
496: return (strcmp(s1, s2) == 0);
497: }
498:
499: /* print the first part of an error message, given the line number */
1.5 deraadt 500: void
1.10 deraadt 501: pe(int lineno)
1.1 deraadt 502: {
503: if (nfiles > 1)
504: printf("%s: ", cfilename);
505: printf("%d: ", lineno);
506: }
507:
1.5 deraadt 508: void
1.10 deraadt 509: checkknown(char *mac)
1.1 deraadt 510: {
511:
512: if (eq(mac, "."))
513: return;
514: if (binsrch(mac) >= 0)
515: return;
516: if (mac[0] == '\\' && mac[1] == '"') /* comments */
517: return;
518:
519: pe(lineno);
520: printf("Unknown command: .%s\n", mac);
521: }
522:
523: /*
524: * We have a .de xx line in "line". Add xx to the list of known commands.
525: */
1.5 deraadt 526: void
1.10 deraadt 527: addcmd(char *line)
1.1 deraadt 528: {
529: char *mac;
530:
531: /* grab the macro being defined */
532: mac = line+4;
533: while (isspace(*mac))
534: mac++;
535: if (*mac == 0) {
536: pe(lineno);
537: printf("illegal define: %s\n", line);
538: return;
539: }
540: mac[2] = 0;
541: if (isspace(mac[1]) || mac[1] == '\\')
542: mac[1] = 0;
543: if (ncmds >= MAXCMDS) {
544: printf("Only %d known commands allowed\n", MAXCMDS);
545: exit(1);
546: }
547: addmac(mac);
548: }
549:
550: /*
551: * Add mac to the list. We should really have some kind of tree
552: * structure here but this is a quick-and-dirty job and I just don't
553: * have time to mess with it. (I wonder if this will come back to haunt
554: * me someday?) Anyway, I claim that .de is fairly rare in user
555: * nroff programs, and the register loop below is pretty fast.
556: */
1.5 deraadt 557: void
1.10 deraadt 558: addmac(char *mac)
1.1 deraadt 559: {
1.7 mpech 560: char **src, **dest, **loc;
1.1 deraadt 561:
562: if (binsrch(mac) >= 0){ /* it's OK to redefine something */
563: #ifdef DEBUG
564: printf("binsrch(%s) -> already in table\n", mac);
1.6 heko 565: #endif /* DEBUG */
1.1 deraadt 566: return;
567: }
568: /* binsrch sets slot as a side effect */
569: #ifdef DEBUG
570: printf("binsrch(%s) -> %d\n", mac, slot);
571: #endif
572: loc = &knowncmds[slot];
573: src = &knowncmds[ncmds-1];
574: dest = src+1;
575: while (dest > loc)
576: *dest-- = *src--;
1.14 jaredy 577: if ((*loc = strdup(mac)) == NULL)
578: err(1, "strdup");
1.1 deraadt 579: ncmds++;
580: #ifdef DEBUG
581: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
582: #endif
583: }
584:
585: /*
586: * Do a binary search in knowncmds for mac.
587: * If found, return the index. If not, return -1.
588: */
1.5 deraadt 589: int
1.10 deraadt 590: binsrch(char *mac)
1.1 deraadt 591: {
1.7 mpech 592: char *p; /* pointer to current cmd in list */
593: int d; /* difference if any */
594: int mid; /* mid point in binary search */
595: int top, bot; /* boundaries of bin search, inclusive */
1.1 deraadt 596:
597: top = ncmds-1;
598: bot = 0;
599: while (top >= bot) {
600: mid = (top+bot)/2;
601: p = knowncmds[mid];
602: d = p[0] - mac[0];
603: if (d == 0)
604: d = p[1] - mac[1];
605: if (d == 0)
606: return mid;
607: if (d < 0)
608: bot = mid + 1;
609: else
610: top = mid - 1;
611: }
612: slot = bot; /* place it would have gone */
613: return -1;
614: }