Annotation of src/usr.bin/mandoc/man_validate.c, Revision 1.114
1.114 ! schwarze 1: /* $OpenBSD: man_validate.c,v 1.113 2018/12/31 04:55:42 schwarze Exp $ */
1.1 kristaps 2: /*
1.38 schwarze 3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.105 schwarze 4: * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
1.2 schwarze 7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 9: *
1.85 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.2 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.85 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.2 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 17: */
18: #include <sys/types.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
1.6 schwarze 22: #include <errno.h>
23: #include <limits.h>
1.1 kristaps 24: #include <stdarg.h>
1.111 schwarze 25: #include <stdio.h>
1.1 kristaps 26: #include <stdlib.h>
1.28 schwarze 27: #include <string.h>
1.36 schwarze 28: #include <time.h>
1.1 kristaps 29:
1.85 schwarze 30: #include "mandoc_aux.h"
31: #include "mandoc.h"
32: #include "roff.h"
1.44 schwarze 33: #include "man.h"
1.85 schwarze 34: #include "libmandoc.h"
1.89 schwarze 35: #include "roff_int.h"
1.1 kristaps 36: #include "libman.h"
37:
1.88 schwarze 38: #define CHKARGS struct roff_man *man, struct roff_node *n
1.1 kristaps 39:
1.78 schwarze 40: typedef void (*v_check)(CHKARGS);
1.1 kristaps 41:
1.109 schwarze 42: static void check_abort(CHKARGS);
1.78 schwarze 43: static void check_par(CHKARGS);
44: static void check_part(CHKARGS);
45: static void check_root(CHKARGS);
46: static void check_text(CHKARGS);
47:
48: static void post_AT(CHKARGS);
49: static void post_IP(CHKARGS);
1.84 schwarze 50: static void post_OP(CHKARGS);
1.110 schwarze 51: static void post_SH(CHKARGS);
1.78 schwarze 52: static void post_TH(CHKARGS);
53: static void post_UC(CHKARGS);
54: static void post_UR(CHKARGS);
1.100 schwarze 55: static void post_in(CHKARGS);
1.34 schwarze 56:
1.106 schwarze 57: static const v_check man_valids[MAN_MAX - MAN_TH] = {
1.76 schwarze 58: post_TH, /* TH */
1.110 schwarze 59: post_SH, /* SH */
60: post_SH, /* SS */
1.76 schwarze 61: NULL, /* TP */
1.107 schwarze 62: NULL, /* TQ */
1.109 schwarze 63: check_abort,/* LP */
1.76 schwarze 64: check_par, /* PP */
1.109 schwarze 65: check_abort,/* P */
1.76 schwarze 66: post_IP, /* IP */
67: NULL, /* HP */
68: NULL, /* SM */
69: NULL, /* SB */
70: NULL, /* BI */
71: NULL, /* IB */
72: NULL, /* BR */
73: NULL, /* RB */
74: NULL, /* R */
75: NULL, /* B */
76: NULL, /* I */
77: NULL, /* IR */
78: NULL, /* RI */
79: NULL, /* RE */
80: check_part, /* RS */
81: NULL, /* DT */
82: post_UC, /* UC */
1.83 schwarze 83: NULL, /* PD */
1.76 schwarze 84: post_AT, /* AT */
1.100 schwarze 85: post_in, /* in */
1.108 schwarze 86: NULL, /* SY */
87: NULL, /* YS */
1.84 schwarze 88: post_OP, /* OP */
1.92 schwarze 89: NULL, /* EX */
90: NULL, /* EE */
1.76 schwarze 91: post_UR, /* UR */
92: NULL, /* UE */
1.103 bentley 93: post_UR, /* MT */
94: NULL, /* ME */
1.1 kristaps 95: };
96:
97:
1.109 schwarze 98: /* Validate the subtree rooted at man->last. */
1.78 schwarze 99: void
1.113 schwarze 100: man_validate(struct roff_man *man)
1.1 kristaps 101: {
1.86 schwarze 102: struct roff_node *n;
1.94 schwarze 103: const v_check *cp;
1.1 kristaps 104:
1.109 schwarze 105: /*
106: * Translate obsolete macros such that later code
107: * does not need to look for them.
108: */
109:
1.76 schwarze 110: n = man->last;
1.109 schwarze 111: switch (n->tok) {
112: case MAN_LP:
113: case MAN_P:
114: n->tok = MAN_PP;
115: break;
116: default:
117: break;
118: }
119:
120: /*
121: * Iterate over all children, recursing into each one
122: * in turn, depth-first.
123: */
124:
1.92 schwarze 125: man->last = man->last->child;
126: while (man->last != NULL) {
1.113 schwarze 127: man_validate(man);
1.92 schwarze 128: if (man->last == n)
129: man->last = man->last->child;
130: else
131: man->last = man->last->next;
132: }
1.1 kristaps 133:
1.109 schwarze 134: /* Finally validate the macro itself. */
135:
1.92 schwarze 136: man->last = n;
137: man->next = ROFF_NEXT_SIBLING;
1.76 schwarze 138: switch (n->type) {
1.85 schwarze 139: case ROFFT_TEXT:
1.78 schwarze 140: check_text(man, n);
141: break;
1.85 schwarze 142: case ROFFT_ROOT:
1.78 schwarze 143: check_root(man, n);
144: break;
1.105 schwarze 145: case ROFFT_COMMENT:
1.85 schwarze 146: case ROFFT_EQN:
147: case ROFFT_TBL:
1.78 schwarze 148: break;
1.1 kristaps 149: default:
1.95 schwarze 150: if (n->tok < ROFF_MAX) {
1.110 schwarze 151: roff_validate(man);
1.114 ! schwarze 152: man_state(man, n);
1.95 schwarze 153: break;
154: }
155: assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
1.106 schwarze 156: cp = man_valids + (n->tok - MAN_TH);
1.78 schwarze 157: if (*cp)
158: (*cp)(man, n);
1.92 schwarze 159: if (man->last == n)
160: man_state(man, n);
1.78 schwarze 161: break;
1.1 kristaps 162: }
163: }
164:
1.78 schwarze 165: static void
1.63 schwarze 166: check_root(CHKARGS)
1.4 schwarze 167: {
1.73 schwarze 168: assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
1.7 schwarze 169:
1.105 schwarze 170: if (n->last == NULL || n->last->type == ROFFT_COMMENT)
1.112 schwarze 171: mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL);
1.65 schwarze 172: else
173: man->meta.hasbody = 1;
174:
175: if (NULL == man->meta.title) {
1.112 schwarze 176: mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL);
1.35 schwarze 177:
1.18 schwarze 178: /*
179: * If a title hasn't been set, do so now (by
180: * implication, date and section also aren't set).
181: */
1.35 schwarze 182:
1.77 schwarze 183: man->meta.title = mandoc_strdup("");
184: man->meta.msec = mandoc_strdup("");
1.60 schwarze 185: man->meta.date = man->quick ? mandoc_strdup("") :
1.99 schwarze 186: mandoc_normdate(man, NULL, n->line, n->pos);
1.17 schwarze 187: }
1.101 schwarze 188:
189: if (man->meta.os_e &&
190: (man->meta.rcsids & (1 << man->meta.os_e)) == 0)
1.112 schwarze 191: mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0,
1.102 schwarze 192: man->meta.os_e == MANDOC_OS_OPENBSD ?
193: "(OpenBSD)" : "(NetBSD)");
1.15 schwarze 194: }
195:
1.78 schwarze 196: static void
1.109 schwarze 197: check_abort(CHKARGS)
198: {
199: abort();
200: }
201:
202: static void
1.47 schwarze 203: check_text(CHKARGS)
1.4 schwarze 204: {
1.47 schwarze 205: char *cp, *p;
206:
1.114 ! schwarze 207: if (man->flags & ROFF_NOFILL)
1.78 schwarze 208: return;
1.48 schwarze 209:
210: cp = n->string;
211: for (p = cp; NULL != (p = strchr(p, '\t')); p++)
1.112 schwarze 212: mandoc_msg(MANDOCERR_FI_TAB,
213: n->line, n->pos + (int)(p - cp), NULL);
1.1 kristaps 214: }
215:
1.84 schwarze 216: static void
217: post_OP(CHKARGS)
218: {
219:
1.93 schwarze 220: if (n->child == NULL)
1.112 schwarze 221: mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP");
1.93 schwarze 222: else if (n->child->next != NULL && n->child->next->next != NULL) {
1.84 schwarze 223: n = n->child->next->next;
1.112 schwarze 224: mandoc_msg(MANDOCERR_ARG_EXCESS,
1.84 schwarze 225: n->line, n->pos, "OP ... %s", n->string);
226: }
1.1 kristaps 227: }
228:
1.78 schwarze 229: static void
1.110 schwarze 230: post_SH(CHKARGS)
231: {
232: struct roff_node *nc;
233:
234: if (n->type != ROFFT_BODY || (nc = n->child) == NULL)
235: return;
236:
237: if (nc->tok == MAN_PP && nc->body->child != NULL) {
238: while (nc->body->last != NULL) {
239: man->next = ROFF_NEXT_CHILD;
240: roff_node_relink(man, nc->body->last);
241: man->last = n;
242: }
243: }
244:
245: if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) {
1.112 schwarze 246: mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos,
247: "%s after %s", roff_name[nc->tok], roff_name[n->tok]);
1.110 schwarze 248: roff_node_delete(man, nc);
249: }
250:
251: /*
252: * Trailing PP is empty, so it is deleted by check_par().
253: * Trailing sp is significant.
254: */
255:
256: if ((nc = n->last) != NULL && nc->tok == ROFF_br) {
1.112 schwarze 257: mandoc_msg(MANDOCERR_PAR_SKIP,
1.110 schwarze 258: nc->line, nc->pos, "%s at the end of %s",
259: roff_name[nc->tok], roff_name[n->tok]);
260: roff_node_delete(man, nc);
261: }
262: }
263:
264: static void
1.76 schwarze 265: post_UR(CHKARGS)
1.58 schwarze 266: {
1.85 schwarze 267: if (n->type == ROFFT_HEAD && n->child == NULL)
1.112 schwarze 268: mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos,
269: "%s", roff_name[n->tok]);
1.78 schwarze 270: check_part(man, n);
1.32 schwarze 271: }
1.7 schwarze 272:
1.78 schwarze 273: static void
1.8 schwarze 274: check_part(CHKARGS)
275: {
276:
1.85 schwarze 277: if (n->type == ROFFT_BODY && n->child == NULL)
1.112 schwarze 278: mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos,
279: "%s", roff_name[n->tok]);
1.8 schwarze 280: }
281:
1.78 schwarze 282: static void
1.36 schwarze 283: check_par(CHKARGS)
284: {
285:
1.39 schwarze 286: switch (n->type) {
1.85 schwarze 287: case ROFFT_BLOCK:
1.93 schwarze 288: if (n->body->child == NULL)
1.89 schwarze 289: roff_node_delete(man, n);
1.39 schwarze 290: break;
1.85 schwarze 291: case ROFFT_BODY:
1.110 schwarze 292: if (n->child != NULL &&
293: (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) {
1.112 schwarze 294: mandoc_msg(MANDOCERR_PAR_SKIP,
295: n->child->line, n->child->pos,
1.110 schwarze 296: "%s after %s", roff_name[n->child->tok],
297: roff_name[n->tok]);
298: roff_node_delete(man, n->child);
299: }
1.93 schwarze 300: if (n->child == NULL)
1.112 schwarze 301: mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos,
1.94 schwarze 302: "%s empty", roff_name[n->tok]);
1.39 schwarze 303: break;
1.85 schwarze 304: case ROFFT_HEAD:
1.93 schwarze 305: if (n->child != NULL)
1.112 schwarze 306: mandoc_msg(MANDOCERR_ARG_SKIP,
307: n->line, n->pos, "%s %s%s",
1.94 schwarze 308: roff_name[n->tok], n->child->string,
1.93 schwarze 309: n->child->next != NULL ? " ..." : "");
1.39 schwarze 310: break;
311: default:
312: break;
313: }
1.36 schwarze 314: }
315:
1.78 schwarze 316: static void
1.55 schwarze 317: post_IP(CHKARGS)
318: {
319:
320: switch (n->type) {
1.85 schwarze 321: case ROFFT_BLOCK:
1.93 schwarze 322: if (n->head->child == NULL && n->body->child == NULL)
1.89 schwarze 323: roff_node_delete(man, n);
1.55 schwarze 324: break;
1.85 schwarze 325: case ROFFT_BODY:
1.93 schwarze 326: if (n->parent->head->child == NULL && n->child == NULL)
1.112 schwarze 327: mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos,
1.94 schwarze 328: "%s empty", roff_name[n->tok]);
1.55 schwarze 329: break;
330: default:
331: break;
332: }
333: }
1.36 schwarze 334:
1.78 schwarze 335: static void
1.34 schwarze 336: post_TH(CHKARGS)
337: {
1.86 schwarze 338: struct roff_node *nb;
1.40 schwarze 339: const char *p;
1.34 schwarze 340:
1.57 schwarze 341: free(man->meta.title);
342: free(man->meta.vol);
1.87 schwarze 343: free(man->meta.os);
1.57 schwarze 344: free(man->meta.msec);
345: free(man->meta.date);
1.34 schwarze 346:
1.57 schwarze 347: man->meta.title = man->meta.vol = man->meta.date =
1.87 schwarze 348: man->meta.msec = man->meta.os = NULL;
1.34 schwarze 349:
1.64 schwarze 350: nb = n;
351:
1.87 schwarze 352: /* ->TITLE<- MSEC DATE OS VOL */
1.34 schwarze 353:
354: n = n->child;
1.40 schwarze 355: if (n && n->string) {
356: for (p = n->string; '\0' != *p; p++) {
357: /* Only warn about this once... */
1.63 schwarze 358: if (isalpha((unsigned char)*p) &&
359: ! isupper((unsigned char)*p)) {
1.112 schwarze 360: mandoc_msg(MANDOCERR_TITLE_CASE, n->line,
361: n->pos + (int)(p - n->string),
1.74 schwarze 362: "TH %s", n->string);
1.40 schwarze 363: break;
364: }
365: }
1.57 schwarze 366: man->meta.title = mandoc_strdup(n->string);
1.77 schwarze 367: } else {
1.57 schwarze 368: man->meta.title = mandoc_strdup("");
1.112 schwarze 369: mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH");
1.77 schwarze 370: }
1.34 schwarze 371:
1.87 schwarze 372: /* TITLE ->MSEC<- DATE OS VOL */
1.34 schwarze 373:
1.40 schwarze 374: if (n)
375: n = n->next;
376: if (n && n->string)
1.57 schwarze 377: man->meta.msec = mandoc_strdup(n->string);
1.77 schwarze 378: else {
1.57 schwarze 379: man->meta.msec = mandoc_strdup("");
1.112 schwarze 380: mandoc_msg(MANDOCERR_MSEC_MISSING,
1.77 schwarze 381: nb->line, nb->pos, "TH %s", man->meta.title);
382: }
1.34 schwarze 383:
1.87 schwarze 384: /* TITLE MSEC ->DATE<- OS VOL */
1.34 schwarze 385:
1.40 schwarze 386: if (n)
387: n = n->next;
1.49 schwarze 388: if (n && n->string && '\0' != n->string[0]) {
1.60 schwarze 389: man->meta.date = man->quick ?
390: mandoc_strdup(n->string) :
1.99 schwarze 391: mandoc_normdate(man, n->string, n->line, n->pos);
1.64 schwarze 392: } else {
1.57 schwarze 393: man->meta.date = mandoc_strdup("");
1.112 schwarze 394: mandoc_msg(MANDOCERR_DATE_MISSING,
1.74 schwarze 395: n ? n->line : nb->line,
396: n ? n->pos : nb->pos, "TH");
1.64 schwarze 397: }
1.34 schwarze 398:
1.87 schwarze 399: /* TITLE MSEC DATE ->OS<- VOL */
1.34 schwarze 400:
401: if (n && (n = n->next))
1.87 schwarze 402: man->meta.os = mandoc_strdup(n->string);
1.102 schwarze 403: else if (man->os_s != NULL)
404: man->meta.os = mandoc_strdup(man->os_s);
405: if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) {
406: if (strstr(man->meta.os, "OpenBSD") != NULL)
407: man->meta.os_e = MANDOC_OS_OPENBSD;
408: else if (strstr(man->meta.os, "NetBSD") != NULL)
409: man->meta.os_e = MANDOC_OS_NETBSD;
410: }
1.34 schwarze 411:
1.87 schwarze 412: /* TITLE MSEC DATE OS ->VOL<- */
1.51 schwarze 413: /* If missing, use the default VOL name for MSEC. */
1.34 schwarze 414:
415: if (n && (n = n->next))
1.57 schwarze 416: man->meta.vol = mandoc_strdup(n->string);
417: else if ('\0' != man->meta.msec[0] &&
418: (NULL != (p = mandoc_a2msec(man->meta.msec))))
419: man->meta.vol = mandoc_strdup(p);
1.84 schwarze 420:
421: if (n != NULL && (n = n->next) != NULL)
1.112 schwarze 422: mandoc_msg(MANDOCERR_ARG_EXCESS,
1.84 schwarze 423: n->line, n->pos, "TH ... %s", n->string);
1.34 schwarze 424:
425: /*
426: * Remove the `TH' node after we've processed it for our
427: * meta-data.
428: */
1.89 schwarze 429: roff_node_delete(man, man->last);
1.34 schwarze 430: }
431:
1.78 schwarze 432: static void
1.34 schwarze 433: post_UC(CHKARGS)
434: {
435: static const char * const bsd_versions[] = {
436: "3rd Berkeley Distribution",
437: "4th Berkeley Distribution",
438: "4.2 Berkeley Distribution",
439: "4.3 Berkeley Distribution",
440: "4.4 Berkeley Distribution",
441: };
442:
443: const char *p, *s;
444:
445: n = n->child;
446:
1.85 schwarze 447: if (n == NULL || n->type != ROFFT_TEXT)
1.34 schwarze 448: p = bsd_versions[0];
449: else {
450: s = n->string;
451: if (0 == strcmp(s, "3"))
452: p = bsd_versions[0];
453: else if (0 == strcmp(s, "4"))
454: p = bsd_versions[1];
455: else if (0 == strcmp(s, "5"))
456: p = bsd_versions[2];
457: else if (0 == strcmp(s, "6"))
458: p = bsd_versions[3];
459: else if (0 == strcmp(s, "7"))
460: p = bsd_versions[4];
461: else
462: p = bsd_versions[0];
463: }
464:
1.87 schwarze 465: free(man->meta.os);
466: man->meta.os = mandoc_strdup(p);
1.34 schwarze 467: }
468:
1.78 schwarze 469: static void
1.34 schwarze 470: post_AT(CHKARGS)
471: {
472: static const char * const unix_versions[] = {
473: "7th Edition",
474: "System III",
475: "System V",
476: "System V Release 2",
477: };
478:
1.86 schwarze 479: struct roff_node *nn;
1.34 schwarze 480: const char *p, *s;
481:
482: n = n->child;
483:
1.85 schwarze 484: if (n == NULL || n->type != ROFFT_TEXT)
1.34 schwarze 485: p = unix_versions[0];
486: else {
487: s = n->string;
488: if (0 == strcmp(s, "3"))
489: p = unix_versions[0];
490: else if (0 == strcmp(s, "4"))
491: p = unix_versions[1];
492: else if (0 == strcmp(s, "5")) {
493: nn = n->next;
1.85 schwarze 494: if (nn != NULL &&
495: nn->type == ROFFT_TEXT &&
496: nn->string[0] != '\0')
1.34 schwarze 497: p = unix_versions[3];
498: else
499: p = unix_versions[2];
500: } else
501: p = unix_versions[0];
502: }
503:
1.87 schwarze 504: free(man->meta.os);
505: man->meta.os = mandoc_strdup(p);
1.100 schwarze 506: }
507:
508: static void
509: post_in(CHKARGS)
510: {
511: char *s;
512:
513: if (n->parent->tok != MAN_TP ||
514: n->parent->type != ROFFT_HEAD ||
515: n->child == NULL ||
516: *n->child->string == '+' ||
517: *n->child->string == '-')
518: return;
519: mandoc_asprintf(&s, "+%s", n->child->string);
520: free(n->child->string);
521: n->child->string = s;
1.34 schwarze 522: }