Annotation of src/usr.bin/mandoc/mdoc.c, Revision 1.143
1.143 ! schwarze 1: /* $OpenBSD: mdoc.c,v 1.142 2015/10/06 18:30:44 schwarze Exp $ */
1.1 kristaps 2: /*
1.77 schwarze 3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.126 schwarze 4: * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
1.3 schwarze 7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 9: *
1.130 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.3 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.130 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.3 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 17: */
1.28 schwarze 18: #include <sys/types.h>
19:
1.1 kristaps 20: #include <assert.h>
1.101 schwarze 21: #include <ctype.h>
1.1 kristaps 22: #include <stdarg.h>
23: #include <stdio.h>
24: #include <stdlib.h>
25: #include <string.h>
1.39 schwarze 26: #include <time.h>
1.1 kristaps 27:
1.128 schwarze 28: #include "mandoc_aux.h"
29: #include "mandoc.h"
30: #include "roff.h"
1.83 schwarze 31: #include "mdoc.h"
1.128 schwarze 32: #include "libmandoc.h"
1.136 schwarze 33: #include "roff_int.h"
1.1 kristaps 34: #include "libmdoc.h"
35:
1.106 schwarze 36: const char *const __mdoc_macronames[MDOC_MAX + 1] = {
1.137 schwarze 37: "text",
1.7 schwarze 38: "Ap", "Dd", "Dt", "Os",
1.1 kristaps 39: "Sh", "Ss", "Pp", "D1",
40: "Dl", "Bd", "Ed", "Bl",
41: "El", "It", "Ad", "An",
42: "Ar", "Cd", "Cm", "Dv",
43: "Er", "Ev", "Ex", "Fa",
44: "Fd", "Fl", "Fn", "Ft",
45: "Ic", "In", "Li", "Nd",
46: "Nm", "Op", "Ot", "Pa",
47: "Rv", "St", "Va", "Vt",
1.33 schwarze 48: "Xr", "%A", "%B", "%D",
49: "%I", "%J", "%N", "%O",
50: "%P", "%R", "%T", "%V",
1.1 kristaps 51: "Ac", "Ao", "Aq", "At",
52: "Bc", "Bf", "Bo", "Bq",
53: "Bsx", "Bx", "Db", "Dc",
54: "Do", "Dq", "Ec", "Ef",
55: "Em", "Eo", "Fx", "Ms",
56: "No", "Ns", "Nx", "Ox",
57: "Pc", "Pf", "Po", "Pq",
58: "Qc", "Ql", "Qo", "Qq",
59: "Re", "Rs", "Sc", "So",
60: "Sq", "Sm", "Sx", "Sy",
61: "Tn", "Ux", "Xc", "Xo",
62: "Fo", "Fc", "Oo", "Oc",
63: "Bk", "Ek", "Bt", "Hf",
1.7 schwarze 64: "Fr", "Ud", "Lb", "Lp",
65: "Lk", "Mt", "Brq", "Bro",
1.33 schwarze 66: "Brc", "%C", "Es", "En",
67: "Dx", "%Q", "br", "sp",
1.137 schwarze 68: "%U", "Ta", "ll",
69: };
1.1 kristaps 70:
1.103 schwarze 71: const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
1.1 kristaps 72: "split", "nosplit", "ragged",
1.103 schwarze 73: "unfilled", "literal", "file",
74: "offset", "bullet", "dash",
75: "hyphen", "item", "enum",
76: "tag", "diag", "hang",
77: "ohang", "inset", "column",
78: "width", "compact", "std",
1.1 kristaps 79: "filled", "words", "emphasis",
1.30 schwarze 80: "symbolic", "nested", "centered"
1.1 kristaps 81: };
82:
1.137 schwarze 83: const char * const *mdoc_macronames = __mdoc_macronames + 1;
1.1 kristaps 84: const char * const *mdoc_argnames = __mdoc_argnames;
85:
1.131 schwarze 86: static int mdoc_ptext(struct roff_man *, int, char *, int);
87: static int mdoc_pmacro(struct roff_man *, int, char *, int);
1.1 kristaps 88:
1.81 schwarze 89:
1.1 kristaps 90: /*
91: * Main parse routine. Parses a single line -- really just hands off to
1.45 schwarze 92: * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
1.1 kristaps 93: */
94: int
1.131 schwarze 95: mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1 kristaps 96: {
97:
1.128 schwarze 98: if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
1.117 schwarze 99: mdoc->flags |= MDOC_NEWLINE;
1.60 schwarze 100:
101: /*
102: * Let the roff nS register switch SYNOPSIS mode early,
103: * such that the parser knows at all times
104: * whether this mode is on or off.
105: * Note that this mode is also switched by the Sh macro.
106: */
1.94 schwarze 107: if (roff_getreg(mdoc->roff, "nS"))
108: mdoc->flags |= MDOC_SYNOPSIS;
109: else
110: mdoc->flags &= ~MDOC_SYNOPSIS;
1.60 schwarze 111:
1.142 schwarze 112: return roff_getcontrol(mdoc->roff, buf, &offs) ?
1.103 schwarze 113: mdoc_pmacro(mdoc, ln, buf, offs) :
1.142 schwarze 114: mdoc_ptext(mdoc, ln, buf, offs);
1.1 kristaps 115: }
116:
1.121 schwarze 117: void
1.58 schwarze 118: mdoc_macro(MACRO_PROT_ARGS)
1.1 kristaps 119: {
1.137 schwarze 120: assert(tok > TOKEN_NONE && tok < MDOC_MAX);
1.46 schwarze 121:
1.113 schwarze 122: if (mdoc->flags & MDOC_PBODY) {
123: if (tok == MDOC_Dt) {
124: mandoc_vmsg(MANDOCERR_DT_LATE,
125: mdoc->parse, line, ppos,
126: "Dt %s", buf + *pos);
1.121 schwarze 127: return;
1.113 schwarze 128: }
129: } else if ( ! (mdoc_macros[tok].flags & MDOC_PROLOGUE)) {
130: if (mdoc->meta.title == NULL) {
131: mandoc_vmsg(MANDOCERR_DT_NOTITLE,
132: mdoc->parse, line, ppos, "%s %s",
133: mdoc_macronames[tok], buf + *pos);
134: mdoc->meta.title = mandoc_strdup("UNTITLED");
135: }
1.93 schwarze 136: if (NULL == mdoc->meta.vol)
137: mdoc->meta.vol = mandoc_strdup("LOCAL");
138: mdoc->flags |= MDOC_PBODY;
1.39 schwarze 139: }
1.121 schwarze 140: (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf);
1.1 kristaps 141: }
142:
1.120 schwarze 143: void
1.131 schwarze 144: mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok)
1.1 kristaps 145: {
1.129 schwarze 146: struct roff_node *p;
1.1 kristaps 147:
1.136 schwarze 148: p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
149: roff_node_append(mdoc, p);
1.131 schwarze 150: mdoc->next = ROFF_NEXT_CHILD;
1.59 schwarze 151: }
152:
1.129 schwarze 153: struct roff_node *
1.131 schwarze 154: mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok,
1.129 schwarze 155: struct roff_node *body, enum mdoc_endbody end)
1.59 schwarze 156: {
1.129 schwarze 157: struct roff_node *p;
1.59 schwarze 158:
1.126 schwarze 159: body->flags |= MDOC_ENDED;
160: body->parent->flags |= MDOC_ENDED;
1.136 schwarze 161: p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
1.126 schwarze 162: p->body = body;
1.92 schwarze 163: p->norm = body->norm;
1.59 schwarze 164: p->end = end;
1.136 schwarze 165: roff_node_append(mdoc, p);
1.131 schwarze 166: mdoc->next = ROFF_NEXT_SIBLING;
1.142 schwarze 167: return p;
1.1 kristaps 168: }
169:
1.129 schwarze 170: struct roff_node *
1.131 schwarze 171: mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
1.129 schwarze 172: int tok, struct mdoc_arg *args)
1.1 kristaps 173: {
1.129 schwarze 174: struct roff_node *p;
1.1 kristaps 175:
1.136 schwarze 176: p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
1.4 schwarze 177: p->args = args;
178: if (p->args)
1.1 kristaps 179: (args->refcnt)++;
1.75 schwarze 180:
181: switch (tok) {
1.103 schwarze 182: case MDOC_Bd:
183: case MDOC_Bf:
184: case MDOC_Bl:
1.107 schwarze 185: case MDOC_En:
1.103 schwarze 186: case MDOC_Rs:
1.75 schwarze 187: p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
188: break;
189: default:
190: break;
191: }
1.136 schwarze 192: roff_node_append(mdoc, p);
1.131 schwarze 193: mdoc->next = ROFF_NEXT_CHILD;
1.142 schwarze 194: return p;
1.1 kristaps 195: }
196:
1.120 schwarze 197: void
1.131 schwarze 198: mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
1.129 schwarze 199: int tok, struct mdoc_arg *args)
1.1 kristaps 200: {
1.129 schwarze 201: struct roff_node *p;
1.1 kristaps 202:
1.136 schwarze 203: p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
1.4 schwarze 204: p->args = args;
205: if (p->args)
1.1 kristaps 206: (args->refcnt)++;
1.75 schwarze 207:
208: switch (tok) {
1.103 schwarze 209: case MDOC_An:
1.75 schwarze 210: p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
211: break;
212: default:
213: break;
214: }
1.136 schwarze 215: roff_node_append(mdoc, p);
1.131 schwarze 216: mdoc->next = ROFF_NEXT_CHILD;
1.1 kristaps 217: }
218:
1.120 schwarze 219: void
1.131 schwarze 220: mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p)
1.91 schwarze 221: {
222:
1.136 schwarze 223: roff_node_unlink(mdoc, p);
224: roff_node_append(mdoc, p);
1.1 kristaps 225: }
226:
227: /*
228: * Parse free-form text, that is, a line that does not begin with the
229: * control character.
230: */
231: static int
1.131 schwarze 232: mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
1.1 kristaps 233: {
1.129 schwarze 234: struct roff_node *n;
1.56 schwarze 235: char *c, *ws, *end;
1.46 schwarze 236:
1.93 schwarze 237: assert(mdoc->last);
238: n = mdoc->last;
1.56 schwarze 239:
240: /*
241: * Divert directly to list processing if we're encountering a
1.128 schwarze 242: * columnar ROFFT_BLOCK with or without a prior ROFFT_BLOCK entry
243: * (a ROFFT_BODY means it's already open, in which case we should
1.56 schwarze 244: * process within its context in the normal way).
245: */
246:
1.128 schwarze 247: if (n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
1.127 schwarze 248: n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) {
1.56 schwarze 249: /* `Bl' is open without any children. */
1.93 schwarze 250: mdoc->flags |= MDOC_FREECOL;
1.121 schwarze 251: mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
1.142 schwarze 252: return 1;
1.56 schwarze 253: }
254:
1.128 schwarze 255: if (n->tok == MDOC_It && n->type == ROFFT_BLOCK &&
1.103 schwarze 256: NULL != n->parent &&
257: MDOC_Bl == n->parent->tok &&
258: LIST_column == n->parent->norm->Bl.type) {
1.56 schwarze 259: /* `Bl' has block-level `It' children. */
1.93 schwarze 260: mdoc->flags |= MDOC_FREECOL;
1.121 schwarze 261: mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
1.142 schwarze 262: return 1;
1.56 schwarze 263: }
264:
1.52 schwarze 265: /*
266: * Search for the beginning of unescaped trailing whitespace (ws)
267: * and for the first character not to be output (end).
268: */
1.56 schwarze 269:
270: /* FIXME: replace with strcspn(). */
1.52 schwarze 271: ws = NULL;
1.53 schwarze 272: for (c = end = buf + offs; *c; c++) {
1.52 schwarze 273: switch (*c) {
274: case ' ':
275: if (NULL == ws)
276: ws = c;
277: continue;
278: case '\t':
279: /*
280: * Always warn about trailing tabs,
281: * even outside literal context,
282: * where they should be put on the next line.
283: */
284: if (NULL == ws)
285: ws = c;
286: /*
287: * Strip trailing tabs in literal context only;
288: * outside, they affect the next line.
289: */
1.93 schwarze 290: if (MDOC_LITERAL & mdoc->flags)
1.52 schwarze 291: continue;
292: break;
293: case '\\':
294: /* Skip the escaped character, too, if any. */
295: if (c[1])
296: c++;
297: /* FALLTHROUGH */
298: default:
299: ws = NULL;
300: break;
301: }
302: end = c + 1;
303: }
304: *end = '\0';
1.19 schwarze 305:
1.52 schwarze 306: if (ws)
1.108 schwarze 307: mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
308: line, (int)(ws-buf), NULL);
1.34 schwarze 309:
1.120 schwarze 310: if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
1.108 schwarze 311: mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
312: line, (int)(c - buf), NULL);
1.46 schwarze 313:
1.40 schwarze 314: /*
1.66 schwarze 315: * Insert a `sp' in the case of a blank line. Technically,
1.46 schwarze 316: * blank lines aren't allowed, but enough manuals assume this
317: * behaviour that we want to work around it.
1.40 schwarze 318: */
1.139 schwarze 319: roff_elem_alloc(mdoc, line, offs, MDOC_sp);
1.131 schwarze 320: mdoc->next = ROFF_NEXT_SIBLING;
1.119 schwarze 321: mdoc_valid_post(mdoc);
1.142 schwarze 322: return 1;
1.38 schwarze 323: }
1.1 kristaps 324:
1.138 schwarze 325: roff_word_alloc(mdoc, line, offs, buf+offs);
1.1 kristaps 326:
1.120 schwarze 327: if (mdoc->flags & MDOC_LITERAL)
1.142 schwarze 328: return 1;
1.52 schwarze 329:
1.35 schwarze 330: /*
1.48 schwarze 331: * End-of-sentence check. If the last character is an unescaped
332: * EOS character, then flag the node as being the end of a
333: * sentence. The front-end will know how to interpret this.
1.35 schwarze 334: */
335:
1.52 schwarze 336: assert(buf < end);
1.48 schwarze 337:
1.97 schwarze 338: if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
1.93 schwarze 339: mdoc->last->flags |= MDOC_EOS;
1.142 schwarze 340: return 1;
1.1 kristaps 341: }
1.19 schwarze 342:
1.1 kristaps 343: /*
344: * Parse a macro line, that is, a line beginning with the control
345: * character.
346: */
1.61 schwarze 347: static int
1.131 schwarze 348: mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1 kristaps 349: {
1.129 schwarze 350: struct roff_node *n;
1.118 schwarze 351: const char *cp;
1.129 schwarze 352: int tok;
1.83 schwarze 353: int i, sv;
1.56 schwarze 354: char mac[5];
1.1 kristaps 355:
1.83 schwarze 356: sv = offs;
1.47 schwarze 357:
1.103 schwarze 358: /*
1.64 schwarze 359: * Copy the first word into a nil-terminated buffer.
1.118 schwarze 360: * Stop when a space, tab, escape, or eoln is encountered.
1.63 schwarze 361: */
1.1 kristaps 362:
1.83 schwarze 363: i = 0;
1.118 schwarze 364: while (i < 4 && strchr(" \t\\", buf[offs]) == NULL)
1.83 schwarze 365: mac[i++] = buf[offs++];
366:
367: mac[i] = '\0';
368:
1.137 schwarze 369: tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE;
1.1 kristaps 370:
1.137 schwarze 371: if (tok == TOKEN_NONE) {
1.112 schwarze 372: mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
373: ln, sv, buf + sv - 1);
1.142 schwarze 374: return 1;
1.1 kristaps 375: }
376:
1.118 schwarze 377: /* Skip a leading escape sequence or tab. */
1.63 schwarze 378:
1.118 schwarze 379: switch (buf[offs]) {
380: case '\\':
381: cp = buf + offs + 1;
382: mandoc_escape(&cp, NULL, NULL);
383: offs = cp - buf;
384: break;
385: case '\t':
1.83 schwarze 386: offs++;
1.118 schwarze 387: break;
388: default:
389: break;
390: }
1.63 schwarze 391:
392: /* Jump to the next non-whitespace word. */
1.1 kristaps 393:
1.83 schwarze 394: while (buf[offs] && ' ' == buf[offs])
395: offs++;
1.34 schwarze 396:
1.103 schwarze 397: /*
1.46 schwarze 398: * Trailing whitespace. Note that tabs are allowed to be passed
399: * into the parser as "text", so we only warn about spaces here.
400: */
1.34 schwarze 401:
1.83 schwarze 402: if ('\0' == buf[offs] && ' ' == buf[offs - 1])
1.108 schwarze 403: mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
404: ln, offs - 1, NULL);
1.1 kristaps 405:
1.56 schwarze 406: /*
407: * If an initial macro or a list invocation, divert directly
408: * into macro processing.
409: */
410:
1.121 schwarze 411: if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) {
412: mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
1.142 schwarze 413: return 1;
1.121 schwarze 414: }
1.56 schwarze 415:
1.93 schwarze 416: n = mdoc->last;
417: assert(mdoc->last);
1.56 schwarze 418:
419: /*
420: * If the first macro of a `Bl -column', open an `It' block
421: * context around the parsed macro.
422: */
423:
1.128 schwarze 424: if (n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
1.127 schwarze 425: n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) {
1.93 schwarze 426: mdoc->flags |= MDOC_FREECOL;
1.121 schwarze 427: mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
1.142 schwarze 428: return 1;
1.56 schwarze 429: }
430:
431: /*
432: * If we're following a block-level `It' within a `Bl -column'
433: * context (perhaps opened in the above block or in ptext()),
434: * then open an `It' block context around the parsed macro.
1.24 schwarze 435: */
1.56 schwarze 436:
1.128 schwarze 437: if (n->tok == MDOC_It && n->type == ROFFT_BLOCK &&
1.103 schwarze 438: NULL != n->parent &&
439: MDOC_Bl == n->parent->tok &&
440: LIST_column == n->parent->norm->Bl.type) {
1.93 schwarze 441: mdoc->flags |= MDOC_FREECOL;
1.121 schwarze 442: mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
1.142 schwarze 443: return 1;
1.56 schwarze 444: }
445:
446: /* Normal processing of a macro. */
447:
1.121 schwarze 448: mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
1.98 schwarze 449:
450: /* In quick mode (for mandocdb), abort after the NAME section. */
451:
452: if (mdoc->quick && MDOC_Sh == tok &&
453: SEC_NAME != mdoc->last->sec)
1.142 schwarze 454: return 2;
1.1 kristaps 455:
1.142 schwarze 456: return 1;
1.1 kristaps 457: }
1.24 schwarze 458:
1.83 schwarze 459: enum mdelim
460: mdoc_isdelim(const char *p)
461: {
462:
463: if ('\0' == p[0])
1.142 schwarze 464: return DELIM_NONE;
1.83 schwarze 465:
466: if ('\0' == p[1])
467: switch (p[0]) {
1.103 schwarze 468: case '(':
469: case '[':
1.142 schwarze 470: return DELIM_OPEN;
1.103 schwarze 471: case '|':
1.142 schwarze 472: return DELIM_MIDDLE;
1.103 schwarze 473: case '.':
474: case ',':
475: case ';':
476: case ':':
477: case '?':
478: case '!':
479: case ')':
480: case ']':
1.142 schwarze 481: return DELIM_CLOSE;
1.83 schwarze 482: default:
1.142 schwarze 483: return DELIM_NONE;
1.83 schwarze 484: }
485:
486: if ('\\' != p[0])
1.142 schwarze 487: return DELIM_NONE;
1.24 schwarze 488:
1.83 schwarze 489: if (0 == strcmp(p + 1, "."))
1.142 schwarze 490: return DELIM_CLOSE;
1.90 schwarze 491: if (0 == strcmp(p + 1, "fR|\\fP"))
1.142 schwarze 492: return DELIM_MIDDLE;
1.83 schwarze 493:
1.142 schwarze 494: return DELIM_NONE;
1.83 schwarze 495: }