Annotation of src/usr.bin/mandoc/read.c, Revision 1.61
1.61 ! schwarze 1: /* $OpenBSD: read.c,v 1.60 2014/09/06 23:24:27 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.18 schwarze 4: * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
1.20 schwarze 5: * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
1.1 schwarze 6: *
7: * Permission to use, copy, modify, and distribute this software for any
8: * purpose with or without fee is hereby granted, provided that the above
9: * copyright notice and this permission notice appear in all copies.
10: *
11: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18: */
1.58 schwarze 19: #include <sys/types.h>
20: #include <sys/mman.h>
1.1 schwarze 21: #include <sys/stat.h>
1.58 schwarze 22: #include <sys/wait.h>
1.1 schwarze 23:
24: #include <assert.h>
25: #include <ctype.h>
1.18 schwarze 26: #include <errno.h>
1.1 schwarze 27: #include <fcntl.h>
28: #include <stdarg.h>
29: #include <stdio.h>
30: #include <stdlib.h>
31: #include <string.h>
32: #include <unistd.h>
33:
34: #include "mandoc.h"
1.24 schwarze 35: #include "mandoc_aux.h"
1.1 schwarze 36: #include "libmandoc.h"
37: #include "mdoc.h"
38: #include "man.h"
39:
40: #define REPARSE_LIMIT 1000
41:
42: struct buf {
1.25 schwarze 43: char *buf; /* binary input buffer */
1.1 schwarze 44: size_t sz; /* size of binary buffer */
45: };
46:
47: struct mparse {
48: struct man *pman; /* persistent man parser */
49: struct mdoc *pmdoc; /* persistent mdoc parser */
50: struct man *man; /* man parser */
51: struct mdoc *mdoc; /* mdoc parser */
52: struct roff *roff; /* roff parser (!NULL) */
1.23 schwarze 53: char *sodest; /* filename pointed to by .so */
1.59 schwarze 54: const char *file; /* filename of current input file */
55: struct buf *primary; /* buffer currently being parsed */
56: struct buf *secondary; /* preprocessed copy of input */
57: const char *defos; /* default operating system */
58: mandocmsg mmsg; /* warning/error message handler */
59: enum mandoclevel file_status; /* status of current parse */
60: enum mandoclevel wlevel; /* ignore messages below this */
61: int options; /* parser options */
1.1 schwarze 62: int reparse_count; /* finite interp. stack */
1.59 schwarze 63: int line; /* line number in the file */
1.1 schwarze 64: };
65:
1.60 schwarze 66: static void choose_parser(struct mparse *);
1.1 schwarze 67: static void resize_buf(struct buf *, size_t);
68: static void mparse_buf_r(struct mparse *, struct buf, int);
1.18 schwarze 69: static int read_whole_file(struct mparse *, const char *, int,
70: struct buf *, int *);
1.1 schwarze 71: static void mparse_end(struct mparse *);
1.15 schwarze 72: static void mparse_parse_buffer(struct mparse *, struct buf,
73: const char *);
1.1 schwarze 74:
75: static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
76: MANDOCERR_OK,
77: MANDOCERR_WARNING,
78: MANDOCERR_WARNING,
79: MANDOCERR_ERROR,
80: MANDOCERR_FATAL,
81: MANDOCERR_MAX,
82: MANDOCERR_MAX
83: };
84:
85: static const char * const mandocerrs[MANDOCERR_MAX] = {
86: "ok",
87:
88: "generic warning",
89:
90: /* related to the prologue */
1.57 schwarze 91: "missing manual title, using UNTITLED",
92: "missing manual title, using \"\"",
1.32 schwarze 93: "lower case character in document title",
1.57 schwarze 94: "missing manual section, using \"\"",
1.1 schwarze 95: "unknown manual section",
1.11 schwarze 96: "unknown manual volume or arch",
1.32 schwarze 97: "missing date, using today's date",
1.1 schwarze 98: "cannot parse date, using it verbatim",
1.57 schwarze 99: "missing Os macro, using \"\"",
100: "duplicate prologue macro",
101: "late prologue macro",
102: "skipping late title macro",
1.1 schwarze 103: "prologue macros out of order",
104:
105: /* related to document structure */
106: ".so is fragile, better use ln(1)",
1.28 schwarze 107: "no document body",
1.32 schwarze 108: "content before first section header",
109: "first section is not \"NAME\"",
1.1 schwarze 110: "bad NAME section contents",
111: "sections out of conventional order",
1.32 schwarze 112: "duplicate section title",
113: "unexpected section",
1.1 schwarze 114:
115: /* related to macros and nesting */
1.33 schwarze 116: "obsolete macro",
1.1 schwarze 117: "skipping paragraph macro",
1.10 schwarze 118: "moving paragraph macro out of list",
1.1 schwarze 119: "skipping no-space macro",
120: "blocks badly nested",
121: "nested displays are not portable",
1.35 schwarze 122: "moving content out of list",
123: ".Vt block has child macro",
1.56 schwarze 124: "fill mode already enabled, skipping",
125: "fill mode already disabled, skipping",
1.1 schwarze 126: "line scope broken",
127:
128: /* related to missing macro arguments */
1.36 schwarze 129: "skipping empty request",
130: "conditional request controls empty scope",
1.1 schwarze 131: "skipping empty macro",
1.40 schwarze 132: "empty argument, using 0n",
1.1 schwarze 133: "argument count wrong",
1.38 schwarze 134: "missing display type, using -ragged",
135: "list type is not the first argument",
136: "missing -width in -tag list, using 8n",
1.56 schwarze 137: "missing utility name, using \"\"",
1.38 schwarze 138: "empty head in list item",
139: "empty list item",
1.39 schwarze 140: "missing font type, using \\fR",
141: "unknown font type, using \\fR",
1.38 schwarze 142: "missing -std argument, adding it",
1.1 schwarze 143:
144: /* related to bad macro arguments */
1.42 schwarze 145: "unterminated quoted argument",
1.1 schwarze 146: "duplicate argument",
1.54 schwarze 147: "skipping duplicate argument",
1.41 schwarze 148: "skipping duplicate display type",
149: "skipping duplicate list type",
1.54 schwarze 150: "skipping -width argument",
1.1 schwarze 151: "unknown AT&T UNIX version",
1.45 schwarze 152: "invalid content in Rs block",
1.41 schwarze 153: "invalid Boolean argument",
154: "unknown font, skipping request",
1.1 schwarze 155:
156: /* related to plain text */
1.42 schwarze 157: "blank line in fill mode, using .sp",
158: "tab in filled text",
159: "whitespace at end of input line",
1.1 schwarze 160: "bad comment style",
1.42 schwarze 161: "invalid escape sequence",
162: "undefined string, using \"\"",
1.3 schwarze 163:
1.1 schwarze 164: "generic error",
165:
1.3 schwarze 166: /* related to equations */
167: "unexpected equation scope closure",
168: "equation scope open on exit",
169: "overlapping equation scopes",
170: "unexpected end of equation",
171: "equation syntax error",
172:
1.1 schwarze 173: /* related to tables */
174: "bad table syntax",
175: "bad table option",
176: "bad table layout",
177: "no table layout cells specified",
178: "no table data cells specified",
179: "ignore data in cell",
180: "data block still open",
181: "ignoring extra data cells",
182:
1.46 schwarze 183: /* related to document structure and macros */
1.1 schwarze 184: "input stack limit exceeded, infinite loop?",
185: "skipping bad character",
1.46 schwarze 186: "skipping unknown macro",
1.51 schwarze 187: "skipping item outside list",
1.46 schwarze 188: "skipping column outside column list",
189: "skipping end of block that is not open",
190: "inserting missing end of block",
191: "appending missing end of block",
192:
193: /* related to request and macro arguments */
1.1 schwarze 194: "escaped character not allowed in a name",
195: "argument count wrong",
1.49 schwarze 196: "missing list type, using -item",
1.48 schwarze 197: "missing manual name, using \"\"",
1.49 schwarze 198: "uname(3) system call failed, using UNKNOWN",
1.41 schwarze 199: "unknown standard specifier",
1.49 schwarze 200: "skipping request without numeric argument",
1.39 schwarze 201: "skipping all arguments",
202: "skipping excess arguments",
1.1 schwarze 203:
204: "generic fatal error",
205:
1.18 schwarze 206: "input too large",
1.56 schwarze 207: "NOT IMPLEMENTED: Bd -file",
1.1 schwarze 208: "NOT IMPLEMENTED: .so with absolute path or \"..\"",
1.30 schwarze 209: ".so request failed",
1.18 schwarze 210:
211: /* system errors */
1.58 schwarze 212: "cannot dup file descriptor",
213: "cannot exec",
214: "gunzip failed with code",
215: "cannot fork",
1.29 schwarze 216: NULL,
1.58 schwarze 217: "cannot open pipe",
218: "cannot read file",
219: "gunzip died from signal",
1.18 schwarze 220: "cannot stat file",
1.58 schwarze 221: "wait failed",
1.1 schwarze 222: };
223:
224: static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
225: "SUCCESS",
226: "RESERVED",
227: "WARNING",
228: "ERROR",
229: "FATAL",
230: "BADARG",
231: "SYSERR"
232: };
233:
1.25 schwarze 234:
1.1 schwarze 235: static void
236: resize_buf(struct buf *buf, size_t initial)
237: {
238:
239: buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
240: buf->buf = mandoc_realloc(buf->buf, buf->sz);
241: }
242:
243: static void
1.60 schwarze 244: choose_parser(struct mparse *curp)
1.1 schwarze 245: {
1.59 schwarze 246: char *cp, *ep;
247: int format;
1.1 schwarze 248:
1.59 schwarze 249: /*
250: * If neither command line arguments -mdoc or -man select
251: * a parser nor the roff parser found a .Dd or .TH macro
252: * yet, look ahead in the main input buffer.
253: */
254:
255: if ((format = roff_getformat(curp->roff)) == 0) {
256: cp = curp->primary->buf;
257: ep = cp + curp->primary->sz;
258: while (cp < ep) {
1.61 ! schwarze 259: if (*cp == '.' || *cp == '\'') {
1.59 schwarze 260: cp++;
261: if (cp[0] == 'D' && cp[1] == 'd') {
262: format = MPARSE_MDOC;
263: break;
264: }
265: if (cp[0] == 'T' && cp[1] == 'H') {
266: format = MPARSE_MAN;
267: break;
268: }
269: }
270: cp = memchr(cp, '\n', ep - cp);
271: if (cp == NULL)
272: break;
273: cp++;
274: }
1.1 schwarze 275: }
276:
1.59 schwarze 277: if (format == MPARSE_MDOC) {
1.25 schwarze 278: if (NULL == curp->pmdoc)
1.22 schwarze 279: curp->pmdoc = mdoc_alloc(
280: curp->roff, curp, curp->defos,
281: MPARSE_QUICK & curp->options ? 1 : 0);
1.1 schwarze 282: assert(curp->pmdoc);
283: curp->mdoc = curp->pmdoc;
284: return;
1.25 schwarze 285: }
1.1 schwarze 286:
1.59 schwarze 287: /* Fall back to man(7) as a last resort. */
288:
1.25 schwarze 289: if (NULL == curp->pman)
1.22 schwarze 290: curp->pman = man_alloc(curp->roff, curp,
291: MPARSE_QUICK & curp->options ? 1 : 0);
1.1 schwarze 292: assert(curp->pman);
293: curp->man = curp->pman;
294: }
295:
296: /*
297: * Main parse routine for an opened file. This is called for each
298: * opened file and simply loops around the full input file, possibly
299: * nesting (i.e., with `so').
300: */
301: static void
302: mparse_buf_r(struct mparse *curp, struct buf blk, int start)
303: {
304: const struct tbl_span *span;
305: struct buf ln;
306: enum rofferr rr;
307: int i, of, rc;
308: int pos; /* byte number in the ln buffer */
309: int lnn; /* line number in the real file */
310: unsigned char c;
311:
312: memset(&ln, 0, sizeof(struct buf));
313:
1.25 schwarze 314: lnn = curp->line;
315: pos = 0;
1.1 schwarze 316:
317: for (i = 0; i < (int)blk.sz; ) {
318: if (0 == pos && '\0' == blk.buf[i])
319: break;
320:
321: if (start) {
322: curp->line = lnn;
323: curp->reparse_count = 0;
324: }
325:
326: while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
327:
328: /*
329: * When finding an unescaped newline character,
330: * leave the character loop to process the line.
331: * Skip a preceding carriage return, if any.
332: */
333:
334: if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
335: '\n' == blk.buf[i + 1])
336: ++i;
337: if ('\n' == blk.buf[i]) {
338: ++i;
339: ++lnn;
340: break;
341: }
342:
1.13 schwarze 343: /*
344: * Make sure we have space for at least
345: * one backslash and one other character
346: * and the trailing NUL byte.
347: */
348:
349: if (pos + 2 >= (int)ln.sz)
350: resize_buf(&ln, 256);
351:
1.25 schwarze 352: /*
1.1 schwarze 353: * Warn about bogus characters. If you're using
354: * non-ASCII encoding, you're screwing your
355: * readers. Since I'd rather this not happen,
1.6 schwarze 356: * I'll be helpful and replace these characters
357: * with "?", so we don't display gibberish.
358: * Note to manual writers: use special characters.
1.1 schwarze 359: */
360:
361: c = (unsigned char) blk.buf[i];
362:
1.25 schwarze 363: if ( ! (isascii(c) &&
364: (isgraph(c) || isblank(c)))) {
1.56 schwarze 365: mandoc_vmsg(MANDOCERR_BADCHAR, curp,
366: curp->line, pos, "0x%x", c);
1.1 schwarze 367: i++;
1.6 schwarze 368: ln.buf[pos++] = '?';
1.1 schwarze 369: continue;
370: }
371:
372: /* Trailing backslash = a plain char. */
373:
374: if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
375: ln.buf[pos++] = blk.buf[i++];
376: continue;
377: }
378:
379: /*
380: * Found escape and at least one other character.
381: * When it's a newline character, skip it.
382: * When there is a carriage return in between,
383: * skip that one as well.
384: */
385:
386: if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
387: '\n' == blk.buf[i + 2])
388: ++i;
389: if ('\n' == blk.buf[i + 1]) {
390: i += 2;
391: ++lnn;
392: continue;
393: }
394:
395: if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
396: i += 2;
397: /* Comment, skip to end of line */
398: for (; i < (int)blk.sz; ++i) {
399: if ('\n' == blk.buf[i]) {
400: ++i;
401: ++lnn;
402: break;
403: }
404: }
405:
406: /* Backout trailing whitespaces */
407: for (; pos > 0; --pos) {
408: if (ln.buf[pos - 1] != ' ')
409: break;
410: if (pos > 2 && ln.buf[pos - 2] == '\\')
411: break;
412: }
413: break;
414: }
415:
1.13 schwarze 416: /* Catch escaped bogus characters. */
417:
418: c = (unsigned char) blk.buf[i+1];
419:
1.25 schwarze 420: if ( ! (isascii(c) &&
421: (isgraph(c) || isblank(c)))) {
1.56 schwarze 422: mandoc_vmsg(MANDOCERR_BADCHAR, curp,
423: curp->line, pos, "0x%x", c);
1.13 schwarze 424: i += 2;
425: ln.buf[pos++] = '?';
426: continue;
427: }
428:
1.1 schwarze 429: /* Some other escape sequence, copy & cont. */
430:
431: ln.buf[pos++] = blk.buf[i++];
432: ln.buf[pos++] = blk.buf[i++];
433: }
434:
1.25 schwarze 435: if (pos >= (int)ln.sz)
1.1 schwarze 436: resize_buf(&ln, 256);
437:
438: ln.buf[pos] = '\0';
439:
440: /*
441: * A significant amount of complexity is contained by
442: * the roff preprocessor. It's line-oriented but can be
443: * expressed on one line, so we need at times to
444: * readjust our starting point and re-run it. The roff
445: * preprocessor can also readjust the buffers with new
446: * data, so we pass them in wholesale.
447: */
448:
449: of = 0;
450:
1.4 schwarze 451: /*
452: * Maintain a lookaside buffer of all parsed lines. We
453: * only do this if mparse_keep() has been invoked (the
454: * buffer may be accessed with mparse_getkeep()).
455: */
456:
457: if (curp->secondary) {
1.25 schwarze 458: curp->secondary->buf = mandoc_realloc(
459: curp->secondary->buf,
460: curp->secondary->sz + pos + 2);
461: memcpy(curp->secondary->buf +
462: curp->secondary->sz,
463: ln.buf, pos);
1.4 schwarze 464: curp->secondary->sz += pos;
465: curp->secondary->buf
466: [curp->secondary->sz] = '\n';
467: curp->secondary->sz++;
468: curp->secondary->buf
469: [curp->secondary->sz] = '\0';
470: }
1.1 schwarze 471: rerun:
1.25 schwarze 472: rr = roff_parseln(curp->roff, curp->line,
473: &ln.buf, &ln.sz, of, &of);
1.1 schwarze 474:
475: switch (rr) {
1.25 schwarze 476: case ROFF_REPARSE:
1.1 schwarze 477: if (REPARSE_LIMIT >= ++curp->reparse_count)
478: mparse_buf_r(curp, ln, 0);
479: else
480: mandoc_msg(MANDOCERR_ROFFLOOP, curp,
1.25 schwarze 481: curp->line, pos, NULL);
1.1 schwarze 482: pos = 0;
483: continue;
1.25 schwarze 484: case ROFF_APPEND:
1.1 schwarze 485: pos = (int)strlen(ln.buf);
486: continue;
1.25 schwarze 487: case ROFF_RERUN:
1.1 schwarze 488: goto rerun;
1.25 schwarze 489: case ROFF_IGN:
1.1 schwarze 490: pos = 0;
491: continue;
1.25 schwarze 492: case ROFF_ERR:
1.1 schwarze 493: assert(MANDOCLEVEL_FATAL <= curp->file_status);
494: break;
1.25 schwarze 495: case ROFF_SO:
1.23 schwarze 496: if (0 == (MPARSE_SO & curp->options) &&
497: (i >= (int)blk.sz || '\0' == blk.buf[i])) {
498: curp->sodest = mandoc_strdup(ln.buf + of);
499: free(ln.buf);
500: return;
501: }
1.4 schwarze 502: /*
503: * We remove `so' clauses from our lookaside
504: * buffer because we're going to descend into
505: * the file recursively.
506: */
1.25 schwarze 507: if (curp->secondary)
1.4 schwarze 508: curp->secondary->sz -= pos + 1;
1.14 schwarze 509: mparse_readfd(curp, -1, ln.buf + of);
1.30 schwarze 510: if (MANDOCLEVEL_FATAL <= curp->file_status) {
511: mandoc_vmsg(MANDOCERR_SO_FAIL,
512: curp, curp->line, pos,
513: ".so %s", ln.buf + of);
1.1 schwarze 514: break;
1.30 schwarze 515: }
1.1 schwarze 516: pos = 0;
517: continue;
518: default:
519: break;
520: }
521:
522: /*
523: * If we encounter errors in the recursive parse, make
524: * sure we don't continue parsing.
525: */
526:
527: if (MANDOCLEVEL_FATAL <= curp->file_status)
528: break;
529:
530: /*
531: * If input parsers have not been allocated, do so now.
1.2 schwarze 532: * We keep these instanced between parsers, but set them
1.1 schwarze 533: * locally per parse routine since we can use different
534: * parsers with each one.
535: */
536:
537: if ( ! (curp->man || curp->mdoc))
1.60 schwarze 538: choose_parser(curp);
1.1 schwarze 539:
1.25 schwarze 540: /*
1.60 schwarze 541: * Lastly, push down into the parsers themselves.
1.1 schwarze 542: * If libroff returns ROFF_TBL, then add it to the
543: * currently open parse. Since we only get here if
544: * there does exist data (see tbl_data.c), we're
545: * guaranteed that something's been allocated.
546: * Do the same for ROFF_EQN.
547: */
548:
549: rc = -1;
550:
551: if (ROFF_TBL == rr)
552: while (NULL != (span = roff_span(curp->roff))) {
553: rc = curp->man ?
1.25 schwarze 554: man_addspan(curp->man, span) :
555: mdoc_addspan(curp->mdoc, span);
1.1 schwarze 556: if (0 == rc)
557: break;
558: }
559: else if (ROFF_EQN == rr)
1.25 schwarze 560: rc = curp->mdoc ?
561: mdoc_addeqn(curp->mdoc,
562: roff_eqn(curp->roff)) :
563: man_addeqn(curp->man,
564: roff_eqn(curp->roff));
1.1 schwarze 565: else if (curp->man || curp->mdoc)
566: rc = curp->man ?
1.25 schwarze 567: man_parseln(curp->man,
568: curp->line, ln.buf, of) :
569: mdoc_parseln(curp->mdoc,
570: curp->line, ln.buf, of);
1.1 schwarze 571:
572: if (0 == rc) {
573: assert(MANDOCLEVEL_FATAL <= curp->file_status);
574: break;
1.19 schwarze 575: } else if (2 == rc)
576: break;
1.1 schwarze 577:
578: /* Temporary buffers typically are not full. */
579:
580: if (0 == start && '\0' == blk.buf[i])
581: break;
582:
583: /* Start the next input line. */
584:
585: pos = 0;
586: }
587:
588: free(ln.buf);
589: }
590:
591: static int
1.18 schwarze 592: read_whole_file(struct mparse *curp, const char *file, int fd,
593: struct buf *fb, int *with_mmap)
1.1 schwarze 594: {
595: struct stat st;
596: size_t off;
597: ssize_t ssz;
598:
599: if (-1 == fstat(fd, &st)) {
1.18 schwarze 600: curp->file_status = MANDOCLEVEL_SYSERR;
601: if (curp->mmsg)
602: (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
603: file, 0, 0, strerror(errno));
1.1 schwarze 604: return(0);
605: }
606:
607: /*
608: * If we're a regular file, try just reading in the whole entry
609: * via mmap(). This is faster than reading it into blocks, and
610: * since each file is only a few bytes to begin with, I'm not
611: * concerned that this is going to tank any machines.
612: */
613:
614: if (S_ISREG(st.st_mode)) {
615: if (st.st_size >= (1U << 31)) {
1.18 schwarze 616: curp->file_status = MANDOCLEVEL_FATAL;
617: if (curp->mmsg)
618: (*curp->mmsg)(MANDOCERR_TOOLARGE,
619: curp->file_status, file, 0, 0, NULL);
1.1 schwarze 620: return(0);
621: }
622: *with_mmap = 1;
623: fb->sz = (size_t)st.st_size;
1.15 schwarze 624: fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
1.1 schwarze 625: if (fb->buf != MAP_FAILED)
626: return(1);
627: }
628:
629: /*
630: * If this isn't a regular file (like, say, stdin), then we must
631: * go the old way and just read things in bit by bit.
632: */
633:
634: *with_mmap = 0;
635: off = 0;
636: fb->sz = 0;
637: fb->buf = NULL;
638: for (;;) {
639: if (off == fb->sz) {
640: if (fb->sz == (1U << 31)) {
1.18 schwarze 641: curp->file_status = MANDOCLEVEL_FATAL;
642: if (curp->mmsg)
643: (*curp->mmsg)(MANDOCERR_TOOLARGE,
644: curp->file_status,
645: file, 0, 0, NULL);
1.1 schwarze 646: break;
647: }
648: resize_buf(fb, 65536);
649: }
650: ssz = read(fd, fb->buf + (int)off, fb->sz - off);
651: if (ssz == 0) {
652: fb->sz = off;
653: return(1);
654: }
655: if (ssz == -1) {
1.18 schwarze 656: curp->file_status = MANDOCLEVEL_SYSERR;
657: if (curp->mmsg)
658: (*curp->mmsg)(MANDOCERR_SYSREAD,
659: curp->file_status, file, 0, 0,
660: strerror(errno));
1.1 schwarze 661: break;
662: }
663: off += (size_t)ssz;
664: }
665:
666: free(fb->buf);
667: fb->buf = NULL;
668: return(0);
669: }
670:
671: static void
672: mparse_end(struct mparse *curp)
673: {
674:
675: if (MANDOCLEVEL_FATAL <= curp->file_status)
676: return;
677:
1.50 schwarze 678: if (curp->mdoc == NULL &&
679: curp->man == NULL &&
680: curp->sodest == NULL) {
681: if (curp->options & MPARSE_MDOC)
682: curp->mdoc = curp->pmdoc;
683: else {
684: if (curp->pman == NULL)
685: curp->pman = man_alloc(curp->roff, curp,
686: curp->options & MPARSE_QUICK ? 1 : 0);
687: curp->man = curp->pman;
688: }
689: }
690:
1.1 schwarze 691: if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
692: assert(MANDOCLEVEL_FATAL <= curp->file_status);
693: return;
694: }
695:
696: if (curp->man && ! man_endparse(curp->man)) {
697: assert(MANDOCLEVEL_FATAL <= curp->file_status);
698: return;
699: }
700:
701: roff_endparse(curp->roff);
702: }
703:
1.15 schwarze 704: static void
705: mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
1.1 schwarze 706: {
1.61 ! schwarze 707: struct buf *svprimary;
1.1 schwarze 708: const char *svfile;
1.14 schwarze 709: static int recursion_depth;
710:
711: if (64 < recursion_depth) {
712: mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
1.15 schwarze 713: return;
1.14 schwarze 714: }
1.1 schwarze 715:
1.15 schwarze 716: /* Line number is per-file. */
717: svfile = curp->file;
718: curp->file = file;
1.61 ! schwarze 719: svprimary = curp->primary;
1.59 schwarze 720: curp->primary = &blk;
1.15 schwarze 721: curp->line = 1;
722: recursion_depth++;
723:
724: mparse_buf_r(curp, blk, 1);
725:
726: if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
727: mparse_end(curp);
728:
1.61 ! schwarze 729: curp->primary = svprimary;
1.15 schwarze 730: curp->file = svfile;
731: }
732:
733: enum mandoclevel
734: mparse_readfd(struct mparse *curp, int fd, const char *file)
735: {
736: struct buf blk;
737: int with_mmap;
738:
1.18 schwarze 739: if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
740: curp->file_status = MANDOCLEVEL_SYSERR;
741: if (curp->mmsg)
742: (*curp->mmsg)(MANDOCERR_SYSOPEN,
743: curp->file_status,
744: file, 0, 0, strerror(errno));
745: goto out;
746: }
747:
1.15 schwarze 748: /*
749: * Run for each opened file; may be called more than once for
750: * each full parse sequence if the opened file is nested (i.e.,
751: * from `so'). Simply sucks in the whole file and moves into
752: * the parse phase for the file.
753: */
1.1 schwarze 754:
1.18 schwarze 755: if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
1.15 schwarze 756: goto out;
1.1 schwarze 757:
1.15 schwarze 758: mparse_parse_buffer(curp, blk, file);
1.1 schwarze 759:
1.15 schwarze 760: if (with_mmap)
761: munmap(blk.buf, blk.sz);
762: else
763: free(blk.buf);
1.1 schwarze 764:
765: if (STDIN_FILENO != fd && -1 == close(fd))
766: perror(file);
1.14 schwarze 767: out:
1.1 schwarze 768: return(curp->file_status);
1.58 schwarze 769: }
770:
771: enum mandoclevel
772: mparse_open(struct mparse *curp, int *fd, const char *file,
773: pid_t *child_pid)
774: {
775: int pfd[2];
776: char *cp;
777: enum mandocerr err;
778:
779: pfd[1] = -1;
780: curp->file = file;
781: if ((cp = strrchr(file, '.')) == NULL ||
782: strcmp(cp + 1, "gz")) {
783: *child_pid = 0;
784: if ((*fd = open(file, O_RDONLY)) == -1) {
785: err = MANDOCERR_SYSOPEN;
786: goto out;
787: }
788: return(MANDOCLEVEL_OK);
789: }
790:
791: if (pipe(pfd) == -1) {
792: err = MANDOCERR_SYSPIPE;
793: goto out;
794: }
795:
796: switch (*child_pid = fork()) {
797: case -1:
798: err = MANDOCERR_SYSFORK;
799: close(pfd[0]);
800: close(pfd[1]);
801: pfd[1] = -1;
802: break;
803: case 0:
804: close(pfd[0]);
805: if (dup2(pfd[1], STDOUT_FILENO) == -1) {
806: err = MANDOCERR_SYSDUP;
807: break;
808: }
809: execlp("gunzip", "gunzip", "-c", file, NULL);
810: err = MANDOCERR_SYSEXEC;
811: break;
812: default:
813: close(pfd[1]);
814: *fd = pfd[0];
815: return(MANDOCLEVEL_OK);
816: }
817:
818: out:
819: *fd = -1;
820: *child_pid = 0;
821: curp->file_status = MANDOCLEVEL_SYSERR;
822: if (curp->mmsg)
823: (*curp->mmsg)(err, curp->file_status, file,
824: 0, 0, strerror(errno));
825: if (pfd[1] != -1)
826: exit(1);
827: return(curp->file_status);
828: }
829:
830: enum mandoclevel
831: mparse_wait(struct mparse *curp, pid_t child_pid)
832: {
833: int status;
834:
835: if (waitpid(child_pid, &status, 0) == -1) {
836: mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
837: strerror(errno));
838: curp->file_status = MANDOCLEVEL_SYSERR;
839: return(curp->file_status);
840: }
841: if (WIFSIGNALED(status)) {
842: mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
843: "%d", WTERMSIG(status));
844: curp->file_status = MANDOCLEVEL_SYSERR;
845: return(curp->file_status);
846: }
847: if (WEXITSTATUS(status)) {
848: mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
849: "%d", WEXITSTATUS(status));
850: curp->file_status = MANDOCLEVEL_SYSERR;
851: return(curp->file_status);
852: }
853: return(MANDOCLEVEL_OK);
1.1 schwarze 854: }
855:
856: struct mparse *
1.22 schwarze 857: mparse_alloc(int options, enum mandoclevel wlevel,
1.47 schwarze 858: mandocmsg mmsg, const char *defos)
1.1 schwarze 859: {
860: struct mparse *curp;
861:
862: assert(wlevel <= MANDOCLEVEL_FATAL);
863:
864: curp = mandoc_calloc(1, sizeof(struct mparse));
865:
1.22 schwarze 866: curp->options = options;
1.1 schwarze 867: curp->wlevel = wlevel;
868: curp->mmsg = mmsg;
1.7 schwarze 869: curp->defos = defos;
1.1 schwarze 870:
1.22 schwarze 871: curp->roff = roff_alloc(curp, options);
1.50 schwarze 872: if (curp->options & MPARSE_MDOC)
873: curp->pmdoc = mdoc_alloc(
874: curp->roff, curp, curp->defos,
875: curp->options & MPARSE_QUICK ? 1 : 0);
876: if (curp->options & MPARSE_MAN)
877: curp->pman = man_alloc(curp->roff, curp,
878: curp->options & MPARSE_QUICK ? 1 : 0);
879:
1.1 schwarze 880: return(curp);
881: }
882:
883: void
884: mparse_reset(struct mparse *curp)
885: {
886:
887: roff_reset(curp->roff);
888:
889: if (curp->mdoc)
890: mdoc_reset(curp->mdoc);
891: if (curp->man)
892: man_reset(curp->man);
1.4 schwarze 893: if (curp->secondary)
894: curp->secondary->sz = 0;
1.1 schwarze 895:
896: curp->file_status = MANDOCLEVEL_OK;
897: curp->mdoc = NULL;
898: curp->man = NULL;
1.23 schwarze 899:
900: free(curp->sodest);
901: curp->sodest = NULL;
1.1 schwarze 902: }
903:
904: void
905: mparse_free(struct mparse *curp)
906: {
907:
908: if (curp->pmdoc)
909: mdoc_free(curp->pmdoc);
910: if (curp->pman)
911: man_free(curp->pman);
912: if (curp->roff)
913: roff_free(curp->roff);
1.4 schwarze 914: if (curp->secondary)
915: free(curp->secondary->buf);
1.1 schwarze 916:
1.4 schwarze 917: free(curp->secondary);
1.23 schwarze 918: free(curp->sodest);
1.1 schwarze 919: free(curp);
920: }
921:
922: void
1.23 schwarze 923: mparse_result(struct mparse *curp,
924: struct mdoc **mdoc, struct man **man, char **sodest)
1.1 schwarze 925: {
926:
1.23 schwarze 927: if (sodest && NULL != (*sodest = curp->sodest)) {
928: *mdoc = NULL;
929: *man = NULL;
930: return;
931: }
1.1 schwarze 932: if (mdoc)
933: *mdoc = curp->mdoc;
934: if (man)
935: *man = curp->man;
936: }
937:
938: void
939: mandoc_vmsg(enum mandocerr t, struct mparse *m,
940: int ln, int pos, const char *fmt, ...)
941: {
942: char buf[256];
943: va_list ap;
944:
945: va_start(ap, fmt);
1.26 schwarze 946: (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1.1 schwarze 947: va_end(ap);
948:
949: mandoc_msg(t, m, ln, pos, buf);
950: }
951:
952: void
1.25 schwarze 953: mandoc_msg(enum mandocerr er, struct mparse *m,
1.1 schwarze 954: int ln, int col, const char *msg)
955: {
956: enum mandoclevel level;
957:
958: level = MANDOCLEVEL_FATAL;
959: while (er < mandoclimits[level])
960: level--;
961:
962: if (level < m->wlevel)
963: return;
964:
965: if (m->mmsg)
966: (*m->mmsg)(er, level, m->file, ln, col, msg);
967:
968: if (m->file_status < level)
969: m->file_status = level;
970: }
971:
972: const char *
973: mparse_strerror(enum mandocerr er)
974: {
975:
976: return(mandocerrs[er]);
977: }
978:
979: const char *
980: mparse_strlevel(enum mandoclevel lvl)
981: {
982: return(mandoclevels[lvl]);
1.4 schwarze 983: }
984:
985: void
986: mparse_keep(struct mparse *p)
987: {
988:
989: assert(NULL == p->secondary);
990: p->secondary = mandoc_calloc(1, sizeof(struct buf));
991: }
992:
993: const char *
994: mparse_getkeep(const struct mparse *p)
995: {
996:
997: assert(p->secondary);
998: return(p->secondary->sz ? p->secondary->buf : NULL);
1.1 schwarze 999: }