Annotation of src/usr.bin/mandoc/main.c, Revision 1.75
1.75 ! schwarze 1: /* $Id: main.c,v 1.74 2011/03/20 23:36:42 schwarze Exp $ */
1.1 kristaps 2: /*
1.64 schwarze 3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.70 schwarze 4: * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
1.2 schwarze 7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 9: *
1.2 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 17: */
1.27 schwarze 18: #include <sys/types.h>
19: #include <sys/mman.h>
1.1 kristaps 20: #include <sys/stat.h>
21:
22: #include <assert.h>
1.43 schwarze 23: #include <ctype.h>
1.1 kristaps 24: #include <fcntl.h>
25: #include <stdio.h>
1.17 schwarze 26: #include <stdint.h>
1.1 kristaps 27: #include <stdlib.h>
28: #include <string.h>
29: #include <unistd.h>
30:
1.30 schwarze 31: #include "mandoc.h"
1.38 schwarze 32: #include "main.h"
1.1 kristaps 33: #include "mdoc.h"
34: #include "man.h"
1.30 schwarze 35: #include "roff.h"
1.17 schwarze 36:
1.61 schwarze 37: #define REPARSE_LIMIT 1000
1.1 kristaps 38:
1.16 schwarze 39: typedef void (*out_mdoc)(void *, const struct mdoc *);
40: typedef void (*out_man)(void *, const struct man *);
1.1 kristaps 41: typedef void (*out_free)(void *);
42:
43: struct buf {
44: char *buf;
45: size_t sz;
46: };
47:
48: enum intt {
49: INTT_AUTO,
50: INTT_MDOC,
51: INTT_MAN
52: };
53:
54: enum outt {
55: OUTT_ASCII = 0,
56: OUTT_TREE,
1.17 schwarze 57: OUTT_HTML,
1.21 schwarze 58: OUTT_XHTML,
1.36 schwarze 59: OUTT_LINT,
1.43 schwarze 60: OUTT_PS,
61: OUTT_PDF
1.1 kristaps 62: };
63:
64: struct curparse {
1.75 ! schwarze 65: enum mandoclevel exit_status; /* status of all file parses */
! 66: const char *file; /* current file-name */
! 67: enum mandoclevel file_status; /* error status of current parse */
! 68: int fd; /* current file-descriptor */
! 69: int line; /* line number in the file */
! 70: enum mandoclevel wlevel; /* ignore messages below this */
! 71: int wstop; /* stop after a file with a warning */
1.33 schwarze 72: enum intt inttype; /* which parser to use */
1.59 schwarze 73: struct man *pman; /* persistent man parser */
74: struct mdoc *pmdoc; /* persistent mdoc parser */
1.33 schwarze 75: struct man *man; /* man parser */
76: struct mdoc *mdoc; /* mdoc parser */
77: struct roff *roff; /* roff parser (!NULL) */
1.38 schwarze 78: struct regset regs; /* roff registers */
1.61 schwarze 79: int reparse_count; /* finite interpolation stack */
1.33 schwarze 80: enum outt outtype; /* which output to use */
81: out_mdoc outmdoc; /* mdoc output ptr */
82: out_man outman; /* man output ptr */
83: out_free outfree; /* free output ptr */
84: void *outdata; /* data for output */
85: char outopts[BUFSIZ]; /* buf of output opts */
86: };
87:
1.45 schwarze 88: static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
89: "SUCCESS",
90: "RESERVED",
91: "WARNING",
92: "ERROR",
93: "FATAL",
94: "BADARG",
95: "SYSERR"
96: };
97:
98: static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
99: MANDOCERR_OK,
100: MANDOCERR_WARNING,
101: MANDOCERR_WARNING,
102: MANDOCERR_ERROR,
103: MANDOCERR_FATAL,
104: MANDOCERR_MAX,
105: MANDOCERR_MAX
106: };
107:
1.33 schwarze 108: static const char * const mandocerrs[MANDOCERR_MAX] = {
109: "ok",
1.40 schwarze 110:
111: "generic warning",
112:
1.53 schwarze 113: /* related to the prologue */
114: "no title in document",
115: "document title should be all caps",
116: "unknown manual section",
1.73 schwarze 117: "date missing, using today's date",
118: "cannot parse date, using it verbatim",
1.53 schwarze 119: "prologue macros out of order",
120: "duplicate prologue macro",
121: "macro not allowed in prologue",
122: "macro not allowed in body",
123:
124: /* related to document structure */
1.54 schwarze 125: ".so is fragile, better use ln(1)",
1.53 schwarze 126: "NAME section must come first",
127: "bad NAME section contents",
128: "manual name not yet set",
1.34 schwarze 129: "sections out of conventional order",
1.53 schwarze 130: "duplicate section name",
131: "section not in conventional manual section",
132:
133: /* related to macros and nesting */
134: "skipping obsolete macro",
135: "skipping paragraph macro",
1.72 schwarze 136: "skipping no-space macro",
1.53 schwarze 137: "blocks badly nested",
138: "child violates parent syntax",
139: "nested displays are not portable",
140: "already in literal mode",
141:
142: /* related to missing macro arguments */
143: "skipping empty macro",
1.63 schwarze 144: "argument count wrong",
1.53 schwarze 145: "missing display type",
1.33 schwarze 146: "list type must come first",
1.53 schwarze 147: "tag lists require a width argument",
148: "missing font type",
1.68 schwarze 149: "skipping end of block that is not open",
1.53 schwarze 150:
151: /* related to bad macro arguments */
152: "skipping argument",
153: "duplicate argument",
154: "duplicate display type",
155: "duplicate list type",
156: "unknown AT&T UNIX version",
157: "bad Boolean value",
1.57 schwarze 158: "unknown font",
1.53 schwarze 159: "unknown standard specifier",
160: "bad width argument",
161:
162: /* related to plain text */
163: "blank line in non-literal context",
1.43 schwarze 164: "tab in non-literal context",
1.53 schwarze 165: "end of line whitespace",
166: "bad comment style",
167: "unknown escape sequence",
1.33 schwarze 168: "unterminated quoted string",
1.64 schwarze 169:
1.40 schwarze 170: "generic error",
171:
1.64 schwarze 172: /* related to tables */
173: "bad table syntax",
174: "bad table option",
175: "bad table layout",
176: "no table layout cells specified",
177: "no table data cells specified",
178: "ignore data in cell",
179: "data block still open",
1.67 schwarze 180: "ignoring extra data cells",
1.64 schwarze 181:
1.61 schwarze 182: "input stack limit exceeded, infinite loop?",
1.53 schwarze 183: "skipping bad character",
1.66 schwarze 184: "escaped character not allowed in a name",
1.53 schwarze 185: "skipping text before the first section header",
186: "skipping unknown macro",
1.69 schwarze 187: "NOT IMPLEMENTED, please use groff: skipping request",
1.33 schwarze 188: "line scope broken",
189: "argument count wrong",
1.53 schwarze 190: "skipping end of block that is not open",
1.50 schwarze 191: "missing end of block",
1.47 schwarze 192: "scope open on exit",
1.50 schwarze 193: "uname(3) system call failed",
1.33 schwarze 194: "macro requires line argument(s)",
195: "macro requires body argument(s)",
196: "macro requires argument(s)",
1.34 schwarze 197: "missing list type",
1.33 schwarze 198: "line argument(s) will be lost",
199: "body argument(s) will be lost",
1.40 schwarze 200:
201: "generic fatal error",
202:
1.34 schwarze 203: "column syntax is inconsistent",
1.56 schwarze 204: "NOT IMPLEMENTED: .Bd -file",
1.33 schwarze 205: "line scope broken, syntax violated",
206: "argument count wrong, violates syntax",
207: "child violates parent syntax",
208: "argument count wrong, violates syntax",
1.56 schwarze 209: "NOT IMPLEMENTED: .so with absolute path or \"..\"",
1.33 schwarze 210: "no document body",
211: "no document prologue",
1.45 schwarze 212: "static buffer exhausted",
1.1 kristaps 213: };
214:
1.55 schwarze 215: static void parsebuf(struct curparse *, struct buf, int);
1.51 schwarze 216: static void pdesc(struct curparse *);
1.27 schwarze 217: static void fdesc(struct curparse *);
218: static void ffile(const char *, struct curparse *);
1.55 schwarze 219: static int pfile(const char *, struct curparse *);
1.1 kristaps 220: static int moptions(enum intt *, char *);
1.75 ! schwarze 221: static void mmsg(enum mandocerr, void *,
1.30 schwarze 222: int, int, const char *);
1.59 schwarze 223: static void pset(const char *, int, struct curparse *);
1.27 schwarze 224: static int toptions(struct curparse *, char *);
225: static void usage(void) __attribute__((noreturn));
1.20 schwarze 226: static void version(void) __attribute__((noreturn));
1.45 schwarze 227: static int woptions(struct curparse *, char *);
1.1 kristaps 228:
1.19 schwarze 229: static const char *progname;
1.1 kristaps 230:
231: int
232: main(int argc, char *argv[])
233: {
1.27 schwarze 234: int c;
1.1 kristaps 235: struct curparse curp;
236:
1.19 schwarze 237: progname = strrchr(argv[0], '/');
238: if (progname == NULL)
239: progname = argv[0];
240: else
241: ++progname;
242:
243: memset(&curp, 0, sizeof(struct curparse));
1.1 kristaps 244:
245: curp.inttype = INTT_AUTO;
246: curp.outtype = OUTT_ASCII;
1.45 schwarze 247: curp.wlevel = MANDOCLEVEL_FATAL;
1.75 ! schwarze 248: curp.exit_status = MANDOCLEVEL_OK;
1.1 kristaps 249:
250: /* LINTED */
1.45 schwarze 251: while (-1 != (c = getopt(argc, argv, "m:O:T:VW:")))
1.1 kristaps 252: switch (c) {
253: case ('m'):
254: if ( ! moptions(&curp.inttype, optarg))
1.47 schwarze 255: return((int)MANDOCLEVEL_BADARG);
1.1 kristaps 256: break;
1.18 schwarze 257: case ('O'):
258: (void)strlcat(curp.outopts, optarg, BUFSIZ);
259: (void)strlcat(curp.outopts, ",", BUFSIZ);
1.17 schwarze 260: break;
1.1 kristaps 261: case ('T'):
1.22 schwarze 262: if ( ! toptions(&curp, optarg))
1.47 schwarze 263: return((int)MANDOCLEVEL_BADARG);
1.1 kristaps 264: break;
265: case ('W'):
1.45 schwarze 266: if ( ! woptions(&curp, optarg))
1.47 schwarze 267: return((int)MANDOCLEVEL_BADARG);
1.1 kristaps 268: break;
1.3 schwarze 269: case ('V'):
270: version();
271: /* NOTREACHED */
1.1 kristaps 272: default:
273: usage();
274: /* NOTREACHED */
275: }
276:
277: argc -= optind;
278: argv += optind;
279:
1.7 schwarze 280: if (NULL == *argv) {
281: curp.file = "<stdin>";
282: curp.fd = STDIN_FILENO;
1.14 schwarze 283:
1.27 schwarze 284: fdesc(&curp);
285: }
286:
287: while (*argv) {
288: ffile(*argv, &curp);
1.75 ! schwarze 289: if (MANDOCLEVEL_OK != curp.exit_status && curp.wstop)
1.27 schwarze 290: break;
291: ++argv;
1.1 kristaps 292: }
293:
294: if (curp.outfree)
295: (*curp.outfree)(curp.outdata);
1.59 schwarze 296: if (curp.pmdoc)
297: mdoc_free(curp.pmdoc);
298: if (curp.pman)
299: man_free(curp.pman);
1.30 schwarze 300: if (curp.roff)
301: roff_free(curp.roff);
1.1 kristaps 302:
1.75 ! schwarze 303: return((int)curp.exit_status);
1.1 kristaps 304: }
305:
306:
1.20 schwarze 307: static void
1.3 schwarze 308: version(void)
309: {
310:
1.19 schwarze 311: (void)printf("%s %s\n", progname, VERSION);
1.47 schwarze 312: exit((int)MANDOCLEVEL_OK);
1.3 schwarze 313: }
314:
315:
1.20 schwarze 316: static void
1.1 kristaps 317: usage(void)
318: {
319:
1.59 schwarze 320: (void)fprintf(stderr, "usage: %s "
321: "[-V] "
322: "[-foption] "
323: "[-mformat] "
324: "[-Ooption] "
325: "[-Toutput] "
326: "[-Werr] "
327: "[file...]\n",
328: progname);
329:
1.47 schwarze 330: exit((int)MANDOCLEVEL_BADARG);
1.1 kristaps 331: }
332:
1.27 schwarze 333: static void
334: ffile(const char *file, struct curparse *curp)
1.1 kristaps 335: {
336:
1.59 schwarze 337: /*
338: * Called once per input file. Get the file ready for reading,
339: * pass it through to the parser-driver, then close it out.
340: * XXX: don't do anything special as this is only called for
341: * files; stdin goes directly to fdesc().
342: */
343:
1.1 kristaps 344: curp->file = file;
1.59 schwarze 345:
1.1 kristaps 346: if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
1.19 schwarze 347: perror(curp->file);
1.75 ! schwarze 348: curp->exit_status = MANDOCLEVEL_SYSERR;
1.27 schwarze 349: return;
1.1 kristaps 350: }
351:
1.27 schwarze 352: fdesc(curp);
1.1 kristaps 353:
354: if (-1 == close(curp->fd))
1.19 schwarze 355: perror(curp->file);
1.27 schwarze 356: }
1.1 kristaps 357:
1.52 schwarze 358: static int
1.55 schwarze 359: pfile(const char *file, struct curparse *curp)
1.52 schwarze 360: {
361: const char *savefile;
362: int fd, savefd;
363:
364: if (-1 == (fd = open(file, O_RDONLY, 0))) {
365: perror(file);
1.75 ! schwarze 366: curp->file_status = MANDOCLEVEL_SYSERR;
1.52 schwarze 367: return(0);
368: }
369:
370: savefile = curp->file;
371: savefd = curp->fd;
372:
373: curp->file = file;
374: curp->fd = fd;
375:
376: pdesc(curp);
377:
378: curp->file = savefile;
379: curp->fd = savefd;
380:
381: if (-1 == close(fd))
382: perror(file);
383:
1.75 ! schwarze 384: return(MANDOCLEVEL_FATAL > curp->file_status ? 1 : 0);
1.52 schwarze 385: }
386:
1.27 schwarze 387:
1.45 schwarze 388: static void
1.27 schwarze 389: resize_buf(struct buf *buf, size_t initial)
390: {
391:
1.62 schwarze 392: buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
1.75 ! schwarze 393: buf->buf = mandoc_realloc(buf->buf, buf->sz);
1.1 kristaps 394: }
395:
396:
397: static int
1.27 schwarze 398: read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
1.1 kristaps 399: {
1.27 schwarze 400: struct stat st;
401: size_t off;
1.1 kristaps 402: ssize_t ssz;
1.27 schwarze 403:
404: if (-1 == fstat(curp->fd, &st)) {
405: perror(curp->file);
406: return(0);
407: }
408:
409: /*
410: * If we're a regular file, try just reading in the whole entry
411: * via mmap(). This is faster than reading it into blocks, and
412: * since each file is only a few bytes to begin with, I'm not
413: * concerned that this is going to tank any machines.
414: */
415:
416: if (S_ISREG(st.st_mode)) {
417: if (st.st_size >= (1U << 31)) {
418: fprintf(stderr, "%s: input too large\n",
419: curp->file);
420: return(0);
421: }
422: *with_mmap = 1;
1.30 schwarze 423: fb->sz = (size_t)st.st_size;
1.27 schwarze 424: fb->buf = mmap(NULL, fb->sz, PROT_READ,
1.35 schwarze 425: MAP_FILE|MAP_SHARED, curp->fd, 0);
1.27 schwarze 426: if (fb->buf != MAP_FAILED)
427: return(1);
428: }
429:
430: /*
431: * If this isn't a regular file (like, say, stdin), then we must
432: * go the old way and just read things in bit by bit.
433: */
434:
435: *with_mmap = 0;
436: off = 0;
437: fb->sz = 0;
438: fb->buf = NULL;
439: for (;;) {
440: if (off == fb->sz) {
441: if (fb->sz == (1U << 31)) {
442: fprintf(stderr, "%s: input too large\n",
443: curp->file);
444: break;
445: }
1.45 schwarze 446: resize_buf(fb, 65536);
1.27 schwarze 447: }
1.30 schwarze 448: ssz = read(curp->fd, fb->buf + (int)off, fb->sz - off);
1.27 schwarze 449: if (ssz == 0) {
450: fb->sz = off;
451: return(1);
452: }
453: if (ssz == -1) {
454: perror(curp->file);
455: break;
456: }
1.30 schwarze 457: off += (size_t)ssz;
1.27 schwarze 458: }
459:
460: free(fb->buf);
461: fb->buf = NULL;
462: return(0);
463: }
464:
465:
466: static void
467: fdesc(struct curparse *curp)
468: {
1.59 schwarze 469:
470: /*
471: * Called once per file with an opened file descriptor. All
472: * pre-file-parse operations (whether stdin or a file) should go
473: * here.
474: *
475: * This calls down into the nested parser, which drills down and
476: * fully parses a file and all its dependences (i.e., `so'). It
477: * then runs the cleanup validators and pushes to output.
478: */
479:
480: /* Zero the parse type. */
481:
482: curp->mdoc = NULL;
483: curp->man = NULL;
1.75 ! schwarze 484: curp->file_status = MANDOCLEVEL_OK;
1.59 schwarze 485:
486: /* Make sure the mandotory roff parser is initialised. */
487:
488: if (NULL == curp->roff) {
489: curp->roff = roff_alloc(&curp->regs, curp, mmsg);
490: assert(curp->roff);
491: }
492:
493: /* Fully parse the file. */
1.51 schwarze 494:
495: pdesc(curp);
496:
1.75 ! schwarze 497: if (MANDOCLEVEL_FATAL <= curp->file_status)
1.51 schwarze 498: goto cleanup;
499:
500: /* NOTE a parser may not have been assigned, yet. */
501:
1.59 schwarze 502: if ( ! (curp->man || curp->mdoc)) {
1.51 schwarze 503: fprintf(stderr, "%s: Not a manual\n", curp->file);
1.75 ! schwarze 504: curp->file_status = MANDOCLEVEL_FATAL;
1.51 schwarze 505: goto cleanup;
506: }
507:
508: /* Clean up the parse routine ASTs. */
509:
1.59 schwarze 510: if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
1.75 ! schwarze 511: assert(MANDOCLEVEL_FATAL <= curp->file_status);
1.51 schwarze 512: goto cleanup;
513: }
1.59 schwarze 514:
515: if (curp->man && ! man_endparse(curp->man)) {
1.75 ! schwarze 516: assert(MANDOCLEVEL_FATAL <= curp->file_status);
1.51 schwarze 517: goto cleanup;
518: }
1.59 schwarze 519:
520: assert(curp->roff);
1.64 schwarze 521: roff_endparse(curp->roff);
1.51 schwarze 522:
523: /*
524: * With -Wstop and warnings or errors of at least
525: * the requested level, do not produce output.
526: */
527:
1.75 ! schwarze 528: if (MANDOCLEVEL_OK != curp->file_status && curp->wstop)
1.51 schwarze 529: goto cleanup;
530:
531: /* If unset, allocate output dev now (if applicable). */
532:
533: if ( ! (curp->outman && curp->outmdoc)) {
534: switch (curp->outtype) {
535: case (OUTT_XHTML):
536: curp->outdata = xhtml_alloc(curp->outopts);
537: break;
538: case (OUTT_HTML):
539: curp->outdata = html_alloc(curp->outopts);
540: break;
541: case (OUTT_ASCII):
542: curp->outdata = ascii_alloc(curp->outopts);
543: curp->outfree = ascii_free;
544: break;
545: case (OUTT_PDF):
546: curp->outdata = pdf_alloc(curp->outopts);
547: curp->outfree = pspdf_free;
548: break;
549: case (OUTT_PS):
550: curp->outdata = ps_alloc(curp->outopts);
551: curp->outfree = pspdf_free;
552: break;
553: default:
554: break;
555: }
556:
557: switch (curp->outtype) {
558: case (OUTT_HTML):
559: /* FALLTHROUGH */
560: case (OUTT_XHTML):
561: curp->outman = html_man;
562: curp->outmdoc = html_mdoc;
563: curp->outfree = html_free;
564: break;
565: case (OUTT_TREE):
566: curp->outman = tree_man;
567: curp->outmdoc = tree_mdoc;
568: break;
569: case (OUTT_PDF):
570: /* FALLTHROUGH */
571: case (OUTT_ASCII):
572: /* FALLTHROUGH */
573: case (OUTT_PS):
574: curp->outman = terminal_man;
575: curp->outmdoc = terminal_mdoc;
576: break;
577: default:
578: break;
579: }
580: }
581:
582: /* Execute the out device, if it exists. */
583:
1.59 schwarze 584: if (curp->man && curp->outman)
585: (*curp->outman)(curp->outdata, curp->man);
586: if (curp->mdoc && curp->outmdoc)
587: (*curp->outmdoc)(curp->outdata, curp->mdoc);
1.51 schwarze 588:
589: cleanup:
1.59 schwarze 590:
1.51 schwarze 591: memset(&curp->regs, 0, sizeof(struct regset));
1.59 schwarze 592:
593: /* Reset the current-parse compilers. */
594:
595: if (curp->mdoc)
596: mdoc_reset(curp->mdoc);
597: if (curp->man)
598: man_reset(curp->man);
599:
600: assert(curp->roff);
601: roff_reset(curp->roff);
1.51 schwarze 602:
1.75 ! schwarze 603: if (curp->exit_status < curp->file_status)
! 604: curp->exit_status = curp->file_status;
1.60 schwarze 605:
1.51 schwarze 606: return;
607: }
608:
609: static void
610: pdesc(struct curparse *curp)
611: {
1.55 schwarze 612: struct buf blk;
613: int with_mmap;
1.1 kristaps 614:
1.59 schwarze 615: /*
616: * Run for each opened file; may be called more than once for
617: * each full parse sequence if the opened file is nested (i.e.,
618: * from `so'). Simply sucks in the whole file and moves into
619: * the parse phase for the file.
620: */
621:
1.45 schwarze 622: if ( ! read_whole_file(curp, &blk, &with_mmap)) {
1.75 ! schwarze 623: curp->file_status = MANDOCLEVEL_SYSERR;
1.27 schwarze 624: return;
1.45 schwarze 625: }
1.27 schwarze 626:
1.59 schwarze 627: /* Line number is per-file. */
1.55 schwarze 628:
629: curp->line = 1;
1.59 schwarze 630:
1.55 schwarze 631: parsebuf(curp, blk, 1);
632:
633: if (with_mmap)
634: munmap(blk.buf, blk.sz);
635: else
636: free(blk.buf);
637: }
638:
1.75 ! schwarze 639: /*
! 640: * Main parse routine for an opened file. This is called for each
! 641: * opened file and simply loops around the full input file, possibly
! 642: * nesting (i.e., with `so').
! 643: */
1.55 schwarze 644: static void
645: parsebuf(struct curparse *curp, struct buf blk, int start)
646: {
1.71 schwarze 647: const struct tbl_span *span;
1.55 schwarze 648: struct buf ln;
1.59 schwarze 649: enum rofferr rr;
650: int i, of, rc;
651: int pos; /* byte number in the ln buffer */
652: int lnn; /* line number in the real file */
1.55 schwarze 653: unsigned char c;
1.59 schwarze 654:
1.55 schwarze 655: memset(&ln, 0, sizeof(struct buf));
656:
1.59 schwarze 657: lnn = curp->line;
658: pos = 0;
1.55 schwarze 659:
1.59 schwarze 660: for (i = 0; i < (int)blk.sz; ) {
1.55 schwarze 661: if (0 == pos && '\0' == blk.buf[i])
662: break;
1.59 schwarze 663:
1.61 schwarze 664: if (start) {
1.55 schwarze 665: curp->line = lnn;
1.61 schwarze 666: curp->reparse_count = 0;
667: }
1.55 schwarze 668:
669: while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
1.70 schwarze 670:
671: /*
672: * When finding an unescaped newline character,
673: * leave the character loop to process the line.
674: * Skip a preceding carriage return, if any.
675: */
676:
677: if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
678: '\n' == blk.buf[i + 1])
679: ++i;
1.29 schwarze 680: if ('\n' == blk.buf[i]) {
681: ++i;
682: ++lnn;
683: break;
684: }
1.43 schwarze 685:
686: /*
687: * Warn about bogus characters. If you're using
688: * non-ASCII encoding, you're screwing your
689: * readers. Since I'd rather this not happen,
690: * I'll be helpful and drop these characters so
691: * we don't display gibberish. Note to manual
692: * writers: use special characters.
693: */
694:
1.44 schwarze 695: c = (unsigned char) blk.buf[i];
1.59 schwarze 696:
697: if ( ! (isascii(c) &&
698: (isgraph(c) || isblank(c)))) {
1.45 schwarze 699: mmsg(MANDOCERR_BADCHAR, curp,
1.55 schwarze 700: curp->line, pos, "ignoring byte");
1.43 schwarze 701: i++;
702: continue;
703: }
704:
1.59 schwarze 705: /* Trailing backslash = a plain char. */
706:
1.29 schwarze 707: if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
708: if (pos >= (int)ln.sz)
1.45 schwarze 709: resize_buf(&ln, 256);
1.29 schwarze 710: ln.buf[pos++] = blk.buf[i++];
1.27 schwarze 711: continue;
1.29 schwarze 712: }
1.59 schwarze 713:
1.70 schwarze 714: /*
715: * Found escape and at least one other character.
716: * When it's a newline character, skip it.
717: * When there is a carriage return in between,
718: * skip that one as well.
719: */
1.59 schwarze 720:
1.70 schwarze 721: if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
722: '\n' == blk.buf[i + 2])
723: ++i;
1.29 schwarze 724: if ('\n' == blk.buf[i + 1]) {
1.59 schwarze 725: i += 2;
1.29 schwarze 726: ++lnn;
1.27 schwarze 727: continue;
728: }
1.59 schwarze 729:
1.29 schwarze 730: if ('"' == blk.buf[i + 1]) {
731: i += 2;
732: /* Comment, skip to end of line */
733: for (; i < (int)blk.sz; ++i) {
734: if ('\n' == blk.buf[i]) {
735: ++i;
736: ++lnn;
737: break;
738: }
739: }
1.59 schwarze 740:
1.29 schwarze 741: /* Backout trailing whitespaces */
742: for (; pos > 0; --pos) {
743: if (ln.buf[pos - 1] != ' ')
744: break;
745: if (pos > 2 && ln.buf[pos - 2] == '\\')
746: break;
747: }
748: break;
749: }
1.59 schwarze 750:
751: /* Some other escape sequence, copy & cont. */
752:
1.29 schwarze 753: if (pos + 1 >= (int)ln.sz)
1.45 schwarze 754: resize_buf(&ln, 256);
1.1 kristaps 755:
1.29 schwarze 756: ln.buf[pos++] = blk.buf[i++];
757: ln.buf[pos++] = blk.buf[i++];
1.27 schwarze 758: }
1.1 kristaps 759:
1.29 schwarze 760: if (pos >= (int)ln.sz)
1.45 schwarze 761: resize_buf(&ln, 256);
1.59 schwarze 762:
1.30 schwarze 763: ln.buf[pos] = '\0';
764:
1.32 schwarze 765: /*
766: * A significant amount of complexity is contained by
767: * the roff preprocessor. It's line-oriented but can be
768: * expressed on one line, so we need at times to
769: * readjust our starting point and re-run it. The roff
770: * preprocessor can also readjust the buffers with new
771: * data, so we pass them in wholesale.
772: */
773:
774: of = 0;
1.59 schwarze 775:
1.55 schwarze 776: rerun:
1.59 schwarze 777: rr = roff_parseln
778: (curp->roff, curp->line,
779: &ln.buf, &ln.sz, of, &of);
780:
781: switch (rr) {
1.55 schwarze 782: case (ROFF_REPARSE):
1.61 schwarze 783: if (REPARSE_LIMIT >= ++curp->reparse_count)
784: parsebuf(curp, ln, 0);
785: else
786: mmsg(MANDOCERR_ROFFLOOP, curp,
787: curp->line, pos, NULL);
1.55 schwarze 788: pos = 0;
789: continue;
790: case (ROFF_APPEND):
1.75 ! schwarze 791: pos = (int)strlen(ln.buf);
1.55 schwarze 792: continue;
793: case (ROFF_RERUN):
794: goto rerun;
795: case (ROFF_IGN):
796: pos = 0;
1.30 schwarze 797: continue;
1.55 schwarze 798: case (ROFF_ERR):
1.75 ! schwarze 799: assert(MANDOCLEVEL_FATAL <= curp->file_status);
1.51 schwarze 800: break;
1.55 schwarze 801: case (ROFF_SO):
802: if (pfile(ln.buf + of, curp)) {
803: pos = 0;
1.52 schwarze 804: continue;
1.55 schwarze 805: } else
1.52 schwarze 806: break;
1.64 schwarze 807: default:
1.55 schwarze 808: break;
1.45 schwarze 809: }
1.65 schwarze 810:
811: /*
812: * If we encounter errors in the recursive parsebuf()
813: * call, make sure we don't continue parsing.
814: */
815:
1.75 ! schwarze 816: if (MANDOCLEVEL_FATAL <= curp->file_status)
1.65 schwarze 817: break;
1.5 schwarze 818:
1.32 schwarze 819: /*
820: * If input parsers have not been allocated, do so now.
821: * We keep these instanced betwen parsers, but set them
822: * locally per parse routine since we can use different
823: * parsers with each one.
824: */
1.1 kristaps 825:
1.59 schwarze 826: if ( ! (curp->man || curp->mdoc))
827: pset(ln.buf + of, pos - of, curp);
1.5 schwarze 828:
1.59 schwarze 829: /*
830: * Lastly, push down into the parsers themselves. One
831: * of these will have already been set in the pset()
832: * routine.
1.64 schwarze 833: * If libroff returns ROFF_TBL, then add it to the
834: * currently open parse. Since we only get here if
835: * there does exist data (see tbl_data.c), we're
836: * guaranteed that something's been allocated.
1.74 schwarze 837: * Do the same for ROFF_EQN.
1.59 schwarze 838: */
839:
1.74 schwarze 840: rc = -1;
841:
842: if (ROFF_TBL == rr)
1.71 schwarze 843: while (NULL != (span = roff_span(curp->roff))) {
1.74 schwarze 844: rc = curp->man ?
845: man_addspan(curp->man, span) :
1.71 schwarze 846: mdoc_addspan(curp->mdoc, span);
1.74 schwarze 847: if (0 == rc)
848: break;
1.71 schwarze 849: }
1.74 schwarze 850: else if (ROFF_EQN == rr)
851: rc = curp->mdoc ?
852: mdoc_addeqn(curp->mdoc,
853: roff_eqn(curp->roff)) :
854: man_addeqn(curp->man,
855: roff_eqn(curp->roff));
856: else if (curp->man || curp->mdoc)
1.59 schwarze 857: rc = curp->man ?
858: man_parseln(curp->man,
859: curp->line, ln.buf, of) :
860: mdoc_parseln(curp->mdoc,
861: curp->line, ln.buf, of);
1.1 kristaps 862:
1.74 schwarze 863: if (0 == rc) {
1.75 ! schwarze 864: assert(MANDOCLEVEL_FATAL <= curp->file_status);
1.74 schwarze 865: break;
1.1 kristaps 866: }
1.55 schwarze 867:
868: /* Temporary buffers typically are not full. */
1.59 schwarze 869:
1.55 schwarze 870: if (0 == start && '\0' == blk.buf[i])
871: break;
872:
873: /* Start the next input line. */
1.59 schwarze 874:
1.55 schwarze 875: pos = 0;
1.1 kristaps 876: }
877:
1.51 schwarze 878: free(ln.buf);
1.1 kristaps 879: }
880:
1.45 schwarze 881: static void
1.59 schwarze 882: pset(const char *buf, int pos, struct curparse *curp)
1.1 kristaps 883: {
1.5 schwarze 884: int i;
1.1 kristaps 885:
886: /*
887: * Try to intuit which kind of manual parser should be used. If
888: * passed in by command-line (-man, -mdoc), then use that
889: * explicitly. If passed as -mandoc, then try to guess from the
1.5 schwarze 890: * line: either skip dot-lines, use -mdoc when finding `.Dt', or
1.1 kristaps 891: * default to -man, which is more lenient.
1.59 schwarze 892: *
893: * Separate out pmdoc/pman from mdoc/man: the first persists
894: * through all parsers, while the latter is used per-parse.
1.1 kristaps 895: */
896:
1.31 schwarze 897: if ('.' == buf[0] || '\'' == buf[0]) {
1.5 schwarze 898: for (i = 1; buf[i]; i++)
899: if (' ' != buf[i] && '\t' != buf[i])
900: break;
1.45 schwarze 901: if ('\0' == buf[i])
902: return;
1.5 schwarze 903: }
1.1 kristaps 904:
905: switch (curp->inttype) {
906: case (INTT_MDOC):
1.59 schwarze 907: if (NULL == curp->pmdoc)
908: curp->pmdoc = mdoc_alloc
909: (&curp->regs, curp, mmsg);
910: assert(curp->pmdoc);
911: curp->mdoc = curp->pmdoc;
1.45 schwarze 912: return;
1.1 kristaps 913: case (INTT_MAN):
1.59 schwarze 914: if (NULL == curp->pman)
915: curp->pman = man_alloc
916: (&curp->regs, curp, mmsg);
917: assert(curp->pman);
918: curp->man = curp->pman;
1.45 schwarze 919: return;
1.1 kristaps 920: default:
921: break;
922: }
923:
924: if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
1.59 schwarze 925: if (NULL == curp->pmdoc)
926: curp->pmdoc = mdoc_alloc
927: (&curp->regs, curp, mmsg);
928: assert(curp->pmdoc);
929: curp->mdoc = curp->pmdoc;
1.45 schwarze 930: return;
1.1 kristaps 931: }
932:
1.59 schwarze 933: if (NULL == curp->pman)
934: curp->pman = man_alloc(&curp->regs, curp, mmsg);
935: assert(curp->pman);
936: curp->man = curp->pman;
1.1 kristaps 937: }
938:
939: static int
940: moptions(enum intt *tflags, char *arg)
941: {
942:
943: if (0 == strcmp(arg, "doc"))
944: *tflags = INTT_MDOC;
945: else if (0 == strcmp(arg, "andoc"))
946: *tflags = INTT_AUTO;
947: else if (0 == strcmp(arg, "an"))
948: *tflags = INTT_MAN;
949: else {
1.20 schwarze 950: fprintf(stderr, "%s: Bad argument\n", arg);
1.1 kristaps 951: return(0);
952: }
953:
954: return(1);
955: }
956:
957: static int
1.22 schwarze 958: toptions(struct curparse *curp, char *arg)
1.1 kristaps 959: {
960:
961: if (0 == strcmp(arg, "ascii"))
1.22 schwarze 962: curp->outtype = OUTT_ASCII;
963: else if (0 == strcmp(arg, "lint")) {
964: curp->outtype = OUTT_LINT;
1.45 schwarze 965: curp->wlevel = MANDOCLEVEL_WARNING;
1.22 schwarze 966: }
1.1 kristaps 967: else if (0 == strcmp(arg, "tree"))
1.22 schwarze 968: curp->outtype = OUTT_TREE;
1.17 schwarze 969: else if (0 == strcmp(arg, "html"))
1.22 schwarze 970: curp->outtype = OUTT_HTML;
1.21 schwarze 971: else if (0 == strcmp(arg, "xhtml"))
1.22 schwarze 972: curp->outtype = OUTT_XHTML;
1.36 schwarze 973: else if (0 == strcmp(arg, "ps"))
974: curp->outtype = OUTT_PS;
1.43 schwarze 975: else if (0 == strcmp(arg, "pdf"))
976: curp->outtype = OUTT_PDF;
1.1 kristaps 977: else {
1.20 schwarze 978: fprintf(stderr, "%s: Bad argument\n", arg);
1.1 kristaps 979: return(0);
980: }
981:
982: return(1);
983: }
984:
985: static int
1.45 schwarze 986: woptions(struct curparse *curp, char *arg)
1.1 kristaps 987: {
1.10 schwarze 988: char *v, *o;
1.45 schwarze 989: const char *toks[6];
1.1 kristaps 990:
1.45 schwarze 991: toks[0] = "stop";
992: toks[1] = "all";
993: toks[2] = "warning";
994: toks[3] = "error";
995: toks[4] = "fatal";
996: toks[5] = NULL;
1.1 kristaps 997:
1.10 schwarze 998: while (*arg) {
999: o = arg;
1.17 schwarze 1000: switch (getsubopt(&arg, UNCONST(toks), &v)) {
1.1 kristaps 1001: case (0):
1.45 schwarze 1002: curp->wstop = 1;
1.1 kristaps 1003: break;
1004: case (1):
1.45 schwarze 1005: /* FALLTHROUGH */
1.1 kristaps 1006: case (2):
1.45 schwarze 1007: curp->wlevel = MANDOCLEVEL_WARNING;
1.1 kristaps 1008: break;
1009: case (3):
1.45 schwarze 1010: curp->wlevel = MANDOCLEVEL_ERROR;
1.1 kristaps 1011: break;
1012: case (4):
1.45 schwarze 1013: curp->wlevel = MANDOCLEVEL_FATAL;
1.19 schwarze 1014: break;
1.1 kristaps 1015: default:
1.45 schwarze 1016: fprintf(stderr, "-W%s: Bad argument\n", o);
1.1 kristaps 1017: return(0);
1018: }
1.10 schwarze 1019: }
1.1 kristaps 1020:
1021: return(1);
1022: }
1023:
1.75 ! schwarze 1024: static void
1.30 schwarze 1025: mmsg(enum mandocerr t, void *arg, int ln, int col, const char *msg)
1026: {
1027: struct curparse *cp;
1.45 schwarze 1028: enum mandoclevel level;
1029:
1030: level = MANDOCLEVEL_FATAL;
1031: while (t < mandoclimits[level])
1.47 schwarze 1032: /* LINTED */
1.45 schwarze 1033: level--;
1.30 schwarze 1034:
1035: cp = (struct curparse *)arg;
1.45 schwarze 1036: if (level < cp->wlevel)
1.75 ! schwarze 1037: return;
1.30 schwarze 1038:
1.45 schwarze 1039: fprintf(stderr, "%s:%d:%d: %s: %s",
1040: cp->file, ln, col + 1, mandoclevels[level], mandocerrs[t]);
1.30 schwarze 1041: if (msg)
1042: fprintf(stderr, ": %s", msg);
1043: fputc('\n', stderr);
1.33 schwarze 1044:
1.75 ! schwarze 1045: if (cp->file_status < level)
! 1046: cp->file_status = level;
1.30 schwarze 1047: }