Annotation of src/usr.bin/mandoc/read.c, Revision 1.189
1.189 ! schwarze 1: /* $OpenBSD: read.c,v 1.188 2020/04/07 22:45:37 schwarze Exp $ */
1.1 schwarze 2: /*
1.189 ! schwarze 3: * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 4: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.20 schwarze 5: * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
1.1 schwarze 6: *
7: * Permission to use, copy, modify, and distribute this software for any
8: * purpose with or without fee is hereby granted, provided that the above
9: * copyright notice and this permission notice appear in all copies.
10: *
1.108 schwarze 11: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 12: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.108 schwarze 13: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 14: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.186 schwarze 18: *
19: * Top-level functions of the mandoc(3) parser:
20: * Parser and input encoding selection, decompression,
21: * handling of input bytes, characters, lines, and files,
22: * handling of roff(7) loops and file inclusion,
23: * and steering of the various parsers.
1.1 schwarze 24: */
1.58 schwarze 25: #include <sys/types.h>
26: #include <sys/mman.h>
1.1 schwarze 27: #include <sys/stat.h>
28:
29: #include <assert.h>
30: #include <ctype.h>
1.18 schwarze 31: #include <errno.h>
1.1 schwarze 32: #include <fcntl.h>
33: #include <stdarg.h>
34: #include <stdio.h>
35: #include <stdlib.h>
36: #include <string.h>
37: #include <unistd.h>
1.115 schwarze 38: #include <zlib.h>
1.1 schwarze 39:
1.108 schwarze 40: #include "mandoc_aux.h"
1.1 schwarze 41: #include "mandoc.h"
1.108 schwarze 42: #include "roff.h"
1.1 schwarze 43: #include "mdoc.h"
44: #include "man.h"
1.174 schwarze 45: #include "mandoc_parse.h"
1.108 schwarze 46: #include "libmandoc.h"
1.173 schwarze 47: #include "roff_int.h"
1.189 ! schwarze 48: #include "tag.h"
1.1 schwarze 49:
50: #define REPARSE_LIMIT 1000
51:
52: struct mparse {
1.133 schwarze 53: struct roff *roff; /* roff parser (!NULL) */
1.109 schwarze 54: struct roff_man *man; /* man parser */
1.59 schwarze 55: struct buf *primary; /* buffer currently being parsed */
1.170 schwarze 56: struct buf *secondary; /* copy of top level input */
1.171 schwarze 57: struct buf *loop; /* open .while request line */
1.151 schwarze 58: const char *os_s; /* default operating system */
1.59 schwarze 59: int options; /* parser options */
1.115 schwarze 60: int gzip; /* current input file is gzipped */
1.70 schwarze 61: int filenc; /* encoding of the current file */
1.1 schwarze 62: int reparse_count; /* finite interp. stack */
1.59 schwarze 63: int line; /* line number in the file */
1.1 schwarze 64: };
65:
1.60 schwarze 66: static void choose_parser(struct mparse *);
1.170 schwarze 67: static void free_buf_list(struct buf *);
1.1 schwarze 68: static void resize_buf(struct buf *, size_t);
1.171 schwarze 69: static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
1.175 schwarze 70: static int read_whole_file(struct mparse *, int, struct buf *, int *);
1.1 schwarze 71: static void mparse_end(struct mparse *);
72:
1.25 schwarze 73:
1.1 schwarze 74: static void
75: resize_buf(struct buf *buf, size_t initial)
76: {
77:
78: buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
79: buf->buf = mandoc_realloc(buf->buf, buf->sz);
80: }
81:
82: static void
1.170 schwarze 83: free_buf_list(struct buf *buf)
84: {
85: struct buf *tmp;
86:
87: while (buf != NULL) {
88: tmp = buf;
89: buf = tmp->next;
90: free(tmp->buf);
91: free(tmp);
92: }
93: }
94:
95: static void
1.60 schwarze 96: choose_parser(struct mparse *curp)
1.1 schwarze 97: {
1.59 schwarze 98: char *cp, *ep;
99: int format;
1.1 schwarze 100:
1.59 schwarze 101: /*
102: * If neither command line arguments -mdoc or -man select
103: * a parser nor the roff parser found a .Dd or .TH macro
104: * yet, look ahead in the main input buffer.
105: */
106:
107: if ((format = roff_getformat(curp->roff)) == 0) {
108: cp = curp->primary->buf;
109: ep = cp + curp->primary->sz;
110: while (cp < ep) {
1.61 schwarze 111: if (*cp == '.' || *cp == '\'') {
1.59 schwarze 112: cp++;
113: if (cp[0] == 'D' && cp[1] == 'd') {
114: format = MPARSE_MDOC;
115: break;
116: }
117: if (cp[0] == 'T' && cp[1] == 'H') {
118: format = MPARSE_MAN;
119: break;
120: }
121: }
122: cp = memchr(cp, '\n', ep - cp);
123: if (cp == NULL)
124: break;
125: cp++;
126: }
1.1 schwarze 127: }
128:
1.59 schwarze 129: if (format == MPARSE_MDOC) {
1.180 schwarze 130: curp->man->meta.macroset = MACROSET_MDOC;
1.136 schwarze 131: if (curp->man->mdocmac == NULL)
132: curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
1.112 schwarze 133: } else {
1.180 schwarze 134: curp->man->meta.macroset = MACROSET_MAN;
1.136 schwarze 135: if (curp->man->manmac == NULL)
136: curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
1.25 schwarze 137: }
1.180 schwarze 138: curp->man->meta.first->tok = TOKEN_NONE;
1.1 schwarze 139: }
140:
141: /*
1.71 schwarze 142: * Main parse routine for a buffer.
143: * It assumes encoding and line numbering are already set up.
144: * It can recurse directly (for invocations of user-defined
145: * macros, inline equations, and input line traps)
146: * and indirectly (for .so file inclusion).
1.1 schwarze 147: */
1.171 schwarze 148: static int
1.71 schwarze 149: mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
1.1 schwarze 150: {
151: struct buf ln;
1.171 schwarze 152: struct buf *firstln, *lastln, *thisln, *loop;
1.85 schwarze 153: char *cp;
1.71 schwarze 154: size_t pos; /* byte number in the ln buffer */
1.171 schwarze 155: int line_result, result;
1.76 schwarze 156: int of;
1.1 schwarze 157: int lnn; /* line number in the real file */
1.85 schwarze 158: int fd;
1.171 schwarze 159: int inloop; /* Saw .while on this level. */
1.1 schwarze 160: unsigned char c;
161:
1.170 schwarze 162: ln.sz = 256;
163: ln.buf = mandoc_malloc(ln.sz);
164: ln.next = NULL;
1.184 schwarze 165: firstln = lastln = loop = NULL;
1.25 schwarze 166: lnn = curp->line;
167: pos = 0;
1.171 schwarze 168: inloop = 0;
1.170 schwarze 169: result = ROFF_CONT;
1.1 schwarze 170:
1.171 schwarze 171: while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) {
1.1 schwarze 172: if (start) {
173: curp->line = lnn;
174: curp->reparse_count = 0;
1.70 schwarze 175:
176: if (lnn < 3 &&
177: curp->filenc & MPARSE_UTF8 &&
1.71 schwarze 178: curp->filenc & MPARSE_LATIN1)
179: curp->filenc = preconv_cue(&blk, i);
1.1 schwarze 180: }
181:
1.71 schwarze 182: while (i < blk.sz && (start || blk.buf[i] != '\0')) {
1.1 schwarze 183:
184: /*
185: * When finding an unescaped newline character,
186: * leave the character loop to process the line.
187: * Skip a preceding carriage return, if any.
188: */
189:
1.71 schwarze 190: if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
1.1 schwarze 191: '\n' == blk.buf[i + 1])
192: ++i;
193: if ('\n' == blk.buf[i]) {
194: ++i;
195: ++lnn;
196: break;
197: }
198:
1.13 schwarze 199: /*
1.70 schwarze 200: * Make sure we have space for the worst
1.170 schwarze 201: * case of 12 bytes: "\\[u10ffff]\n\0"
1.13 schwarze 202: */
203:
1.170 schwarze 204: if (pos + 12 > ln.sz)
1.13 schwarze 205: resize_buf(&ln, 256);
206:
1.25 schwarze 207: /*
1.70 schwarze 208: * Encode 8-bit input.
1.1 schwarze 209: */
210:
1.70 schwarze 211: c = blk.buf[i];
212: if (c & 0x80) {
1.71 schwarze 213: if ( ! (curp->filenc && preconv_encode(
214: &blk, &i, &ln, &pos, &curp->filenc))) {
1.177 schwarze 215: mandoc_msg(MANDOCERR_CHAR_BAD,
1.89 schwarze 216: curp->line, pos, "0x%x", c);
1.70 schwarze 217: ln.buf[pos++] = '?';
218: i++;
219: }
220: continue;
221: }
222:
223: /*
224: * Exclude control characters.
225: */
1.1 schwarze 226:
1.70 schwarze 227: if (c == 0x7f || (c < 0x20 && c != 0x09)) {
1.177 schwarze 228: mandoc_msg(c == 0x00 || c == 0x04 ||
1.89 schwarze 229: c > 0x0a ? MANDOCERR_CHAR_BAD :
230: MANDOCERR_CHAR_UNSUPP,
1.177 schwarze 231: curp->line, pos, "0x%x", c);
1.1 schwarze 232: i++;
1.102 schwarze 233: if (c != '\r')
234: ln.buf[pos++] = '?';
1.1 schwarze 235: continue;
236: }
237:
238: ln.buf[pos++] = blk.buf[i++];
239: }
1.170 schwarze 240: ln.buf[pos] = '\0';
241:
242: /*
243: * Maintain a lookaside buffer of all lines.
244: * parsed from this input source.
245: */
246:
247: thisln = mandoc_malloc(sizeof(*thisln));
248: thisln->buf = mandoc_strdup(ln.buf);
249: thisln->sz = strlen(ln.buf) + 1;
250: thisln->next = NULL;
251: if (firstln == NULL) {
252: firstln = lastln = thisln;
253: if (curp->secondary == NULL)
254: curp->secondary = firstln;
255: } else {
256: lastln->next = thisln;
257: lastln = thisln;
258: }
1.1 schwarze 259:
1.170 schwarze 260: /* XXX Ugly hack to mark the end of the input. */
1.1 schwarze 261:
1.170 schwarze 262: if (i == blk.sz || blk.buf[i] == '\0') {
1.183 schwarze 263: if (pos + 2 > ln.sz)
264: resize_buf(&ln, 256);
1.142 schwarze 265: ln.buf[pos++] = '\n';
1.170 schwarze 266: ln.buf[pos] = '\0';
267: }
1.1 schwarze 268:
269: /*
270: * A significant amount of complexity is contained by
271: * the roff preprocessor. It's line-oriented but can be
272: * expressed on one line, so we need at times to
273: * readjust our starting point and re-run it. The roff
274: * preprocessor can also readjust the buffers with new
275: * data, so we pass them in wholesale.
276: */
277:
278: of = 0;
279: rerun:
1.169 schwarze 280: line_result = roff_parseln(curp->roff, curp->line, &ln, &of);
1.1 schwarze 281:
1.171 schwarze 282: /* Process options. */
283:
284: if (line_result & ROFF_APPEND)
285: assert(line_result == (ROFF_IGN | ROFF_APPEND));
286:
287: if (line_result & ROFF_USERCALL)
288: assert((line_result & ROFF_MASK) == ROFF_REPARSE);
289:
290: if (line_result & ROFF_USERRET) {
291: assert(line_result == (ROFF_IGN | ROFF_USERRET));
292: if (start == 0) {
293: /* Return from the current macro. */
294: result = ROFF_USERRET;
295: goto out;
296: }
297: }
298:
299: switch (line_result & ROFF_LOOPMASK) {
300: case ROFF_IGN:
301: break;
302: case ROFF_WHILE:
303: if (curp->loop != NULL) {
304: if (loop == curp->loop)
305: break;
306: mandoc_msg(MANDOCERR_WHILE_NEST,
1.177 schwarze 307: curp->line, pos, NULL);
1.171 schwarze 308: }
309: curp->loop = thisln;
310: loop = NULL;
311: inloop = 1;
312: break;
313: case ROFF_LOOPCONT:
314: case ROFF_LOOPEXIT:
315: if (curp->loop == NULL) {
316: mandoc_msg(MANDOCERR_WHILE_FAIL,
1.177 schwarze 317: curp->line, pos, NULL);
1.171 schwarze 318: break;
319: }
320: if (inloop == 0) {
321: mandoc_msg(MANDOCERR_WHILE_INTO,
1.177 schwarze 322: curp->line, pos, NULL);
1.171 schwarze 323: curp->loop = loop = NULL;
324: break;
325: }
326: if (line_result & ROFF_LOOPCONT)
327: loop = curp->loop;
328: else {
329: curp->loop = loop = NULL;
330: inloop = 0;
331: }
332: break;
333: default:
334: abort();
335: }
336:
337: /* Process the main instruction from the roff parser. */
338:
339: switch (line_result & ROFF_MASK) {
340: case ROFF_IGN:
341: break;
342: case ROFF_CONT:
1.180 schwarze 343: if (curp->man->meta.macroset == MACROSET_NONE)
1.171 schwarze 344: choose_parser(curp);
1.180 schwarze 345: if ((curp->man->meta.macroset == MACROSET_MDOC ?
1.171 schwarze 346: mdoc_parseln(curp->man, curp->line, ln.buf, of) :
347: man_parseln(curp->man, curp->line, ln.buf, of)
348: ) == 2)
349: goto out;
350: break;
351: case ROFF_RERUN:
352: goto rerun;
1.25 schwarze 353: case ROFF_REPARSE:
1.169 schwarze 354: if (++curp->reparse_count > REPARSE_LIMIT) {
1.171 schwarze 355: /* Abort and return to the top level. */
1.170 schwarze 356: result = ROFF_IGN;
1.177 schwarze 357: mandoc_msg(MANDOCERR_ROFFLOOP,
1.25 schwarze 358: curp->line, pos, NULL);
1.171 schwarze 359: goto out;
1.169 schwarze 360: }
1.171 schwarze 361: result = mparse_buf_r(curp, ln, of, 0);
362: if (line_result & ROFF_USERCALL) {
363: roff_userret(curp->roff);
364: /* Continue normally. */
365: if (result & ROFF_USERRET)
366: result = ROFF_CONT;
1.135 schwarze 367: }
1.171 schwarze 368: if (start == 0 && result != ROFF_CONT)
369: goto out;
370: break;
1.25 schwarze 371: case ROFF_SO:
1.71 schwarze 372: if ( ! (curp->options & MPARSE_SO) &&
373: (i >= blk.sz || blk.buf[i] == '\0')) {
1.180 schwarze 374: curp->man->meta.sodest =
375: mandoc_strdup(ln.buf + of);
1.170 schwarze 376: goto out;
1.23 schwarze 377: }
1.122 schwarze 378: if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
1.85 schwarze 379: mparse_readfd(curp, fd, ln.buf + of);
1.121 schwarze 380: close(fd);
1.88 schwarze 381: } else {
1.179 schwarze 382: mandoc_msg(MANDOCERR_SO_FAIL,
383: curp->line, of, ".so %s: %s",
384: ln.buf + of, strerror(errno));
1.85 schwarze 385: ln.sz = mandoc_asprintf(&cp,
386: ".sp\nSee the file %s.\n.sp",
387: ln.buf + of);
388: free(ln.buf);
389: ln.buf = cp;
390: of = 0;
391: mparse_buf_r(curp, ln, of, 0);
1.30 schwarze 392: }
1.171 schwarze 393: break;
1.1 schwarze 394: default:
1.171 schwarze 395: abort();
1.1 schwarze 396: }
397:
1.171 schwarze 398: /* Start the next input line. */
1.1 schwarze 399:
1.171 schwarze 400: if (loop != NULL &&
401: (line_result & ROFF_LOOPMASK) == ROFF_IGN)
402: loop = loop->next;
403:
404: if (loop != NULL) {
405: if ((line_result & ROFF_APPEND) == 0)
406: *ln.buf = '\0';
407: if (ln.sz < loop->sz)
408: resize_buf(&ln, loop->sz);
409: (void)strlcat(ln.buf, loop->buf, ln.sz);
410: of = 0;
411: goto rerun;
412: }
1.1 schwarze 413:
1.171 schwarze 414: pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0;
1.1 schwarze 415: }
1.170 schwarze 416: out:
1.171 schwarze 417: if (inloop) {
418: if (result != ROFF_USERRET)
1.177 schwarze 419: mandoc_msg(MANDOCERR_WHILE_OUTOF,
1.171 schwarze 420: curp->line, pos, NULL);
421: curp->loop = NULL;
422: }
1.1 schwarze 423: free(ln.buf);
1.170 schwarze 424: if (firstln != curp->secondary)
425: free_buf_list(firstln);
426: return result;
1.1 schwarze 427: }
428:
429: static int
1.175 schwarze 430: read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap)
1.1 schwarze 431: {
432: struct stat st;
1.115 schwarze 433: gzFile gz;
1.1 schwarze 434: size_t off;
435: ssize_t ssz;
1.166 schwarze 436: int gzerrnum, retval;
1.1 schwarze 437:
1.164 schwarze 438: if (fstat(fd, &st) == -1) {
1.185 schwarze 439: mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno));
440: return -1;
1.164 schwarze 441: }
1.1 schwarze 442:
443: /*
444: * If we're a regular file, try just reading in the whole entry
445: * via mmap(). This is faster than reading it into blocks, and
446: * since each file is only a few bytes to begin with, I'm not
447: * concerned that this is going to tank any machines.
448: */
449:
1.115 schwarze 450: if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
1.106 schwarze 451: if (st.st_size > 0x7fffffff) {
1.177 schwarze 452: mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL);
1.185 schwarze 453: return -1;
1.1 schwarze 454: }
455: *with_mmap = 1;
456: fb->sz = (size_t)st.st_size;
1.15 schwarze 457: fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
1.1 schwarze 458: if (fb->buf != MAP_FAILED)
1.185 schwarze 459: return 0;
1.1 schwarze 460: }
461:
1.115 schwarze 462: if (curp->gzip) {
1.166 schwarze 463: /*
464: * Duplicating the file descriptor is required
465: * because we will have to call gzclose(3)
466: * to free memory used internally by zlib,
467: * but that will also close the file descriptor,
468: * which this function must not do.
469: */
470: if ((fd = dup(fd)) == -1) {
1.185 schwarze 471: mandoc_msg(MANDOCERR_DUP, 0, 0,
472: "%s", strerror(errno));
473: return -1;
1.166 schwarze 474: }
1.164 schwarze 475: if ((gz = gzdopen(fd, "rb")) == NULL) {
1.185 schwarze 476: mandoc_msg(MANDOCERR_GZDOPEN, 0, 0,
477: "%s", strerror(errno));
1.166 schwarze 478: close(fd);
1.185 schwarze 479: return -1;
1.164 schwarze 480: }
1.115 schwarze 481: } else
482: gz = NULL;
483:
1.1 schwarze 484: /*
485: * If this isn't a regular file (like, say, stdin), then we must
486: * go the old way and just read things in bit by bit.
487: */
488:
489: *with_mmap = 0;
490: off = 0;
1.185 schwarze 491: retval = -1;
1.1 schwarze 492: fb->sz = 0;
493: fb->buf = NULL;
494: for (;;) {
495: if (off == fb->sz) {
496: if (fb->sz == (1U << 31)) {
1.177 schwarze 497: mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL);
1.1 schwarze 498: break;
499: }
500: resize_buf(fb, 65536);
501: }
1.115 schwarze 502: ssz = curp->gzip ?
503: gzread(gz, fb->buf + (int)off, fb->sz - off) :
504: read(fd, fb->buf + (int)off, fb->sz - off);
1.1 schwarze 505: if (ssz == 0) {
506: fb->sz = off;
1.185 schwarze 507: retval = 0;
1.166 schwarze 508: break;
1.1 schwarze 509: }
1.164 schwarze 510: if (ssz == -1) {
1.166 schwarze 511: if (curp->gzip)
512: (void)gzerror(gz, &gzerrnum);
1.185 schwarze 513: mandoc_msg(MANDOCERR_READ, 0, 0, "%s",
1.166 schwarze 514: curp->gzip && gzerrnum != Z_ERRNO ?
515: zError(gzerrnum) : strerror(errno));
1.164 schwarze 516: break;
517: }
1.1 schwarze 518: off += (size_t)ssz;
519: }
520:
1.166 schwarze 521: if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
1.185 schwarze 522: mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s",
1.166 schwarze 523: gzerrnum == Z_ERRNO ? strerror(errno) :
524: zError(gzerrnum));
1.185 schwarze 525: if (retval == -1) {
1.166 schwarze 526: free(fb->buf);
527: fb->buf = NULL;
528: }
529: return retval;
1.1 schwarze 530: }
531:
532: static void
533: mparse_end(struct mparse *curp)
534: {
1.180 schwarze 535: if (curp->man->meta.macroset == MACROSET_NONE)
536: curp->man->meta.macroset = MACROSET_MAN;
537: if (curp->man->meta.macroset == MACROSET_MDOC)
1.110 schwarze 538: mdoc_endparse(curp->man);
539: else
1.86 schwarze 540: man_endparse(curp->man);
1.1 schwarze 541: roff_endparse(curp->roff);
542: }
543:
1.176 schwarze 544: /*
545: * Read the whole file into memory and call the parsers.
546: * Called recursively when an .so request is encountered.
547: */
548: void
549: mparse_readfd(struct mparse *curp, int fd, const char *filename)
1.1 schwarze 550: {
1.176 schwarze 551: static int recursion_depth;
552:
553: struct buf blk;
554: struct buf *save_primary;
555: const char *save_filename;
1.71 schwarze 556: size_t offset;
1.176 schwarze 557: int save_filenc, save_lineno;
558: int with_mmap;
1.14 schwarze 559:
1.176 schwarze 560: if (recursion_depth > 64) {
1.177 schwarze 561: mandoc_msg(MANDOCERR_ROFFLOOP, curp->line, 0, NULL);
1.15 schwarze 562: return;
1.14 schwarze 563: }
1.185 schwarze 564: if (read_whole_file(curp, fd, &blk, &with_mmap) == -1)
1.176 schwarze 565: return;
566:
567: /*
568: * Save some properties of the parent file.
569: */
570:
571: save_primary = curp->primary;
572: save_filenc = curp->filenc;
573: save_lineno = curp->line;
574: save_filename = mandoc_msg_getinfilename();
1.1 schwarze 575:
1.59 schwarze 576: curp->primary = &blk;
1.176 schwarze 577: curp->filenc = curp->options & (MPARSE_UTF8 | MPARSE_LATIN1);
1.15 schwarze 578: curp->line = 1;
1.176 schwarze 579: mandoc_msg_setinfilename(filename);
1.15 schwarze 580:
1.70 schwarze 581: /* Skip an UTF-8 byte order mark. */
582: if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
583: (unsigned char)blk.buf[0] == 0xef &&
584: (unsigned char)blk.buf[1] == 0xbb &&
585: (unsigned char)blk.buf[2] == 0xbf) {
1.71 schwarze 586: offset = 3;
1.70 schwarze 587: curp->filenc &= ~MPARSE_LATIN1;
1.71 schwarze 588: } else
589: offset = 0;
1.70 schwarze 590:
1.176 schwarze 591: recursion_depth++;
1.71 schwarze 592: mparse_buf_r(curp, blk, offset, 1);
1.86 schwarze 593: if (--recursion_depth == 0)
1.15 schwarze 594: mparse_end(curp);
595:
1.176 schwarze 596: /*
597: * Clean up and restore saved parent properties.
598: */
1.15 schwarze 599:
1.176 schwarze 600: if (with_mmap)
601: munmap(blk.buf, blk.sz);
602: else
603: free(blk.buf);
1.1 schwarze 604:
1.176 schwarze 605: curp->primary = save_primary;
606: curp->filenc = save_filenc;
607: curp->line = save_lineno;
608: if (save_filename != NULL)
609: mandoc_msg_setinfilename(save_filename);
1.58 schwarze 610: }
611:
1.122 schwarze 612: int
613: mparse_open(struct mparse *curp, const char *file)
1.58 schwarze 614: {
615: char *cp;
1.182 schwarze 616: int fd, save_errno;
1.58 schwarze 617:
1.115 schwarze 618: cp = strrchr(file, '.');
619: curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
1.74 schwarze 620:
1.115 schwarze 621: /* First try to use the filename as it is. */
1.74 schwarze 622:
1.122 schwarze 623: if ((fd = open(file, O_RDONLY)) != -1)
624: return fd;
1.74 schwarze 625:
1.115 schwarze 626: /*
627: * If that doesn't work and the filename doesn't
628: * already end in .gz, try appending .gz.
629: */
1.74 schwarze 630:
1.115 schwarze 631: if ( ! curp->gzip) {
1.182 schwarze 632: save_errno = errno;
1.74 schwarze 633: mandoc_asprintf(&cp, "%s.gz", file);
1.123 schwarze 634: fd = open(cp, O_RDONLY);
1.83 schwarze 635: free(cp);
1.182 schwarze 636: errno = save_errno;
1.122 schwarze 637: if (fd != -1) {
1.115 schwarze 638: curp->gzip = 1;
1.122 schwarze 639: return fd;
1.58 schwarze 640: }
641: }
642:
1.115 schwarze 643: /* Neither worked, give up. */
1.73 schwarze 644:
1.122 schwarze 645: return -1;
1.1 schwarze 646: }
647:
648: struct mparse *
1.175 schwarze 649: mparse_alloc(int options, enum mandoc_os os_e, const char *os_s)
1.1 schwarze 650: {
651: struct mparse *curp;
652:
653: curp = mandoc_calloc(1, sizeof(struct mparse));
654:
1.22 schwarze 655: curp->options = options;
1.151 schwarze 656: curp->os_s = os_s;
1.1 schwarze 657:
1.178 schwarze 658: curp->roff = roff_alloc(options);
659: curp->man = roff_man_alloc(curp->roff, curp->os_s,
1.112 schwarze 660: curp->options & MPARSE_QUICK ? 1 : 0);
1.111 schwarze 661: if (curp->options & MPARSE_MDOC) {
1.180 schwarze 662: curp->man->meta.macroset = MACROSET_MDOC;
1.136 schwarze 663: if (curp->man->mdocmac == NULL)
664: curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
1.112 schwarze 665: } else if (curp->options & MPARSE_MAN) {
1.180 schwarze 666: curp->man->meta.macroset = MACROSET_MAN;
1.136 schwarze 667: if (curp->man->manmac == NULL)
668: curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
1.111 schwarze 669: }
1.180 schwarze 670: curp->man->meta.first->tok = TOKEN_NONE;
1.151 schwarze 671: curp->man->meta.os_e = os_e;
1.186 schwarze 672: tag_alloc();
1.117 schwarze 673: return curp;
1.1 schwarze 674: }
675:
676: void
677: mparse_reset(struct mparse *curp)
678: {
1.187 schwarze 679: tag_free();
1.1 schwarze 680: roff_reset(curp->roff);
1.124 schwarze 681: roff_man_reset(curp->man);
1.170 schwarze 682: free_buf_list(curp->secondary);
683: curp->secondary = NULL;
1.132 schwarze 684: curp->gzip = 0;
1.187 schwarze 685: tag_alloc();
1.1 schwarze 686: }
687:
688: void
689: mparse_free(struct mparse *curp)
690: {
1.186 schwarze 691: tag_free();
1.136 schwarze 692: roffhash_free(curp->man->mdocmac);
693: roffhash_free(curp->man->manmac);
1.112 schwarze 694: roff_man_free(curp->man);
1.133 schwarze 695: roff_free(curp->roff);
1.170 schwarze 696: free_buf_list(curp->secondary);
1.1 schwarze 697: free(curp);
698: }
699:
1.180 schwarze 700: struct roff_meta *
701: mparse_result(struct mparse *curp)
1.1 schwarze 702: {
1.181 schwarze 703: roff_state_reset(curp->man);
1.180 schwarze 704: if (curp->options & MPARSE_VALIDATE) {
705: if (curp->man->meta.macroset == MACROSET_MDOC)
706: mdoc_validate(curp->man);
707: else
708: man_validate(curp->man);
1.189 ! schwarze 709: tag_postprocess(curp->man, curp->man->meta.first);
1.23 schwarze 710: }
1.180 schwarze 711: return &curp->man->meta;
1.4 schwarze 712: }
713:
714: void
1.170 schwarze 715: mparse_copy(const struct mparse *p)
1.4 schwarze 716: {
1.170 schwarze 717: struct buf *buf;
1.4 schwarze 718:
1.170 schwarze 719: for (buf = p->secondary; buf != NULL; buf = buf->next)
720: puts(buf->buf);
1.1 schwarze 721: }