Annotation of src/usr.bin/mandoc/roff.c, Revision 1.22
1.22 ! schwarze 1: /* $Id: roff.c,v 1.21 2010/11/28 19:35:33 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.8 schwarze 4: * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.16 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.16 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
20: #endif
21:
22: #include <assert.h>
1.6 schwarze 23: #include <errno.h>
1.3 schwarze 24: #include <ctype.h>
1.6 schwarze 25: #include <limits.h>
1.1 schwarze 26: #include <stdlib.h>
27: #include <string.h>
1.2 schwarze 28: #include <stdio.h>
1.1 schwarze 29:
30: #include "mandoc.h"
31: #include "roff.h"
1.8 schwarze 32: #include "libmandoc.h"
1.1 schwarze 33:
1.2 schwarze 34: #define RSTACK_MAX 128
35:
36: #define ROFF_CTL(c) \
37: ('.' == (c) || '\'' == (c))
38:
1.1 schwarze 39: enum rofft {
1.20 schwarze 40: ROFF_ad,
1.2 schwarze 41: ROFF_am,
42: ROFF_ami,
43: ROFF_am1,
1.1 schwarze 44: ROFF_de,
45: ROFF_dei,
1.2 schwarze 46: ROFF_de1,
47: ROFF_ds,
48: ROFF_el,
1.20 schwarze 49: ROFF_hy,
1.2 schwarze 50: ROFF_ie,
51: ROFF_if,
1.1 schwarze 52: ROFF_ig,
1.20 schwarze 53: ROFF_ne,
54: ROFF_nh,
1.14 schwarze 55: ROFF_nr,
1.2 schwarze 56: ROFF_rm,
1.14 schwarze 57: ROFF_so,
1.2 schwarze 58: ROFF_tr,
59: ROFF_cblock,
1.13 schwarze 60: ROFF_ccond, /* FIXME: remove this. */
1.16 schwarze 61: ROFF_USERDEF,
1.1 schwarze 62: ROFF_MAX
63: };
64:
1.2 schwarze 65: enum roffrule {
66: ROFFRULE_ALLOW,
67: ROFFRULE_DENY
68: };
69:
1.8 schwarze 70:
71: struct roffstr {
72: char *name; /* key of symbol */
73: char *string; /* current value */
74: struct roffstr *next; /* next in list */
75: };
76:
1.1 schwarze 77: struct roff {
78: struct roffnode *last; /* leaf of stack */
79: mandocmsg msg; /* err/warn/fatal messages */
80: void *data; /* privdata for messages */
1.2 schwarze 81: enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
82: int rstackpos; /* position in rstack */
1.6 schwarze 83: struct regset *regs; /* read/writable registers */
1.16 schwarze 84: struct roffstr *first_string; /* user-defined strings & macros */
85: const char *current_string; /* value of last called user macro */
1.1 schwarze 86: };
87:
88: struct roffnode {
89: enum rofft tok; /* type of node */
90: struct roffnode *parent; /* up one in stack */
91: int line; /* parse line */
92: int col; /* parse col */
1.16 schwarze 93: char *name; /* node name, e.g. macro name */
1.2 schwarze 94: char *end; /* end-rules: custom token */
95: int endspan; /* end-rules: next-line or infty */
96: enum roffrule rule; /* current evaluation rule */
1.1 schwarze 97: };
98:
99: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
100: enum rofft tok, /* tok of macro */ \
101: char **bufp, /* input buffer */ \
102: size_t *szp, /* size of input buffer */ \
103: int ln, /* parse line */ \
1.2 schwarze 104: int ppos, /* original pos in buffer */ \
105: int pos, /* current pos in buffer */ \
106: int *offs /* reset offset of buffer data */
1.1 schwarze 107:
108: typedef enum rofferr (*roffproc)(ROFF_ARGS);
109:
110: struct roffmac {
111: const char *name; /* macro name */
1.2 schwarze 112: roffproc proc; /* process new macro */
113: roffproc text; /* process as child text of macro */
114: roffproc sub; /* process as child of macro */
115: int flags;
116: #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
1.3 schwarze 117: struct roffmac *next;
1.1 schwarze 118: };
119:
1.2 schwarze 120: static enum rofferr roff_block(ROFF_ARGS);
121: static enum rofferr roff_block_text(ROFF_ARGS);
122: static enum rofferr roff_block_sub(ROFF_ARGS);
123: static enum rofferr roff_cblock(ROFF_ARGS);
124: static enum rofferr roff_ccond(ROFF_ARGS);
125: static enum rofferr roff_cond(ROFF_ARGS);
126: static enum rofferr roff_cond_text(ROFF_ARGS);
127: static enum rofferr roff_cond_sub(ROFF_ARGS);
1.7 schwarze 128: static enum rofferr roff_ds(ROFF_ARGS);
1.8 schwarze 129: static enum roffrule roff_evalcond(const char *, int *);
130: static void roff_freestr(struct roff *);
131: static const char *roff_getstrn(const struct roff *,
132: const char *, size_t);
1.21 schwarze 133: static enum rofferr roff_line_ignore(ROFF_ARGS);
134: static enum rofferr roff_line_error(ROFF_ARGS);
1.6 schwarze 135: static enum rofferr roff_nr(ROFF_ARGS);
1.9 schwarze 136: static int roff_res(struct roff *,
137: char **, size_t *, int);
1.8 schwarze 138: static void roff_setstr(struct roff *,
1.16 schwarze 139: const char *, const char *, int);
1.14 schwarze 140: static enum rofferr roff_so(ROFF_ARGS);
1.16 schwarze 141: static enum rofferr roff_userdef(ROFF_ARGS);
1.1 schwarze 142:
1.3 schwarze 143: /* See roff_hash_find() */
144:
145: #define ASCII_HI 126
146: #define ASCII_LO 33
147: #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
148:
149: static struct roffmac *hash[HASHWIDTH];
150:
151: static struct roffmac roffs[ROFF_MAX] = {
1.21 schwarze 152: { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 153: { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
154: { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
155: { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
156: { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
157: { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
158: { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.7 schwarze 159: { "ds", roff_ds, NULL, NULL, 0, NULL },
1.3 schwarze 160: { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
1.21 schwarze 161: { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 162: { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
163: { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
164: { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.21 schwarze 165: { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
166: { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
1.14 schwarze 167: { "nr", roff_nr, NULL, NULL, 0, NULL },
1.21 schwarze 168: { "rm", roff_line_error, NULL, NULL, 0, NULL },
1.14 schwarze 169: { "so", roff_so, NULL, NULL, 0, NULL },
1.21 schwarze 170: { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 171: { ".", roff_cblock, NULL, NULL, 0, NULL },
172: { "\\}", roff_ccond, NULL, NULL, 0, NULL },
1.16 schwarze 173: { NULL, roff_userdef, NULL, NULL, 0, NULL },
1.1 schwarze 174: };
175:
176: static void roff_free1(struct roff *);
1.16 schwarze 177: static enum rofft roff_hash_find(const char *, size_t);
1.3 schwarze 178: static void roff_hash_init(void);
1.2 schwarze 179: static void roffnode_cleanscope(struct roff *);
1.16 schwarze 180: static void roffnode_push(struct roff *, enum rofft,
181: const char *, int, int);
1.1 schwarze 182: static void roffnode_pop(struct roff *);
1.16 schwarze 183: static enum rofft roff_parse(struct roff *, const char *, int *);
1.6 schwarze 184: static int roff_parse_nat(const char *, unsigned int *);
1.1 schwarze 185:
1.3 schwarze 186: /* See roff_hash_find() */
187: #define ROFF_HASH(p) (p[0] - ASCII_LO)
188:
189: static void
190: roff_hash_init(void)
191: {
192: struct roffmac *n;
193: int buc, i;
194:
1.16 schwarze 195: for (i = 0; i < (int)ROFF_USERDEF; i++) {
1.3 schwarze 196: assert(roffs[i].name[0] >= ASCII_LO);
197: assert(roffs[i].name[0] <= ASCII_HI);
198:
199: buc = ROFF_HASH(roffs[i].name);
200:
201: if (NULL != (n = hash[buc])) {
202: for ( ; n->next; n = n->next)
203: /* Do nothing. */ ;
204: n->next = &roffs[i];
205: } else
206: hash[buc] = &roffs[i];
207: }
208: }
209:
1.1 schwarze 210:
211: /*
212: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
213: * the nil-terminated string name could be found.
214: */
215: static enum rofft
1.16 schwarze 216: roff_hash_find(const char *p, size_t s)
1.1 schwarze 217: {
1.3 schwarze 218: int buc;
219: struct roffmac *n;
1.1 schwarze 220:
1.3 schwarze 221: /*
222: * libroff has an extremely simple hashtable, for the time
223: * being, which simply keys on the first character, which must
224: * be printable, then walks a chain. It works well enough until
225: * optimised.
226: */
227:
228: if (p[0] < ASCII_LO || p[0] > ASCII_HI)
229: return(ROFF_MAX);
230:
231: buc = ROFF_HASH(p);
232:
233: if (NULL == (n = hash[buc]))
234: return(ROFF_MAX);
235: for ( ; n; n = n->next)
1.16 schwarze 236: if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
1.3 schwarze 237: return((enum rofft)(n - roffs));
1.1 schwarze 238:
239: return(ROFF_MAX);
240: }
241:
242:
243: /*
244: * Pop the current node off of the stack of roff instructions currently
245: * pending.
246: */
247: static void
248: roffnode_pop(struct roff *r)
249: {
250: struct roffnode *p;
251:
1.2 schwarze 252: assert(r->last);
253: p = r->last;
254:
255: if (ROFF_el == p->tok)
256: if (r->rstackpos > -1)
257: r->rstackpos--;
258:
259: r->last = r->last->parent;
1.16 schwarze 260: free(p->name);
261: free(p->end);
1.1 schwarze 262: free(p);
263: }
264:
265:
266: /*
267: * Push a roff node onto the instruction stack. This must later be
268: * removed with roffnode_pop().
269: */
1.11 schwarze 270: static void
1.16 schwarze 271: roffnode_push(struct roff *r, enum rofft tok, const char *name,
272: int line, int col)
1.1 schwarze 273: {
274: struct roffnode *p;
275:
1.11 schwarze 276: p = mandoc_calloc(1, sizeof(struct roffnode));
1.1 schwarze 277: p->tok = tok;
1.16 schwarze 278: if (name)
279: p->name = mandoc_strdup(name);
1.1 schwarze 280: p->parent = r->last;
281: p->line = line;
282: p->col = col;
1.2 schwarze 283: p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
1.1 schwarze 284:
285: r->last = p;
286: }
287:
288:
289: static void
290: roff_free1(struct roff *r)
291: {
292:
293: while (r->last)
294: roffnode_pop(r);
1.8 schwarze 295: roff_freestr(r);
1.1 schwarze 296: }
297:
298:
299: void
300: roff_reset(struct roff *r)
301: {
302:
303: roff_free1(r);
304: }
305:
306:
307: void
308: roff_free(struct roff *r)
309: {
310:
311: roff_free1(r);
312: free(r);
313: }
314:
315:
316: struct roff *
1.11 schwarze 317: roff_alloc(struct regset *regs, void *data, const mandocmsg msg)
1.1 schwarze 318: {
319: struct roff *r;
320:
1.11 schwarze 321: r = mandoc_calloc(1, sizeof(struct roff));
1.6 schwarze 322: r->regs = regs;
1.1 schwarze 323: r->msg = msg;
324: r->data = data;
1.2 schwarze 325: r->rstackpos = -1;
1.3 schwarze 326:
327: roff_hash_init();
1.1 schwarze 328: return(r);
329: }
330:
331:
1.8 schwarze 332: /*
333: * Pre-filter each and every line for reserved words (one beginning with
334: * `\*', e.g., `\*(ab'). These must be handled before the actual line
335: * is processed.
336: */
337: static int
1.9 schwarze 338: roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
1.8 schwarze 339: {
340: const char *cp, *cpp, *st, *res;
341: int i, maxl;
342: size_t nsz;
343: char *n;
344:
1.9 schwarze 345: /* LINTED */
1.8 schwarze 346: for (cp = &(*bufp)[pos]; (cpp = strstr(cp, "\\*")); cp++) {
347: cp = cpp + 2;
348: switch (*cp) {
349: case ('('):
350: cp++;
351: maxl = 2;
352: break;
353: case ('['):
354: cp++;
355: maxl = 0;
356: break;
357: default:
358: maxl = 1;
359: break;
360: }
361:
362: st = cp;
363:
364: for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
365: if ('\0' == *cp)
366: return(1); /* Error. */
367: if (0 == maxl && ']' == *cp)
368: break;
369: }
370:
371: res = roff_getstrn(r, st, (size_t)i);
372:
373: if (NULL == res) {
374: cp -= maxl ? 1 : 0;
375: continue;
376: }
377:
378: nsz = *szp + strlen(res) + 1;
379: n = mandoc_malloc(nsz);
380:
381: *n = '\0';
382:
383: strlcat(n, *bufp, (size_t)(cpp - *bufp + 1));
384: strlcat(n, res, nsz);
385: strlcat(n, cp + (maxl ? 0 : 1), nsz);
386:
387: free(*bufp);
388:
389: *bufp = n;
390: *szp = nsz;
391: return(0);
392: }
393:
394: return(1);
395: }
396:
397:
1.1 schwarze 398: enum rofferr
1.6 schwarze 399: roff_parseln(struct roff *r, int ln, char **bufp,
400: size_t *szp, int pos, int *offs)
1.1 schwarze 401: {
402: enum rofft t;
403: int ppos;
404:
1.2 schwarze 405: /*
1.8 schwarze 406: * Run the reserved-word filter only if we have some reserved
407: * words to fill in.
408: */
409:
1.9 schwarze 410: if (r->first_string && ! roff_res(r, bufp, szp, pos))
1.16 schwarze 411: return(ROFF_REPARSE);
1.8 schwarze 412:
413: /*
1.2 schwarze 414: * First, if a scope is open and we're not a macro, pass the
415: * text through the macro's filter. If a scope isn't open and
416: * we're not a macro, just let it through.
417: */
418:
419: if (r->last && ! ROFF_CTL((*bufp)[pos])) {
420: t = r->last->tok;
421: assert(roffs[t].text);
422: return((*roffs[t].text)
1.8 schwarze 423: (r, t, bufp, szp,
424: ln, pos, pos, offs));
1.2 schwarze 425: } else if ( ! ROFF_CTL((*bufp)[pos]))
426: return(ROFF_CONT);
427:
428: /*
429: * If a scope is open, go to the child handler for that macro,
430: * as it may want to preprocess before doing anything with it.
431: */
432:
433: if (r->last) {
1.1 schwarze 434: t = r->last->tok;
435: assert(roffs[t].sub);
1.2 schwarze 436: return((*roffs[t].sub)
1.8 schwarze 437: (r, t, bufp, szp,
438: ln, pos, pos, offs));
1.2 schwarze 439: }
440:
441: /*
442: * Lastly, as we've no scope open, try to look up and execute
443: * the new macro. If no macro is found, simply return and let
444: * the compilers handle it.
445: */
446:
447: ppos = pos;
1.16 schwarze 448: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
1.1 schwarze 449: return(ROFF_CONT);
450:
1.2 schwarze 451: assert(roffs[t].proc);
452: return((*roffs[t].proc)
1.8 schwarze 453: (r, t, bufp, szp,
454: ln, ppos, pos, offs));
1.2 schwarze 455: }
456:
1.1 schwarze 457:
1.2 schwarze 458: int
459: roff_endparse(struct roff *r)
460: {
1.1 schwarze 461:
1.2 schwarze 462: if (NULL == r->last)
463: return(1);
464: return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
465: r->last->col, NULL));
1.1 schwarze 466: }
467:
468:
469: /*
470: * Parse a roff node's type from the input buffer. This must be in the
471: * form of ".foo xxx" in the usual way.
472: */
473: static enum rofft
1.16 schwarze 474: roff_parse(struct roff *r, const char *buf, int *pos)
1.1 schwarze 475: {
1.16 schwarze 476: const char *mac;
477: size_t maclen;
1.1 schwarze 478: enum rofft t;
479:
1.2 schwarze 480: assert(ROFF_CTL(buf[*pos]));
481: (*pos)++;
1.1 schwarze 482:
1.16 schwarze 483: while (' ' == buf[*pos] || '\t' == buf[*pos])
1.1 schwarze 484: (*pos)++;
485:
486: if ('\0' == buf[*pos])
487: return(ROFF_MAX);
488:
1.16 schwarze 489: mac = buf + *pos;
490: maclen = strcspn(mac, " \\\t\0");
1.1 schwarze 491:
1.16 schwarze 492: t = (r->current_string = roff_getstrn(r, mac, maclen))
493: ? ROFF_USERDEF : roff_hash_find(mac, maclen);
1.1 schwarze 494:
1.16 schwarze 495: *pos += maclen;
1.1 schwarze 496: while (buf[*pos] && ' ' == buf[*pos])
497: (*pos)++;
498:
499: return(t);
500: }
501:
502:
1.6 schwarze 503: static int
504: roff_parse_nat(const char *buf, unsigned int *res)
505: {
506: char *ep;
507: long lval;
508:
509: errno = 0;
510: lval = strtol(buf, &ep, 10);
511: if (buf[0] == '\0' || *ep != '\0')
512: return(0);
513: if ((errno == ERANGE &&
514: (lval == LONG_MAX || lval == LONG_MIN)) ||
515: (lval > INT_MAX || lval < 0))
516: return(0);
517:
518: *res = (unsigned int)lval;
519: return(1);
520: }
521:
522:
1.1 schwarze 523: /* ARGSUSED */
524: static enum rofferr
1.2 schwarze 525: roff_cblock(ROFF_ARGS)
1.1 schwarze 526: {
527:
1.2 schwarze 528: /*
529: * A block-close `..' should only be invoked as a child of an
530: * ignore macro, otherwise raise a warning and just ignore it.
531: */
532:
533: if (NULL == r->last) {
534: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
535: return(ROFF_ERR);
536: return(ROFF_IGN);
537: }
1.1 schwarze 538:
1.2 schwarze 539: switch (r->last->tok) {
540: case (ROFF_am):
541: /* FALLTHROUGH */
542: case (ROFF_ami):
543: /* FALLTHROUGH */
544: case (ROFF_am1):
545: /* FALLTHROUGH */
546: case (ROFF_de):
547: /* FALLTHROUGH */
548: case (ROFF_dei):
549: /* FALLTHROUGH */
550: case (ROFF_ig):
551: break;
552: default:
553: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
554: return(ROFF_ERR);
1.1 schwarze 555: return(ROFF_IGN);
1.2 schwarze 556: }
557:
558: if ((*bufp)[pos])
559: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
560: return(ROFF_ERR);
561:
562: roffnode_pop(r);
563: roffnode_cleanscope(r);
564: return(ROFF_IGN);
565:
566: }
1.1 schwarze 567:
568:
1.2 schwarze 569: static void
570: roffnode_cleanscope(struct roff *r)
571: {
1.1 schwarze 572:
1.2 schwarze 573: while (r->last) {
574: if (--r->last->endspan < 0)
575: break;
576: roffnode_pop(r);
577: }
578: }
1.1 schwarze 579:
580:
1.2 schwarze 581: /* ARGSUSED */
582: static enum rofferr
583: roff_ccond(ROFF_ARGS)
584: {
1.1 schwarze 585:
1.2 schwarze 586: if (NULL == r->last) {
587: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
588: return(ROFF_ERR);
1.1 schwarze 589: return(ROFF_IGN);
1.2 schwarze 590: }
1.1 schwarze 591:
1.2 schwarze 592: switch (r->last->tok) {
593: case (ROFF_el):
594: /* FALLTHROUGH */
595: case (ROFF_ie):
596: /* FALLTHROUGH */
597: case (ROFF_if):
598: break;
599: default:
600: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
601: return(ROFF_ERR);
602: return(ROFF_IGN);
603: }
1.1 schwarze 604:
1.2 schwarze 605: if (r->last->endspan > -1) {
606: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
607: return(ROFF_ERR);
1.1 schwarze 608: return(ROFF_IGN);
1.2 schwarze 609: }
610:
611: if ((*bufp)[pos])
612: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
613: return(ROFF_ERR);
1.1 schwarze 614:
1.2 schwarze 615: roffnode_pop(r);
616: roffnode_cleanscope(r);
1.1 schwarze 617: return(ROFF_IGN);
618: }
619:
620:
621: /* ARGSUSED */
622: static enum rofferr
1.2 schwarze 623: roff_block(ROFF_ARGS)
1.1 schwarze 624: {
1.2 schwarze 625: int sv;
626: size_t sz;
1.16 schwarze 627: char *name;
628:
629: name = NULL;
1.2 schwarze 630:
1.16 schwarze 631: if (ROFF_ig != tok) {
632: if ('\0' == (*bufp)[pos]) {
633: (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
634: return(ROFF_IGN);
635: }
1.22 ! schwarze 636:
! 637: /*
! 638: * Re-write `de1', since we don't really care about
! 639: * groff's strange compatibility mode, into `de'.
! 640: */
! 641:
1.18 schwarze 642: if (ROFF_de1 == tok)
643: tok = ROFF_de;
1.16 schwarze 644: if (ROFF_de == tok)
645: name = *bufp + pos;
1.21 schwarze 646: else
647: (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos,
648: roffs[tok].name);
1.22 ! schwarze 649:
1.2 schwarze 650: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
651: pos++;
1.22 ! schwarze 652:
1.2 schwarze 653: while (' ' == (*bufp)[pos])
1.16 schwarze 654: (*bufp)[pos++] = '\0';
1.2 schwarze 655: }
656:
1.16 schwarze 657: roffnode_push(r, tok, name, ln, ppos);
658:
659: /*
660: * At the beginning of a `de' macro, clear the existing string
661: * with the same name, if there is one. New content will be
662: * added from roff_block_text() in multiline mode.
663: */
1.22 ! schwarze 664:
1.16 schwarze 665: if (ROFF_de == tok)
1.19 schwarze 666: roff_setstr(r, name, "", 0);
1.2 schwarze 667:
668: if ('\0' == (*bufp)[pos])
669: return(ROFF_IGN);
1.1 schwarze 670:
1.22 ! schwarze 671: /* If present, process the custom end-of-line marker. */
! 672:
1.2 schwarze 673: sv = pos;
1.22 ! schwarze 674: while ((*bufp)[pos] &&
! 675: ' ' != (*bufp)[pos] &&
1.2 schwarze 676: '\t' != (*bufp)[pos])
677: pos++;
678:
679: /*
680: * Note: groff does NOT like escape characters in the input.
681: * Instead of detecting this, we're just going to let it fly and
682: * to hell with it.
683: */
684:
685: assert(pos > sv);
686: sz = (size_t)(pos - sv);
687:
688: if (1 == sz && '.' == (*bufp)[sv])
689: return(ROFF_IGN);
690:
1.11 schwarze 691: r->last->end = mandoc_malloc(sz + 1);
1.2 schwarze 692:
693: memcpy(r->last->end, *bufp + sv, sz);
694: r->last->end[(int)sz] = '\0';
695:
696: if ((*bufp)[pos])
1.22 ! schwarze 697: (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
1.1 schwarze 698:
699: return(ROFF_IGN);
700: }
701:
702:
703: /* ARGSUSED */
704: static enum rofferr
1.2 schwarze 705: roff_block_sub(ROFF_ARGS)
1.1 schwarze 706: {
1.2 schwarze 707: enum rofft t;
708: int i, j;
709:
710: /*
711: * First check whether a custom macro exists at this level. If
712: * it does, then check against it. This is some of groff's
713: * stranger behaviours. If we encountered a custom end-scope
714: * tag and that tag also happens to be a "real" macro, then we
715: * need to try interpreting it again as a real macro. If it's
716: * not, then return ignore. Else continue.
717: */
718:
719: if (r->last->end) {
720: i = pos + 1;
721: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
722: i++;
723:
724: for (j = 0; r->last->end[j]; j++, i++)
725: if ((*bufp)[i] != r->last->end[j])
726: break;
1.1 schwarze 727:
1.2 schwarze 728: if ('\0' == r->last->end[j] &&
729: ('\0' == (*bufp)[i] ||
730: ' ' == (*bufp)[i] ||
731: '\t' == (*bufp)[i])) {
732: roffnode_pop(r);
733: roffnode_cleanscope(r);
1.1 schwarze 734:
1.16 schwarze 735: if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1.2 schwarze 736: return(ROFF_RERUN);
737: return(ROFF_IGN);
738: }
1.1 schwarze 739: }
740:
1.2 schwarze 741: /*
742: * If we have no custom end-query or lookup failed, then try
743: * pulling it out of the hashtable.
744: */
1.1 schwarze 745:
1.2 schwarze 746: ppos = pos;
1.16 schwarze 747: t = roff_parse(r, *bufp, &pos);
1.1 schwarze 748:
1.16 schwarze 749: /*
750: * Macros other than block-end are only significant
751: * in `de' blocks; elsewhere, simply throw them away.
752: */
753: if (ROFF_cblock != t) {
754: if (ROFF_de == tok)
755: roff_setstr(r, r->last->name, *bufp + ppos, 1);
1.1 schwarze 756: return(ROFF_IGN);
1.16 schwarze 757: }
1.1 schwarze 758:
1.2 schwarze 759: assert(roffs[t].proc);
1.6 schwarze 760: return((*roffs[t].proc)(r, t, bufp, szp,
761: ln, ppos, pos, offs));
1.2 schwarze 762: }
763:
764:
765: /* ARGSUSED */
766: static enum rofferr
767: roff_block_text(ROFF_ARGS)
768: {
769:
1.16 schwarze 770: if (ROFF_de == tok)
771: roff_setstr(r, r->last->name, *bufp + pos, 1);
772:
1.2 schwarze 773: return(ROFF_IGN);
774: }
775:
776:
777: /* ARGSUSED */
778: static enum rofferr
779: roff_cond_sub(ROFF_ARGS)
780: {
781: enum rofft t;
782: enum roffrule rr;
783:
784: ppos = pos;
785: rr = r->last->rule;
786:
1.5 schwarze 787: /*
788: * Clean out scope. If we've closed ourselves, then don't
789: * continue.
790: */
791:
792: roffnode_cleanscope(r);
793:
1.16 schwarze 794: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
1.12 schwarze 795: if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
796: return(roff_ccond
797: (r, ROFF_ccond, bufp, szp,
798: ln, pos, pos + 2, offs));
1.2 schwarze 799: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1.12 schwarze 800: }
1.2 schwarze 801:
802: /*
803: * A denied conditional must evaluate its children if and only
804: * if they're either structurally required (such as loops and
805: * conditionals) or a closing macro.
806: */
807: if (ROFFRULE_DENY == rr)
808: if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
809: if (ROFF_ccond != t)
810: return(ROFF_IGN);
811:
812: assert(roffs[t].proc);
1.6 schwarze 813: return((*roffs[t].proc)(r, t, bufp, szp,
814: ln, ppos, pos, offs));
1.2 schwarze 815: }
816:
817:
818: /* ARGSUSED */
819: static enum rofferr
820: roff_cond_text(ROFF_ARGS)
821: {
822: char *ep, *st;
823: enum roffrule rr;
824:
825: rr = r->last->rule;
1.1 schwarze 826:
827: /*
1.2 schwarze 828: * We display the value of the text if out current evaluation
829: * scope permits us to do so.
1.1 schwarze 830: */
1.13 schwarze 831:
832: /* FIXME: use roff_ccond? */
1.1 schwarze 833:
1.2 schwarze 834: st = &(*bufp)[pos];
835: if (NULL == (ep = strstr(st, "\\}"))) {
836: roffnode_cleanscope(r);
837: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
838: }
839:
1.4 schwarze 840: if (ep == st || (ep > st && '\\' != *(ep - 1)))
1.2 schwarze 841: roffnode_pop(r);
842:
843: roffnode_cleanscope(r);
844: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
845: }
846:
847:
1.5 schwarze 848: static enum roffrule
849: roff_evalcond(const char *v, int *pos)
850: {
851:
852: switch (v[*pos]) {
853: case ('n'):
854: (*pos)++;
855: return(ROFFRULE_ALLOW);
856: case ('e'):
857: /* FALLTHROUGH */
858: case ('o'):
859: /* FALLTHROUGH */
860: case ('t'):
861: (*pos)++;
862: return(ROFFRULE_DENY);
863: default:
864: break;
865: }
866:
867: while (v[*pos] && ' ' != v[*pos])
868: (*pos)++;
869: return(ROFFRULE_DENY);
870: }
871:
1.2 schwarze 872: /* ARGSUSED */
873: static enum rofferr
1.21 schwarze 874: roff_line_ignore(ROFF_ARGS)
1.6 schwarze 875: {
876:
1.21 schwarze 877: return(ROFF_IGN);
878: }
879:
880: /* ARGSUSED */
881: static enum rofferr
882: roff_line_error(ROFF_ARGS)
883: {
884:
885: (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, roffs[tok].name);
1.6 schwarze 886: return(ROFF_IGN);
887: }
888:
889: /* ARGSUSED */
890: static enum rofferr
1.2 schwarze 891: roff_cond(ROFF_ARGS)
892: {
893: int sv;
1.5 schwarze 894: enum roffrule rule;
1.2 schwarze 895:
896: /* Stack overflow! */
897:
898: if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
1.1 schwarze 899: (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
900: return(ROFF_ERR);
901: }
902:
1.5 schwarze 903: /* First, evaluate the conditional. */
1.2 schwarze 904:
1.5 schwarze 905: if (ROFF_el == tok) {
906: /*
907: * An `.el' will get the value of the current rstack
908: * entry set in prior `ie' calls or defaults to DENY.
909: */
910: if (r->rstackpos < 0)
911: rule = ROFFRULE_DENY;
912: else
913: rule = r->rstack[r->rstackpos];
914: } else
915: rule = roff_evalcond(*bufp, &pos);
1.2 schwarze 916:
917: sv = pos;
1.5 schwarze 918:
1.2 schwarze 919: while (' ' == (*bufp)[pos])
920: pos++;
921:
922: /*
923: * Roff is weird. If we have just white-space after the
924: * conditional, it's considered the BODY and we exit without
925: * really doing anything. Warn about this. It's probably
926: * wrong.
927: */
1.5 schwarze 928:
1.2 schwarze 929: if ('\0' == (*bufp)[pos] && sv != pos) {
1.22 ! schwarze 930: (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
! 931: return(ROFF_IGN);
1.2 schwarze 932: }
933:
1.16 schwarze 934: roffnode_push(r, tok, NULL, ln, ppos);
1.2 schwarze 935:
1.5 schwarze 936: r->last->rule = rule;
1.2 schwarze 937:
938: if (ROFF_ie == tok) {
939: /*
940: * An if-else will put the NEGATION of the current
941: * evaluated conditional into the stack.
942: */
943: r->rstackpos++;
944: if (ROFFRULE_DENY == r->last->rule)
945: r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
946: else
947: r->rstack[r->rstackpos] = ROFFRULE_DENY;
948: }
1.5 schwarze 949:
950: /* If the parent has false as its rule, then so do we. */
951:
1.2 schwarze 952: if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
953: r->last->rule = ROFFRULE_DENY;
1.5 schwarze 954:
955: /*
956: * Determine scope. If we're invoked with "\{" trailing the
957: * conditional, then we're in a multiline scope. Else our scope
958: * expires on the next line.
959: */
1.2 schwarze 960:
961: r->last->endspan = 1;
962:
963: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
964: r->last->endspan = -1;
965: pos += 2;
966: }
967:
968: /*
969: * If there are no arguments on the line, the next-line scope is
970: * assumed.
971: */
972:
973: if ('\0' == (*bufp)[pos])
974: return(ROFF_IGN);
975:
976: /* Otherwise re-run the roff parser after recalculating. */
1.1 schwarze 977:
1.2 schwarze 978: *offs = pos;
979: return(ROFF_RERUN);
1.1 schwarze 980: }
981:
982:
1.2 schwarze 983: /* ARGSUSED */
984: static enum rofferr
1.7 schwarze 985: roff_ds(ROFF_ARGS)
986: {
1.10 schwarze 987: char *name, *string;
988:
989: /*
990: * A symbol is named by the first word following the macro
991: * invocation up to a space. Its value is anything after the
992: * name's trailing whitespace and optional double-quote. Thus,
993: *
994: * [.ds foo "bar " ]
995: *
996: * will have `bar " ' as its value.
997: */
1.7 schwarze 998:
999: name = *bufp + pos;
1000: if ('\0' == *name)
1001: return(ROFF_IGN);
1002:
1003: string = name;
1.10 schwarze 1004: /* Read until end of name. */
1.7 schwarze 1005: while (*string && ' ' != *string)
1006: string++;
1.10 schwarze 1007:
1008: /* Nil-terminate name. */
1.7 schwarze 1009: if (*string)
1.10 schwarze 1010: *(string++) = '\0';
1011:
1012: /* Read past spaces. */
1013: while (*string && ' ' == *string)
1014: string++;
1015:
1016: /* Read passed initial double-quote. */
1.7 schwarze 1017: if (*string && '"' == *string)
1018: string++;
1019:
1.10 schwarze 1020: /* The rest is the value. */
1.16 schwarze 1021: roff_setstr(r, name, string, 0);
1.7 schwarze 1022: return(ROFF_IGN);
1023: }
1024:
1025:
1026: /* ARGSUSED */
1027: static enum rofferr
1.6 schwarze 1028: roff_nr(ROFF_ARGS)
1.1 schwarze 1029: {
1.6 schwarze 1030: const char *key, *val;
1031: struct reg *rg;
1032:
1033: key = &(*bufp)[pos];
1034: rg = r->regs->regs;
1035:
1036: /* Parse register request. */
1037: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
1038: pos++;
1039:
1040: /*
1041: * Set our nil terminator. Because this line is going to be
1042: * ignored anyway, we can munge it as we please.
1043: */
1044: if ((*bufp)[pos])
1045: (*bufp)[pos++] = '\0';
1046:
1047: /* Skip whitespace to register token. */
1048: while ((*bufp)[pos] && ' ' == (*bufp)[pos])
1049: pos++;
1050:
1051: val = &(*bufp)[pos];
1052:
1053: /* Process register token. */
1054:
1055: if (0 == strcmp(key, "nS")) {
1056: rg[(int)REG_nS].set = 1;
1057: if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1058: rg[(int)REG_nS].v.u = 0;
1059: }
1.1 schwarze 1060:
1.2 schwarze 1061: return(ROFF_IGN);
1.14 schwarze 1062: }
1063:
1064: /* ARGSUSED */
1065: static enum rofferr
1066: roff_so(ROFF_ARGS)
1067: {
1068: char *name;
1.15 schwarze 1069:
1070: (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL);
1.14 schwarze 1071:
1.22 ! schwarze 1072: /*
! 1073: * Handle `so'. Be EXTREMELY careful, as we shouldn't be
! 1074: * opening anything that's not in our cwd or anything beneath
! 1075: * it. Thus, explicitly disallow traversing up the file-system
! 1076: * or using absolute paths.
! 1077: */
! 1078:
1.14 schwarze 1079: name = *bufp + pos;
1080: if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1081: (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL);
1082: return(ROFF_ERR);
1083: }
1084:
1085: *offs = pos;
1086: return(ROFF_SO);
1.7 schwarze 1087: }
1088:
1.16 schwarze 1089: /* ARGSUSED */
1090: static enum rofferr
1091: roff_userdef(ROFF_ARGS)
1.12 schwarze 1092: {
1.16 schwarze 1093: const char *arg[9];
1094: char *cp, *n1, *n2;
1.17 schwarze 1095: int i, quoted, pairs;
1.12 schwarze 1096:
1.16 schwarze 1097: /*
1098: * Collect pointers to macro argument strings
1099: * and null-terminate them.
1100: */
1101: cp = *bufp + pos;
1102: for (i = 0; i < 9; i++) {
1.17 schwarze 1103: /* Quoting can only start with a new word. */
1104: if ('"' == *cp) {
1105: quoted = 1;
1106: cp++;
1107: } else
1108: quoted = 0;
1.16 schwarze 1109: arg[i] = cp;
1.17 schwarze 1110: for (pairs = 0; '\0' != *cp; cp++) {
1111: /* Unquoted arguments end at blanks. */
1112: if (0 == quoted) {
1113: if (' ' == *cp)
1114: break;
1115: continue;
1116: }
1117: /* After pairs of quotes, move left. */
1118: if (pairs)
1119: cp[-pairs] = cp[0];
1120: /* Pairs of quotes do not end words, ... */
1121: if ('"' == cp[0] && '"' == cp[1]) {
1122: pairs++;
1123: cp++;
1124: continue;
1125: }
1126: /* ... but solitary quotes do. */
1127: if ('"' != *cp)
1128: continue;
1129: if (pairs)
1130: cp[-pairs] = '\0';
1131: *cp = ' ';
1132: break;
1133: }
1134: /* Last argument; the remaining ones are empty strings. */
1.16 schwarze 1135: if ('\0' == *cp)
1136: continue;
1.17 schwarze 1137: /* Null-terminate argument and move to the next one. */
1.16 schwarze 1138: *cp++ = '\0';
1139: while (' ' == *cp)
1140: cp++;
1141: }
1142:
1143: /*
1144: * Expand macro arguments.
1.12 schwarze 1145: */
1.16 schwarze 1146: *szp = 0;
1147: n1 = cp = mandoc_strdup(r->current_string);
1148: while (NULL != (cp = strstr(cp, "\\$"))) {
1149: i = cp[2] - '1';
1150: if (0 > i || 8 < i) {
1151: /* Not an argument invocation. */
1152: cp += 2;
1153: continue;
1154: }
1155:
1156: *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1157: n2 = mandoc_malloc(*szp);
1158:
1159: strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1160: strlcat(n2, arg[i], *szp);
1161: strlcat(n2, cp + 3, *szp);
1162:
1163: cp = n2 + (cp - n1);
1164: free(n1);
1165: n1 = n2;
1.12 schwarze 1166: }
1167:
1.16 schwarze 1168: /*
1169: * Replace the macro invocation
1170: * by the expanded macro.
1171: */
1172: free(*bufp);
1173: *bufp = n1;
1174: if (0 == *szp)
1175: *szp = strlen(*bufp) + 1;
1176:
1.19 schwarze 1177: return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1.16 schwarze 1178: ROFF_REPARSE : ROFF_APPEND);
1.12 schwarze 1179: }
1180:
1.16 schwarze 1181: /*
1182: * Store *string into the user-defined string called *name.
1183: * In multiline mode, append to an existing entry and append '\n';
1184: * else replace the existing entry, if there is one.
1185: * To clear an existing entry, call with (*r, *name, NULL, 0).
1186: */
1.8 schwarze 1187: static void
1.16 schwarze 1188: roff_setstr(struct roff *r, const char *name, const char *string,
1189: int multiline)
1.7 schwarze 1190: {
1191: struct roffstr *n;
1.16 schwarze 1192: char *c;
1193: size_t oldch, newch;
1.7 schwarze 1194:
1.16 schwarze 1195: /* Search for an existing string with the same name. */
1.8 schwarze 1196: n = r->first_string;
1.7 schwarze 1197: while (n && strcmp(name, n->name))
1198: n = n->next;
1.8 schwarze 1199:
1200: if (NULL == n) {
1.16 schwarze 1201: /* Create a new string table entry. */
1.8 schwarze 1202: n = mandoc_malloc(sizeof(struct roffstr));
1.16 schwarze 1203: n->name = mandoc_strdup(name);
1204: n->string = NULL;
1.8 schwarze 1205: n->next = r->first_string;
1206: r->first_string = n;
1.16 schwarze 1207: } else if (0 == multiline) {
1208: /* In multiline mode, append; else replace. */
1.7 schwarze 1209: free(n->string);
1.16 schwarze 1210: n->string = NULL;
1211: }
1212:
1213: if (NULL == string)
1214: return;
1215:
1216: /*
1217: * One additional byte for the '\n' in multiline mode,
1218: * and one for the terminating '\0'.
1219: */
1220: newch = strlen(string) + (multiline ? 2 : 1);
1221: if (NULL == n->string) {
1222: n->string = mandoc_malloc(newch);
1223: *n->string = '\0';
1224: oldch = 0;
1225: } else {
1226: oldch = strlen(n->string);
1227: n->string = mandoc_realloc(n->string, oldch + newch);
1228: }
1229:
1230: /* Skip existing content in the destination buffer. */
1231: c = n->string + oldch;
1232:
1233: /* Append new content to the destination buffer. */
1234: while (*string) {
1235: /*
1236: * Rudimentary roff copy mode:
1237: * Handle escaped backslashes.
1238: */
1239: if ('\\' == *string && '\\' == *(string + 1))
1240: string++;
1241: *c++ = *string++;
1242: }
1.8 schwarze 1243:
1.16 schwarze 1244: /* Append terminating bytes. */
1245: if (multiline)
1246: *c++ = '\n';
1247: *c = '\0';
1.7 schwarze 1248: }
1249:
1250:
1.8 schwarze 1251: static const char *
1252: roff_getstrn(const struct roff *r, const char *name, size_t len)
1.7 schwarze 1253: {
1.8 schwarze 1254: const struct roffstr *n;
1.7 schwarze 1255:
1.8 schwarze 1256: n = r->first_string;
1.10 schwarze 1257: while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1.7 schwarze 1258: n = n->next;
1.8 schwarze 1259:
1260: return(n ? n->string : NULL);
1.7 schwarze 1261: }
1262:
1.8 schwarze 1263:
1264: static void
1265: roff_freestr(struct roff *r)
1.7 schwarze 1266: {
1267: struct roffstr *n, *nn;
1268:
1.8 schwarze 1269: for (n = r->first_string; n; n = nn) {
1.7 schwarze 1270: free(n->name);
1271: free(n->string);
1272: nn = n->next;
1273: free(n);
1274: }
1.8 schwarze 1275:
1276: r->first_string = NULL;
1.1 schwarze 1277: }