Annotation of src/usr.bin/mandoc/roff.c, Revision 1.24
1.24 ! schwarze 1: /* $Id: roff.c,v 1.23 2010/12/09 20:56:30 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.8 schwarze 4: * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.16 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.16 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
20: #endif
21:
22: #include <assert.h>
1.6 schwarze 23: #include <errno.h>
1.3 schwarze 24: #include <ctype.h>
1.6 schwarze 25: #include <limits.h>
1.1 schwarze 26: #include <stdlib.h>
27: #include <string.h>
1.2 schwarze 28: #include <stdio.h>
1.1 schwarze 29:
30: #include "mandoc.h"
31: #include "roff.h"
1.8 schwarze 32: #include "libmandoc.h"
1.1 schwarze 33:
1.2 schwarze 34: #define RSTACK_MAX 128
35:
36: #define ROFF_CTL(c) \
37: ('.' == (c) || '\'' == (c))
38:
1.1 schwarze 39: enum rofft {
1.20 schwarze 40: ROFF_ad,
1.2 schwarze 41: ROFF_am,
42: ROFF_ami,
43: ROFF_am1,
1.1 schwarze 44: ROFF_de,
45: ROFF_dei,
1.2 schwarze 46: ROFF_de1,
47: ROFF_ds,
48: ROFF_el,
1.20 schwarze 49: ROFF_hy,
1.2 schwarze 50: ROFF_ie,
51: ROFF_if,
1.1 schwarze 52: ROFF_ig,
1.20 schwarze 53: ROFF_ne,
54: ROFF_nh,
1.14 schwarze 55: ROFF_nr,
1.2 schwarze 56: ROFF_rm,
1.14 schwarze 57: ROFF_so,
1.2 schwarze 58: ROFF_tr,
59: ROFF_cblock,
1.13 schwarze 60: ROFF_ccond, /* FIXME: remove this. */
1.16 schwarze 61: ROFF_USERDEF,
1.1 schwarze 62: ROFF_MAX
63: };
64:
1.2 schwarze 65: enum roffrule {
66: ROFFRULE_ALLOW,
67: ROFFRULE_DENY
68: };
69:
1.8 schwarze 70:
71: struct roffstr {
72: char *name; /* key of symbol */
73: char *string; /* current value */
74: struct roffstr *next; /* next in list */
75: };
76:
1.1 schwarze 77: struct roff {
78: struct roffnode *last; /* leaf of stack */
79: mandocmsg msg; /* err/warn/fatal messages */
80: void *data; /* privdata for messages */
1.2 schwarze 81: enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
82: int rstackpos; /* position in rstack */
1.6 schwarze 83: struct regset *regs; /* read/writable registers */
1.16 schwarze 84: struct roffstr *first_string; /* user-defined strings & macros */
85: const char *current_string; /* value of last called user macro */
1.1 schwarze 86: };
87:
88: struct roffnode {
89: enum rofft tok; /* type of node */
90: struct roffnode *parent; /* up one in stack */
91: int line; /* parse line */
92: int col; /* parse col */
1.16 schwarze 93: char *name; /* node name, e.g. macro name */
1.2 schwarze 94: char *end; /* end-rules: custom token */
95: int endspan; /* end-rules: next-line or infty */
96: enum roffrule rule; /* current evaluation rule */
1.1 schwarze 97: };
98:
99: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
100: enum rofft tok, /* tok of macro */ \
101: char **bufp, /* input buffer */ \
102: size_t *szp, /* size of input buffer */ \
103: int ln, /* parse line */ \
1.2 schwarze 104: int ppos, /* original pos in buffer */ \
105: int pos, /* current pos in buffer */ \
106: int *offs /* reset offset of buffer data */
1.1 schwarze 107:
108: typedef enum rofferr (*roffproc)(ROFF_ARGS);
109:
110: struct roffmac {
111: const char *name; /* macro name */
1.2 schwarze 112: roffproc proc; /* process new macro */
113: roffproc text; /* process as child text of macro */
114: roffproc sub; /* process as child of macro */
115: int flags;
116: #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
1.3 schwarze 117: struct roffmac *next;
1.1 schwarze 118: };
119:
1.2 schwarze 120: static enum rofferr roff_block(ROFF_ARGS);
121: static enum rofferr roff_block_text(ROFF_ARGS);
122: static enum rofferr roff_block_sub(ROFF_ARGS);
123: static enum rofferr roff_cblock(ROFF_ARGS);
124: static enum rofferr roff_ccond(ROFF_ARGS);
125: static enum rofferr roff_cond(ROFF_ARGS);
126: static enum rofferr roff_cond_text(ROFF_ARGS);
127: static enum rofferr roff_cond_sub(ROFF_ARGS);
1.7 schwarze 128: static enum rofferr roff_ds(ROFF_ARGS);
1.8 schwarze 129: static enum roffrule roff_evalcond(const char *, int *);
130: static void roff_freestr(struct roff *);
131: static const char *roff_getstrn(const struct roff *,
132: const char *, size_t);
1.21 schwarze 133: static enum rofferr roff_line_ignore(ROFF_ARGS);
134: static enum rofferr roff_line_error(ROFF_ARGS);
1.6 schwarze 135: static enum rofferr roff_nr(ROFF_ARGS);
1.9 schwarze 136: static int roff_res(struct roff *,
137: char **, size_t *, int);
1.8 schwarze 138: static void roff_setstr(struct roff *,
1.16 schwarze 139: const char *, const char *, int);
1.14 schwarze 140: static enum rofferr roff_so(ROFF_ARGS);
1.16 schwarze 141: static enum rofferr roff_userdef(ROFF_ARGS);
1.1 schwarze 142:
1.3 schwarze 143: /* See roff_hash_find() */
144:
145: #define ASCII_HI 126
146: #define ASCII_LO 33
147: #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
148:
149: static struct roffmac *hash[HASHWIDTH];
150:
151: static struct roffmac roffs[ROFF_MAX] = {
1.21 schwarze 152: { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 153: { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
154: { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
155: { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
156: { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
157: { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
158: { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.7 schwarze 159: { "ds", roff_ds, NULL, NULL, 0, NULL },
1.3 schwarze 160: { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
1.21 schwarze 161: { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 162: { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
163: { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
164: { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.21 schwarze 165: { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
166: { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
1.14 schwarze 167: { "nr", roff_nr, NULL, NULL, 0, NULL },
1.21 schwarze 168: { "rm", roff_line_error, NULL, NULL, 0, NULL },
1.14 schwarze 169: { "so", roff_so, NULL, NULL, 0, NULL },
1.21 schwarze 170: { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 171: { ".", roff_cblock, NULL, NULL, 0, NULL },
172: { "\\}", roff_ccond, NULL, NULL, 0, NULL },
1.16 schwarze 173: { NULL, roff_userdef, NULL, NULL, 0, NULL },
1.1 schwarze 174: };
175:
176: static void roff_free1(struct roff *);
1.16 schwarze 177: static enum rofft roff_hash_find(const char *, size_t);
1.3 schwarze 178: static void roff_hash_init(void);
1.2 schwarze 179: static void roffnode_cleanscope(struct roff *);
1.16 schwarze 180: static void roffnode_push(struct roff *, enum rofft,
181: const char *, int, int);
1.1 schwarze 182: static void roffnode_pop(struct roff *);
1.16 schwarze 183: static enum rofft roff_parse(struct roff *, const char *, int *);
1.6 schwarze 184: static int roff_parse_nat(const char *, unsigned int *);
1.1 schwarze 185:
1.3 schwarze 186: /* See roff_hash_find() */
187: #define ROFF_HASH(p) (p[0] - ASCII_LO)
188:
189: static void
190: roff_hash_init(void)
191: {
192: struct roffmac *n;
193: int buc, i;
194:
1.16 schwarze 195: for (i = 0; i < (int)ROFF_USERDEF; i++) {
1.3 schwarze 196: assert(roffs[i].name[0] >= ASCII_LO);
197: assert(roffs[i].name[0] <= ASCII_HI);
198:
199: buc = ROFF_HASH(roffs[i].name);
200:
201: if (NULL != (n = hash[buc])) {
202: for ( ; n->next; n = n->next)
203: /* Do nothing. */ ;
204: n->next = &roffs[i];
205: } else
206: hash[buc] = &roffs[i];
207: }
208: }
209:
1.1 schwarze 210:
211: /*
212: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
213: * the nil-terminated string name could be found.
214: */
215: static enum rofft
1.16 schwarze 216: roff_hash_find(const char *p, size_t s)
1.1 schwarze 217: {
1.3 schwarze 218: int buc;
219: struct roffmac *n;
1.1 schwarze 220:
1.3 schwarze 221: /*
222: * libroff has an extremely simple hashtable, for the time
223: * being, which simply keys on the first character, which must
224: * be printable, then walks a chain. It works well enough until
225: * optimised.
226: */
227:
228: if (p[0] < ASCII_LO || p[0] > ASCII_HI)
229: return(ROFF_MAX);
230:
231: buc = ROFF_HASH(p);
232:
233: if (NULL == (n = hash[buc]))
234: return(ROFF_MAX);
235: for ( ; n; n = n->next)
1.16 schwarze 236: if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
1.3 schwarze 237: return((enum rofft)(n - roffs));
1.1 schwarze 238:
239: return(ROFF_MAX);
240: }
241:
242:
243: /*
244: * Pop the current node off of the stack of roff instructions currently
245: * pending.
246: */
247: static void
248: roffnode_pop(struct roff *r)
249: {
250: struct roffnode *p;
251:
1.2 schwarze 252: assert(r->last);
253: p = r->last;
254:
255: if (ROFF_el == p->tok)
256: if (r->rstackpos > -1)
257: r->rstackpos--;
258:
259: r->last = r->last->parent;
1.16 schwarze 260: free(p->name);
261: free(p->end);
1.1 schwarze 262: free(p);
263: }
264:
265:
266: /*
267: * Push a roff node onto the instruction stack. This must later be
268: * removed with roffnode_pop().
269: */
1.11 schwarze 270: static void
1.16 schwarze 271: roffnode_push(struct roff *r, enum rofft tok, const char *name,
272: int line, int col)
1.1 schwarze 273: {
274: struct roffnode *p;
275:
1.11 schwarze 276: p = mandoc_calloc(1, sizeof(struct roffnode));
1.1 schwarze 277: p->tok = tok;
1.16 schwarze 278: if (name)
279: p->name = mandoc_strdup(name);
1.1 schwarze 280: p->parent = r->last;
281: p->line = line;
282: p->col = col;
1.2 schwarze 283: p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
1.1 schwarze 284:
285: r->last = p;
286: }
287:
288:
289: static void
290: roff_free1(struct roff *r)
291: {
292:
293: while (r->last)
294: roffnode_pop(r);
1.8 schwarze 295: roff_freestr(r);
1.1 schwarze 296: }
297:
298:
299: void
300: roff_reset(struct roff *r)
301: {
302:
303: roff_free1(r);
304: }
305:
306:
307: void
308: roff_free(struct roff *r)
309: {
310:
311: roff_free1(r);
312: free(r);
313: }
314:
315:
316: struct roff *
1.11 schwarze 317: roff_alloc(struct regset *regs, void *data, const mandocmsg msg)
1.1 schwarze 318: {
319: struct roff *r;
320:
1.11 schwarze 321: r = mandoc_calloc(1, sizeof(struct roff));
1.6 schwarze 322: r->regs = regs;
1.1 schwarze 323: r->msg = msg;
324: r->data = data;
1.2 schwarze 325: r->rstackpos = -1;
1.3 schwarze 326:
327: roff_hash_init();
1.1 schwarze 328: return(r);
329: }
330:
331:
1.8 schwarze 332: /*
333: * Pre-filter each and every line for reserved words (one beginning with
334: * `\*', e.g., `\*(ab'). These must be handled before the actual line
335: * is processed.
336: */
337: static int
1.9 schwarze 338: roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
1.8 schwarze 339: {
1.23 schwarze 340: const char *stesc; /* start of an escape sequence ('\\') */
341: const char *stnam; /* start of the name, after "[(*" */
342: const char *cp; /* end of the name, e.g. before ']' */
343: const char *res; /* the string to be substituted */
1.8 schwarze 344: int i, maxl;
345: size_t nsz;
346: char *n;
347:
1.24 ! schwarze 348: /* Search for a leading backslash and save a pointer to it. */
1.23 schwarze 349:
1.24 ! schwarze 350: cp = *bufp + pos;
! 351: while (NULL != (cp = strchr(cp, '\\'))) {
! 352: stesc = cp++;
1.23 schwarze 353:
354: /*
355: * The second character must be an asterisk.
356: * If it isn't, skip it anyway: It is escaped,
357: * so it can't start another escape sequence.
358: */
359:
1.24 ! schwarze 360: if ('\0' == *cp)
! 361: return(1);
! 362: if ('*' != *cp++)
1.23 schwarze 363: continue;
364:
365: /*
366: * The third character decides the length
367: * of the name of the string.
368: * Save a pointer to the name.
369: */
370:
1.24 ! schwarze 371: switch (*cp) {
! 372: case ('\0'):
! 373: return(1);
1.8 schwarze 374: case ('('):
375: cp++;
376: maxl = 2;
377: break;
378: case ('['):
379: cp++;
380: maxl = 0;
381: break;
382: default:
383: maxl = 1;
384: break;
385: }
1.23 schwarze 386: stnam = cp;
1.8 schwarze 387:
1.23 schwarze 388: /* Advance to the end of the name. */
1.8 schwarze 389:
390: for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
391: if ('\0' == *cp)
392: return(1); /* Error. */
393: if (0 == maxl && ']' == *cp)
394: break;
395: }
396:
1.23 schwarze 397: /*
398: * Retrieve the replacement string; if it is
399: * undefined, resume searching for escapes.
400: */
401:
402: res = roff_getstrn(r, stnam, (size_t)i);
1.8 schwarze 403:
404: if (NULL == res) {
405: cp -= maxl ? 1 : 0;
406: continue;
407: }
408:
1.23 schwarze 409: /* Replace the escape sequence by the string. */
410:
1.8 schwarze 411: nsz = *szp + strlen(res) + 1;
412: n = mandoc_malloc(nsz);
413:
1.23 schwarze 414: strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
1.8 schwarze 415: strlcat(n, res, nsz);
416: strlcat(n, cp + (maxl ? 0 : 1), nsz);
417:
418: free(*bufp);
419:
420: *bufp = n;
421: *szp = nsz;
422: return(0);
423: }
424:
425: return(1);
426: }
427:
428:
1.1 schwarze 429: enum rofferr
1.6 schwarze 430: roff_parseln(struct roff *r, int ln, char **bufp,
431: size_t *szp, int pos, int *offs)
1.1 schwarze 432: {
433: enum rofft t;
434: int ppos;
435:
1.2 schwarze 436: /*
1.8 schwarze 437: * Run the reserved-word filter only if we have some reserved
438: * words to fill in.
439: */
440:
1.9 schwarze 441: if (r->first_string && ! roff_res(r, bufp, szp, pos))
1.16 schwarze 442: return(ROFF_REPARSE);
1.8 schwarze 443:
444: /*
1.2 schwarze 445: * First, if a scope is open and we're not a macro, pass the
446: * text through the macro's filter. If a scope isn't open and
447: * we're not a macro, just let it through.
448: */
449:
450: if (r->last && ! ROFF_CTL((*bufp)[pos])) {
451: t = r->last->tok;
452: assert(roffs[t].text);
453: return((*roffs[t].text)
1.8 schwarze 454: (r, t, bufp, szp,
455: ln, pos, pos, offs));
1.2 schwarze 456: } else if ( ! ROFF_CTL((*bufp)[pos]))
457: return(ROFF_CONT);
458:
459: /*
460: * If a scope is open, go to the child handler for that macro,
461: * as it may want to preprocess before doing anything with it.
462: */
463:
464: if (r->last) {
1.1 schwarze 465: t = r->last->tok;
466: assert(roffs[t].sub);
1.2 schwarze 467: return((*roffs[t].sub)
1.8 schwarze 468: (r, t, bufp, szp,
469: ln, pos, pos, offs));
1.2 schwarze 470: }
471:
472: /*
473: * Lastly, as we've no scope open, try to look up and execute
474: * the new macro. If no macro is found, simply return and let
475: * the compilers handle it.
476: */
477:
478: ppos = pos;
1.16 schwarze 479: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
1.1 schwarze 480: return(ROFF_CONT);
481:
1.2 schwarze 482: assert(roffs[t].proc);
483: return((*roffs[t].proc)
1.8 schwarze 484: (r, t, bufp, szp,
485: ln, ppos, pos, offs));
1.2 schwarze 486: }
487:
1.1 schwarze 488:
1.2 schwarze 489: int
490: roff_endparse(struct roff *r)
491: {
1.1 schwarze 492:
1.2 schwarze 493: if (NULL == r->last)
494: return(1);
495: return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
496: r->last->col, NULL));
1.1 schwarze 497: }
498:
499:
500: /*
501: * Parse a roff node's type from the input buffer. This must be in the
502: * form of ".foo xxx" in the usual way.
503: */
504: static enum rofft
1.16 schwarze 505: roff_parse(struct roff *r, const char *buf, int *pos)
1.1 schwarze 506: {
1.16 schwarze 507: const char *mac;
508: size_t maclen;
1.1 schwarze 509: enum rofft t;
510:
1.2 schwarze 511: assert(ROFF_CTL(buf[*pos]));
512: (*pos)++;
1.1 schwarze 513:
1.16 schwarze 514: while (' ' == buf[*pos] || '\t' == buf[*pos])
1.1 schwarze 515: (*pos)++;
516:
517: if ('\0' == buf[*pos])
518: return(ROFF_MAX);
519:
1.16 schwarze 520: mac = buf + *pos;
521: maclen = strcspn(mac, " \\\t\0");
1.1 schwarze 522:
1.16 schwarze 523: t = (r->current_string = roff_getstrn(r, mac, maclen))
524: ? ROFF_USERDEF : roff_hash_find(mac, maclen);
1.1 schwarze 525:
1.16 schwarze 526: *pos += maclen;
1.1 schwarze 527: while (buf[*pos] && ' ' == buf[*pos])
528: (*pos)++;
529:
530: return(t);
531: }
532:
533:
1.6 schwarze 534: static int
535: roff_parse_nat(const char *buf, unsigned int *res)
536: {
537: char *ep;
538: long lval;
539:
540: errno = 0;
541: lval = strtol(buf, &ep, 10);
542: if (buf[0] == '\0' || *ep != '\0')
543: return(0);
544: if ((errno == ERANGE &&
545: (lval == LONG_MAX || lval == LONG_MIN)) ||
546: (lval > INT_MAX || lval < 0))
547: return(0);
548:
549: *res = (unsigned int)lval;
550: return(1);
551: }
552:
553:
1.1 schwarze 554: /* ARGSUSED */
555: static enum rofferr
1.2 schwarze 556: roff_cblock(ROFF_ARGS)
1.1 schwarze 557: {
558:
1.2 schwarze 559: /*
560: * A block-close `..' should only be invoked as a child of an
561: * ignore macro, otherwise raise a warning and just ignore it.
562: */
563:
564: if (NULL == r->last) {
565: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
566: return(ROFF_ERR);
567: return(ROFF_IGN);
568: }
1.1 schwarze 569:
1.2 schwarze 570: switch (r->last->tok) {
571: case (ROFF_am):
572: /* FALLTHROUGH */
573: case (ROFF_ami):
574: /* FALLTHROUGH */
575: case (ROFF_am1):
576: /* FALLTHROUGH */
577: case (ROFF_de):
1.23 schwarze 578: /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1.2 schwarze 579: /* FALLTHROUGH */
580: case (ROFF_dei):
581: /* FALLTHROUGH */
582: case (ROFF_ig):
583: break;
584: default:
585: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
586: return(ROFF_ERR);
1.1 schwarze 587: return(ROFF_IGN);
1.2 schwarze 588: }
589:
590: if ((*bufp)[pos])
591: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
592: return(ROFF_ERR);
593:
594: roffnode_pop(r);
595: roffnode_cleanscope(r);
596: return(ROFF_IGN);
597:
598: }
1.1 schwarze 599:
600:
1.2 schwarze 601: static void
602: roffnode_cleanscope(struct roff *r)
603: {
1.1 schwarze 604:
1.2 schwarze 605: while (r->last) {
606: if (--r->last->endspan < 0)
607: break;
608: roffnode_pop(r);
609: }
610: }
1.1 schwarze 611:
612:
1.2 schwarze 613: /* ARGSUSED */
614: static enum rofferr
615: roff_ccond(ROFF_ARGS)
616: {
1.1 schwarze 617:
1.2 schwarze 618: if (NULL == r->last) {
619: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
620: return(ROFF_ERR);
1.1 schwarze 621: return(ROFF_IGN);
1.2 schwarze 622: }
1.1 schwarze 623:
1.2 schwarze 624: switch (r->last->tok) {
625: case (ROFF_el):
626: /* FALLTHROUGH */
627: case (ROFF_ie):
628: /* FALLTHROUGH */
629: case (ROFF_if):
630: break;
631: default:
632: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
633: return(ROFF_ERR);
634: return(ROFF_IGN);
635: }
1.1 schwarze 636:
1.2 schwarze 637: if (r->last->endspan > -1) {
638: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
639: return(ROFF_ERR);
1.1 schwarze 640: return(ROFF_IGN);
1.2 schwarze 641: }
642:
643: if ((*bufp)[pos])
644: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
645: return(ROFF_ERR);
1.1 schwarze 646:
1.2 schwarze 647: roffnode_pop(r);
648: roffnode_cleanscope(r);
1.1 schwarze 649: return(ROFF_IGN);
650: }
651:
652:
653: /* ARGSUSED */
654: static enum rofferr
1.2 schwarze 655: roff_block(ROFF_ARGS)
1.1 schwarze 656: {
1.2 schwarze 657: int sv;
658: size_t sz;
1.16 schwarze 659: char *name;
660:
661: name = NULL;
1.2 schwarze 662:
1.16 schwarze 663: if (ROFF_ig != tok) {
664: if ('\0' == (*bufp)[pos]) {
665: (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
666: return(ROFF_IGN);
667: }
1.22 schwarze 668:
669: /*
670: * Re-write `de1', since we don't really care about
671: * groff's strange compatibility mode, into `de'.
672: */
673:
1.18 schwarze 674: if (ROFF_de1 == tok)
675: tok = ROFF_de;
1.16 schwarze 676: if (ROFF_de == tok)
677: name = *bufp + pos;
1.21 schwarze 678: else
679: (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos,
680: roffs[tok].name);
1.22 schwarze 681:
1.2 schwarze 682: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
683: pos++;
1.22 schwarze 684:
1.2 schwarze 685: while (' ' == (*bufp)[pos])
1.16 schwarze 686: (*bufp)[pos++] = '\0';
1.2 schwarze 687: }
688:
1.16 schwarze 689: roffnode_push(r, tok, name, ln, ppos);
690:
691: /*
692: * At the beginning of a `de' macro, clear the existing string
693: * with the same name, if there is one. New content will be
694: * added from roff_block_text() in multiline mode.
695: */
1.22 schwarze 696:
1.16 schwarze 697: if (ROFF_de == tok)
1.19 schwarze 698: roff_setstr(r, name, "", 0);
1.2 schwarze 699:
700: if ('\0' == (*bufp)[pos])
701: return(ROFF_IGN);
1.1 schwarze 702:
1.22 schwarze 703: /* If present, process the custom end-of-line marker. */
704:
1.2 schwarze 705: sv = pos;
1.22 schwarze 706: while ((*bufp)[pos] &&
707: ' ' != (*bufp)[pos] &&
1.2 schwarze 708: '\t' != (*bufp)[pos])
709: pos++;
710:
711: /*
712: * Note: groff does NOT like escape characters in the input.
713: * Instead of detecting this, we're just going to let it fly and
714: * to hell with it.
715: */
716:
717: assert(pos > sv);
718: sz = (size_t)(pos - sv);
719:
720: if (1 == sz && '.' == (*bufp)[sv])
721: return(ROFF_IGN);
722:
1.11 schwarze 723: r->last->end = mandoc_malloc(sz + 1);
1.2 schwarze 724:
725: memcpy(r->last->end, *bufp + sv, sz);
726: r->last->end[(int)sz] = '\0';
727:
728: if ((*bufp)[pos])
1.22 schwarze 729: (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
1.1 schwarze 730:
731: return(ROFF_IGN);
732: }
733:
734:
735: /* ARGSUSED */
736: static enum rofferr
1.2 schwarze 737: roff_block_sub(ROFF_ARGS)
1.1 schwarze 738: {
1.2 schwarze 739: enum rofft t;
740: int i, j;
741:
742: /*
743: * First check whether a custom macro exists at this level. If
744: * it does, then check against it. This is some of groff's
745: * stranger behaviours. If we encountered a custom end-scope
746: * tag and that tag also happens to be a "real" macro, then we
747: * need to try interpreting it again as a real macro. If it's
748: * not, then return ignore. Else continue.
749: */
750:
751: if (r->last->end) {
752: i = pos + 1;
753: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
754: i++;
755:
756: for (j = 0; r->last->end[j]; j++, i++)
757: if ((*bufp)[i] != r->last->end[j])
758: break;
1.1 schwarze 759:
1.2 schwarze 760: if ('\0' == r->last->end[j] &&
761: ('\0' == (*bufp)[i] ||
762: ' ' == (*bufp)[i] ||
763: '\t' == (*bufp)[i])) {
764: roffnode_pop(r);
765: roffnode_cleanscope(r);
1.1 schwarze 766:
1.16 schwarze 767: if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1.2 schwarze 768: return(ROFF_RERUN);
769: return(ROFF_IGN);
770: }
1.1 schwarze 771: }
772:
1.2 schwarze 773: /*
774: * If we have no custom end-query or lookup failed, then try
775: * pulling it out of the hashtable.
776: */
1.1 schwarze 777:
1.2 schwarze 778: ppos = pos;
1.16 schwarze 779: t = roff_parse(r, *bufp, &pos);
1.1 schwarze 780:
1.16 schwarze 781: /*
782: * Macros other than block-end are only significant
783: * in `de' blocks; elsewhere, simply throw them away.
784: */
785: if (ROFF_cblock != t) {
786: if (ROFF_de == tok)
787: roff_setstr(r, r->last->name, *bufp + ppos, 1);
1.1 schwarze 788: return(ROFF_IGN);
1.16 schwarze 789: }
1.1 schwarze 790:
1.2 schwarze 791: assert(roffs[t].proc);
1.6 schwarze 792: return((*roffs[t].proc)(r, t, bufp, szp,
793: ln, ppos, pos, offs));
1.2 schwarze 794: }
795:
796:
797: /* ARGSUSED */
798: static enum rofferr
799: roff_block_text(ROFF_ARGS)
800: {
801:
1.16 schwarze 802: if (ROFF_de == tok)
803: roff_setstr(r, r->last->name, *bufp + pos, 1);
804:
1.2 schwarze 805: return(ROFF_IGN);
806: }
807:
808:
809: /* ARGSUSED */
810: static enum rofferr
811: roff_cond_sub(ROFF_ARGS)
812: {
813: enum rofft t;
814: enum roffrule rr;
815:
816: ppos = pos;
817: rr = r->last->rule;
818:
1.5 schwarze 819: /*
820: * Clean out scope. If we've closed ourselves, then don't
821: * continue.
822: */
823:
824: roffnode_cleanscope(r);
825:
1.16 schwarze 826: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
1.12 schwarze 827: if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
828: return(roff_ccond
829: (r, ROFF_ccond, bufp, szp,
830: ln, pos, pos + 2, offs));
1.2 schwarze 831: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1.12 schwarze 832: }
1.2 schwarze 833:
834: /*
835: * A denied conditional must evaluate its children if and only
836: * if they're either structurally required (such as loops and
837: * conditionals) or a closing macro.
838: */
839: if (ROFFRULE_DENY == rr)
840: if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
841: if (ROFF_ccond != t)
842: return(ROFF_IGN);
843:
844: assert(roffs[t].proc);
1.6 schwarze 845: return((*roffs[t].proc)(r, t, bufp, szp,
846: ln, ppos, pos, offs));
1.2 schwarze 847: }
848:
849:
850: /* ARGSUSED */
851: static enum rofferr
852: roff_cond_text(ROFF_ARGS)
853: {
854: char *ep, *st;
855: enum roffrule rr;
856:
857: rr = r->last->rule;
1.1 schwarze 858:
859: /*
1.2 schwarze 860: * We display the value of the text if out current evaluation
861: * scope permits us to do so.
1.1 schwarze 862: */
1.13 schwarze 863:
864: /* FIXME: use roff_ccond? */
1.1 schwarze 865:
1.2 schwarze 866: st = &(*bufp)[pos];
867: if (NULL == (ep = strstr(st, "\\}"))) {
868: roffnode_cleanscope(r);
869: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
870: }
871:
1.4 schwarze 872: if (ep == st || (ep > st && '\\' != *(ep - 1)))
1.2 schwarze 873: roffnode_pop(r);
874:
875: roffnode_cleanscope(r);
876: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
877: }
878:
879:
1.5 schwarze 880: static enum roffrule
881: roff_evalcond(const char *v, int *pos)
882: {
883:
884: switch (v[*pos]) {
885: case ('n'):
886: (*pos)++;
887: return(ROFFRULE_ALLOW);
888: case ('e'):
889: /* FALLTHROUGH */
890: case ('o'):
891: /* FALLTHROUGH */
892: case ('t'):
893: (*pos)++;
894: return(ROFFRULE_DENY);
895: default:
896: break;
897: }
898:
899: while (v[*pos] && ' ' != v[*pos])
900: (*pos)++;
901: return(ROFFRULE_DENY);
902: }
903:
1.2 schwarze 904: /* ARGSUSED */
905: static enum rofferr
1.21 schwarze 906: roff_line_ignore(ROFF_ARGS)
1.6 schwarze 907: {
908:
1.21 schwarze 909: return(ROFF_IGN);
910: }
911:
912: /* ARGSUSED */
913: static enum rofferr
914: roff_line_error(ROFF_ARGS)
915: {
916:
917: (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, roffs[tok].name);
1.6 schwarze 918: return(ROFF_IGN);
919: }
920:
921: /* ARGSUSED */
922: static enum rofferr
1.2 schwarze 923: roff_cond(ROFF_ARGS)
924: {
925: int sv;
1.5 schwarze 926: enum roffrule rule;
1.2 schwarze 927:
928: /* Stack overflow! */
929:
930: if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
1.1 schwarze 931: (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
932: return(ROFF_ERR);
933: }
934:
1.5 schwarze 935: /* First, evaluate the conditional. */
1.2 schwarze 936:
1.5 schwarze 937: if (ROFF_el == tok) {
938: /*
939: * An `.el' will get the value of the current rstack
940: * entry set in prior `ie' calls or defaults to DENY.
941: */
942: if (r->rstackpos < 0)
943: rule = ROFFRULE_DENY;
944: else
945: rule = r->rstack[r->rstackpos];
946: } else
947: rule = roff_evalcond(*bufp, &pos);
1.2 schwarze 948:
949: sv = pos;
1.5 schwarze 950:
1.2 schwarze 951: while (' ' == (*bufp)[pos])
952: pos++;
953:
954: /*
955: * Roff is weird. If we have just white-space after the
956: * conditional, it's considered the BODY and we exit without
957: * really doing anything. Warn about this. It's probably
958: * wrong.
959: */
1.5 schwarze 960:
1.2 schwarze 961: if ('\0' == (*bufp)[pos] && sv != pos) {
1.22 schwarze 962: (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
963: return(ROFF_IGN);
1.2 schwarze 964: }
965:
1.16 schwarze 966: roffnode_push(r, tok, NULL, ln, ppos);
1.2 schwarze 967:
1.5 schwarze 968: r->last->rule = rule;
1.2 schwarze 969:
970: if (ROFF_ie == tok) {
971: /*
972: * An if-else will put the NEGATION of the current
973: * evaluated conditional into the stack.
974: */
975: r->rstackpos++;
976: if (ROFFRULE_DENY == r->last->rule)
977: r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
978: else
979: r->rstack[r->rstackpos] = ROFFRULE_DENY;
980: }
1.5 schwarze 981:
982: /* If the parent has false as its rule, then so do we. */
983:
1.2 schwarze 984: if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
985: r->last->rule = ROFFRULE_DENY;
1.5 schwarze 986:
987: /*
988: * Determine scope. If we're invoked with "\{" trailing the
989: * conditional, then we're in a multiline scope. Else our scope
990: * expires on the next line.
991: */
1.2 schwarze 992:
993: r->last->endspan = 1;
994:
995: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
996: r->last->endspan = -1;
997: pos += 2;
998: }
999:
1000: /*
1001: * If there are no arguments on the line, the next-line scope is
1002: * assumed.
1003: */
1004:
1005: if ('\0' == (*bufp)[pos])
1006: return(ROFF_IGN);
1007:
1008: /* Otherwise re-run the roff parser after recalculating. */
1.1 schwarze 1009:
1.2 schwarze 1010: *offs = pos;
1011: return(ROFF_RERUN);
1.1 schwarze 1012: }
1013:
1014:
1.2 schwarze 1015: /* ARGSUSED */
1016: static enum rofferr
1.7 schwarze 1017: roff_ds(ROFF_ARGS)
1018: {
1.10 schwarze 1019: char *name, *string;
1020:
1021: /*
1022: * A symbol is named by the first word following the macro
1023: * invocation up to a space. Its value is anything after the
1024: * name's trailing whitespace and optional double-quote. Thus,
1025: *
1026: * [.ds foo "bar " ]
1027: *
1028: * will have `bar " ' as its value.
1029: */
1.7 schwarze 1030:
1031: name = *bufp + pos;
1032: if ('\0' == *name)
1033: return(ROFF_IGN);
1034:
1035: string = name;
1.10 schwarze 1036: /* Read until end of name. */
1.7 schwarze 1037: while (*string && ' ' != *string)
1038: string++;
1.10 schwarze 1039:
1040: /* Nil-terminate name. */
1.7 schwarze 1041: if (*string)
1.10 schwarze 1042: *(string++) = '\0';
1043:
1044: /* Read past spaces. */
1045: while (*string && ' ' == *string)
1046: string++;
1047:
1048: /* Read passed initial double-quote. */
1.7 schwarze 1049: if (*string && '"' == *string)
1050: string++;
1051:
1.10 schwarze 1052: /* The rest is the value. */
1.16 schwarze 1053: roff_setstr(r, name, string, 0);
1.7 schwarze 1054: return(ROFF_IGN);
1055: }
1056:
1057:
1058: /* ARGSUSED */
1059: static enum rofferr
1.6 schwarze 1060: roff_nr(ROFF_ARGS)
1.1 schwarze 1061: {
1.6 schwarze 1062: const char *key, *val;
1063: struct reg *rg;
1064:
1065: key = &(*bufp)[pos];
1066: rg = r->regs->regs;
1067:
1068: /* Parse register request. */
1069: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
1070: pos++;
1071:
1072: /*
1073: * Set our nil terminator. Because this line is going to be
1074: * ignored anyway, we can munge it as we please.
1075: */
1076: if ((*bufp)[pos])
1077: (*bufp)[pos++] = '\0';
1078:
1079: /* Skip whitespace to register token. */
1080: while ((*bufp)[pos] && ' ' == (*bufp)[pos])
1081: pos++;
1082:
1083: val = &(*bufp)[pos];
1084:
1085: /* Process register token. */
1086:
1087: if (0 == strcmp(key, "nS")) {
1088: rg[(int)REG_nS].set = 1;
1089: if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1090: rg[(int)REG_nS].v.u = 0;
1091: }
1.1 schwarze 1092:
1.2 schwarze 1093: return(ROFF_IGN);
1.14 schwarze 1094: }
1095:
1096: /* ARGSUSED */
1097: static enum rofferr
1098: roff_so(ROFF_ARGS)
1099: {
1100: char *name;
1.15 schwarze 1101:
1102: (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL);
1.14 schwarze 1103:
1.22 schwarze 1104: /*
1105: * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1106: * opening anything that's not in our cwd or anything beneath
1107: * it. Thus, explicitly disallow traversing up the file-system
1108: * or using absolute paths.
1109: */
1110:
1.14 schwarze 1111: name = *bufp + pos;
1112: if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1113: (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL);
1114: return(ROFF_ERR);
1115: }
1116:
1117: *offs = pos;
1118: return(ROFF_SO);
1.7 schwarze 1119: }
1120:
1.16 schwarze 1121: /* ARGSUSED */
1122: static enum rofferr
1123: roff_userdef(ROFF_ARGS)
1.12 schwarze 1124: {
1.16 schwarze 1125: const char *arg[9];
1126: char *cp, *n1, *n2;
1.17 schwarze 1127: int i, quoted, pairs;
1.12 schwarze 1128:
1.16 schwarze 1129: /*
1130: * Collect pointers to macro argument strings
1131: * and null-terminate them.
1132: */
1133: cp = *bufp + pos;
1134: for (i = 0; i < 9; i++) {
1.17 schwarze 1135: /* Quoting can only start with a new word. */
1136: if ('"' == *cp) {
1137: quoted = 1;
1138: cp++;
1139: } else
1140: quoted = 0;
1.16 schwarze 1141: arg[i] = cp;
1.17 schwarze 1142: for (pairs = 0; '\0' != *cp; cp++) {
1143: /* Unquoted arguments end at blanks. */
1144: if (0 == quoted) {
1145: if (' ' == *cp)
1146: break;
1147: continue;
1148: }
1149: /* After pairs of quotes, move left. */
1150: if (pairs)
1151: cp[-pairs] = cp[0];
1152: /* Pairs of quotes do not end words, ... */
1153: if ('"' == cp[0] && '"' == cp[1]) {
1154: pairs++;
1155: cp++;
1156: continue;
1157: }
1158: /* ... but solitary quotes do. */
1159: if ('"' != *cp)
1160: continue;
1161: if (pairs)
1162: cp[-pairs] = '\0';
1163: *cp = ' ';
1164: break;
1165: }
1166: /* Last argument; the remaining ones are empty strings. */
1.16 schwarze 1167: if ('\0' == *cp)
1168: continue;
1.17 schwarze 1169: /* Null-terminate argument and move to the next one. */
1.16 schwarze 1170: *cp++ = '\0';
1171: while (' ' == *cp)
1172: cp++;
1173: }
1174:
1175: /*
1176: * Expand macro arguments.
1.12 schwarze 1177: */
1.16 schwarze 1178: *szp = 0;
1179: n1 = cp = mandoc_strdup(r->current_string);
1180: while (NULL != (cp = strstr(cp, "\\$"))) {
1181: i = cp[2] - '1';
1182: if (0 > i || 8 < i) {
1183: /* Not an argument invocation. */
1184: cp += 2;
1185: continue;
1186: }
1187:
1188: *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1189: n2 = mandoc_malloc(*szp);
1190:
1191: strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1192: strlcat(n2, arg[i], *szp);
1193: strlcat(n2, cp + 3, *szp);
1194:
1195: cp = n2 + (cp - n1);
1196: free(n1);
1197: n1 = n2;
1.12 schwarze 1198: }
1199:
1.16 schwarze 1200: /*
1201: * Replace the macro invocation
1202: * by the expanded macro.
1203: */
1204: free(*bufp);
1205: *bufp = n1;
1206: if (0 == *szp)
1207: *szp = strlen(*bufp) + 1;
1208:
1.19 schwarze 1209: return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1.16 schwarze 1210: ROFF_REPARSE : ROFF_APPEND);
1.12 schwarze 1211: }
1212:
1.16 schwarze 1213: /*
1214: * Store *string into the user-defined string called *name.
1215: * In multiline mode, append to an existing entry and append '\n';
1216: * else replace the existing entry, if there is one.
1217: * To clear an existing entry, call with (*r, *name, NULL, 0).
1218: */
1.8 schwarze 1219: static void
1.16 schwarze 1220: roff_setstr(struct roff *r, const char *name, const char *string,
1221: int multiline)
1.7 schwarze 1222: {
1223: struct roffstr *n;
1.16 schwarze 1224: char *c;
1225: size_t oldch, newch;
1.7 schwarze 1226:
1.16 schwarze 1227: /* Search for an existing string with the same name. */
1.8 schwarze 1228: n = r->first_string;
1.7 schwarze 1229: while (n && strcmp(name, n->name))
1230: n = n->next;
1.8 schwarze 1231:
1232: if (NULL == n) {
1.16 schwarze 1233: /* Create a new string table entry. */
1.8 schwarze 1234: n = mandoc_malloc(sizeof(struct roffstr));
1.16 schwarze 1235: n->name = mandoc_strdup(name);
1236: n->string = NULL;
1.8 schwarze 1237: n->next = r->first_string;
1238: r->first_string = n;
1.16 schwarze 1239: } else if (0 == multiline) {
1240: /* In multiline mode, append; else replace. */
1.7 schwarze 1241: free(n->string);
1.16 schwarze 1242: n->string = NULL;
1243: }
1244:
1245: if (NULL == string)
1246: return;
1247:
1248: /*
1249: * One additional byte for the '\n' in multiline mode,
1250: * and one for the terminating '\0'.
1251: */
1252: newch = strlen(string) + (multiline ? 2 : 1);
1253: if (NULL == n->string) {
1254: n->string = mandoc_malloc(newch);
1255: *n->string = '\0';
1256: oldch = 0;
1257: } else {
1258: oldch = strlen(n->string);
1259: n->string = mandoc_realloc(n->string, oldch + newch);
1260: }
1261:
1262: /* Skip existing content in the destination buffer. */
1263: c = n->string + oldch;
1264:
1265: /* Append new content to the destination buffer. */
1266: while (*string) {
1267: /*
1268: * Rudimentary roff copy mode:
1269: * Handle escaped backslashes.
1270: */
1271: if ('\\' == *string && '\\' == *(string + 1))
1272: string++;
1273: *c++ = *string++;
1274: }
1.8 schwarze 1275:
1.16 schwarze 1276: /* Append terminating bytes. */
1277: if (multiline)
1278: *c++ = '\n';
1279: *c = '\0';
1.7 schwarze 1280: }
1281:
1282:
1.8 schwarze 1283: static const char *
1284: roff_getstrn(const struct roff *r, const char *name, size_t len)
1.7 schwarze 1285: {
1.8 schwarze 1286: const struct roffstr *n;
1.7 schwarze 1287:
1.8 schwarze 1288: n = r->first_string;
1.10 schwarze 1289: while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1.7 schwarze 1290: n = n->next;
1.8 schwarze 1291:
1292: return(n ? n->string : NULL);
1.7 schwarze 1293: }
1294:
1.8 schwarze 1295:
1296: static void
1297: roff_freestr(struct roff *r)
1.7 schwarze 1298: {
1299: struct roffstr *n, *nn;
1300:
1.8 schwarze 1301: for (n = r->first_string; n; n = nn) {
1.7 schwarze 1302: free(n->name);
1303: free(n->string);
1304: nn = n->next;
1305: free(n);
1306: }
1.8 schwarze 1307:
1308: r->first_string = NULL;
1.1 schwarze 1309: }