Annotation of src/usr.bin/mandoc/roff.c, Revision 1.21
1.21 ! schwarze 1: /* $Id: roff.c,v 1.20 2010/11/28 01:00:40 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.8 schwarze 4: * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.16 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.16 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
20: #endif
21:
22: #include <assert.h>
1.6 schwarze 23: #include <errno.h>
1.3 schwarze 24: #include <ctype.h>
1.6 schwarze 25: #include <limits.h>
1.1 schwarze 26: #include <stdlib.h>
27: #include <string.h>
1.2 schwarze 28: #include <stdio.h>
1.1 schwarze 29:
30: #include "mandoc.h"
31: #include "roff.h"
1.8 schwarze 32: #include "libmandoc.h"
1.1 schwarze 33:
1.2 schwarze 34: #define RSTACK_MAX 128
35:
36: #define ROFF_CTL(c) \
37: ('.' == (c) || '\'' == (c))
38:
1.1 schwarze 39: enum rofft {
1.20 schwarze 40: ROFF_ad,
1.2 schwarze 41: ROFF_am,
42: ROFF_ami,
43: ROFF_am1,
1.1 schwarze 44: ROFF_de,
45: ROFF_dei,
1.2 schwarze 46: ROFF_de1,
47: ROFF_ds,
48: ROFF_el,
1.20 schwarze 49: ROFF_hy,
1.2 schwarze 50: ROFF_ie,
51: ROFF_if,
1.1 schwarze 52: ROFF_ig,
1.20 schwarze 53: ROFF_ne,
54: ROFF_nh,
1.14 schwarze 55: ROFF_nr,
1.2 schwarze 56: ROFF_rm,
1.14 schwarze 57: ROFF_so,
1.2 schwarze 58: ROFF_tr,
59: ROFF_cblock,
1.13 schwarze 60: ROFF_ccond, /* FIXME: remove this. */
1.16 schwarze 61: ROFF_USERDEF,
1.1 schwarze 62: ROFF_MAX
63: };
64:
1.2 schwarze 65: enum roffrule {
66: ROFFRULE_ALLOW,
67: ROFFRULE_DENY
68: };
69:
1.8 schwarze 70:
71: struct roffstr {
72: char *name; /* key of symbol */
73: char *string; /* current value */
74: struct roffstr *next; /* next in list */
75: };
76:
1.1 schwarze 77: struct roff {
78: struct roffnode *last; /* leaf of stack */
79: mandocmsg msg; /* err/warn/fatal messages */
80: void *data; /* privdata for messages */
1.2 schwarze 81: enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
82: int rstackpos; /* position in rstack */
1.6 schwarze 83: struct regset *regs; /* read/writable registers */
1.16 schwarze 84: struct roffstr *first_string; /* user-defined strings & macros */
85: const char *current_string; /* value of last called user macro */
1.1 schwarze 86: };
87:
88: struct roffnode {
89: enum rofft tok; /* type of node */
90: struct roffnode *parent; /* up one in stack */
91: int line; /* parse line */
92: int col; /* parse col */
1.16 schwarze 93: char *name; /* node name, e.g. macro name */
1.2 schwarze 94: char *end; /* end-rules: custom token */
95: int endspan; /* end-rules: next-line or infty */
96: enum roffrule rule; /* current evaluation rule */
1.1 schwarze 97: };
98:
99: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
100: enum rofft tok, /* tok of macro */ \
101: char **bufp, /* input buffer */ \
102: size_t *szp, /* size of input buffer */ \
103: int ln, /* parse line */ \
1.2 schwarze 104: int ppos, /* original pos in buffer */ \
105: int pos, /* current pos in buffer */ \
106: int *offs /* reset offset of buffer data */
1.1 schwarze 107:
108: typedef enum rofferr (*roffproc)(ROFF_ARGS);
109:
110: struct roffmac {
111: const char *name; /* macro name */
1.2 schwarze 112: roffproc proc; /* process new macro */
113: roffproc text; /* process as child text of macro */
114: roffproc sub; /* process as child of macro */
115: int flags;
116: #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
1.3 schwarze 117: struct roffmac *next;
1.1 schwarze 118: };
119:
1.2 schwarze 120: static enum rofferr roff_block(ROFF_ARGS);
121: static enum rofferr roff_block_text(ROFF_ARGS);
122: static enum rofferr roff_block_sub(ROFF_ARGS);
123: static enum rofferr roff_cblock(ROFF_ARGS);
124: static enum rofferr roff_ccond(ROFF_ARGS);
125: static enum rofferr roff_cond(ROFF_ARGS);
126: static enum rofferr roff_cond_text(ROFF_ARGS);
127: static enum rofferr roff_cond_sub(ROFF_ARGS);
1.7 schwarze 128: static enum rofferr roff_ds(ROFF_ARGS);
1.8 schwarze 129: static enum roffrule roff_evalcond(const char *, int *);
130: static void roff_freestr(struct roff *);
131: static const char *roff_getstrn(const struct roff *,
132: const char *, size_t);
1.21 ! schwarze 133: static enum rofferr roff_line_ignore(ROFF_ARGS);
! 134: static enum rofferr roff_line_error(ROFF_ARGS);
1.6 schwarze 135: static enum rofferr roff_nr(ROFF_ARGS);
1.9 schwarze 136: static int roff_res(struct roff *,
137: char **, size_t *, int);
1.8 schwarze 138: static void roff_setstr(struct roff *,
1.16 schwarze 139: const char *, const char *, int);
1.14 schwarze 140: static enum rofferr roff_so(ROFF_ARGS);
1.16 schwarze 141: static enum rofferr roff_userdef(ROFF_ARGS);
1.1 schwarze 142:
1.3 schwarze 143: /* See roff_hash_find() */
144:
145: #define ASCII_HI 126
146: #define ASCII_LO 33
147: #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
148:
149: static struct roffmac *hash[HASHWIDTH];
150:
151: static struct roffmac roffs[ROFF_MAX] = {
1.21 ! schwarze 152: { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 153: { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
154: { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
155: { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
156: { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
157: { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
158: { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.7 schwarze 159: { "ds", roff_ds, NULL, NULL, 0, NULL },
1.3 schwarze 160: { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
1.21 ! schwarze 161: { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 162: { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
163: { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
164: { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.21 ! schwarze 165: { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
! 166: { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
1.14 schwarze 167: { "nr", roff_nr, NULL, NULL, 0, NULL },
1.21 ! schwarze 168: { "rm", roff_line_error, NULL, NULL, 0, NULL },
1.14 schwarze 169: { "so", roff_so, NULL, NULL, 0, NULL },
1.21 ! schwarze 170: { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 171: { ".", roff_cblock, NULL, NULL, 0, NULL },
172: { "\\}", roff_ccond, NULL, NULL, 0, NULL },
1.16 schwarze 173: { NULL, roff_userdef, NULL, NULL, 0, NULL },
1.1 schwarze 174: };
175:
176: static void roff_free1(struct roff *);
1.16 schwarze 177: static enum rofft roff_hash_find(const char *, size_t);
1.3 schwarze 178: static void roff_hash_init(void);
1.2 schwarze 179: static void roffnode_cleanscope(struct roff *);
1.16 schwarze 180: static void roffnode_push(struct roff *, enum rofft,
181: const char *, int, int);
1.1 schwarze 182: static void roffnode_pop(struct roff *);
1.16 schwarze 183: static enum rofft roff_parse(struct roff *, const char *, int *);
1.6 schwarze 184: static int roff_parse_nat(const char *, unsigned int *);
1.1 schwarze 185:
1.3 schwarze 186: /* See roff_hash_find() */
187: #define ROFF_HASH(p) (p[0] - ASCII_LO)
188:
189: static void
190: roff_hash_init(void)
191: {
192: struct roffmac *n;
193: int buc, i;
194:
1.16 schwarze 195: for (i = 0; i < (int)ROFF_USERDEF; i++) {
1.3 schwarze 196: assert(roffs[i].name[0] >= ASCII_LO);
197: assert(roffs[i].name[0] <= ASCII_HI);
198:
199: buc = ROFF_HASH(roffs[i].name);
200:
201: if (NULL != (n = hash[buc])) {
202: for ( ; n->next; n = n->next)
203: /* Do nothing. */ ;
204: n->next = &roffs[i];
205: } else
206: hash[buc] = &roffs[i];
207: }
208: }
209:
1.1 schwarze 210:
211: /*
212: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
213: * the nil-terminated string name could be found.
214: */
215: static enum rofft
1.16 schwarze 216: roff_hash_find(const char *p, size_t s)
1.1 schwarze 217: {
1.3 schwarze 218: int buc;
219: struct roffmac *n;
1.1 schwarze 220:
1.3 schwarze 221: /*
222: * libroff has an extremely simple hashtable, for the time
223: * being, which simply keys on the first character, which must
224: * be printable, then walks a chain. It works well enough until
225: * optimised.
226: */
227:
228: if (p[0] < ASCII_LO || p[0] > ASCII_HI)
229: return(ROFF_MAX);
230:
231: buc = ROFF_HASH(p);
232:
233: if (NULL == (n = hash[buc]))
234: return(ROFF_MAX);
235: for ( ; n; n = n->next)
1.16 schwarze 236: if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
1.3 schwarze 237: return((enum rofft)(n - roffs));
1.1 schwarze 238:
239: return(ROFF_MAX);
240: }
241:
242:
243: /*
244: * Pop the current node off of the stack of roff instructions currently
245: * pending.
246: */
247: static void
248: roffnode_pop(struct roff *r)
249: {
250: struct roffnode *p;
251:
1.2 schwarze 252: assert(r->last);
253: p = r->last;
254:
255: if (ROFF_el == p->tok)
256: if (r->rstackpos > -1)
257: r->rstackpos--;
258:
259: r->last = r->last->parent;
1.16 schwarze 260: free(p->name);
261: free(p->end);
1.1 schwarze 262: free(p);
263: }
264:
265:
266: /*
267: * Push a roff node onto the instruction stack. This must later be
268: * removed with roffnode_pop().
269: */
1.11 schwarze 270: static void
1.16 schwarze 271: roffnode_push(struct roff *r, enum rofft tok, const char *name,
272: int line, int col)
1.1 schwarze 273: {
274: struct roffnode *p;
275:
1.11 schwarze 276: p = mandoc_calloc(1, sizeof(struct roffnode));
1.1 schwarze 277: p->tok = tok;
1.16 schwarze 278: if (name)
279: p->name = mandoc_strdup(name);
1.1 schwarze 280: p->parent = r->last;
281: p->line = line;
282: p->col = col;
1.2 schwarze 283: p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
1.1 schwarze 284:
285: r->last = p;
286: }
287:
288:
289: static void
290: roff_free1(struct roff *r)
291: {
292:
293: while (r->last)
294: roffnode_pop(r);
1.8 schwarze 295: roff_freestr(r);
1.1 schwarze 296: }
297:
298:
299: void
300: roff_reset(struct roff *r)
301: {
302:
303: roff_free1(r);
304: }
305:
306:
307: void
308: roff_free(struct roff *r)
309: {
310:
311: roff_free1(r);
312: free(r);
313: }
314:
315:
316: struct roff *
1.11 schwarze 317: roff_alloc(struct regset *regs, void *data, const mandocmsg msg)
1.1 schwarze 318: {
319: struct roff *r;
320:
1.11 schwarze 321: r = mandoc_calloc(1, sizeof(struct roff));
1.6 schwarze 322: r->regs = regs;
1.1 schwarze 323: r->msg = msg;
324: r->data = data;
1.2 schwarze 325: r->rstackpos = -1;
1.3 schwarze 326:
327: roff_hash_init();
1.1 schwarze 328: return(r);
329: }
330:
331:
1.8 schwarze 332: /*
333: * Pre-filter each and every line for reserved words (one beginning with
334: * `\*', e.g., `\*(ab'). These must be handled before the actual line
335: * is processed.
336: */
337: static int
1.9 schwarze 338: roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
1.8 schwarze 339: {
340: const char *cp, *cpp, *st, *res;
341: int i, maxl;
342: size_t nsz;
343: char *n;
344:
1.9 schwarze 345: /* LINTED */
1.8 schwarze 346: for (cp = &(*bufp)[pos]; (cpp = strstr(cp, "\\*")); cp++) {
347: cp = cpp + 2;
348: switch (*cp) {
349: case ('('):
350: cp++;
351: maxl = 2;
352: break;
353: case ('['):
354: cp++;
355: maxl = 0;
356: break;
357: default:
358: maxl = 1;
359: break;
360: }
361:
362: st = cp;
363:
364: for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
365: if ('\0' == *cp)
366: return(1); /* Error. */
367: if (0 == maxl && ']' == *cp)
368: break;
369: }
370:
371: res = roff_getstrn(r, st, (size_t)i);
372:
373: if (NULL == res) {
374: cp -= maxl ? 1 : 0;
375: continue;
376: }
377:
378: nsz = *szp + strlen(res) + 1;
379: n = mandoc_malloc(nsz);
380:
381: *n = '\0';
382:
383: strlcat(n, *bufp, (size_t)(cpp - *bufp + 1));
384: strlcat(n, res, nsz);
385: strlcat(n, cp + (maxl ? 0 : 1), nsz);
386:
387: free(*bufp);
388:
389: *bufp = n;
390: *szp = nsz;
391: return(0);
392: }
393:
394: return(1);
395: }
396:
397:
1.1 schwarze 398: enum rofferr
1.6 schwarze 399: roff_parseln(struct roff *r, int ln, char **bufp,
400: size_t *szp, int pos, int *offs)
1.1 schwarze 401: {
402: enum rofft t;
403: int ppos;
404:
1.2 schwarze 405: /*
1.8 schwarze 406: * Run the reserved-word filter only if we have some reserved
407: * words to fill in.
408: */
409:
1.9 schwarze 410: if (r->first_string && ! roff_res(r, bufp, szp, pos))
1.16 schwarze 411: return(ROFF_REPARSE);
1.8 schwarze 412:
413: /*
1.2 schwarze 414: * First, if a scope is open and we're not a macro, pass the
415: * text through the macro's filter. If a scope isn't open and
416: * we're not a macro, just let it through.
417: */
418:
419: if (r->last && ! ROFF_CTL((*bufp)[pos])) {
420: t = r->last->tok;
421: assert(roffs[t].text);
422: return((*roffs[t].text)
1.8 schwarze 423: (r, t, bufp, szp,
424: ln, pos, pos, offs));
1.2 schwarze 425: } else if ( ! ROFF_CTL((*bufp)[pos]))
426: return(ROFF_CONT);
427:
428: /*
429: * If a scope is open, go to the child handler for that macro,
430: * as it may want to preprocess before doing anything with it.
431: */
432:
433: if (r->last) {
1.1 schwarze 434: t = r->last->tok;
435: assert(roffs[t].sub);
1.2 schwarze 436: return((*roffs[t].sub)
1.8 schwarze 437: (r, t, bufp, szp,
438: ln, pos, pos, offs));
1.2 schwarze 439: }
440:
441: /*
442: * Lastly, as we've no scope open, try to look up and execute
443: * the new macro. If no macro is found, simply return and let
444: * the compilers handle it.
445: */
446:
447: ppos = pos;
1.16 schwarze 448: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
1.1 schwarze 449: return(ROFF_CONT);
450:
1.2 schwarze 451: assert(roffs[t].proc);
452: return((*roffs[t].proc)
1.8 schwarze 453: (r, t, bufp, szp,
454: ln, ppos, pos, offs));
1.2 schwarze 455: }
456:
1.1 schwarze 457:
1.2 schwarze 458: int
459: roff_endparse(struct roff *r)
460: {
1.1 schwarze 461:
1.2 schwarze 462: if (NULL == r->last)
463: return(1);
464: return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
465: r->last->col, NULL));
1.1 schwarze 466: }
467:
468:
469: /*
470: * Parse a roff node's type from the input buffer. This must be in the
471: * form of ".foo xxx" in the usual way.
472: */
473: static enum rofft
1.16 schwarze 474: roff_parse(struct roff *r, const char *buf, int *pos)
1.1 schwarze 475: {
1.16 schwarze 476: const char *mac;
477: size_t maclen;
1.1 schwarze 478: enum rofft t;
479:
1.2 schwarze 480: assert(ROFF_CTL(buf[*pos]));
481: (*pos)++;
1.1 schwarze 482:
1.16 schwarze 483: while (' ' == buf[*pos] || '\t' == buf[*pos])
1.1 schwarze 484: (*pos)++;
485:
486: if ('\0' == buf[*pos])
487: return(ROFF_MAX);
488:
1.16 schwarze 489: mac = buf + *pos;
490: maclen = strcspn(mac, " \\\t\0");
1.1 schwarze 491:
1.16 schwarze 492: t = (r->current_string = roff_getstrn(r, mac, maclen))
493: ? ROFF_USERDEF : roff_hash_find(mac, maclen);
1.1 schwarze 494:
1.16 schwarze 495: *pos += maclen;
1.1 schwarze 496: while (buf[*pos] && ' ' == buf[*pos])
497: (*pos)++;
498:
499: return(t);
500: }
501:
502:
1.6 schwarze 503: static int
504: roff_parse_nat(const char *buf, unsigned int *res)
505: {
506: char *ep;
507: long lval;
508:
509: errno = 0;
510: lval = strtol(buf, &ep, 10);
511: if (buf[0] == '\0' || *ep != '\0')
512: return(0);
513: if ((errno == ERANGE &&
514: (lval == LONG_MAX || lval == LONG_MIN)) ||
515: (lval > INT_MAX || lval < 0))
516: return(0);
517:
518: *res = (unsigned int)lval;
519: return(1);
520: }
521:
522:
1.1 schwarze 523: /* ARGSUSED */
524: static enum rofferr
1.2 schwarze 525: roff_cblock(ROFF_ARGS)
1.1 schwarze 526: {
527:
1.2 schwarze 528: /*
529: * A block-close `..' should only be invoked as a child of an
530: * ignore macro, otherwise raise a warning and just ignore it.
531: */
532:
533: if (NULL == r->last) {
534: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
535: return(ROFF_ERR);
536: return(ROFF_IGN);
537: }
1.1 schwarze 538:
1.2 schwarze 539: switch (r->last->tok) {
540: case (ROFF_am):
541: /* FALLTHROUGH */
542: case (ROFF_ami):
543: /* FALLTHROUGH */
544: case (ROFF_am1):
545: /* FALLTHROUGH */
546: case (ROFF_de):
547: /* FALLTHROUGH */
548: case (ROFF_dei):
549: /* FALLTHROUGH */
550: case (ROFF_ig):
551: break;
552: default:
553: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
554: return(ROFF_ERR);
1.1 schwarze 555: return(ROFF_IGN);
1.2 schwarze 556: }
557:
558: if ((*bufp)[pos])
559: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
560: return(ROFF_ERR);
561:
562: roffnode_pop(r);
563: roffnode_cleanscope(r);
564: return(ROFF_IGN);
565:
566: }
1.1 schwarze 567:
568:
1.2 schwarze 569: static void
570: roffnode_cleanscope(struct roff *r)
571: {
1.1 schwarze 572:
1.2 schwarze 573: while (r->last) {
574: if (--r->last->endspan < 0)
575: break;
576: roffnode_pop(r);
577: }
578: }
1.1 schwarze 579:
580:
1.2 schwarze 581: /* ARGSUSED */
582: static enum rofferr
583: roff_ccond(ROFF_ARGS)
584: {
1.1 schwarze 585:
1.2 schwarze 586: if (NULL == r->last) {
587: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
588: return(ROFF_ERR);
1.1 schwarze 589: return(ROFF_IGN);
1.2 schwarze 590: }
1.1 schwarze 591:
1.2 schwarze 592: switch (r->last->tok) {
593: case (ROFF_el):
594: /* FALLTHROUGH */
595: case (ROFF_ie):
596: /* FALLTHROUGH */
597: case (ROFF_if):
598: break;
599: default:
600: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
601: return(ROFF_ERR);
602: return(ROFF_IGN);
603: }
1.1 schwarze 604:
1.2 schwarze 605: if (r->last->endspan > -1) {
606: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
607: return(ROFF_ERR);
1.1 schwarze 608: return(ROFF_IGN);
1.2 schwarze 609: }
610:
611: if ((*bufp)[pos])
612: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
613: return(ROFF_ERR);
1.1 schwarze 614:
1.2 schwarze 615: roffnode_pop(r);
616: roffnode_cleanscope(r);
1.1 schwarze 617: return(ROFF_IGN);
618: }
619:
620:
621: /* ARGSUSED */
622: static enum rofferr
1.2 schwarze 623: roff_block(ROFF_ARGS)
1.1 schwarze 624: {
1.2 schwarze 625: int sv;
626: size_t sz;
1.16 schwarze 627: char *name;
628:
629: name = NULL;
1.2 schwarze 630:
1.16 schwarze 631: if (ROFF_ig != tok) {
632: if ('\0' == (*bufp)[pos]) {
633: (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
634: return(ROFF_IGN);
635: }
1.18 schwarze 636: if (ROFF_de1 == tok)
637: tok = ROFF_de;
1.16 schwarze 638: if (ROFF_de == tok)
639: name = *bufp + pos;
1.21 ! schwarze 640: else
! 641: (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos,
! 642: roffs[tok].name);
1.2 schwarze 643: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
644: pos++;
645: while (' ' == (*bufp)[pos])
1.16 schwarze 646: (*bufp)[pos++] = '\0';
1.2 schwarze 647: }
648:
1.16 schwarze 649: roffnode_push(r, tok, name, ln, ppos);
650:
651: /*
652: * At the beginning of a `de' macro, clear the existing string
653: * with the same name, if there is one. New content will be
654: * added from roff_block_text() in multiline mode.
655: */
656: if (ROFF_de == tok)
1.19 schwarze 657: roff_setstr(r, name, "", 0);
1.2 schwarze 658:
659: if ('\0' == (*bufp)[pos])
660: return(ROFF_IGN);
1.1 schwarze 661:
1.2 schwarze 662: sv = pos;
663: while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
664: '\t' != (*bufp)[pos])
665: pos++;
666:
667: /*
668: * Note: groff does NOT like escape characters in the input.
669: * Instead of detecting this, we're just going to let it fly and
670: * to hell with it.
671: */
672:
673: assert(pos > sv);
674: sz = (size_t)(pos - sv);
675:
676: if (1 == sz && '.' == (*bufp)[sv])
677: return(ROFF_IGN);
678:
1.11 schwarze 679: r->last->end = mandoc_malloc(sz + 1);
1.2 schwarze 680:
681: memcpy(r->last->end, *bufp + sv, sz);
682: r->last->end[(int)sz] = '\0';
683:
684: if ((*bufp)[pos])
685: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
686: return(ROFF_ERR);
1.1 schwarze 687:
688: return(ROFF_IGN);
689: }
690:
691:
692: /* ARGSUSED */
693: static enum rofferr
1.2 schwarze 694: roff_block_sub(ROFF_ARGS)
1.1 schwarze 695: {
1.2 schwarze 696: enum rofft t;
697: int i, j;
698:
699: /*
700: * First check whether a custom macro exists at this level. If
701: * it does, then check against it. This is some of groff's
702: * stranger behaviours. If we encountered a custom end-scope
703: * tag and that tag also happens to be a "real" macro, then we
704: * need to try interpreting it again as a real macro. If it's
705: * not, then return ignore. Else continue.
706: */
707:
708: if (r->last->end) {
709: i = pos + 1;
710: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
711: i++;
712:
713: for (j = 0; r->last->end[j]; j++, i++)
714: if ((*bufp)[i] != r->last->end[j])
715: break;
1.1 schwarze 716:
1.2 schwarze 717: if ('\0' == r->last->end[j] &&
718: ('\0' == (*bufp)[i] ||
719: ' ' == (*bufp)[i] ||
720: '\t' == (*bufp)[i])) {
721: roffnode_pop(r);
722: roffnode_cleanscope(r);
1.1 schwarze 723:
1.16 schwarze 724: if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1.2 schwarze 725: return(ROFF_RERUN);
726: return(ROFF_IGN);
727: }
1.1 schwarze 728: }
729:
1.2 schwarze 730: /*
731: * If we have no custom end-query or lookup failed, then try
732: * pulling it out of the hashtable.
733: */
1.1 schwarze 734:
1.2 schwarze 735: ppos = pos;
1.16 schwarze 736: t = roff_parse(r, *bufp, &pos);
1.1 schwarze 737:
1.16 schwarze 738: /*
739: * Macros other than block-end are only significant
740: * in `de' blocks; elsewhere, simply throw them away.
741: */
742: if (ROFF_cblock != t) {
743: if (ROFF_de == tok)
744: roff_setstr(r, r->last->name, *bufp + ppos, 1);
1.1 schwarze 745: return(ROFF_IGN);
1.16 schwarze 746: }
1.1 schwarze 747:
1.2 schwarze 748: assert(roffs[t].proc);
1.6 schwarze 749: return((*roffs[t].proc)(r, t, bufp, szp,
750: ln, ppos, pos, offs));
1.2 schwarze 751: }
752:
753:
754: /* ARGSUSED */
755: static enum rofferr
756: roff_block_text(ROFF_ARGS)
757: {
758:
1.16 schwarze 759: if (ROFF_de == tok)
760: roff_setstr(r, r->last->name, *bufp + pos, 1);
761:
1.2 schwarze 762: return(ROFF_IGN);
763: }
764:
765:
766: /* ARGSUSED */
767: static enum rofferr
768: roff_cond_sub(ROFF_ARGS)
769: {
770: enum rofft t;
771: enum roffrule rr;
772:
773: ppos = pos;
774: rr = r->last->rule;
775:
1.5 schwarze 776: /*
777: * Clean out scope. If we've closed ourselves, then don't
778: * continue.
779: */
780:
781: roffnode_cleanscope(r);
782:
1.16 schwarze 783: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
1.12 schwarze 784: if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
785: return(roff_ccond
786: (r, ROFF_ccond, bufp, szp,
787: ln, pos, pos + 2, offs));
1.2 schwarze 788: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1.12 schwarze 789: }
1.2 schwarze 790:
791: /*
792: * A denied conditional must evaluate its children if and only
793: * if they're either structurally required (such as loops and
794: * conditionals) or a closing macro.
795: */
796: if (ROFFRULE_DENY == rr)
797: if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
798: if (ROFF_ccond != t)
799: return(ROFF_IGN);
800:
801: assert(roffs[t].proc);
1.6 schwarze 802: return((*roffs[t].proc)(r, t, bufp, szp,
803: ln, ppos, pos, offs));
1.2 schwarze 804: }
805:
806:
807: /* ARGSUSED */
808: static enum rofferr
809: roff_cond_text(ROFF_ARGS)
810: {
811: char *ep, *st;
812: enum roffrule rr;
813:
814: rr = r->last->rule;
1.1 schwarze 815:
816: /*
1.2 schwarze 817: * We display the value of the text if out current evaluation
818: * scope permits us to do so.
1.1 schwarze 819: */
1.13 schwarze 820:
821: /* FIXME: use roff_ccond? */
1.1 schwarze 822:
1.2 schwarze 823: st = &(*bufp)[pos];
824: if (NULL == (ep = strstr(st, "\\}"))) {
825: roffnode_cleanscope(r);
826: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
827: }
828:
1.4 schwarze 829: if (ep == st || (ep > st && '\\' != *(ep - 1)))
1.2 schwarze 830: roffnode_pop(r);
831:
832: roffnode_cleanscope(r);
833: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
834: }
835:
836:
1.5 schwarze 837: static enum roffrule
838: roff_evalcond(const char *v, int *pos)
839: {
840:
841: switch (v[*pos]) {
842: case ('n'):
843: (*pos)++;
844: return(ROFFRULE_ALLOW);
845: case ('e'):
846: /* FALLTHROUGH */
847: case ('o'):
848: /* FALLTHROUGH */
849: case ('t'):
850: (*pos)++;
851: return(ROFFRULE_DENY);
852: default:
853: break;
854: }
855:
856: while (v[*pos] && ' ' != v[*pos])
857: (*pos)++;
858: return(ROFFRULE_DENY);
859: }
860:
861:
1.2 schwarze 862: /* ARGSUSED */
863: static enum rofferr
1.21 ! schwarze 864: roff_line_ignore(ROFF_ARGS)
1.6 schwarze 865: {
866:
1.21 ! schwarze 867: return(ROFF_IGN);
! 868: }
! 869:
! 870:
! 871: /* ARGSUSED */
! 872: static enum rofferr
! 873: roff_line_error(ROFF_ARGS)
! 874: {
! 875:
! 876: (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, roffs[tok].name);
1.6 schwarze 877: return(ROFF_IGN);
878: }
879:
880:
881: /* ARGSUSED */
882: static enum rofferr
1.2 schwarze 883: roff_cond(ROFF_ARGS)
884: {
885: int sv;
1.5 schwarze 886: enum roffrule rule;
1.2 schwarze 887:
888: /* Stack overflow! */
889:
890: if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
1.1 schwarze 891: (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
892: return(ROFF_ERR);
893: }
894:
1.5 schwarze 895: /* First, evaluate the conditional. */
1.2 schwarze 896:
1.5 schwarze 897: if (ROFF_el == tok) {
898: /*
899: * An `.el' will get the value of the current rstack
900: * entry set in prior `ie' calls or defaults to DENY.
901: */
902: if (r->rstackpos < 0)
903: rule = ROFFRULE_DENY;
904: else
905: rule = r->rstack[r->rstackpos];
906: } else
907: rule = roff_evalcond(*bufp, &pos);
1.2 schwarze 908:
909: sv = pos;
1.5 schwarze 910:
1.2 schwarze 911: while (' ' == (*bufp)[pos])
912: pos++;
913:
914: /*
915: * Roff is weird. If we have just white-space after the
916: * conditional, it's considered the BODY and we exit without
917: * really doing anything. Warn about this. It's probably
918: * wrong.
919: */
1.5 schwarze 920:
1.2 schwarze 921: if ('\0' == (*bufp)[pos] && sv != pos) {
1.5 schwarze 922: if ((*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
923: return(ROFF_IGN);
924: return(ROFF_ERR);
1.2 schwarze 925: }
926:
1.16 schwarze 927: roffnode_push(r, tok, NULL, ln, ppos);
1.2 schwarze 928:
1.5 schwarze 929: r->last->rule = rule;
1.2 schwarze 930:
931: if (ROFF_ie == tok) {
932: /*
933: * An if-else will put the NEGATION of the current
934: * evaluated conditional into the stack.
935: */
936: r->rstackpos++;
937: if (ROFFRULE_DENY == r->last->rule)
938: r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
939: else
940: r->rstack[r->rstackpos] = ROFFRULE_DENY;
941: }
1.5 schwarze 942:
943: /* If the parent has false as its rule, then so do we. */
944:
1.2 schwarze 945: if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
946: r->last->rule = ROFFRULE_DENY;
1.5 schwarze 947:
948: /*
949: * Determine scope. If we're invoked with "\{" trailing the
950: * conditional, then we're in a multiline scope. Else our scope
951: * expires on the next line.
952: */
1.2 schwarze 953:
954: r->last->endspan = 1;
955:
956: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
957: r->last->endspan = -1;
958: pos += 2;
959: }
960:
961: /*
962: * If there are no arguments on the line, the next-line scope is
963: * assumed.
964: */
965:
966: if ('\0' == (*bufp)[pos])
967: return(ROFF_IGN);
968:
969: /* Otherwise re-run the roff parser after recalculating. */
1.1 schwarze 970:
1.2 schwarze 971: *offs = pos;
972: return(ROFF_RERUN);
1.1 schwarze 973: }
974:
975:
1.2 schwarze 976: /* ARGSUSED */
977: static enum rofferr
1.7 schwarze 978: roff_ds(ROFF_ARGS)
979: {
1.10 schwarze 980: char *name, *string;
981:
982: /*
983: * A symbol is named by the first word following the macro
984: * invocation up to a space. Its value is anything after the
985: * name's trailing whitespace and optional double-quote. Thus,
986: *
987: * [.ds foo "bar " ]
988: *
989: * will have `bar " ' as its value.
990: */
1.7 schwarze 991:
992: name = *bufp + pos;
993: if ('\0' == *name)
994: return(ROFF_IGN);
995:
996: string = name;
1.10 schwarze 997: /* Read until end of name. */
1.7 schwarze 998: while (*string && ' ' != *string)
999: string++;
1.10 schwarze 1000:
1001: /* Nil-terminate name. */
1.7 schwarze 1002: if (*string)
1.10 schwarze 1003: *(string++) = '\0';
1004:
1005: /* Read past spaces. */
1006: while (*string && ' ' == *string)
1007: string++;
1008:
1009: /* Read passed initial double-quote. */
1.7 schwarze 1010: if (*string && '"' == *string)
1011: string++;
1012:
1.10 schwarze 1013: /* The rest is the value. */
1.16 schwarze 1014: roff_setstr(r, name, string, 0);
1.7 schwarze 1015: return(ROFF_IGN);
1016: }
1017:
1018:
1019: /* ARGSUSED */
1020: static enum rofferr
1.6 schwarze 1021: roff_nr(ROFF_ARGS)
1.1 schwarze 1022: {
1.6 schwarze 1023: const char *key, *val;
1024: struct reg *rg;
1025:
1026: key = &(*bufp)[pos];
1027: rg = r->regs->regs;
1028:
1029: /* Parse register request. */
1030: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
1031: pos++;
1032:
1033: /*
1034: * Set our nil terminator. Because this line is going to be
1035: * ignored anyway, we can munge it as we please.
1036: */
1037: if ((*bufp)[pos])
1038: (*bufp)[pos++] = '\0';
1039:
1040: /* Skip whitespace to register token. */
1041: while ((*bufp)[pos] && ' ' == (*bufp)[pos])
1042: pos++;
1043:
1044: val = &(*bufp)[pos];
1045:
1046: /* Process register token. */
1047:
1048: if (0 == strcmp(key, "nS")) {
1049: rg[(int)REG_nS].set = 1;
1050: if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1051: rg[(int)REG_nS].v.u = 0;
1052: }
1.1 schwarze 1053:
1.2 schwarze 1054: return(ROFF_IGN);
1.14 schwarze 1055: }
1056:
1057:
1058: /* ARGSUSED */
1059: static enum rofferr
1060: roff_so(ROFF_ARGS)
1061: {
1062: char *name;
1.15 schwarze 1063:
1064: (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL);
1.14 schwarze 1065:
1066: name = *bufp + pos;
1067: if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1068: (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL);
1069: return(ROFF_ERR);
1070: }
1071:
1072: *offs = pos;
1073: return(ROFF_SO);
1.7 schwarze 1074: }
1075:
1076:
1.16 schwarze 1077: /* ARGSUSED */
1078: static enum rofferr
1079: roff_userdef(ROFF_ARGS)
1.12 schwarze 1080: {
1.16 schwarze 1081: const char *arg[9];
1082: char *cp, *n1, *n2;
1.17 schwarze 1083: int i, quoted, pairs;
1.12 schwarze 1084:
1.16 schwarze 1085: /*
1086: * Collect pointers to macro argument strings
1087: * and null-terminate them.
1088: */
1089: cp = *bufp + pos;
1090: for (i = 0; i < 9; i++) {
1.17 schwarze 1091: /* Quoting can only start with a new word. */
1092: if ('"' == *cp) {
1093: quoted = 1;
1094: cp++;
1095: } else
1096: quoted = 0;
1.16 schwarze 1097: arg[i] = cp;
1.17 schwarze 1098: for (pairs = 0; '\0' != *cp; cp++) {
1099: /* Unquoted arguments end at blanks. */
1100: if (0 == quoted) {
1101: if (' ' == *cp)
1102: break;
1103: continue;
1104: }
1105: /* After pairs of quotes, move left. */
1106: if (pairs)
1107: cp[-pairs] = cp[0];
1108: /* Pairs of quotes do not end words, ... */
1109: if ('"' == cp[0] && '"' == cp[1]) {
1110: pairs++;
1111: cp++;
1112: continue;
1113: }
1114: /* ... but solitary quotes do. */
1115: if ('"' != *cp)
1116: continue;
1117: if (pairs)
1118: cp[-pairs] = '\0';
1119: *cp = ' ';
1120: break;
1121: }
1122: /* Last argument; the remaining ones are empty strings. */
1.16 schwarze 1123: if ('\0' == *cp)
1124: continue;
1.17 schwarze 1125: /* Null-terminate argument and move to the next one. */
1.16 schwarze 1126: *cp++ = '\0';
1127: while (' ' == *cp)
1128: cp++;
1129: }
1130:
1131: /*
1132: * Expand macro arguments.
1.12 schwarze 1133: */
1.16 schwarze 1134: *szp = 0;
1135: n1 = cp = mandoc_strdup(r->current_string);
1136: while (NULL != (cp = strstr(cp, "\\$"))) {
1137: i = cp[2] - '1';
1138: if (0 > i || 8 < i) {
1139: /* Not an argument invocation. */
1140: cp += 2;
1141: continue;
1142: }
1143:
1144: *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1145: n2 = mandoc_malloc(*szp);
1146:
1147: strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1148: strlcat(n2, arg[i], *szp);
1149: strlcat(n2, cp + 3, *szp);
1150:
1151: cp = n2 + (cp - n1);
1152: free(n1);
1153: n1 = n2;
1.12 schwarze 1154: }
1155:
1.16 schwarze 1156: /*
1157: * Replace the macro invocation
1158: * by the expanded macro.
1159: */
1160: free(*bufp);
1161: *bufp = n1;
1162: if (0 == *szp)
1163: *szp = strlen(*bufp) + 1;
1164:
1.19 schwarze 1165: return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1.16 schwarze 1166: ROFF_REPARSE : ROFF_APPEND);
1.12 schwarze 1167: }
1168:
1.16 schwarze 1169: /*
1170: * Store *string into the user-defined string called *name.
1171: * In multiline mode, append to an existing entry and append '\n';
1172: * else replace the existing entry, if there is one.
1173: * To clear an existing entry, call with (*r, *name, NULL, 0).
1174: */
1.8 schwarze 1175: static void
1.16 schwarze 1176: roff_setstr(struct roff *r, const char *name, const char *string,
1177: int multiline)
1.7 schwarze 1178: {
1179: struct roffstr *n;
1.16 schwarze 1180: char *c;
1181: size_t oldch, newch;
1.7 schwarze 1182:
1.16 schwarze 1183: /* Search for an existing string with the same name. */
1.8 schwarze 1184: n = r->first_string;
1.7 schwarze 1185: while (n && strcmp(name, n->name))
1186: n = n->next;
1.8 schwarze 1187:
1188: if (NULL == n) {
1.16 schwarze 1189: /* Create a new string table entry. */
1.8 schwarze 1190: n = mandoc_malloc(sizeof(struct roffstr));
1.16 schwarze 1191: n->name = mandoc_strdup(name);
1192: n->string = NULL;
1.8 schwarze 1193: n->next = r->first_string;
1194: r->first_string = n;
1.16 schwarze 1195: } else if (0 == multiline) {
1196: /* In multiline mode, append; else replace. */
1.7 schwarze 1197: free(n->string);
1.16 schwarze 1198: n->string = NULL;
1199: }
1200:
1201: if (NULL == string)
1202: return;
1203:
1204: /*
1205: * One additional byte for the '\n' in multiline mode,
1206: * and one for the terminating '\0'.
1207: */
1208: newch = strlen(string) + (multiline ? 2 : 1);
1209: if (NULL == n->string) {
1210: n->string = mandoc_malloc(newch);
1211: *n->string = '\0';
1212: oldch = 0;
1213: } else {
1214: oldch = strlen(n->string);
1215: n->string = mandoc_realloc(n->string, oldch + newch);
1216: }
1217:
1218: /* Skip existing content in the destination buffer. */
1219: c = n->string + oldch;
1220:
1221: /* Append new content to the destination buffer. */
1222: while (*string) {
1223: /*
1224: * Rudimentary roff copy mode:
1225: * Handle escaped backslashes.
1226: */
1227: if ('\\' == *string && '\\' == *(string + 1))
1228: string++;
1229: *c++ = *string++;
1230: }
1.8 schwarze 1231:
1.16 schwarze 1232: /* Append terminating bytes. */
1233: if (multiline)
1234: *c++ = '\n';
1235: *c = '\0';
1.7 schwarze 1236: }
1237:
1238:
1.8 schwarze 1239: static const char *
1240: roff_getstrn(const struct roff *r, const char *name, size_t len)
1.7 schwarze 1241: {
1.8 schwarze 1242: const struct roffstr *n;
1.7 schwarze 1243:
1.8 schwarze 1244: n = r->first_string;
1.10 schwarze 1245: while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1.7 schwarze 1246: n = n->next;
1.8 schwarze 1247:
1248: return(n ? n->string : NULL);
1.7 schwarze 1249: }
1250:
1.8 schwarze 1251:
1252: static void
1253: roff_freestr(struct roff *r)
1.7 schwarze 1254: {
1255: struct roffstr *n, *nn;
1256:
1.8 schwarze 1257: for (n = r->first_string; n; n = nn) {
1.7 schwarze 1258: free(n->name);
1259: free(n->string);
1260: nn = n->next;
1261: free(n);
1262: }
1.8 schwarze 1263:
1264: r->first_string = NULL;
1.1 schwarze 1265: }