Annotation of src/usr.bin/mandoc/roff.c, Revision 1.17
1.17 ! schwarze 1: /* $Id: roff.c,v 1.16 2010/11/25 22:23:31 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.8 schwarze 4: * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.16 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.16 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
20: #endif
21:
22: #include <assert.h>
1.6 schwarze 23: #include <errno.h>
1.3 schwarze 24: #include <ctype.h>
1.6 schwarze 25: #include <limits.h>
1.1 schwarze 26: #include <stdlib.h>
27: #include <string.h>
1.2 schwarze 28: #include <stdio.h>
1.1 schwarze 29:
30: #include "mandoc.h"
31: #include "roff.h"
1.8 schwarze 32: #include "libmandoc.h"
1.1 schwarze 33:
1.2 schwarze 34: #define RSTACK_MAX 128
35:
36: #define ROFF_CTL(c) \
37: ('.' == (c) || '\'' == (c))
38:
1.1 schwarze 39: enum rofft {
1.2 schwarze 40: ROFF_am,
41: ROFF_ami,
42: ROFF_am1,
1.1 schwarze 43: ROFF_de,
44: ROFF_dei,
1.2 schwarze 45: ROFF_de1,
46: ROFF_ds,
47: ROFF_el,
48: ROFF_ie,
49: ROFF_if,
1.1 schwarze 50: ROFF_ig,
1.14 schwarze 51: ROFF_nr,
1.2 schwarze 52: ROFF_rm,
1.14 schwarze 53: ROFF_so,
1.2 schwarze 54: ROFF_tr,
55: ROFF_cblock,
1.13 schwarze 56: ROFF_ccond, /* FIXME: remove this. */
1.16 schwarze 57: ROFF_USERDEF,
1.1 schwarze 58: ROFF_MAX
59: };
60:
1.2 schwarze 61: enum roffrule {
62: ROFFRULE_ALLOW,
63: ROFFRULE_DENY
64: };
65:
1.8 schwarze 66:
67: struct roffstr {
68: char *name; /* key of symbol */
69: char *string; /* current value */
70: struct roffstr *next; /* next in list */
71: };
72:
1.1 schwarze 73: struct roff {
74: struct roffnode *last; /* leaf of stack */
75: mandocmsg msg; /* err/warn/fatal messages */
76: void *data; /* privdata for messages */
1.2 schwarze 77: enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
78: int rstackpos; /* position in rstack */
1.6 schwarze 79: struct regset *regs; /* read/writable registers */
1.16 schwarze 80: struct roffstr *first_string; /* user-defined strings & macros */
81: const char *current_string; /* value of last called user macro */
1.1 schwarze 82: };
83:
84: struct roffnode {
85: enum rofft tok; /* type of node */
86: struct roffnode *parent; /* up one in stack */
87: int line; /* parse line */
88: int col; /* parse col */
1.16 schwarze 89: char *name; /* node name, e.g. macro name */
1.2 schwarze 90: char *end; /* end-rules: custom token */
91: int endspan; /* end-rules: next-line or infty */
92: enum roffrule rule; /* current evaluation rule */
1.1 schwarze 93: };
94:
95: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
96: enum rofft tok, /* tok of macro */ \
97: char **bufp, /* input buffer */ \
98: size_t *szp, /* size of input buffer */ \
99: int ln, /* parse line */ \
1.2 schwarze 100: int ppos, /* original pos in buffer */ \
101: int pos, /* current pos in buffer */ \
102: int *offs /* reset offset of buffer data */
1.1 schwarze 103:
104: typedef enum rofferr (*roffproc)(ROFF_ARGS);
105:
106: struct roffmac {
107: const char *name; /* macro name */
1.2 schwarze 108: roffproc proc; /* process new macro */
109: roffproc text; /* process as child text of macro */
110: roffproc sub; /* process as child of macro */
111: int flags;
112: #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
1.3 schwarze 113: struct roffmac *next;
1.1 schwarze 114: };
115:
1.2 schwarze 116: static enum rofferr roff_block(ROFF_ARGS);
117: static enum rofferr roff_block_text(ROFF_ARGS);
118: static enum rofferr roff_block_sub(ROFF_ARGS);
119: static enum rofferr roff_cblock(ROFF_ARGS);
120: static enum rofferr roff_ccond(ROFF_ARGS);
121: static enum rofferr roff_cond(ROFF_ARGS);
122: static enum rofferr roff_cond_text(ROFF_ARGS);
123: static enum rofferr roff_cond_sub(ROFF_ARGS);
1.7 schwarze 124: static enum rofferr roff_ds(ROFF_ARGS);
1.8 schwarze 125: static enum roffrule roff_evalcond(const char *, int *);
126: static void roff_freestr(struct roff *);
127: static const char *roff_getstrn(const struct roff *,
128: const char *, size_t);
1.6 schwarze 129: static enum rofferr roff_line(ROFF_ARGS);
130: static enum rofferr roff_nr(ROFF_ARGS);
1.9 schwarze 131: static int roff_res(struct roff *,
132: char **, size_t *, int);
1.8 schwarze 133: static void roff_setstr(struct roff *,
1.16 schwarze 134: const char *, const char *, int);
1.14 schwarze 135: static enum rofferr roff_so(ROFF_ARGS);
1.16 schwarze 136: static enum rofferr roff_userdef(ROFF_ARGS);
1.1 schwarze 137:
1.3 schwarze 138: /* See roff_hash_find() */
139:
140: #define ASCII_HI 126
141: #define ASCII_LO 33
142: #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
143:
144: static struct roffmac *hash[HASHWIDTH];
145:
146: static struct roffmac roffs[ROFF_MAX] = {
147: { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
148: { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
149: { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
150: { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
151: { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
152: { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.7 schwarze 153: { "ds", roff_ds, NULL, NULL, 0, NULL },
1.3 schwarze 154: { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
155: { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
156: { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
157: { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.14 schwarze 158: { "nr", roff_nr, NULL, NULL, 0, NULL },
1.3 schwarze 159: { "rm", roff_line, NULL, NULL, 0, NULL },
1.14 schwarze 160: { "so", roff_so, NULL, NULL, 0, NULL },
1.3 schwarze 161: { "tr", roff_line, NULL, NULL, 0, NULL },
162: { ".", roff_cblock, NULL, NULL, 0, NULL },
163: { "\\}", roff_ccond, NULL, NULL, 0, NULL },
1.16 schwarze 164: { NULL, roff_userdef, NULL, NULL, 0, NULL },
1.1 schwarze 165: };
166:
167: static void roff_free1(struct roff *);
1.16 schwarze 168: static enum rofft roff_hash_find(const char *, size_t);
1.3 schwarze 169: static void roff_hash_init(void);
1.2 schwarze 170: static void roffnode_cleanscope(struct roff *);
1.16 schwarze 171: static void roffnode_push(struct roff *, enum rofft,
172: const char *, int, int);
1.1 schwarze 173: static void roffnode_pop(struct roff *);
1.16 schwarze 174: static enum rofft roff_parse(struct roff *, const char *, int *);
1.6 schwarze 175: static int roff_parse_nat(const char *, unsigned int *);
1.1 schwarze 176:
1.3 schwarze 177: /* See roff_hash_find() */
178: #define ROFF_HASH(p) (p[0] - ASCII_LO)
179:
180: static void
181: roff_hash_init(void)
182: {
183: struct roffmac *n;
184: int buc, i;
185:
1.16 schwarze 186: for (i = 0; i < (int)ROFF_USERDEF; i++) {
1.3 schwarze 187: assert(roffs[i].name[0] >= ASCII_LO);
188: assert(roffs[i].name[0] <= ASCII_HI);
189:
190: buc = ROFF_HASH(roffs[i].name);
191:
192: if (NULL != (n = hash[buc])) {
193: for ( ; n->next; n = n->next)
194: /* Do nothing. */ ;
195: n->next = &roffs[i];
196: } else
197: hash[buc] = &roffs[i];
198: }
199: }
200:
1.1 schwarze 201:
202: /*
203: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
204: * the nil-terminated string name could be found.
205: */
206: static enum rofft
1.16 schwarze 207: roff_hash_find(const char *p, size_t s)
1.1 schwarze 208: {
1.3 schwarze 209: int buc;
210: struct roffmac *n;
1.1 schwarze 211:
1.3 schwarze 212: /*
213: * libroff has an extremely simple hashtable, for the time
214: * being, which simply keys on the first character, which must
215: * be printable, then walks a chain. It works well enough until
216: * optimised.
217: */
218:
219: if (p[0] < ASCII_LO || p[0] > ASCII_HI)
220: return(ROFF_MAX);
221:
222: buc = ROFF_HASH(p);
223:
224: if (NULL == (n = hash[buc]))
225: return(ROFF_MAX);
226: for ( ; n; n = n->next)
1.16 schwarze 227: if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
1.3 schwarze 228: return((enum rofft)(n - roffs));
1.1 schwarze 229:
230: return(ROFF_MAX);
231: }
232:
233:
234: /*
235: * Pop the current node off of the stack of roff instructions currently
236: * pending.
237: */
238: static void
239: roffnode_pop(struct roff *r)
240: {
241: struct roffnode *p;
242:
1.2 schwarze 243: assert(r->last);
244: p = r->last;
245:
246: if (ROFF_el == p->tok)
247: if (r->rstackpos > -1)
248: r->rstackpos--;
249:
250: r->last = r->last->parent;
1.16 schwarze 251: free(p->name);
252: free(p->end);
1.1 schwarze 253: free(p);
254: }
255:
256:
257: /*
258: * Push a roff node onto the instruction stack. This must later be
259: * removed with roffnode_pop().
260: */
1.11 schwarze 261: static void
1.16 schwarze 262: roffnode_push(struct roff *r, enum rofft tok, const char *name,
263: int line, int col)
1.1 schwarze 264: {
265: struct roffnode *p;
266:
1.11 schwarze 267: p = mandoc_calloc(1, sizeof(struct roffnode));
1.1 schwarze 268: p->tok = tok;
1.16 schwarze 269: if (name)
270: p->name = mandoc_strdup(name);
1.1 schwarze 271: p->parent = r->last;
272: p->line = line;
273: p->col = col;
1.2 schwarze 274: p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
1.1 schwarze 275:
276: r->last = p;
277: }
278:
279:
280: static void
281: roff_free1(struct roff *r)
282: {
283:
284: while (r->last)
285: roffnode_pop(r);
1.8 schwarze 286: roff_freestr(r);
1.1 schwarze 287: }
288:
289:
290: void
291: roff_reset(struct roff *r)
292: {
293:
294: roff_free1(r);
295: }
296:
297:
298: void
299: roff_free(struct roff *r)
300: {
301:
302: roff_free1(r);
303: free(r);
304: }
305:
306:
307: struct roff *
1.11 schwarze 308: roff_alloc(struct regset *regs, void *data, const mandocmsg msg)
1.1 schwarze 309: {
310: struct roff *r;
311:
1.11 schwarze 312: r = mandoc_calloc(1, sizeof(struct roff));
1.6 schwarze 313: r->regs = regs;
1.1 schwarze 314: r->msg = msg;
315: r->data = data;
1.2 schwarze 316: r->rstackpos = -1;
1.3 schwarze 317:
318: roff_hash_init();
1.1 schwarze 319: return(r);
320: }
321:
322:
1.8 schwarze 323: /*
324: * Pre-filter each and every line for reserved words (one beginning with
325: * `\*', e.g., `\*(ab'). These must be handled before the actual line
326: * is processed.
327: */
328: static int
1.9 schwarze 329: roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
1.8 schwarze 330: {
331: const char *cp, *cpp, *st, *res;
332: int i, maxl;
333: size_t nsz;
334: char *n;
335:
1.9 schwarze 336: /* LINTED */
1.8 schwarze 337: for (cp = &(*bufp)[pos]; (cpp = strstr(cp, "\\*")); cp++) {
338: cp = cpp + 2;
339: switch (*cp) {
340: case ('('):
341: cp++;
342: maxl = 2;
343: break;
344: case ('['):
345: cp++;
346: maxl = 0;
347: break;
348: default:
349: maxl = 1;
350: break;
351: }
352:
353: st = cp;
354:
355: for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
356: if ('\0' == *cp)
357: return(1); /* Error. */
358: if (0 == maxl && ']' == *cp)
359: break;
360: }
361:
362: res = roff_getstrn(r, st, (size_t)i);
363:
364: if (NULL == res) {
365: cp -= maxl ? 1 : 0;
366: continue;
367: }
368:
369: nsz = *szp + strlen(res) + 1;
370: n = mandoc_malloc(nsz);
371:
372: *n = '\0';
373:
374: strlcat(n, *bufp, (size_t)(cpp - *bufp + 1));
375: strlcat(n, res, nsz);
376: strlcat(n, cp + (maxl ? 0 : 1), nsz);
377:
378: free(*bufp);
379:
380: *bufp = n;
381: *szp = nsz;
382: return(0);
383: }
384:
385: return(1);
386: }
387:
388:
1.1 schwarze 389: enum rofferr
1.6 schwarze 390: roff_parseln(struct roff *r, int ln, char **bufp,
391: size_t *szp, int pos, int *offs)
1.1 schwarze 392: {
393: enum rofft t;
394: int ppos;
395:
1.2 schwarze 396: /*
1.8 schwarze 397: * Run the reserved-word filter only if we have some reserved
398: * words to fill in.
399: */
400:
1.9 schwarze 401: if (r->first_string && ! roff_res(r, bufp, szp, pos))
1.16 schwarze 402: return(ROFF_REPARSE);
1.8 schwarze 403:
404: /*
1.2 schwarze 405: * First, if a scope is open and we're not a macro, pass the
406: * text through the macro's filter. If a scope isn't open and
407: * we're not a macro, just let it through.
408: */
409:
410: if (r->last && ! ROFF_CTL((*bufp)[pos])) {
411: t = r->last->tok;
412: assert(roffs[t].text);
413: return((*roffs[t].text)
1.8 schwarze 414: (r, t, bufp, szp,
415: ln, pos, pos, offs));
1.2 schwarze 416: } else if ( ! ROFF_CTL((*bufp)[pos]))
417: return(ROFF_CONT);
418:
419: /*
420: * If a scope is open, go to the child handler for that macro,
421: * as it may want to preprocess before doing anything with it.
422: */
423:
424: if (r->last) {
1.1 schwarze 425: t = r->last->tok;
426: assert(roffs[t].sub);
1.2 schwarze 427: return((*roffs[t].sub)
1.8 schwarze 428: (r, t, bufp, szp,
429: ln, pos, pos, offs));
1.2 schwarze 430: }
431:
432: /*
433: * Lastly, as we've no scope open, try to look up and execute
434: * the new macro. If no macro is found, simply return and let
435: * the compilers handle it.
436: */
437:
438: ppos = pos;
1.16 schwarze 439: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
1.1 schwarze 440: return(ROFF_CONT);
441:
1.2 schwarze 442: assert(roffs[t].proc);
443: return((*roffs[t].proc)
1.8 schwarze 444: (r, t, bufp, szp,
445: ln, ppos, pos, offs));
1.2 schwarze 446: }
447:
1.1 schwarze 448:
1.2 schwarze 449: int
450: roff_endparse(struct roff *r)
451: {
1.1 schwarze 452:
1.2 schwarze 453: if (NULL == r->last)
454: return(1);
455: return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
456: r->last->col, NULL));
1.1 schwarze 457: }
458:
459:
460: /*
461: * Parse a roff node's type from the input buffer. This must be in the
462: * form of ".foo xxx" in the usual way.
463: */
464: static enum rofft
1.16 schwarze 465: roff_parse(struct roff *r, const char *buf, int *pos)
1.1 schwarze 466: {
1.16 schwarze 467: const char *mac;
468: size_t maclen;
1.1 schwarze 469: enum rofft t;
470:
1.2 schwarze 471: assert(ROFF_CTL(buf[*pos]));
472: (*pos)++;
1.1 schwarze 473:
1.16 schwarze 474: while (' ' == buf[*pos] || '\t' == buf[*pos])
1.1 schwarze 475: (*pos)++;
476:
477: if ('\0' == buf[*pos])
478: return(ROFF_MAX);
479:
1.16 schwarze 480: mac = buf + *pos;
481: maclen = strcspn(mac, " \\\t\0");
1.1 schwarze 482:
1.16 schwarze 483: t = (r->current_string = roff_getstrn(r, mac, maclen))
484: ? ROFF_USERDEF : roff_hash_find(mac, maclen);
1.1 schwarze 485:
1.16 schwarze 486: *pos += maclen;
1.1 schwarze 487: while (buf[*pos] && ' ' == buf[*pos])
488: (*pos)++;
489:
490: return(t);
491: }
492:
493:
1.6 schwarze 494: static int
495: roff_parse_nat(const char *buf, unsigned int *res)
496: {
497: char *ep;
498: long lval;
499:
500: errno = 0;
501: lval = strtol(buf, &ep, 10);
502: if (buf[0] == '\0' || *ep != '\0')
503: return(0);
504: if ((errno == ERANGE &&
505: (lval == LONG_MAX || lval == LONG_MIN)) ||
506: (lval > INT_MAX || lval < 0))
507: return(0);
508:
509: *res = (unsigned int)lval;
510: return(1);
511: }
512:
513:
1.1 schwarze 514: /* ARGSUSED */
515: static enum rofferr
1.2 schwarze 516: roff_cblock(ROFF_ARGS)
1.1 schwarze 517: {
518:
1.2 schwarze 519: /*
520: * A block-close `..' should only be invoked as a child of an
521: * ignore macro, otherwise raise a warning and just ignore it.
522: */
523:
524: if (NULL == r->last) {
525: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
526: return(ROFF_ERR);
527: return(ROFF_IGN);
528: }
1.1 schwarze 529:
1.2 schwarze 530: switch (r->last->tok) {
531: case (ROFF_am):
532: /* FALLTHROUGH */
533: case (ROFF_ami):
534: /* FALLTHROUGH */
535: case (ROFF_am1):
536: /* FALLTHROUGH */
537: case (ROFF_de):
538: /* FALLTHROUGH */
539: case (ROFF_dei):
540: /* FALLTHROUGH */
541: case (ROFF_de1):
542: /* FALLTHROUGH */
543: case (ROFF_ig):
544: break;
545: default:
546: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
547: return(ROFF_ERR);
1.1 schwarze 548: return(ROFF_IGN);
1.2 schwarze 549: }
550:
551: if ((*bufp)[pos])
552: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
553: return(ROFF_ERR);
554:
555: roffnode_pop(r);
556: roffnode_cleanscope(r);
557: return(ROFF_IGN);
558:
559: }
1.1 schwarze 560:
561:
1.2 schwarze 562: static void
563: roffnode_cleanscope(struct roff *r)
564: {
1.1 schwarze 565:
1.2 schwarze 566: while (r->last) {
567: if (--r->last->endspan < 0)
568: break;
569: roffnode_pop(r);
570: }
571: }
1.1 schwarze 572:
573:
1.2 schwarze 574: /* ARGSUSED */
575: static enum rofferr
576: roff_ccond(ROFF_ARGS)
577: {
1.1 schwarze 578:
1.2 schwarze 579: if (NULL == r->last) {
580: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
581: return(ROFF_ERR);
1.1 schwarze 582: return(ROFF_IGN);
1.2 schwarze 583: }
1.1 schwarze 584:
1.2 schwarze 585: switch (r->last->tok) {
586: case (ROFF_el):
587: /* FALLTHROUGH */
588: case (ROFF_ie):
589: /* FALLTHROUGH */
590: case (ROFF_if):
591: break;
592: default:
593: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
594: return(ROFF_ERR);
595: return(ROFF_IGN);
596: }
1.1 schwarze 597:
1.2 schwarze 598: if (r->last->endspan > -1) {
599: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
600: return(ROFF_ERR);
1.1 schwarze 601: return(ROFF_IGN);
1.2 schwarze 602: }
603:
604: if ((*bufp)[pos])
605: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
606: return(ROFF_ERR);
1.1 schwarze 607:
1.2 schwarze 608: roffnode_pop(r);
609: roffnode_cleanscope(r);
1.1 schwarze 610: return(ROFF_IGN);
611: }
612:
613:
614: /* ARGSUSED */
615: static enum rofferr
1.2 schwarze 616: roff_block(ROFF_ARGS)
1.1 schwarze 617: {
1.2 schwarze 618: int sv;
619: size_t sz;
1.16 schwarze 620: char *name;
621:
622: name = NULL;
1.2 schwarze 623:
1.16 schwarze 624: if (ROFF_ig != tok) {
625: if ('\0' == (*bufp)[pos]) {
626: (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
627: return(ROFF_IGN);
628: }
629: if (ROFF_de == tok)
630: name = *bufp + pos;
1.2 schwarze 631: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
632: pos++;
633: while (' ' == (*bufp)[pos])
1.16 schwarze 634: (*bufp)[pos++] = '\0';
1.2 schwarze 635: }
636:
1.16 schwarze 637: roffnode_push(r, tok, name, ln, ppos);
638:
639: /*
640: * At the beginning of a `de' macro, clear the existing string
641: * with the same name, if there is one. New content will be
642: * added from roff_block_text() in multiline mode.
643: */
644: if (ROFF_de == tok)
645: roff_setstr(r, name, NULL, 0);
1.2 schwarze 646:
647: if ('\0' == (*bufp)[pos])
648: return(ROFF_IGN);
1.1 schwarze 649:
1.2 schwarze 650: sv = pos;
651: while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
652: '\t' != (*bufp)[pos])
653: pos++;
654:
655: /*
656: * Note: groff does NOT like escape characters in the input.
657: * Instead of detecting this, we're just going to let it fly and
658: * to hell with it.
659: */
660:
661: assert(pos > sv);
662: sz = (size_t)(pos - sv);
663:
664: if (1 == sz && '.' == (*bufp)[sv])
665: return(ROFF_IGN);
666:
1.11 schwarze 667: r->last->end = mandoc_malloc(sz + 1);
1.2 schwarze 668:
669: memcpy(r->last->end, *bufp + sv, sz);
670: r->last->end[(int)sz] = '\0';
671:
672: if ((*bufp)[pos])
673: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
674: return(ROFF_ERR);
1.1 schwarze 675:
676: return(ROFF_IGN);
677: }
678:
679:
680: /* ARGSUSED */
681: static enum rofferr
1.2 schwarze 682: roff_block_sub(ROFF_ARGS)
1.1 schwarze 683: {
1.2 schwarze 684: enum rofft t;
685: int i, j;
686:
687: /*
688: * First check whether a custom macro exists at this level. If
689: * it does, then check against it. This is some of groff's
690: * stranger behaviours. If we encountered a custom end-scope
691: * tag and that tag also happens to be a "real" macro, then we
692: * need to try interpreting it again as a real macro. If it's
693: * not, then return ignore. Else continue.
694: */
695:
696: if (r->last->end) {
697: i = pos + 1;
698: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
699: i++;
700:
701: for (j = 0; r->last->end[j]; j++, i++)
702: if ((*bufp)[i] != r->last->end[j])
703: break;
1.1 schwarze 704:
1.2 schwarze 705: if ('\0' == r->last->end[j] &&
706: ('\0' == (*bufp)[i] ||
707: ' ' == (*bufp)[i] ||
708: '\t' == (*bufp)[i])) {
709: roffnode_pop(r);
710: roffnode_cleanscope(r);
1.1 schwarze 711:
1.16 schwarze 712: if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1.2 schwarze 713: return(ROFF_RERUN);
714: return(ROFF_IGN);
715: }
1.1 schwarze 716: }
717:
1.2 schwarze 718: /*
719: * If we have no custom end-query or lookup failed, then try
720: * pulling it out of the hashtable.
721: */
1.1 schwarze 722:
1.2 schwarze 723: ppos = pos;
1.16 schwarze 724: t = roff_parse(r, *bufp, &pos);
1.1 schwarze 725:
1.16 schwarze 726: /*
727: * Macros other than block-end are only significant
728: * in `de' blocks; elsewhere, simply throw them away.
729: */
730: if (ROFF_cblock != t) {
731: if (ROFF_de == tok)
732: roff_setstr(r, r->last->name, *bufp + ppos, 1);
1.1 schwarze 733: return(ROFF_IGN);
1.16 schwarze 734: }
1.1 schwarze 735:
1.2 schwarze 736: assert(roffs[t].proc);
1.6 schwarze 737: return((*roffs[t].proc)(r, t, bufp, szp,
738: ln, ppos, pos, offs));
1.2 schwarze 739: }
740:
741:
742: /* ARGSUSED */
743: static enum rofferr
744: roff_block_text(ROFF_ARGS)
745: {
746:
1.16 schwarze 747: if (ROFF_de == tok)
748: roff_setstr(r, r->last->name, *bufp + pos, 1);
749:
1.2 schwarze 750: return(ROFF_IGN);
751: }
752:
753:
754: /* ARGSUSED */
755: static enum rofferr
756: roff_cond_sub(ROFF_ARGS)
757: {
758: enum rofft t;
759: enum roffrule rr;
760:
761: ppos = pos;
762: rr = r->last->rule;
763:
1.5 schwarze 764: /*
765: * Clean out scope. If we've closed ourselves, then don't
766: * continue.
767: */
768:
769: roffnode_cleanscope(r);
770:
1.16 schwarze 771: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
1.12 schwarze 772: if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
773: return(roff_ccond
774: (r, ROFF_ccond, bufp, szp,
775: ln, pos, pos + 2, offs));
1.2 schwarze 776: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1.12 schwarze 777: }
1.2 schwarze 778:
779: /*
780: * A denied conditional must evaluate its children if and only
781: * if they're either structurally required (such as loops and
782: * conditionals) or a closing macro.
783: */
784: if (ROFFRULE_DENY == rr)
785: if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
786: if (ROFF_ccond != t)
787: return(ROFF_IGN);
788:
789: assert(roffs[t].proc);
1.6 schwarze 790: return((*roffs[t].proc)(r, t, bufp, szp,
791: ln, ppos, pos, offs));
1.2 schwarze 792: }
793:
794:
795: /* ARGSUSED */
796: static enum rofferr
797: roff_cond_text(ROFF_ARGS)
798: {
799: char *ep, *st;
800: enum roffrule rr;
801:
802: rr = r->last->rule;
1.1 schwarze 803:
804: /*
1.2 schwarze 805: * We display the value of the text if out current evaluation
806: * scope permits us to do so.
1.1 schwarze 807: */
1.13 schwarze 808:
809: /* FIXME: use roff_ccond? */
1.1 schwarze 810:
1.2 schwarze 811: st = &(*bufp)[pos];
812: if (NULL == (ep = strstr(st, "\\}"))) {
813: roffnode_cleanscope(r);
814: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
815: }
816:
1.4 schwarze 817: if (ep == st || (ep > st && '\\' != *(ep - 1)))
1.2 schwarze 818: roffnode_pop(r);
819:
820: roffnode_cleanscope(r);
821: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
822: }
823:
824:
1.5 schwarze 825: static enum roffrule
826: roff_evalcond(const char *v, int *pos)
827: {
828:
829: switch (v[*pos]) {
830: case ('n'):
831: (*pos)++;
832: return(ROFFRULE_ALLOW);
833: case ('e'):
834: /* FALLTHROUGH */
835: case ('o'):
836: /* FALLTHROUGH */
837: case ('t'):
838: (*pos)++;
839: return(ROFFRULE_DENY);
840: default:
841: break;
842: }
843:
844: while (v[*pos] && ' ' != v[*pos])
845: (*pos)++;
846: return(ROFFRULE_DENY);
847: }
848:
849:
1.2 schwarze 850: /* ARGSUSED */
851: static enum rofferr
1.6 schwarze 852: roff_line(ROFF_ARGS)
853: {
854:
855: return(ROFF_IGN);
856: }
857:
858:
859: /* ARGSUSED */
860: static enum rofferr
1.2 schwarze 861: roff_cond(ROFF_ARGS)
862: {
863: int sv;
1.5 schwarze 864: enum roffrule rule;
1.2 schwarze 865:
866: /* Stack overflow! */
867:
868: if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
1.1 schwarze 869: (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
870: return(ROFF_ERR);
871: }
872:
1.5 schwarze 873: /* First, evaluate the conditional. */
1.2 schwarze 874:
1.5 schwarze 875: if (ROFF_el == tok) {
876: /*
877: * An `.el' will get the value of the current rstack
878: * entry set in prior `ie' calls or defaults to DENY.
879: */
880: if (r->rstackpos < 0)
881: rule = ROFFRULE_DENY;
882: else
883: rule = r->rstack[r->rstackpos];
884: } else
885: rule = roff_evalcond(*bufp, &pos);
1.2 schwarze 886:
887: sv = pos;
1.5 schwarze 888:
1.2 schwarze 889: while (' ' == (*bufp)[pos])
890: pos++;
891:
892: /*
893: * Roff is weird. If we have just white-space after the
894: * conditional, it's considered the BODY and we exit without
895: * really doing anything. Warn about this. It's probably
896: * wrong.
897: */
1.5 schwarze 898:
1.2 schwarze 899: if ('\0' == (*bufp)[pos] && sv != pos) {
1.5 schwarze 900: if ((*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
901: return(ROFF_IGN);
902: return(ROFF_ERR);
1.2 schwarze 903: }
904:
1.16 schwarze 905: roffnode_push(r, tok, NULL, ln, ppos);
1.2 schwarze 906:
1.5 schwarze 907: r->last->rule = rule;
1.2 schwarze 908:
909: if (ROFF_ie == tok) {
910: /*
911: * An if-else will put the NEGATION of the current
912: * evaluated conditional into the stack.
913: */
914: r->rstackpos++;
915: if (ROFFRULE_DENY == r->last->rule)
916: r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
917: else
918: r->rstack[r->rstackpos] = ROFFRULE_DENY;
919: }
1.5 schwarze 920:
921: /* If the parent has false as its rule, then so do we. */
922:
1.2 schwarze 923: if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
924: r->last->rule = ROFFRULE_DENY;
1.5 schwarze 925:
926: /*
927: * Determine scope. If we're invoked with "\{" trailing the
928: * conditional, then we're in a multiline scope. Else our scope
929: * expires on the next line.
930: */
1.2 schwarze 931:
932: r->last->endspan = 1;
933:
934: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
935: r->last->endspan = -1;
936: pos += 2;
937: }
938:
939: /*
940: * If there are no arguments on the line, the next-line scope is
941: * assumed.
942: */
943:
944: if ('\0' == (*bufp)[pos])
945: return(ROFF_IGN);
946:
947: /* Otherwise re-run the roff parser after recalculating. */
1.1 schwarze 948:
1.2 schwarze 949: *offs = pos;
950: return(ROFF_RERUN);
1.1 schwarze 951: }
952:
953:
1.2 schwarze 954: /* ARGSUSED */
955: static enum rofferr
1.7 schwarze 956: roff_ds(ROFF_ARGS)
957: {
1.10 schwarze 958: char *name, *string;
959:
960: /*
961: * A symbol is named by the first word following the macro
962: * invocation up to a space. Its value is anything after the
963: * name's trailing whitespace and optional double-quote. Thus,
964: *
965: * [.ds foo "bar " ]
966: *
967: * will have `bar " ' as its value.
968: */
1.7 schwarze 969:
970: name = *bufp + pos;
971: if ('\0' == *name)
972: return(ROFF_IGN);
973:
974: string = name;
1.10 schwarze 975: /* Read until end of name. */
1.7 schwarze 976: while (*string && ' ' != *string)
977: string++;
1.10 schwarze 978:
979: /* Nil-terminate name. */
1.7 schwarze 980: if (*string)
1.10 schwarze 981: *(string++) = '\0';
982:
983: /* Read past spaces. */
984: while (*string && ' ' == *string)
985: string++;
986:
987: /* Read passed initial double-quote. */
1.7 schwarze 988: if (*string && '"' == *string)
989: string++;
990:
1.10 schwarze 991: /* The rest is the value. */
1.16 schwarze 992: roff_setstr(r, name, string, 0);
1.7 schwarze 993: return(ROFF_IGN);
994: }
995:
996:
997: /* ARGSUSED */
998: static enum rofferr
1.6 schwarze 999: roff_nr(ROFF_ARGS)
1.1 schwarze 1000: {
1.6 schwarze 1001: const char *key, *val;
1002: struct reg *rg;
1003:
1004: key = &(*bufp)[pos];
1005: rg = r->regs->regs;
1006:
1007: /* Parse register request. */
1008: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
1009: pos++;
1010:
1011: /*
1012: * Set our nil terminator. Because this line is going to be
1013: * ignored anyway, we can munge it as we please.
1014: */
1015: if ((*bufp)[pos])
1016: (*bufp)[pos++] = '\0';
1017:
1018: /* Skip whitespace to register token. */
1019: while ((*bufp)[pos] && ' ' == (*bufp)[pos])
1020: pos++;
1021:
1022: val = &(*bufp)[pos];
1023:
1024: /* Process register token. */
1025:
1026: if (0 == strcmp(key, "nS")) {
1027: rg[(int)REG_nS].set = 1;
1028: if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1029: rg[(int)REG_nS].v.u = 0;
1030: }
1.1 schwarze 1031:
1.2 schwarze 1032: return(ROFF_IGN);
1.14 schwarze 1033: }
1034:
1035:
1036: /* ARGSUSED */
1037: static enum rofferr
1038: roff_so(ROFF_ARGS)
1039: {
1040: char *name;
1.15 schwarze 1041:
1042: (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL);
1.14 schwarze 1043:
1044: name = *bufp + pos;
1045: if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1046: (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL);
1047: return(ROFF_ERR);
1048: }
1049:
1050: *offs = pos;
1051: return(ROFF_SO);
1.7 schwarze 1052: }
1053:
1054:
1.16 schwarze 1055: /* ARGSUSED */
1056: static enum rofferr
1057: roff_userdef(ROFF_ARGS)
1.12 schwarze 1058: {
1.16 schwarze 1059: const char *arg[9];
1060: char *cp, *n1, *n2;
1.17 ! schwarze 1061: int i, quoted, pairs;
1.12 schwarze 1062:
1.16 schwarze 1063: /*
1064: * Collect pointers to macro argument strings
1065: * and null-terminate them.
1066: */
1067: cp = *bufp + pos;
1068: for (i = 0; i < 9; i++) {
1.17 ! schwarze 1069: /* Quoting can only start with a new word. */
! 1070: if ('"' == *cp) {
! 1071: quoted = 1;
! 1072: cp++;
! 1073: } else
! 1074: quoted = 0;
1.16 schwarze 1075: arg[i] = cp;
1.17 ! schwarze 1076: for (pairs = 0; '\0' != *cp; cp++) {
! 1077: /* Unquoted arguments end at blanks. */
! 1078: if (0 == quoted) {
! 1079: if (' ' == *cp)
! 1080: break;
! 1081: continue;
! 1082: }
! 1083: /* After pairs of quotes, move left. */
! 1084: if (pairs)
! 1085: cp[-pairs] = cp[0];
! 1086: /* Pairs of quotes do not end words, ... */
! 1087: if ('"' == cp[0] && '"' == cp[1]) {
! 1088: pairs++;
! 1089: cp++;
! 1090: continue;
! 1091: }
! 1092: /* ... but solitary quotes do. */
! 1093: if ('"' != *cp)
! 1094: continue;
! 1095: if (pairs)
! 1096: cp[-pairs] = '\0';
! 1097: *cp = ' ';
! 1098: break;
! 1099: }
! 1100: /* Last argument; the remaining ones are empty strings. */
1.16 schwarze 1101: if ('\0' == *cp)
1102: continue;
1.17 ! schwarze 1103: /* Null-terminate argument and move to the next one. */
1.16 schwarze 1104: *cp++ = '\0';
1105: while (' ' == *cp)
1106: cp++;
1107: }
1108:
1109: /*
1110: * Expand macro arguments.
1.12 schwarze 1111: */
1.16 schwarze 1112: *szp = 0;
1113: n1 = cp = mandoc_strdup(r->current_string);
1114: while (NULL != (cp = strstr(cp, "\\$"))) {
1115: i = cp[2] - '1';
1116: if (0 > i || 8 < i) {
1117: /* Not an argument invocation. */
1118: cp += 2;
1119: continue;
1120: }
1121:
1122: *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1123: n2 = mandoc_malloc(*szp);
1124:
1125: strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1126: strlcat(n2, arg[i], *szp);
1127: strlcat(n2, cp + 3, *szp);
1128:
1129: cp = n2 + (cp - n1);
1130: free(n1);
1131: n1 = n2;
1.12 schwarze 1132: }
1133:
1.16 schwarze 1134: /*
1135: * Replace the macro invocation
1136: * by the expanded macro.
1137: */
1138: free(*bufp);
1139: *bufp = n1;
1140: if (0 == *szp)
1141: *szp = strlen(*bufp) + 1;
1142:
1143: return(*szp && '\n' == (*bufp)[(int)*szp - 2] ?
1144: ROFF_REPARSE : ROFF_APPEND);
1.12 schwarze 1145: }
1146:
1.16 schwarze 1147: /*
1148: * Store *string into the user-defined string called *name.
1149: * In multiline mode, append to an existing entry and append '\n';
1150: * else replace the existing entry, if there is one.
1151: * To clear an existing entry, call with (*r, *name, NULL, 0).
1152: */
1.8 schwarze 1153: static void
1.16 schwarze 1154: roff_setstr(struct roff *r, const char *name, const char *string,
1155: int multiline)
1.7 schwarze 1156: {
1157: struct roffstr *n;
1.16 schwarze 1158: char *c;
1159: size_t oldch, newch;
1.7 schwarze 1160:
1.16 schwarze 1161: /* Search for an existing string with the same name. */
1.8 schwarze 1162: n = r->first_string;
1.7 schwarze 1163: while (n && strcmp(name, n->name))
1164: n = n->next;
1.8 schwarze 1165:
1166: if (NULL == n) {
1.16 schwarze 1167: /* Create a new string table entry. */
1.8 schwarze 1168: n = mandoc_malloc(sizeof(struct roffstr));
1.16 schwarze 1169: n->name = mandoc_strdup(name);
1170: n->string = NULL;
1.8 schwarze 1171: n->next = r->first_string;
1172: r->first_string = n;
1.16 schwarze 1173: } else if (0 == multiline) {
1174: /* In multiline mode, append; else replace. */
1.7 schwarze 1175: free(n->string);
1.16 schwarze 1176: n->string = NULL;
1177: }
1178:
1179: if (NULL == string)
1180: return;
1181:
1182: /*
1183: * One additional byte for the '\n' in multiline mode,
1184: * and one for the terminating '\0'.
1185: */
1186: newch = strlen(string) + (multiline ? 2 : 1);
1187: if (NULL == n->string) {
1188: n->string = mandoc_malloc(newch);
1189: *n->string = '\0';
1190: oldch = 0;
1191: } else {
1192: oldch = strlen(n->string);
1193: n->string = mandoc_realloc(n->string, oldch + newch);
1194: }
1195:
1196: /* Skip existing content in the destination buffer. */
1197: c = n->string + oldch;
1198:
1199: /* Append new content to the destination buffer. */
1200: while (*string) {
1201: /*
1202: * Rudimentary roff copy mode:
1203: * Handle escaped backslashes.
1204: */
1205: if ('\\' == *string && '\\' == *(string + 1))
1206: string++;
1207: *c++ = *string++;
1208: }
1.8 schwarze 1209:
1.16 schwarze 1210: /* Append terminating bytes. */
1211: if (multiline)
1212: *c++ = '\n';
1213: *c = '\0';
1.7 schwarze 1214: }
1215:
1216:
1.8 schwarze 1217: static const char *
1218: roff_getstrn(const struct roff *r, const char *name, size_t len)
1.7 schwarze 1219: {
1.8 schwarze 1220: const struct roffstr *n;
1.7 schwarze 1221:
1.8 schwarze 1222: n = r->first_string;
1.10 schwarze 1223: while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1.7 schwarze 1224: n = n->next;
1.8 schwarze 1225:
1226: return(n ? n->string : NULL);
1.7 schwarze 1227: }
1228:
1.8 schwarze 1229:
1230: static void
1231: roff_freestr(struct roff *r)
1.7 schwarze 1232: {
1233: struct roffstr *n, *nn;
1234:
1.8 schwarze 1235: for (n = r->first_string; n; n = nn) {
1.7 schwarze 1236: free(n->name);
1237: free(n->string);
1238: nn = n->next;
1239: free(n);
1240: }
1.8 schwarze 1241:
1242: r->first_string = NULL;
1.1 schwarze 1243: }