Annotation of src/usr.bin/mandoc/roff.c, Revision 1.43
1.43 ! schwarze 1: /* $Id: roff.c,v 1.42 2011/09/18 15:54:48 schwarze Exp $ */
1.1 schwarze 2: /*
1.27 schwarze 3: * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.25 schwarze 4: * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.16 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.16 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <assert.h>
1.3 schwarze 19: #include <ctype.h>
1.1 schwarze 20: #include <stdlib.h>
21: #include <string.h>
22:
23: #include "mandoc.h"
1.27 schwarze 24: #include "libroff.h"
1.8 schwarze 25: #include "libmandoc.h"
1.1 schwarze 26:
1.37 schwarze 27: /* Maximum number of nested if-else conditionals. */
1.2 schwarze 28: #define RSTACK_MAX 128
29:
1.43 ! schwarze 30: /* Maximum number of string expansions per line, to break infinite loops. */
! 31: #define EXPAND_LIMIT 1000
! 32:
1.1 schwarze 33: enum rofft {
1.20 schwarze 34: ROFF_ad,
1.2 schwarze 35: ROFF_am,
36: ROFF_ami,
37: ROFF_am1,
1.1 schwarze 38: ROFF_de,
39: ROFF_dei,
1.2 schwarze 40: ROFF_de1,
41: ROFF_ds,
42: ROFF_el,
1.20 schwarze 43: ROFF_hy,
1.2 schwarze 44: ROFF_ie,
45: ROFF_if,
1.1 schwarze 46: ROFF_ig,
1.30 schwarze 47: ROFF_it,
1.20 schwarze 48: ROFF_ne,
49: ROFF_nh,
1.14 schwarze 50: ROFF_nr,
1.31 schwarze 51: ROFF_ns,
52: ROFF_ps,
1.2 schwarze 53: ROFF_rm,
1.14 schwarze 54: ROFF_so,
1.31 schwarze 55: ROFF_ta,
1.2 schwarze 56: ROFF_tr,
1.27 schwarze 57: ROFF_TS,
58: ROFF_TE,
59: ROFF_T_,
1.32 schwarze 60: ROFF_EQ,
61: ROFF_EN,
1.2 schwarze 62: ROFF_cblock,
1.37 schwarze 63: ROFF_ccond,
1.16 schwarze 64: ROFF_USERDEF,
1.1 schwarze 65: ROFF_MAX
66: };
67:
1.2 schwarze 68: enum roffrule {
69: ROFFRULE_ALLOW,
70: ROFFRULE_DENY
71: };
72:
1.41 schwarze 73: /*
74: * A single register entity. If "set" is zero, the value of the
75: * register should be the default one, which is per-register.
76: * Registers are assumed to be unsigned ints for now.
77: */
78: struct reg {
1.42 schwarze 79: int set; /* whether set or not */
80: unsigned int u; /* unsigned integer */
1.41 schwarze 81: };
82:
1.42 schwarze 83: /*
84: * An incredibly-simple string buffer.
85: */
1.8 schwarze 86: struct roffstr {
1.42 schwarze 87: char *p; /* nil-terminated buffer */
88: size_t sz; /* saved strlen(p) */
89: };
90:
91: /*
92: * A key-value roffstr pair as part of a singly-linked list.
93: */
94: struct roffkv {
95: struct roffstr key;
96: struct roffstr val;
97: struct roffkv *next; /* next in list */
1.8 schwarze 98: };
99:
1.1 schwarze 100: struct roff {
1.35 schwarze 101: struct mparse *parse; /* parse point */
1.1 schwarze 102: struct roffnode *last; /* leaf of stack */
1.2 schwarze 103: enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
104: int rstackpos; /* position in rstack */
1.41 schwarze 105: struct reg regs[REG__MAX];
1.42 schwarze 106: struct roffkv *strtab; /* user-defined strings & macros */
107: struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
108: struct roffstr *xtab; /* single-byte trans table (`tr') */
1.16 schwarze 109: const char *current_string; /* value of last called user macro */
1.27 schwarze 110: struct tbl_node *first_tbl; /* first table parsed */
111: struct tbl_node *last_tbl; /* last table parsed */
112: struct tbl_node *tbl; /* current table being parsed */
1.32 schwarze 113: struct eqn_node *last_eqn; /* last equation parsed */
114: struct eqn_node *first_eqn; /* first equation parsed */
115: struct eqn_node *eqn; /* current equation being parsed */
1.1 schwarze 116: };
117:
118: struct roffnode {
119: enum rofft tok; /* type of node */
120: struct roffnode *parent; /* up one in stack */
121: int line; /* parse line */
122: int col; /* parse col */
1.16 schwarze 123: char *name; /* node name, e.g. macro name */
1.2 schwarze 124: char *end; /* end-rules: custom token */
125: int endspan; /* end-rules: next-line or infty */
126: enum roffrule rule; /* current evaluation rule */
1.1 schwarze 127: };
128:
129: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
130: enum rofft tok, /* tok of macro */ \
131: char **bufp, /* input buffer */ \
132: size_t *szp, /* size of input buffer */ \
133: int ln, /* parse line */ \
1.2 schwarze 134: int ppos, /* original pos in buffer */ \
135: int pos, /* current pos in buffer */ \
136: int *offs /* reset offset of buffer data */
1.1 schwarze 137:
138: typedef enum rofferr (*roffproc)(ROFF_ARGS);
139:
140: struct roffmac {
141: const char *name; /* macro name */
1.2 schwarze 142: roffproc proc; /* process new macro */
143: roffproc text; /* process as child text of macro */
144: roffproc sub; /* process as child of macro */
145: int flags;
146: #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
1.3 schwarze 147: struct roffmac *next;
1.1 schwarze 148: };
149:
1.37 schwarze 150: struct predef {
151: const char *name; /* predefined input name */
152: const char *str; /* replacement symbol */
153: };
154:
155: #define PREDEF(__name, __str) \
156: { (__name), (__str) },
157:
1.42 schwarze 158: static enum rofft roffhash_find(const char *, size_t);
159: static void roffhash_init(void);
160: static void roffnode_cleanscope(struct roff *);
161: static void roffnode_pop(struct roff *);
162: static void roffnode_push(struct roff *, enum rofft,
163: const char *, int, int);
1.2 schwarze 164: static enum rofferr roff_block(ROFF_ARGS);
165: static enum rofferr roff_block_text(ROFF_ARGS);
166: static enum rofferr roff_block_sub(ROFF_ARGS);
167: static enum rofferr roff_cblock(ROFF_ARGS);
168: static enum rofferr roff_ccond(ROFF_ARGS);
169: static enum rofferr roff_cond(ROFF_ARGS);
170: static enum rofferr roff_cond_text(ROFF_ARGS);
171: static enum rofferr roff_cond_sub(ROFF_ARGS);
1.7 schwarze 172: static enum rofferr roff_ds(ROFF_ARGS);
1.8 schwarze 173: static enum roffrule roff_evalcond(const char *, int *);
1.42 schwarze 174: static void roff_free1(struct roff *);
175: static void roff_freestr(struct roffkv *);
1.28 schwarze 176: static char *roff_getname(struct roff *, char **, int, int);
1.8 schwarze 177: static const char *roff_getstrn(const struct roff *,
178: const char *, size_t);
1.21 schwarze 179: static enum rofferr roff_line_ignore(ROFF_ARGS);
1.6 schwarze 180: static enum rofferr roff_nr(ROFF_ARGS);
1.41 schwarze 181: static void roff_openeqn(struct roff *, const char *,
182: int, int, const char *);
1.42 schwarze 183: static enum rofft roff_parse(struct roff *, const char *, int *);
184: static enum rofferr roff_parsetext(char *);
185: static void roff_res(struct roff *,
1.37 schwarze 186: char **, size_t *, int, int);
1.29 schwarze 187: static enum rofferr roff_rm(ROFF_ARGS);
1.8 schwarze 188: static void roff_setstr(struct roff *,
1.16 schwarze 189: const char *, const char *, int);
1.42 schwarze 190: static void roff_setstrn(struct roffkv **, const char *,
191: size_t, const char *, size_t, int);
1.14 schwarze 192: static enum rofferr roff_so(ROFF_ARGS);
1.42 schwarze 193: static enum rofferr roff_tr(ROFF_ARGS);
1.27 schwarze 194: static enum rofferr roff_TE(ROFF_ARGS);
195: static enum rofferr roff_TS(ROFF_ARGS);
1.32 schwarze 196: static enum rofferr roff_EQ(ROFF_ARGS);
197: static enum rofferr roff_EN(ROFF_ARGS);
1.27 schwarze 198: static enum rofferr roff_T_(ROFF_ARGS);
1.16 schwarze 199: static enum rofferr roff_userdef(ROFF_ARGS);
1.1 schwarze 200:
1.42 schwarze 201: /* See roffhash_find() */
1.3 schwarze 202:
203: #define ASCII_HI 126
204: #define ASCII_LO 33
205: #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
206:
207: static struct roffmac *hash[HASHWIDTH];
208:
209: static struct roffmac roffs[ROFF_MAX] = {
1.21 schwarze 210: { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 211: { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
212: { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
213: { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
214: { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
215: { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
216: { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.7 schwarze 217: { "ds", roff_ds, NULL, NULL, 0, NULL },
1.3 schwarze 218: { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
1.21 schwarze 219: { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
1.3 schwarze 220: { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
221: { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
222: { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.30 schwarze 223: { "it", roff_line_ignore, NULL, NULL, 0, NULL },
1.21 schwarze 224: { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
225: { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
1.14 schwarze 226: { "nr", roff_nr, NULL, NULL, 0, NULL },
1.31 schwarze 227: { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
228: { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
1.29 schwarze 229: { "rm", roff_rm, NULL, NULL, 0, NULL },
1.14 schwarze 230: { "so", roff_so, NULL, NULL, 0, NULL },
1.31 schwarze 231: { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
1.42 schwarze 232: { "tr", roff_tr, NULL, NULL, 0, NULL },
1.27 schwarze 233: { "TS", roff_TS, NULL, NULL, 0, NULL },
234: { "TE", roff_TE, NULL, NULL, 0, NULL },
235: { "T&", roff_T_, NULL, NULL, 0, NULL },
1.32 schwarze 236: { "EQ", roff_EQ, NULL, NULL, 0, NULL },
237: { "EN", roff_EN, NULL, NULL, 0, NULL },
1.3 schwarze 238: { ".", roff_cblock, NULL, NULL, 0, NULL },
239: { "\\}", roff_ccond, NULL, NULL, 0, NULL },
1.16 schwarze 240: { NULL, roff_userdef, NULL, NULL, 0, NULL },
1.1 schwarze 241: };
242:
1.37 schwarze 243: /* Array of injected predefined strings. */
244: #define PREDEFS_MAX 38
245: static const struct predef predefs[PREDEFS_MAX] = {
246: #include "predefs.in"
247: };
248:
1.42 schwarze 249: /* See roffhash_find() */
1.3 schwarze 250: #define ROFF_HASH(p) (p[0] - ASCII_LO)
251:
252: static void
1.42 schwarze 253: roffhash_init(void)
1.3 schwarze 254: {
255: struct roffmac *n;
256: int buc, i;
257:
1.16 schwarze 258: for (i = 0; i < (int)ROFF_USERDEF; i++) {
1.3 schwarze 259: assert(roffs[i].name[0] >= ASCII_LO);
260: assert(roffs[i].name[0] <= ASCII_HI);
261:
262: buc = ROFF_HASH(roffs[i].name);
263:
264: if (NULL != (n = hash[buc])) {
265: for ( ; n->next; n = n->next)
266: /* Do nothing. */ ;
267: n->next = &roffs[i];
268: } else
269: hash[buc] = &roffs[i];
270: }
271: }
272:
1.1 schwarze 273: /*
274: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
275: * the nil-terminated string name could be found.
276: */
277: static enum rofft
1.42 schwarze 278: roffhash_find(const char *p, size_t s)
1.1 schwarze 279: {
1.3 schwarze 280: int buc;
281: struct roffmac *n;
1.1 schwarze 282:
1.3 schwarze 283: /*
284: * libroff has an extremely simple hashtable, for the time
285: * being, which simply keys on the first character, which must
286: * be printable, then walks a chain. It works well enough until
287: * optimised.
288: */
289:
290: if (p[0] < ASCII_LO || p[0] > ASCII_HI)
291: return(ROFF_MAX);
292:
293: buc = ROFF_HASH(p);
294:
295: if (NULL == (n = hash[buc]))
296: return(ROFF_MAX);
297: for ( ; n; n = n->next)
1.16 schwarze 298: if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
1.3 schwarze 299: return((enum rofft)(n - roffs));
1.1 schwarze 300:
301: return(ROFF_MAX);
302: }
303:
304:
305: /*
306: * Pop the current node off of the stack of roff instructions currently
307: * pending.
308: */
309: static void
310: roffnode_pop(struct roff *r)
311: {
312: struct roffnode *p;
313:
1.2 schwarze 314: assert(r->last);
315: p = r->last;
316:
317: r->last = r->last->parent;
1.16 schwarze 318: free(p->name);
319: free(p->end);
1.1 schwarze 320: free(p);
321: }
322:
323:
324: /*
325: * Push a roff node onto the instruction stack. This must later be
326: * removed with roffnode_pop().
327: */
1.11 schwarze 328: static void
1.16 schwarze 329: roffnode_push(struct roff *r, enum rofft tok, const char *name,
330: int line, int col)
1.1 schwarze 331: {
332: struct roffnode *p;
333:
1.11 schwarze 334: p = mandoc_calloc(1, sizeof(struct roffnode));
1.1 schwarze 335: p->tok = tok;
1.16 schwarze 336: if (name)
337: p->name = mandoc_strdup(name);
1.1 schwarze 338: p->parent = r->last;
339: p->line = line;
340: p->col = col;
1.2 schwarze 341: p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
1.1 schwarze 342:
343: r->last = p;
344: }
345:
346:
347: static void
348: roff_free1(struct roff *r)
349: {
1.27 schwarze 350: struct tbl_node *t;
1.32 schwarze 351: struct eqn_node *e;
1.42 schwarze 352: int i;
1.27 schwarze 353:
1.32 schwarze 354: while (NULL != (t = r->first_tbl)) {
1.27 schwarze 355: r->first_tbl = t->next;
356: tbl_free(t);
357: }
358:
359: r->first_tbl = r->last_tbl = r->tbl = NULL;
1.1 schwarze 360:
1.32 schwarze 361: while (NULL != (e = r->first_eqn)) {
362: r->first_eqn = e->next;
363: eqn_free(e);
364: }
365:
366: r->first_eqn = r->last_eqn = r->eqn = NULL;
367:
1.1 schwarze 368: while (r->last)
369: roffnode_pop(r);
1.27 schwarze 370:
1.42 schwarze 371: roff_freestr(r->strtab);
372: roff_freestr(r->xmbtab);
373:
374: r->strtab = r->xmbtab = NULL;
375:
376: if (r->xtab)
377: for (i = 0; i < 128; i++)
378: free(r->xtab[i].p);
379:
380: free(r->xtab);
381: r->xtab = NULL;
1.1 schwarze 382: }
383:
384: void
385: roff_reset(struct roff *r)
386: {
1.38 schwarze 387: int i;
1.1 schwarze 388:
389: roff_free1(r);
1.38 schwarze 390:
1.41 schwarze 391: memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
392:
1.38 schwarze 393: for (i = 0; i < PREDEFS_MAX; i++)
394: roff_setstr(r, predefs[i].name, predefs[i].str, 0);
1.1 schwarze 395: }
396:
397:
398: void
399: roff_free(struct roff *r)
400: {
401:
402: roff_free1(r);
403: free(r);
404: }
405:
406:
407: struct roff *
1.41 schwarze 408: roff_alloc(struct mparse *parse)
1.1 schwarze 409: {
410: struct roff *r;
1.37 schwarze 411: int i;
1.1 schwarze 412:
1.11 schwarze 413: r = mandoc_calloc(1, sizeof(struct roff));
1.35 schwarze 414: r->parse = parse;
1.2 schwarze 415: r->rstackpos = -1;
1.3 schwarze 416:
1.42 schwarze 417: roffhash_init();
1.37 schwarze 418:
419: for (i = 0; i < PREDEFS_MAX; i++)
420: roff_setstr(r, predefs[i].name, predefs[i].str, 0);
421:
1.1 schwarze 422: return(r);
423: }
424:
1.8 schwarze 425: /*
426: * Pre-filter each and every line for reserved words (one beginning with
427: * `\*', e.g., `\*(ab'). These must be handled before the actual line
428: * is processed.
1.42 schwarze 429: * This also checks the syntax of regular escapes.
1.8 schwarze 430: */
1.42 schwarze 431: static void
1.37 schwarze 432: roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
1.8 schwarze 433: {
1.42 schwarze 434: enum mandoc_esc esc;
1.23 schwarze 435: const char *stesc; /* start of an escape sequence ('\\') */
436: const char *stnam; /* start of the name, after "[(*" */
437: const char *cp; /* end of the name, e.g. before ']' */
438: const char *res; /* the string to be substituted */
1.43 ! schwarze 439: int i, maxl, expand_count;
1.8 schwarze 440: size_t nsz;
441: char *n;
442:
1.43 ! schwarze 443: expand_count = 0;
! 444:
1.42 schwarze 445: again:
1.24 schwarze 446: cp = *bufp + pos;
447: while (NULL != (cp = strchr(cp, '\\'))) {
448: stesc = cp++;
1.23 schwarze 449:
450: /*
451: * The second character must be an asterisk.
452: * If it isn't, skip it anyway: It is escaped,
453: * so it can't start another escape sequence.
454: */
455:
1.24 schwarze 456: if ('\0' == *cp)
1.42 schwarze 457: return;
458:
459: if ('*' != *cp) {
460: res = cp;
461: esc = mandoc_escape(&cp, NULL, NULL);
462: if (ESCAPE_ERROR != esc)
463: continue;
464: cp = res;
465: mandoc_msg
466: (MANDOCERR_BADESCAPE, r->parse,
467: ln, (int)(stesc - *bufp), NULL);
468: return;
469: }
470:
471: cp++;
1.23 schwarze 472:
473: /*
474: * The third character decides the length
475: * of the name of the string.
476: * Save a pointer to the name.
477: */
478:
1.24 schwarze 479: switch (*cp) {
480: case ('\0'):
1.42 schwarze 481: return;
1.8 schwarze 482: case ('('):
483: cp++;
484: maxl = 2;
485: break;
486: case ('['):
487: cp++;
488: maxl = 0;
489: break;
490: default:
491: maxl = 1;
492: break;
493: }
1.23 schwarze 494: stnam = cp;
1.8 schwarze 495:
1.23 schwarze 496: /* Advance to the end of the name. */
1.8 schwarze 497:
498: for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
1.42 schwarze 499: if ('\0' == *cp) {
500: mandoc_msg
501: (MANDOCERR_BADESCAPE,
502: r->parse, ln,
503: (int)(stesc - *bufp), NULL);
504: return;
505: }
1.8 schwarze 506: if (0 == maxl && ']' == *cp)
507: break;
508: }
509:
1.23 schwarze 510: /*
511: * Retrieve the replacement string; if it is
512: * undefined, resume searching for escapes.
513: */
514:
515: res = roff_getstrn(r, stnam, (size_t)i);
1.8 schwarze 516:
517: if (NULL == res) {
1.42 schwarze 518: mandoc_msg
519: (MANDOCERR_BADESCAPE, r->parse,
520: ln, (int)(stesc - *bufp), NULL);
1.37 schwarze 521: res = "";
1.8 schwarze 522: }
523:
1.23 schwarze 524: /* Replace the escape sequence by the string. */
525:
1.42 schwarze 526: pos = stesc - *bufp;
527:
1.8 schwarze 528: nsz = *szp + strlen(res) + 1;
529: n = mandoc_malloc(nsz);
530:
1.23 schwarze 531: strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
1.8 schwarze 532: strlcat(n, res, nsz);
533: strlcat(n, cp + (maxl ? 0 : 1), nsz);
534:
535: free(*bufp);
536:
537: *bufp = n;
538: *szp = nsz;
1.43 ! schwarze 539:
! 540: if (EXPAND_LIMIT >= ++expand_count)
! 541: goto again;
! 542:
! 543: /* Just leave the string unexpanded. */
! 544: mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
! 545: return;
1.42 schwarze 546: }
547: }
548:
549: /*
550: * Process text streams: convert all breakable hyphens into ASCII_HYPH.
551: */
552: static enum rofferr
553: roff_parsetext(char *p)
554: {
555: char l, r;
556: size_t sz;
557: const char *start;
558: enum mandoc_esc esc;
559:
560: start = p;
561:
562: while ('\0' != *p) {
563: sz = strcspn(p, "-\\");
564: p += sz;
565:
566: if ('\0' == *p)
567: break;
568:
569: if ('\\' == *p) {
570: /* Skip over escapes. */
571: p++;
572: esc = mandoc_escape
573: ((const char **)&p, NULL, NULL);
574: if (ESCAPE_ERROR == esc)
575: break;
576: continue;
577: } else if (p == start) {
578: p++;
579: continue;
580: }
581:
582: l = *(p - 1);
583: r = *(p + 1);
584: if ('\\' != l &&
585: '\t' != r && '\t' != l &&
586: ' ' != r && ' ' != l &&
587: '-' != r && '-' != l &&
588: ! isdigit((unsigned char)l) &&
589: ! isdigit((unsigned char)r))
590: *p = ASCII_HYPH;
591: p++;
1.8 schwarze 592: }
593:
1.42 schwarze 594: return(ROFF_CONT);
1.8 schwarze 595: }
596:
1.1 schwarze 597: enum rofferr
1.6 schwarze 598: roff_parseln(struct roff *r, int ln, char **bufp,
599: size_t *szp, int pos, int *offs)
1.1 schwarze 600: {
601: enum rofft t;
1.27 schwarze 602: enum rofferr e;
1.35 schwarze 603: int ppos, ctl;
1.1 schwarze 604:
1.2 schwarze 605: /*
1.8 schwarze 606: * Run the reserved-word filter only if we have some reserved
607: * words to fill in.
608: */
609:
1.42 schwarze 610: roff_res(r, bufp, szp, ln, pos);
1.8 schwarze 611:
1.35 schwarze 612: ppos = pos;
613: ctl = mandoc_getcontrol(*bufp, &pos);
614:
1.8 schwarze 615: /*
1.2 schwarze 616: * First, if a scope is open and we're not a macro, pass the
617: * text through the macro's filter. If a scope isn't open and
618: * we're not a macro, just let it through.
1.32 schwarze 619: * Finally, if there's an equation scope open, divert it into it
620: * no matter our state.
1.2 schwarze 621: */
622:
1.35 schwarze 623: if (r->last && ! ctl) {
1.2 schwarze 624: t = r->last->tok;
625: assert(roffs[t].text);
1.27 schwarze 626: e = (*roffs[t].text)
627: (r, t, bufp, szp, ln, pos, pos, offs);
628: assert(ROFF_IGN == e || ROFF_CONT == e);
1.32 schwarze 629: if (ROFF_CONT != e)
630: return(e);
631: if (r->eqn)
1.41 schwarze 632: return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
1.32 schwarze 633: if (r->tbl)
1.35 schwarze 634: return(tbl_read(r->tbl, ln, *bufp, pos));
1.42 schwarze 635: return(roff_parsetext(*bufp + pos));
1.35 schwarze 636: } else if ( ! ctl) {
1.32 schwarze 637: if (r->eqn)
1.41 schwarze 638: return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
1.27 schwarze 639: if (r->tbl)
1.35 schwarze 640: return(tbl_read(r->tbl, ln, *bufp, pos));
1.42 schwarze 641: return(roff_parsetext(*bufp + pos));
1.32 schwarze 642: } else if (r->eqn)
1.41 schwarze 643: return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
1.2 schwarze 644:
645: /*
646: * If a scope is open, go to the child handler for that macro,
647: * as it may want to preprocess before doing anything with it.
1.32 schwarze 648: * Don't do so if an equation is open.
1.2 schwarze 649: */
650:
651: if (r->last) {
1.1 schwarze 652: t = r->last->tok;
653: assert(roffs[t].sub);
1.2 schwarze 654: return((*roffs[t].sub)
1.8 schwarze 655: (r, t, bufp, szp,
1.35 schwarze 656: ln, ppos, pos, offs));
1.2 schwarze 657: }
658:
659: /*
660: * Lastly, as we've no scope open, try to look up and execute
661: * the new macro. If no macro is found, simply return and let
662: * the compilers handle it.
663: */
664:
1.16 schwarze 665: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
1.1 schwarze 666: return(ROFF_CONT);
667:
1.2 schwarze 668: assert(roffs[t].proc);
669: return((*roffs[t].proc)
1.8 schwarze 670: (r, t, bufp, szp,
671: ln, ppos, pos, offs));
1.2 schwarze 672: }
673:
1.1 schwarze 674:
1.27 schwarze 675: void
1.2 schwarze 676: roff_endparse(struct roff *r)
677: {
1.1 schwarze 678:
1.27 schwarze 679: if (r->last)
1.35 schwarze 680: mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
1.27 schwarze 681: r->last->line, r->last->col, NULL);
682:
1.32 schwarze 683: if (r->eqn) {
1.35 schwarze 684: mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
1.41 schwarze 685: r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
686: eqn_end(&r->eqn);
1.32 schwarze 687: }
688:
1.27 schwarze 689: if (r->tbl) {
1.35 schwarze 690: mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
1.27 schwarze 691: r->tbl->line, r->tbl->pos, NULL);
1.41 schwarze 692: tbl_end(&r->tbl);
1.27 schwarze 693: }
1.1 schwarze 694: }
695:
696: /*
697: * Parse a roff node's type from the input buffer. This must be in the
698: * form of ".foo xxx" in the usual way.
699: */
700: static enum rofft
1.16 schwarze 701: roff_parse(struct roff *r, const char *buf, int *pos)
1.1 schwarze 702: {
1.16 schwarze 703: const char *mac;
704: size_t maclen;
1.1 schwarze 705: enum rofft t;
706:
1.39 schwarze 707: if ('\0' == buf[*pos] || '"' == buf[*pos] ||
708: '\t' == buf[*pos] || ' ' == buf[*pos])
1.1 schwarze 709: return(ROFF_MAX);
710:
1.39 schwarze 711: /*
712: * We stop the macro parse at an escape, tab, space, or nil.
713: * However, `\}' is also a valid macro, so make sure we don't
714: * clobber it by seeing the `\' as the end of token.
715: */
716:
1.16 schwarze 717: mac = buf + *pos;
1.39 schwarze 718: maclen = strcspn(mac + 1, " \\\t\0") + 1;
1.1 schwarze 719:
1.16 schwarze 720: t = (r->current_string = roff_getstrn(r, mac, maclen))
1.42 schwarze 721: ? ROFF_USERDEF : roffhash_find(mac, maclen);
1.1 schwarze 722:
1.34 schwarze 723: *pos += (int)maclen;
1.35 schwarze 724:
1.1 schwarze 725: while (buf[*pos] && ' ' == buf[*pos])
726: (*pos)++;
727:
728: return(t);
729: }
730:
731: /* ARGSUSED */
732: static enum rofferr
1.2 schwarze 733: roff_cblock(ROFF_ARGS)
1.1 schwarze 734: {
735:
1.2 schwarze 736: /*
737: * A block-close `..' should only be invoked as a child of an
738: * ignore macro, otherwise raise a warning and just ignore it.
739: */
740:
741: if (NULL == r->last) {
1.35 schwarze 742: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.2 schwarze 743: return(ROFF_IGN);
744: }
1.1 schwarze 745:
1.2 schwarze 746: switch (r->last->tok) {
747: case (ROFF_am):
748: /* FALLTHROUGH */
749: case (ROFF_ami):
750: /* FALLTHROUGH */
751: case (ROFF_am1):
752: /* FALLTHROUGH */
753: case (ROFF_de):
1.23 schwarze 754: /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1.2 schwarze 755: /* FALLTHROUGH */
756: case (ROFF_dei):
757: /* FALLTHROUGH */
758: case (ROFF_ig):
759: break;
760: default:
1.35 schwarze 761: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.1 schwarze 762: return(ROFF_IGN);
1.2 schwarze 763: }
764:
765: if ((*bufp)[pos])
1.35 schwarze 766: mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
1.2 schwarze 767:
768: roffnode_pop(r);
769: roffnode_cleanscope(r);
770: return(ROFF_IGN);
771:
772: }
1.1 schwarze 773:
774:
1.2 schwarze 775: static void
776: roffnode_cleanscope(struct roff *r)
777: {
1.1 schwarze 778:
1.2 schwarze 779: while (r->last) {
780: if (--r->last->endspan < 0)
781: break;
782: roffnode_pop(r);
783: }
784: }
1.1 schwarze 785:
786:
1.2 schwarze 787: /* ARGSUSED */
788: static enum rofferr
789: roff_ccond(ROFF_ARGS)
790: {
1.1 schwarze 791:
1.2 schwarze 792: if (NULL == r->last) {
1.35 schwarze 793: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.1 schwarze 794: return(ROFF_IGN);
1.2 schwarze 795: }
1.1 schwarze 796:
1.2 schwarze 797: switch (r->last->tok) {
798: case (ROFF_el):
799: /* FALLTHROUGH */
800: case (ROFF_ie):
801: /* FALLTHROUGH */
802: case (ROFF_if):
803: break;
804: default:
1.35 schwarze 805: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.2 schwarze 806: return(ROFF_IGN);
807: }
1.1 schwarze 808:
1.2 schwarze 809: if (r->last->endspan > -1) {
1.35 schwarze 810: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.1 schwarze 811: return(ROFF_IGN);
1.2 schwarze 812: }
813:
814: if ((*bufp)[pos])
1.35 schwarze 815: mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
1.1 schwarze 816:
1.2 schwarze 817: roffnode_pop(r);
818: roffnode_cleanscope(r);
1.1 schwarze 819: return(ROFF_IGN);
820: }
821:
822:
823: /* ARGSUSED */
824: static enum rofferr
1.2 schwarze 825: roff_block(ROFF_ARGS)
1.1 schwarze 826: {
1.2 schwarze 827: int sv;
828: size_t sz;
1.16 schwarze 829: char *name;
830:
831: name = NULL;
1.2 schwarze 832:
1.16 schwarze 833: if (ROFF_ig != tok) {
834: if ('\0' == (*bufp)[pos]) {
1.35 schwarze 835: mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1.16 schwarze 836: return(ROFF_IGN);
837: }
1.22 schwarze 838:
839: /*
840: * Re-write `de1', since we don't really care about
841: * groff's strange compatibility mode, into `de'.
842: */
843:
1.18 schwarze 844: if (ROFF_de1 == tok)
845: tok = ROFF_de;
1.16 schwarze 846: if (ROFF_de == tok)
847: name = *bufp + pos;
1.21 schwarze 848: else
1.35 schwarze 849: mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
1.21 schwarze 850: roffs[tok].name);
1.22 schwarze 851:
1.33 schwarze 852: while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
1.2 schwarze 853: pos++;
1.22 schwarze 854:
1.33 schwarze 855: while (isspace((unsigned char)(*bufp)[pos]))
1.16 schwarze 856: (*bufp)[pos++] = '\0';
1.2 schwarze 857: }
858:
1.16 schwarze 859: roffnode_push(r, tok, name, ln, ppos);
860:
861: /*
862: * At the beginning of a `de' macro, clear the existing string
863: * with the same name, if there is one. New content will be
864: * added from roff_block_text() in multiline mode.
865: */
1.22 schwarze 866:
1.16 schwarze 867: if (ROFF_de == tok)
1.19 schwarze 868: roff_setstr(r, name, "", 0);
1.2 schwarze 869:
870: if ('\0' == (*bufp)[pos])
871: return(ROFF_IGN);
1.1 schwarze 872:
1.22 schwarze 873: /* If present, process the custom end-of-line marker. */
874:
1.2 schwarze 875: sv = pos;
1.33 schwarze 876: while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
1.2 schwarze 877: pos++;
878:
879: /*
880: * Note: groff does NOT like escape characters in the input.
881: * Instead of detecting this, we're just going to let it fly and
882: * to hell with it.
883: */
884:
885: assert(pos > sv);
886: sz = (size_t)(pos - sv);
887:
888: if (1 == sz && '.' == (*bufp)[sv])
889: return(ROFF_IGN);
890:
1.11 schwarze 891: r->last->end = mandoc_malloc(sz + 1);
1.2 schwarze 892:
893: memcpy(r->last->end, *bufp + sv, sz);
894: r->last->end[(int)sz] = '\0';
895:
896: if ((*bufp)[pos])
1.35 schwarze 897: mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
1.1 schwarze 898:
899: return(ROFF_IGN);
900: }
901:
902:
903: /* ARGSUSED */
904: static enum rofferr
1.2 schwarze 905: roff_block_sub(ROFF_ARGS)
1.1 schwarze 906: {
1.2 schwarze 907: enum rofft t;
908: int i, j;
909:
910: /*
911: * First check whether a custom macro exists at this level. If
912: * it does, then check against it. This is some of groff's
913: * stranger behaviours. If we encountered a custom end-scope
914: * tag and that tag also happens to be a "real" macro, then we
915: * need to try interpreting it again as a real macro. If it's
916: * not, then return ignore. Else continue.
917: */
918:
919: if (r->last->end) {
1.35 schwarze 920: for (i = pos, j = 0; r->last->end[j]; j++, i++)
1.2 schwarze 921: if ((*bufp)[i] != r->last->end[j])
922: break;
1.1 schwarze 923:
1.2 schwarze 924: if ('\0' == r->last->end[j] &&
925: ('\0' == (*bufp)[i] ||
926: ' ' == (*bufp)[i] ||
927: '\t' == (*bufp)[i])) {
928: roffnode_pop(r);
929: roffnode_cleanscope(r);
1.1 schwarze 930:
1.35 schwarze 931: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
932: i++;
933:
934: pos = i;
1.16 schwarze 935: if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1.2 schwarze 936: return(ROFF_RERUN);
937: return(ROFF_IGN);
938: }
1.1 schwarze 939: }
940:
1.2 schwarze 941: /*
942: * If we have no custom end-query or lookup failed, then try
943: * pulling it out of the hashtable.
944: */
1.1 schwarze 945:
1.36 schwarze 946: t = roff_parse(r, *bufp, &pos);
1.1 schwarze 947:
1.16 schwarze 948: /*
949: * Macros other than block-end are only significant
950: * in `de' blocks; elsewhere, simply throw them away.
951: */
952: if (ROFF_cblock != t) {
953: if (ROFF_de == tok)
954: roff_setstr(r, r->last->name, *bufp + ppos, 1);
1.1 schwarze 955: return(ROFF_IGN);
1.16 schwarze 956: }
1.1 schwarze 957:
1.2 schwarze 958: assert(roffs[t].proc);
1.6 schwarze 959: return((*roffs[t].proc)(r, t, bufp, szp,
960: ln, ppos, pos, offs));
1.2 schwarze 961: }
962:
963:
964: /* ARGSUSED */
965: static enum rofferr
966: roff_block_text(ROFF_ARGS)
967: {
968:
1.16 schwarze 969: if (ROFF_de == tok)
970: roff_setstr(r, r->last->name, *bufp + pos, 1);
971:
1.2 schwarze 972: return(ROFF_IGN);
973: }
974:
975:
976: /* ARGSUSED */
977: static enum rofferr
978: roff_cond_sub(ROFF_ARGS)
979: {
980: enum rofft t;
981: enum roffrule rr;
1.37 schwarze 982: char *ep;
1.2 schwarze 983:
984: rr = r->last->rule;
1.37 schwarze 985: roffnode_cleanscope(r);
1.2 schwarze 986:
1.37 schwarze 987: /*
988: * If the macro is unknown, first check if it contains a closing
989: * delimiter `\}'. If it does, close out our scope and return
990: * the currently-scoped rule (ignore or continue). Else, drop
991: * into the currently-scoped rule.
1.5 schwarze 992: */
993:
1.16 schwarze 994: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
1.37 schwarze 995: ep = &(*bufp)[pos];
996: for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
997: ep++;
998: if ('}' != *ep)
999: continue;
1.39 schwarze 1000:
1001: /*
1002: * Make the \} go away.
1003: * This is a little haphazard, as it's not quite
1004: * clear how nroff does this.
1005: * If we're at the end of line, then just chop
1006: * off the \} and resize the buffer.
1007: * If we aren't, then conver it to spaces.
1008: */
1009:
1010: if ('\0' == *(ep + 1)) {
1011: *--ep = '\0';
1012: *szp -= 2;
1013: } else
1014: *(ep - 1) = *ep = ' ';
1015:
1.37 schwarze 1016: roff_ccond(r, ROFF_ccond, bufp, szp,
1017: ln, pos, pos + 2, offs);
1018: break;
1019: }
1.2 schwarze 1020: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1.12 schwarze 1021: }
1.2 schwarze 1022:
1023: /*
1024: * A denied conditional must evaluate its children if and only
1025: * if they're either structurally required (such as loops and
1026: * conditionals) or a closing macro.
1027: */
1.37 schwarze 1028:
1.2 schwarze 1029: if (ROFFRULE_DENY == rr)
1030: if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
1031: if (ROFF_ccond != t)
1032: return(ROFF_IGN);
1033:
1034: assert(roffs[t].proc);
1.6 schwarze 1035: return((*roffs[t].proc)(r, t, bufp, szp,
1036: ln, ppos, pos, offs));
1.2 schwarze 1037: }
1038:
1039: /* ARGSUSED */
1040: static enum rofferr
1041: roff_cond_text(ROFF_ARGS)
1042: {
1.37 schwarze 1043: char *ep;
1.2 schwarze 1044: enum roffrule rr;
1045:
1046: rr = r->last->rule;
1.37 schwarze 1047: roffnode_cleanscope(r);
1.1 schwarze 1048:
1.37 schwarze 1049: ep = &(*bufp)[pos];
1050: for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1051: ep++;
1052: if ('}' != *ep)
1053: continue;
1054: *ep = '&';
1055: roff_ccond(r, ROFF_ccond, bufp, szp,
1056: ln, pos, pos + 2, offs);
1.2 schwarze 1057: }
1058: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1059: }
1060:
1.5 schwarze 1061: static enum roffrule
1062: roff_evalcond(const char *v, int *pos)
1063: {
1064:
1065: switch (v[*pos]) {
1066: case ('n'):
1067: (*pos)++;
1068: return(ROFFRULE_ALLOW);
1069: case ('e'):
1070: /* FALLTHROUGH */
1071: case ('o'):
1072: /* FALLTHROUGH */
1073: case ('t'):
1074: (*pos)++;
1075: return(ROFFRULE_DENY);
1076: default:
1077: break;
1078: }
1079:
1080: while (v[*pos] && ' ' != v[*pos])
1081: (*pos)++;
1082: return(ROFFRULE_DENY);
1083: }
1084:
1.2 schwarze 1085: /* ARGSUSED */
1086: static enum rofferr
1.21 schwarze 1087: roff_line_ignore(ROFF_ARGS)
1.6 schwarze 1088: {
1.30 schwarze 1089:
1090: if (ROFF_it == tok)
1.35 schwarze 1091: mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
1.6 schwarze 1092:
1.21 schwarze 1093: return(ROFF_IGN);
1094: }
1095:
1096: /* ARGSUSED */
1097: static enum rofferr
1.2 schwarze 1098: roff_cond(ROFF_ARGS)
1099: {
1100: int sv;
1.5 schwarze 1101: enum roffrule rule;
1.2 schwarze 1102:
1.35 schwarze 1103: /*
1104: * An `.el' has no conditional body: it will consume the value
1105: * of the current rstack entry set in prior `ie' calls or
1106: * defaults to DENY.
1107: *
1108: * If we're not an `el', however, then evaluate the conditional.
1109: */
1.1 schwarze 1110:
1.35 schwarze 1111: rule = ROFF_el == tok ?
1112: (r->rstackpos < 0 ?
1113: ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1114: roff_evalcond(*bufp, &pos);
1.2 schwarze 1115:
1116: sv = pos;
1117: while (' ' == (*bufp)[pos])
1118: pos++;
1119:
1120: /*
1121: * Roff is weird. If we have just white-space after the
1122: * conditional, it's considered the BODY and we exit without
1123: * really doing anything. Warn about this. It's probably
1124: * wrong.
1125: */
1.5 schwarze 1126:
1.2 schwarze 1127: if ('\0' == (*bufp)[pos] && sv != pos) {
1.35 schwarze 1128: mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1.22 schwarze 1129: return(ROFF_IGN);
1.2 schwarze 1130: }
1131:
1.16 schwarze 1132: roffnode_push(r, tok, NULL, ln, ppos);
1.2 schwarze 1133:
1.5 schwarze 1134: r->last->rule = rule;
1.2 schwarze 1135:
1.35 schwarze 1136: /*
1137: * An if-else will put the NEGATION of the current evaluated
1138: * conditional into the stack of rules.
1139: */
1140:
1.2 schwarze 1141: if (ROFF_ie == tok) {
1.35 schwarze 1142: if (r->rstackpos == RSTACK_MAX - 1) {
1143: mandoc_msg(MANDOCERR_MEM,
1144: r->parse, ln, ppos, NULL);
1145: return(ROFF_ERR);
1146: }
1147: r->rstack[++r->rstackpos] =
1148: ROFFRULE_DENY == r->last->rule ?
1149: ROFFRULE_ALLOW : ROFFRULE_DENY;
1.2 schwarze 1150: }
1.5 schwarze 1151:
1152: /* If the parent has false as its rule, then so do we. */
1153:
1.2 schwarze 1154: if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1155: r->last->rule = ROFFRULE_DENY;
1.5 schwarze 1156:
1157: /*
1158: * Determine scope. If we're invoked with "\{" trailing the
1159: * conditional, then we're in a multiline scope. Else our scope
1160: * expires on the next line.
1161: */
1.2 schwarze 1162:
1163: r->last->endspan = 1;
1164:
1165: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1166: r->last->endspan = -1;
1167: pos += 2;
1168: }
1169:
1170: /*
1171: * If there are no arguments on the line, the next-line scope is
1172: * assumed.
1173: */
1174:
1175: if ('\0' == (*bufp)[pos])
1176: return(ROFF_IGN);
1177:
1178: /* Otherwise re-run the roff parser after recalculating. */
1.1 schwarze 1179:
1.2 schwarze 1180: *offs = pos;
1181: return(ROFF_RERUN);
1.1 schwarze 1182: }
1183:
1184:
1.2 schwarze 1185: /* ARGSUSED */
1186: static enum rofferr
1.7 schwarze 1187: roff_ds(ROFF_ARGS)
1188: {
1.10 schwarze 1189: char *name, *string;
1190:
1191: /*
1192: * A symbol is named by the first word following the macro
1193: * invocation up to a space. Its value is anything after the
1194: * name's trailing whitespace and optional double-quote. Thus,
1195: *
1196: * [.ds foo "bar " ]
1197: *
1198: * will have `bar " ' as its value.
1199: */
1.7 schwarze 1200:
1.28 schwarze 1201: string = *bufp + pos;
1202: name = roff_getname(r, &string, ln, pos);
1.7 schwarze 1203: if ('\0' == *name)
1204: return(ROFF_IGN);
1205:
1.28 schwarze 1206: /* Read past initial double-quote. */
1207: if ('"' == *string)
1.7 schwarze 1208: string++;
1209:
1.10 schwarze 1210: /* The rest is the value. */
1.16 schwarze 1211: roff_setstr(r, name, string, 0);
1.7 schwarze 1212: return(ROFF_IGN);
1213: }
1214:
1.41 schwarze 1215: int
1216: roff_regisset(const struct roff *r, enum regs reg)
1217: {
1218:
1219: return(r->regs[(int)reg].set);
1220: }
1221:
1222: unsigned int
1223: roff_regget(const struct roff *r, enum regs reg)
1224: {
1225:
1226: return(r->regs[(int)reg].u);
1227: }
1228:
1229: void
1230: roff_regunset(struct roff *r, enum regs reg)
1231: {
1232:
1233: r->regs[(int)reg].set = 0;
1234: }
1.7 schwarze 1235:
1236: /* ARGSUSED */
1237: static enum rofferr
1.6 schwarze 1238: roff_nr(ROFF_ARGS)
1.1 schwarze 1239: {
1.28 schwarze 1240: const char *key;
1241: char *val;
1.37 schwarze 1242: int iv;
1.6 schwarze 1243:
1.28 schwarze 1244: val = *bufp + pos;
1245: key = roff_getname(r, &val, ln, pos);
1.6 schwarze 1246:
1247: if (0 == strcmp(key, "nS")) {
1.41 schwarze 1248: r->regs[(int)REG_nS].set = 1;
1249: if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
1250: r->regs[(int)REG_nS].u = (unsigned)iv;
1.37 schwarze 1251: else
1.41 schwarze 1252: r->regs[(int)REG_nS].u = 0u;
1.6 schwarze 1253: }
1.1 schwarze 1254:
1.29 schwarze 1255: return(ROFF_IGN);
1256: }
1257:
1258: /* ARGSUSED */
1259: static enum rofferr
1260: roff_rm(ROFF_ARGS)
1261: {
1262: const char *name;
1263: char *cp;
1264:
1265: cp = *bufp + pos;
1266: while ('\0' != *cp) {
1.34 schwarze 1267: name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1.29 schwarze 1268: if ('\0' != *name)
1269: roff_setstr(r, name, NULL, 0);
1270: }
1.2 schwarze 1271: return(ROFF_IGN);
1.14 schwarze 1272: }
1273:
1274: /* ARGSUSED */
1275: static enum rofferr
1.27 schwarze 1276: roff_TE(ROFF_ARGS)
1277: {
1278:
1279: if (NULL == r->tbl)
1.35 schwarze 1280: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.27 schwarze 1281: else
1.41 schwarze 1282: tbl_end(&r->tbl);
1.27 schwarze 1283:
1284: return(ROFF_IGN);
1285: }
1286:
1287: /* ARGSUSED */
1288: static enum rofferr
1289: roff_T_(ROFF_ARGS)
1290: {
1291:
1292: if (NULL == r->tbl)
1.35 schwarze 1293: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.27 schwarze 1294: else
1295: tbl_restart(ppos, ln, r->tbl);
1296:
1297: return(ROFF_IGN);
1298: }
1299:
1.41 schwarze 1300: #if 0
1301: static int
1302: roff_closeeqn(struct roff *r)
1303: {
1304:
1305: return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1306: }
1307: #endif
1308:
1309: static void
1310: roff_openeqn(struct roff *r, const char *name, int line,
1311: int offs, const char *buf)
1.32 schwarze 1312: {
1.41 schwarze 1313: struct eqn_node *e;
1314: int poff;
1.32 schwarze 1315:
1316: assert(NULL == r->eqn);
1.41 schwarze 1317: e = eqn_alloc(name, offs, line, r->parse);
1.32 schwarze 1318:
1319: if (r->last_eqn)
1320: r->last_eqn->next = e;
1321: else
1322: r->first_eqn = r->last_eqn = e;
1323:
1324: r->eqn = r->last_eqn = e;
1.41 schwarze 1325:
1326: if (buf) {
1327: poff = 0;
1328: eqn_read(&r->eqn, line, buf, offs, &poff);
1329: }
1330: }
1331:
1332: /* ARGSUSED */
1333: static enum rofferr
1334: roff_EQ(ROFF_ARGS)
1335: {
1336:
1337: roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1.32 schwarze 1338: return(ROFF_IGN);
1339: }
1340:
1341: /* ARGSUSED */
1342: static enum rofferr
1343: roff_EN(ROFF_ARGS)
1344: {
1345:
1.35 schwarze 1346: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.32 schwarze 1347: return(ROFF_IGN);
1348: }
1349:
1350: /* ARGSUSED */
1351: static enum rofferr
1.27 schwarze 1352: roff_TS(ROFF_ARGS)
1353: {
1354: struct tbl_node *t;
1355:
1356: if (r->tbl) {
1.35 schwarze 1357: mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1.41 schwarze 1358: tbl_end(&r->tbl);
1.27 schwarze 1359: }
1360:
1.35 schwarze 1361: t = tbl_alloc(ppos, ln, r->parse);
1.27 schwarze 1362:
1363: if (r->last_tbl)
1364: r->last_tbl->next = t;
1365: else
1366: r->first_tbl = r->last_tbl = t;
1367:
1368: r->tbl = r->last_tbl = t;
1369: return(ROFF_IGN);
1370: }
1371:
1372: /* ARGSUSED */
1373: static enum rofferr
1.42 schwarze 1374: roff_tr(ROFF_ARGS)
1375: {
1376: const char *p, *first, *second;
1377: size_t fsz, ssz;
1378: enum mandoc_esc esc;
1379:
1380: p = *bufp + pos;
1381:
1382: if ('\0' == *p) {
1383: mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1384: return(ROFF_IGN);
1385: }
1386:
1387: while ('\0' != *p) {
1388: fsz = ssz = 1;
1389:
1390: first = p++;
1391: if ('\\' == *first) {
1392: esc = mandoc_escape(&p, NULL, NULL);
1393: if (ESCAPE_ERROR == esc) {
1394: mandoc_msg
1395: (MANDOCERR_BADESCAPE, r->parse,
1396: ln, (int)(p - *bufp), NULL);
1397: return(ROFF_IGN);
1398: }
1399: fsz = (size_t)(p - first);
1400: }
1401:
1402: second = p++;
1403: if ('\\' == *second) {
1404: esc = mandoc_escape(&p, NULL, NULL);
1405: if (ESCAPE_ERROR == esc) {
1406: mandoc_msg
1407: (MANDOCERR_BADESCAPE, r->parse,
1408: ln, (int)(p - *bufp), NULL);
1409: return(ROFF_IGN);
1410: }
1411: ssz = (size_t)(p - second);
1412: } else if ('\0' == *second) {
1413: mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1414: ln, (int)(p - *bufp), NULL);
1415: second = " ";
1416: p--;
1417: }
1418:
1419: if (fsz > 1) {
1420: roff_setstrn(&r->xmbtab, first,
1421: fsz, second, ssz, 0);
1422: continue;
1423: }
1424:
1425: if (NULL == r->xtab)
1426: r->xtab = mandoc_calloc
1427: (128, sizeof(struct roffstr));
1428:
1429: free(r->xtab[(int)*first].p);
1430: r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1431: r->xtab[(int)*first].sz = ssz;
1432: }
1433:
1434: return(ROFF_IGN);
1435: }
1436:
1437: /* ARGSUSED */
1438: static enum rofferr
1.14 schwarze 1439: roff_so(ROFF_ARGS)
1440: {
1441: char *name;
1.15 schwarze 1442:
1.35 schwarze 1443: mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1.14 schwarze 1444:
1.22 schwarze 1445: /*
1446: * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1447: * opening anything that's not in our cwd or anything beneath
1448: * it. Thus, explicitly disallow traversing up the file-system
1449: * or using absolute paths.
1450: */
1451:
1.14 schwarze 1452: name = *bufp + pos;
1453: if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1.35 schwarze 1454: mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1.14 schwarze 1455: return(ROFF_ERR);
1456: }
1457:
1458: *offs = pos;
1459: return(ROFF_SO);
1.7 schwarze 1460: }
1461:
1.16 schwarze 1462: /* ARGSUSED */
1463: static enum rofferr
1464: roff_userdef(ROFF_ARGS)
1.12 schwarze 1465: {
1.16 schwarze 1466: const char *arg[9];
1467: char *cp, *n1, *n2;
1.25 schwarze 1468: int i;
1.12 schwarze 1469:
1.16 schwarze 1470: /*
1471: * Collect pointers to macro argument strings
1472: * and null-terminate them.
1473: */
1474: cp = *bufp + pos;
1.25 schwarze 1475: for (i = 0; i < 9; i++)
1.26 schwarze 1476: arg[i] = '\0' == *cp ? "" :
1.35 schwarze 1477: mandoc_getarg(r->parse, &cp, ln, &pos);
1.16 schwarze 1478:
1479: /*
1480: * Expand macro arguments.
1.12 schwarze 1481: */
1.16 schwarze 1482: *szp = 0;
1483: n1 = cp = mandoc_strdup(r->current_string);
1484: while (NULL != (cp = strstr(cp, "\\$"))) {
1485: i = cp[2] - '1';
1486: if (0 > i || 8 < i) {
1487: /* Not an argument invocation. */
1488: cp += 2;
1489: continue;
1490: }
1491:
1492: *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1493: n2 = mandoc_malloc(*szp);
1494:
1495: strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1496: strlcat(n2, arg[i], *szp);
1497: strlcat(n2, cp + 3, *szp);
1498:
1499: cp = n2 + (cp - n1);
1500: free(n1);
1501: n1 = n2;
1.12 schwarze 1502: }
1503:
1.16 schwarze 1504: /*
1505: * Replace the macro invocation
1506: * by the expanded macro.
1507: */
1508: free(*bufp);
1509: *bufp = n1;
1510: if (0 == *szp)
1511: *szp = strlen(*bufp) + 1;
1512:
1.19 schwarze 1513: return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1.16 schwarze 1514: ROFF_REPARSE : ROFF_APPEND);
1.12 schwarze 1515: }
1.28 schwarze 1516:
1517: static char *
1518: roff_getname(struct roff *r, char **cpp, int ln, int pos)
1519: {
1520: char *name, *cp;
1521:
1522: name = *cpp;
1523: if ('\0' == *name)
1524: return(name);
1525:
1526: /* Read until end of name. */
1527: for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1528: if ('\\' != *cp)
1529: continue;
1530: cp++;
1531: if ('\\' == *cp)
1532: continue;
1.35 schwarze 1533: mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1.28 schwarze 1534: *cp = '\0';
1535: name = cp;
1536: }
1537:
1538: /* Nil-terminate name. */
1539: if ('\0' != *cp)
1540: *(cp++) = '\0';
1541:
1542: /* Read past spaces. */
1543: while (' ' == *cp)
1544: cp++;
1545:
1546: *cpp = cp;
1547: return(name);
1548: }
1549:
1.16 schwarze 1550: /*
1551: * Store *string into the user-defined string called *name.
1552: * In multiline mode, append to an existing entry and append '\n';
1553: * else replace the existing entry, if there is one.
1554: * To clear an existing entry, call with (*r, *name, NULL, 0).
1555: */
1.8 schwarze 1556: static void
1.16 schwarze 1557: roff_setstr(struct roff *r, const char *name, const char *string,
1558: int multiline)
1.7 schwarze 1559: {
1.42 schwarze 1560:
1561: roff_setstrn(&r->strtab, name, strlen(name), string,
1562: string ? strlen(string) : 0, multiline);
1563: }
1564:
1565: static void
1566: roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1567: const char *string, size_t stringsz, int multiline)
1568: {
1569: struct roffkv *n;
1570: char *c;
1571: int i;
1572: size_t oldch, newch;
1.7 schwarze 1573:
1.16 schwarze 1574: /* Search for an existing string with the same name. */
1.42 schwarze 1575: n = *r;
1576:
1577: while (n && strcmp(name, n->key.p))
1.7 schwarze 1578: n = n->next;
1.8 schwarze 1579:
1580: if (NULL == n) {
1.16 schwarze 1581: /* Create a new string table entry. */
1.42 schwarze 1582: n = mandoc_malloc(sizeof(struct roffkv));
1583: n->key.p = mandoc_strndup(name, namesz);
1584: n->key.sz = namesz;
1585: n->val.p = NULL;
1586: n->val.sz = 0;
1587: n->next = *r;
1588: *r = n;
1.16 schwarze 1589: } else if (0 == multiline) {
1590: /* In multiline mode, append; else replace. */
1.42 schwarze 1591: free(n->val.p);
1592: n->val.p = NULL;
1593: n->val.sz = 0;
1.16 schwarze 1594: }
1595:
1596: if (NULL == string)
1597: return;
1598:
1599: /*
1600: * One additional byte for the '\n' in multiline mode,
1601: * and one for the terminating '\0'.
1602: */
1.42 schwarze 1603: newch = stringsz + (multiline ? 2u : 1u);
1604:
1605: if (NULL == n->val.p) {
1606: n->val.p = mandoc_malloc(newch);
1607: *n->val.p = '\0';
1.16 schwarze 1608: oldch = 0;
1609: } else {
1.42 schwarze 1610: oldch = n->val.sz;
1611: n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1.16 schwarze 1612: }
1613:
1614: /* Skip existing content in the destination buffer. */
1.42 schwarze 1615: c = n->val.p + (int)oldch;
1.16 schwarze 1616:
1617: /* Append new content to the destination buffer. */
1.42 schwarze 1618: i = 0;
1619: while (i < (int)stringsz) {
1.16 schwarze 1620: /*
1621: * Rudimentary roff copy mode:
1622: * Handle escaped backslashes.
1623: */
1.42 schwarze 1624: if ('\\' == string[i] && '\\' == string[i + 1])
1625: i++;
1626: *c++ = string[i++];
1.16 schwarze 1627: }
1.8 schwarze 1628:
1.16 schwarze 1629: /* Append terminating bytes. */
1630: if (multiline)
1631: *c++ = '\n';
1.42 schwarze 1632:
1.16 schwarze 1633: *c = '\0';
1.42 schwarze 1634: n->val.sz = (int)(c - n->val.p);
1.7 schwarze 1635: }
1636:
1.8 schwarze 1637: static const char *
1638: roff_getstrn(const struct roff *r, const char *name, size_t len)
1.7 schwarze 1639: {
1.42 schwarze 1640: const struct roffkv *n;
1.7 schwarze 1641:
1.42 schwarze 1642: for (n = r->strtab; n; n = n->next)
1643: if (0 == strncmp(name, n->key.p, len) &&
1644: '\0' == n->key.p[(int)len])
1645: return(n->val.p);
1.8 schwarze 1646:
1.42 schwarze 1647: return(NULL);
1.7 schwarze 1648: }
1649:
1.8 schwarze 1650: static void
1.42 schwarze 1651: roff_freestr(struct roffkv *r)
1.7 schwarze 1652: {
1.42 schwarze 1653: struct roffkv *n, *nn;
1.7 schwarze 1654:
1.42 schwarze 1655: for (n = r; n; n = nn) {
1656: free(n->key.p);
1657: free(n->val.p);
1.7 schwarze 1658: nn = n->next;
1659: free(n);
1660: }
1.27 schwarze 1661: }
1662:
1663: const struct tbl_span *
1664: roff_span(const struct roff *r)
1665: {
1666:
1667: return(r->tbl ? tbl_span(r->tbl) : NULL);
1.32 schwarze 1668: }
1669:
1670: const struct eqn *
1671: roff_eqn(const struct roff *r)
1672: {
1673:
1674: return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1.42 schwarze 1675: }
1676:
1677: /*
1678: * Duplicate an input string, making the appropriate character
1679: * conversations (as stipulated by `tr') along the way.
1680: * Returns a heap-allocated string with all the replacements made.
1681: */
1682: char *
1683: roff_strdup(const struct roff *r, const char *p)
1684: {
1685: const struct roffkv *cp;
1686: char *res;
1687: const char *pp;
1688: size_t ssz, sz;
1689: enum mandoc_esc esc;
1690:
1691: if (NULL == r->xmbtab && NULL == r->xtab)
1692: return(mandoc_strdup(p));
1693: else if ('\0' == *p)
1694: return(mandoc_strdup(""));
1695:
1696: /*
1697: * Step through each character looking for term matches
1698: * (remember that a `tr' can be invoked with an escape, which is
1699: * a glyph but the escape is multi-character).
1700: * We only do this if the character hash has been initialised
1701: * and the string is >0 length.
1702: */
1703:
1704: res = NULL;
1705: ssz = 0;
1706:
1707: while ('\0' != *p) {
1708: if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1709: sz = r->xtab[(int)*p].sz;
1710: res = mandoc_realloc(res, ssz + sz + 1);
1711: memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1712: ssz += sz;
1713: p++;
1714: continue;
1715: } else if ('\\' != *p) {
1716: res = mandoc_realloc(res, ssz + 2);
1717: res[ssz++] = *p++;
1718: continue;
1719: }
1720:
1721: /* Search for term matches. */
1722: for (cp = r->xmbtab; cp; cp = cp->next)
1723: if (0 == strncmp(p, cp->key.p, cp->key.sz))
1724: break;
1725:
1726: if (NULL != cp) {
1727: /*
1728: * A match has been found.
1729: * Append the match to the array and move
1730: * forward by its keysize.
1731: */
1732: res = mandoc_realloc
1733: (res, ssz + cp->val.sz + 1);
1734: memcpy(res + ssz, cp->val.p, cp->val.sz);
1735: ssz += cp->val.sz;
1736: p += (int)cp->key.sz;
1737: continue;
1738: }
1739:
1740: /*
1741: * Handle escapes carefully: we need to copy
1742: * over just the escape itself, or else we might
1743: * do replacements within the escape itself.
1744: * Make sure to pass along the bogus string.
1745: */
1746: pp = p++;
1747: esc = mandoc_escape(&p, NULL, NULL);
1748: if (ESCAPE_ERROR == esc) {
1749: sz = strlen(pp);
1750: res = mandoc_realloc(res, ssz + sz + 1);
1751: memcpy(res + ssz, pp, sz);
1752: break;
1753: }
1754: /*
1755: * We bail out on bad escapes.
1756: * No need to warn: we already did so when
1757: * roff_res() was called.
1758: */
1759: sz = (int)(p - pp);
1760: res = mandoc_realloc(res, ssz + sz + 1);
1761: memcpy(res + ssz, pp, sz);
1762: ssz += sz;
1763: }
1764:
1765: res[(int)ssz] = '\0';
1766: return(res);
1.1 schwarze 1767: }