Annotation of src/usr.bin/mandoc/mdoc.c, Revision 1.6
1.6 ! schwarze 1: /* $Id: mdoc.c,v 1.5 2009/06/15 18:41:13 schwarze Exp $ */
1.1 kristaps 2: /*
1.3 schwarze 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.3 schwarze 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.3 schwarze 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
17: #include <assert.h>
18: #include <ctype.h>
19: #include <stdarg.h>
20: #include <stdio.h>
21: #include <stdlib.h>
22: #include <string.h>
23:
24: #include "libmdoc.h"
25:
26: enum merr {
27: ENOCALL,
28: EBODYPROL,
29: EPROLBODY,
30: ESPACE,
31: ETEXTPROL,
32: ENOBLANK,
33: EMALLOC
34: };
35:
36: const char *const __mdoc_macronames[MDOC_MAX] = {
37: "\\\"", "Dd", "Dt", "Os",
38: "Sh", "Ss", "Pp", "D1",
39: "Dl", "Bd", "Ed", "Bl",
40: "El", "It", "Ad", "An",
41: "Ar", "Cd", "Cm", "Dv",
42: "Er", "Ev", "Ex", "Fa",
43: "Fd", "Fl", "Fn", "Ft",
44: "Ic", "In", "Li", "Nd",
45: "Nm", "Op", "Ot", "Pa",
46: "Rv", "St", "Va", "Vt",
47: /* LINTED */
48: "Xr", "\%A", "\%B", "\%D",
49: /* LINTED */
50: "\%I", "\%J", "\%N", "\%O",
51: /* LINTED */
52: "\%P", "\%R", "\%T", "\%V",
53: "Ac", "Ao", "Aq", "At",
54: "Bc", "Bf", "Bo", "Bq",
55: "Bsx", "Bx", "Db", "Dc",
56: "Do", "Dq", "Ec", "Ef",
57: "Em", "Eo", "Fx", "Ms",
58: "No", "Ns", "Nx", "Ox",
59: "Pc", "Pf", "Po", "Pq",
60: "Qc", "Ql", "Qo", "Qq",
61: "Re", "Rs", "Sc", "So",
62: "Sq", "Sm", "Sx", "Sy",
63: "Tn", "Ux", "Xc", "Xo",
64: "Fo", "Fc", "Oo", "Oc",
65: "Bk", "Ek", "Bt", "Hf",
66: "Fr", "Ud", "Lb", "Ap",
67: "Lp", "Lk", "Mt", "Brq",
68: /* LINTED */
69: "Bro", "Brc", "\%C", "Es",
70: /* LINTED */
71: "En", "Dx", "\%Q"
72: };
73:
74: const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
75: "split", "nosplit", "ragged",
76: "unfilled", "literal", "file",
77: "offset", "bullet", "dash",
78: "hyphen", "item", "enum",
79: "tag", "diag", "hang",
80: "ohang", "inset", "column",
81: "width", "compact", "std",
82: "filled", "words", "emphasis",
83: "symbolic", "nested"
84: };
85:
86: const char * const *mdoc_macronames = __mdoc_macronames;
87: const char * const *mdoc_argnames = __mdoc_argnames;
88:
89: static void mdoc_free1(struct mdoc *);
90: static int mdoc_alloc1(struct mdoc *);
91: static struct mdoc_node *node_alloc(struct mdoc *, int, int,
92: int, enum mdoc_type);
93: static int node_append(struct mdoc *,
94: struct mdoc_node *);
95: static int parsetext(struct mdoc *, int, char *);
96: static int parsemacro(struct mdoc *, int, char *);
97: static int macrowarn(struct mdoc *, int, const char *);
98: static int perr(struct mdoc *, int, int, enum merr);
99:
100: #define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t))
101:
102: /*
103: * Get the first (root) node of the parse tree.
104: */
105: const struct mdoc_node *
106: mdoc_node(const struct mdoc *m)
107: {
108:
109: return(MDOC_HALT & m->flags ? NULL : m->first);
110: }
111:
112:
113: const struct mdoc_meta *
114: mdoc_meta(const struct mdoc *m)
115: {
116:
117: return(MDOC_HALT & m->flags ? NULL : &m->meta);
118: }
119:
120:
121: static void
122: mdoc_free1(struct mdoc *mdoc)
123: {
124:
125: if (mdoc->first)
126: mdoc_node_freelist(mdoc->first);
127: if (mdoc->meta.title)
128: free(mdoc->meta.title);
129: if (mdoc->meta.os)
130: free(mdoc->meta.os);
131: if (mdoc->meta.name)
132: free(mdoc->meta.name);
133: if (mdoc->meta.arch)
134: free(mdoc->meta.arch);
135: if (mdoc->meta.vol)
136: free(mdoc->meta.vol);
137: }
138:
139:
140: static int
141: mdoc_alloc1(struct mdoc *mdoc)
142: {
143:
144: bzero(&mdoc->meta, sizeof(struct mdoc_meta));
145: mdoc->flags = 0;
146: mdoc->lastnamed = mdoc->lastsec = 0;
147: mdoc->last = calloc(1, sizeof(struct mdoc_node));
148: if (NULL == mdoc->last)
149: return(0);
150:
151: mdoc->first = mdoc->last;
152: mdoc->last->type = MDOC_ROOT;
153: mdoc->next = MDOC_NEXT_CHILD;
154: return(1);
155: }
156:
157:
158: /*
159: * Free up all resources contributed by a parse: the node tree,
160: * meta-data and so on. Then reallocate the root node for another
161: * parse.
162: */
163: int
164: mdoc_reset(struct mdoc *mdoc)
165: {
166:
167: mdoc_free1(mdoc);
168: return(mdoc_alloc1(mdoc));
169: }
170:
171:
172: /*
173: * Completely free up all resources.
174: */
175: void
176: mdoc_free(struct mdoc *mdoc)
177: {
178:
179: mdoc_free1(mdoc);
180: if (mdoc->htab)
181: mdoc_hash_free(mdoc->htab);
182: free(mdoc);
183: }
184:
185:
186: struct mdoc *
187: mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
188: {
189: struct mdoc *p;
190:
191: if (NULL == (p = calloc(1, sizeof(struct mdoc))))
192: return(NULL);
193: if (cb)
194: (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
195:
196: p->data = data;
197: p->pflags = pflags;
198:
199: if (NULL == (p->htab = mdoc_hash_alloc())) {
200: free(p);
201: return(NULL);
202: } else if (mdoc_alloc1(p))
203: return(p);
204:
205: free(p);
206: return(NULL);
207: }
208:
209:
210: /*
211: * Climb back up the parse tree, validating open scopes. Mostly calls
212: * through to macro_end in macro.c.
213: */
214: int
215: mdoc_endparse(struct mdoc *m)
216: {
217:
218: if (MDOC_HALT & m->flags)
219: return(0);
220: else if (mdoc_macroend(m))
221: return(1);
222: m->flags |= MDOC_HALT;
223: return(0);
224: }
225:
226:
227: /*
228: * Main parse routine. Parses a single line -- really just hands off to
229: * the macro or text parser.
230: */
231: int
232: mdoc_parseln(struct mdoc *m, int ln, char *buf)
233: {
234:
235: /* If in error-mode, then we parse no more. */
236:
237: if (MDOC_HALT & m->flags)
238: return(0);
239:
240: return('.' == *buf ? parsemacro(m, ln, buf) :
241: parsetext(m, ln, buf));
242: }
243:
244:
245: int
1.4 schwarze 246: mdoc_verr(struct mdoc *mdoc, int ln, int pos,
247: const char *fmt, ...)
1.1 kristaps 248: {
249: char buf[256];
250: va_list ap;
251:
252: if (NULL == mdoc->cb.mdoc_err)
253: return(0);
254:
255: va_start(ap, fmt);
256: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
257: va_end(ap);
258: return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
259: }
260:
261:
262: int
263: mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
264: enum mdoc_warn type, const char *fmt, ...)
265: {
266: char buf[256];
267: va_list ap;
268:
269: if (NULL == mdoc->cb.mdoc_warn)
270: return(0);
271:
272: va_start(ap, fmt);
273: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
274: va_end(ap);
275: return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
1.5 schwarze 276: }
277:
278:
279: int
280: mdoc_nerr(struct mdoc *mdoc, const struct mdoc_node *node, const char *fmt, ...)
281: {
282: char buf[256];
283: va_list ap;
284:
285: if (NULL == mdoc->cb.mdoc_err)
286: return(0);
287:
288: va_start(ap, fmt);
289: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
290: va_end(ap);
291: return((*mdoc->cb.mdoc_err)(mdoc->data, node->line, node->pos, buf));
292: }
293:
294:
295: int
296: mdoc_warn(struct mdoc *mdoc, enum mdoc_warn type, const char *fmt, ...)
297: {
298: char buf[256];
299: va_list ap;
300:
301: if (NULL == mdoc->cb.mdoc_warn)
302: return(0);
303:
304: va_start(ap, fmt);
305: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
306: va_end(ap);
307: return((*mdoc->cb.mdoc_warn)(mdoc->data, mdoc->last->line,
308: mdoc->last->pos, type, buf));
309: }
310:
311:
312: int
313: mdoc_err(struct mdoc *mdoc, const char *fmt, ...)
314: {
315: char buf[256];
316: va_list ap;
317:
318: if (NULL == mdoc->cb.mdoc_err)
319: return(0);
320:
321: va_start(ap, fmt);
322: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
323: va_end(ap);
324: return((*mdoc->cb.mdoc_err)(mdoc->data, mdoc->last->line,
325: mdoc->last->pos, buf));
326: }
327:
328:
329: int
330: mdoc_pwarn(struct mdoc *mdoc, int line, int pos, enum mdoc_warn type,
331: const char *fmt, ...)
332: {
333: char buf[256];
334: va_list ap;
335:
336: if (NULL == mdoc->cb.mdoc_warn)
337: return(0);
338:
339: va_start(ap, fmt);
340: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
341: va_end(ap);
342: return((*mdoc->cb.mdoc_warn)(mdoc->data, line, pos, type, buf));
343: }
344:
345: int
346: mdoc_perr(struct mdoc *mdoc, int line, int pos, const char *fmt, ...)
347: {
348: char buf[256];
349: va_list ap;
350:
351: if (NULL == mdoc->cb.mdoc_err)
352: return(0);
353:
354: va_start(ap, fmt);
355: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
356: va_end(ap);
357: return((*mdoc->cb.mdoc_err)(mdoc->data, line, pos, buf));
1.2 miod 358: }
359:
360:
361: int
1.1 kristaps 362: mdoc_macro(struct mdoc *m, int tok,
363: int ln, int pp, int *pos, char *buf)
364: {
365:
366: /* FIXME - these should happen during validation. */
367:
368: if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
369: SEC_PROLOGUE != m->lastnamed)
370: return(perr(m, ln, pp, EPROLBODY));
371:
372: if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
373: SEC_PROLOGUE == m->lastnamed)
374: return(perr(m, ln, pp, EBODYPROL));
375:
376: if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
377: return(perr(m, ln, pp, ENOCALL));
378:
379: return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
380: }
381:
382:
383: static int
384: perr(struct mdoc *m, int line, int pos, enum merr type)
385: {
386: char *p;
387:
388: p = NULL;
389: switch (type) {
390: case (ENOCALL):
391: p = "not callable";
392: break;
393: case (EPROLBODY):
394: p = "macro disallowed in document body";
395: break;
396: case (EBODYPROL):
397: p = "macro disallowed in document prologue";
398: break;
399: case (EMALLOC):
400: p = "memory exhausted";
401: break;
402: case (ETEXTPROL):
403: p = "text disallowed in document prologue";
404: break;
405: case (ENOBLANK):
406: p = "blank lines disallowed in non-literal contexts";
407: break;
408: case (ESPACE):
409: p = "whitespace disallowed after delimiter";
410: break;
411: }
412: assert(p);
413: return(mdoc_perr(m, line, pos, p));
414: }
415:
416:
417: static int
418: node_append(struct mdoc *mdoc, struct mdoc_node *p)
419: {
420:
421: assert(mdoc->last);
422: assert(mdoc->first);
423: assert(MDOC_ROOT != p->type);
424:
425: switch (mdoc->next) {
426: case (MDOC_NEXT_SIBLING):
427: mdoc->last->next = p;
428: p->prev = mdoc->last;
429: p->parent = mdoc->last->parent;
430: break;
431: case (MDOC_NEXT_CHILD):
432: mdoc->last->child = p;
433: p->parent = mdoc->last;
434: break;
435: default:
436: abort();
437: /* NOTREACHED */
438: }
439:
440: if ( ! mdoc_valid_pre(mdoc, p))
441: return(0);
442: if ( ! mdoc_action_pre(mdoc, p))
443: return(0);
444:
445: switch (p->type) {
446: case (MDOC_HEAD):
447: assert(MDOC_BLOCK == p->parent->type);
448: p->parent->head = p;
449: break;
450: case (MDOC_TAIL):
451: assert(MDOC_BLOCK == p->parent->type);
452: p->parent->tail = p;
453: break;
454: case (MDOC_BODY):
455: assert(MDOC_BLOCK == p->parent->type);
456: p->parent->body = p;
457: break;
458: default:
459: break;
460: }
461:
462: mdoc->last = p;
463:
464: switch (p->type) {
465: case (MDOC_TEXT):
466: if ( ! mdoc_valid_post(mdoc))
467: return(0);
468: if ( ! mdoc_action_post(mdoc))
469: return(0);
470: break;
471: default:
472: break;
473: }
474:
475: return(1);
476: }
477:
478:
479: static struct mdoc_node *
480: node_alloc(struct mdoc *mdoc, int line,
481: int pos, int tok, enum mdoc_type type)
482: {
483: struct mdoc_node *p;
484:
485: if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
486: (void)verr(mdoc, EMALLOC);
487: return(NULL);
488: }
489:
490: p->sec = mdoc->lastsec;
491: p->line = line;
492: p->pos = pos;
493: p->tok = tok;
494: if (MDOC_TEXT != (p->type = type))
495: assert(p->tok >= 0);
496:
497: return(p);
498: }
499:
500:
501: int
502: mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
503: {
504: struct mdoc_node *p;
505:
506: p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
507: if (NULL == p)
508: return(0);
509: return(node_append(mdoc, p));
510: }
511:
512:
513: int
514: mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
515: {
516: struct mdoc_node *p;
517:
518: assert(mdoc->first);
519: assert(mdoc->last);
520:
521: p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
522: if (NULL == p)
523: return(0);
524: return(node_append(mdoc, p));
525: }
526:
527:
528: int
529: mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
530: {
531: struct mdoc_node *p;
532:
533: p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
534: if (NULL == p)
535: return(0);
536: return(node_append(mdoc, p));
537: }
538:
539:
540: int
541: mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
542: int tok, struct mdoc_arg *args)
543: {
544: struct mdoc_node *p;
545:
546: p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
547: if (NULL == p)
548: return(0);
1.4 schwarze 549: p->args = args;
550: if (p->args)
1.1 kristaps 551: (args->refcnt)++;
552: return(node_append(mdoc, p));
553: }
554:
555:
556: int
557: mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
558: int tok, struct mdoc_arg *args)
559: {
560: struct mdoc_node *p;
561:
562: p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
563: if (NULL == p)
564: return(0);
1.4 schwarze 565: p->args = args;
566: if (p->args)
1.1 kristaps 567: (args->refcnt)++;
568: return(node_append(mdoc, p));
569: }
570:
571:
572: int
573: mdoc_word_alloc(struct mdoc *mdoc,
574: int line, int pos, const char *word)
575: {
576: struct mdoc_node *p;
577:
578: p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
579: if (NULL == p)
580: return(0);
581: if (NULL == (p->string = strdup(word))) {
582: (void)verr(mdoc, EMALLOC);
583: return(0);
584: }
585: return(node_append(mdoc, p));
586: }
587:
588:
589: void
590: mdoc_node_free(struct mdoc_node *p)
591: {
592:
593: if (p->string)
594: free(p->string);
595: if (p->args)
596: mdoc_argv_free(p->args);
597: free(p);
598: }
599:
600:
601: void
602: mdoc_node_freelist(struct mdoc_node *p)
603: {
604:
605: if (p->child)
606: mdoc_node_freelist(p->child);
607: if (p->next)
608: mdoc_node_freelist(p->next);
609:
610: mdoc_node_free(p);
611: }
612:
613:
614: /*
615: * Parse free-form text, that is, a line that does not begin with the
616: * control character.
617: */
618: static int
619: parsetext(struct mdoc *m, int line, char *buf)
620: {
621:
622: if (SEC_PROLOGUE == m->lastnamed)
623: return(perr(m, line, 0, ETEXTPROL));
624:
625: if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
626: return(perr(m, line, 0, ENOBLANK));
627:
628: if ( ! mdoc_word_alloc(m, line, 0, buf))
629: return(0);
630:
631: m->next = MDOC_NEXT_SIBLING;
632: return(1);
633: }
634:
635:
636: static int
637: macrowarn(struct mdoc *m, int ln, const char *buf)
638: {
639: if ( ! (MDOC_IGN_MACRO & m->pflags))
640: return(mdoc_perr(m, ln, 1,
641: "unknown macro: %s%s",
642: buf, strlen(buf) > 3 ? "..." : ""));
643: return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX,
644: "unknown macro: %s%s",
645: buf, strlen(buf) > 3 ? "..." : ""));
646: }
647:
648:
649:
650: /*
651: * Parse a macro line, that is, a line beginning with the control
652: * character.
653: */
654: int
655: parsemacro(struct mdoc *m, int ln, char *buf)
656: {
657: int i, c;
658: char mac[5];
659:
660: /* Comments and empties are quickly ignored. */
661:
662: if (0 == buf[1])
663: return(1);
664:
665: if (' ' == buf[1]) {
666: i = 2;
667: while (buf[i] && ' ' == buf[i])
668: i++;
669: if (0 == buf[i])
670: return(1);
671: return(perr(m, ln, 1, ESPACE));
672: }
673:
674: if (buf[1] && '\\' == buf[1])
675: if (buf[2] && '\"' == buf[2])
676: return(1);
677:
678: /* Copy the first word into a nil-terminated buffer. */
679:
680: for (i = 1; i < 5; i++) {
681: if (0 == (mac[i - 1] = buf[i]))
682: break;
683: else if (' ' == buf[i])
684: break;
685: }
686:
687: mac[i - 1] = 0;
688:
689: if (i == 5 || i <= 2) {
690: if ( ! macrowarn(m, ln, mac))
691: goto err;
692: return(1);
693: }
694:
695: if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
696: if ( ! macrowarn(m, ln, mac))
697: goto err;
698: return(1);
699: }
700:
701: /* The macro is sane. Jump to the next word. */
702:
703: while (buf[i] && ' ' == buf[i])
704: i++;
705:
706: /* Begin recursive parse sequence. */
707:
708: if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
709: goto err;
710:
711: return(1);
712:
713: err: /* Error out. */
714:
715: m->flags |= MDOC_HALT;
716: return(0);
717: }