Annotation of src/usr.bin/mandoc/mdoc.c, Revision 1.3
1.3 ! schwarze 1: /* $Id: mdoc.c,v 1.77 2009/06/12 12:52:51 kristaps Exp $ */
1.1 kristaps 2: /*
1.3 ! schwarze 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.3 ! schwarze 6: * purpose with or without fee is hereby granted, provided that the above
! 7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.3 ! schwarze 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
! 10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
! 11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
! 12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
! 14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
! 15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
17: #include <assert.h>
18: #include <ctype.h>
19: #include <stdarg.h>
20: #include <stdio.h>
21: #include <stdlib.h>
22: #include <string.h>
23:
24: #include "libmdoc.h"
25:
26: enum merr {
27: ENOCALL,
28: EBODYPROL,
29: EPROLBODY,
30: ESPACE,
31: ETEXTPROL,
32: ENOBLANK,
33: EMALLOC
34: };
35:
36: const char *const __mdoc_macronames[MDOC_MAX] = {
37: "\\\"", "Dd", "Dt", "Os",
38: "Sh", "Ss", "Pp", "D1",
39: "Dl", "Bd", "Ed", "Bl",
40: "El", "It", "Ad", "An",
41: "Ar", "Cd", "Cm", "Dv",
42: "Er", "Ev", "Ex", "Fa",
43: "Fd", "Fl", "Fn", "Ft",
44: "Ic", "In", "Li", "Nd",
45: "Nm", "Op", "Ot", "Pa",
46: "Rv", "St", "Va", "Vt",
47: /* LINTED */
48: "Xr", "\%A", "\%B", "\%D",
49: /* LINTED */
50: "\%I", "\%J", "\%N", "\%O",
51: /* LINTED */
52: "\%P", "\%R", "\%T", "\%V",
53: "Ac", "Ao", "Aq", "At",
54: "Bc", "Bf", "Bo", "Bq",
55: "Bsx", "Bx", "Db", "Dc",
56: "Do", "Dq", "Ec", "Ef",
57: "Em", "Eo", "Fx", "Ms",
58: "No", "Ns", "Nx", "Ox",
59: "Pc", "Pf", "Po", "Pq",
60: "Qc", "Ql", "Qo", "Qq",
61: "Re", "Rs", "Sc", "So",
62: "Sq", "Sm", "Sx", "Sy",
63: "Tn", "Ux", "Xc", "Xo",
64: "Fo", "Fc", "Oo", "Oc",
65: "Bk", "Ek", "Bt", "Hf",
66: "Fr", "Ud", "Lb", "Ap",
67: "Lp", "Lk", "Mt", "Brq",
68: /* LINTED */
69: "Bro", "Brc", "\%C", "Es",
70: /* LINTED */
71: "En", "Dx", "\%Q"
72: };
73:
74: const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
75: "split", "nosplit", "ragged",
76: "unfilled", "literal", "file",
77: "offset", "bullet", "dash",
78: "hyphen", "item", "enum",
79: "tag", "diag", "hang",
80: "ohang", "inset", "column",
81: "width", "compact", "std",
82: "filled", "words", "emphasis",
83: "symbolic", "nested"
84: };
85:
86: const char * const *mdoc_macronames = __mdoc_macronames;
87: const char * const *mdoc_argnames = __mdoc_argnames;
88:
89: static void mdoc_free1(struct mdoc *);
90: static int mdoc_alloc1(struct mdoc *);
91: static struct mdoc_node *node_alloc(struct mdoc *, int, int,
92: int, enum mdoc_type);
93: static int node_append(struct mdoc *,
94: struct mdoc_node *);
95: static int parsetext(struct mdoc *, int, char *);
96: static int parsemacro(struct mdoc *, int, char *);
97: static int macrowarn(struct mdoc *, int, const char *);
98: static int perr(struct mdoc *, int, int, enum merr);
99:
100: #define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t))
101:
102: /*
103: * Get the first (root) node of the parse tree.
104: */
105: const struct mdoc_node *
106: mdoc_node(const struct mdoc *m)
107: {
108:
109: return(MDOC_HALT & m->flags ? NULL : m->first);
110: }
111:
112:
113: const struct mdoc_meta *
114: mdoc_meta(const struct mdoc *m)
115: {
116:
117: return(MDOC_HALT & m->flags ? NULL : &m->meta);
118: }
119:
120:
121: static void
122: mdoc_free1(struct mdoc *mdoc)
123: {
124:
125: if (mdoc->first)
126: mdoc_node_freelist(mdoc->first);
127: if (mdoc->meta.title)
128: free(mdoc->meta.title);
129: if (mdoc->meta.os)
130: free(mdoc->meta.os);
131: if (mdoc->meta.name)
132: free(mdoc->meta.name);
133: if (mdoc->meta.arch)
134: free(mdoc->meta.arch);
135: if (mdoc->meta.vol)
136: free(mdoc->meta.vol);
137: }
138:
139:
140: static int
141: mdoc_alloc1(struct mdoc *mdoc)
142: {
143:
144: bzero(&mdoc->meta, sizeof(struct mdoc_meta));
145: mdoc->flags = 0;
146: mdoc->lastnamed = mdoc->lastsec = 0;
147: mdoc->last = calloc(1, sizeof(struct mdoc_node));
148: if (NULL == mdoc->last)
149: return(0);
150:
151: mdoc->first = mdoc->last;
152: mdoc->last->type = MDOC_ROOT;
153: mdoc->next = MDOC_NEXT_CHILD;
154: return(1);
155: }
156:
157:
158: /*
159: * Free up all resources contributed by a parse: the node tree,
160: * meta-data and so on. Then reallocate the root node for another
161: * parse.
162: */
163: int
164: mdoc_reset(struct mdoc *mdoc)
165: {
166:
167: mdoc_free1(mdoc);
168: return(mdoc_alloc1(mdoc));
169: }
170:
171:
172: /*
173: * Completely free up all resources.
174: */
175: void
176: mdoc_free(struct mdoc *mdoc)
177: {
178:
179: mdoc_free1(mdoc);
180: if (mdoc->htab)
181: mdoc_hash_free(mdoc->htab);
182: free(mdoc);
183: }
184:
185:
186: struct mdoc *
187: mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
188: {
189: struct mdoc *p;
190:
191: if (NULL == (p = calloc(1, sizeof(struct mdoc))))
192: return(NULL);
193: if (cb)
194: (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
195:
196: p->data = data;
197: p->pflags = pflags;
198:
199: if (NULL == (p->htab = mdoc_hash_alloc())) {
200: free(p);
201: return(NULL);
202: } else if (mdoc_alloc1(p))
203: return(p);
204:
205: free(p);
206: return(NULL);
207: }
208:
209:
210: /*
211: * Climb back up the parse tree, validating open scopes. Mostly calls
212: * through to macro_end in macro.c.
213: */
214: int
215: mdoc_endparse(struct mdoc *m)
216: {
217:
218: if (MDOC_HALT & m->flags)
219: return(0);
220: else if (mdoc_macroend(m))
221: return(1);
222: m->flags |= MDOC_HALT;
223: return(0);
224: }
225:
226:
227: /*
228: * Main parse routine. Parses a single line -- really just hands off to
229: * the macro or text parser.
230: */
231: int
232: mdoc_parseln(struct mdoc *m, int ln, char *buf)
233: {
234:
235: /* If in error-mode, then we parse no more. */
236:
237: if (MDOC_HALT & m->flags)
238: return(0);
239:
240: return('.' == *buf ? parsemacro(m, ln, buf) :
241: parsetext(m, ln, buf));
242: }
243:
244:
245: void
246: mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
247: {
248: char buf[256];
249: va_list ap;
250:
251: if (NULL == mdoc->cb.mdoc_msg)
252: return;
253:
254: va_start(ap, fmt);
255: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
256: va_end(ap);
257: (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
258: }
259:
260:
261: int
1.2 miod 262: mdoc_verr(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
1.1 kristaps 263: {
264: char buf[256];
265: va_list ap;
266:
267: if (NULL == mdoc->cb.mdoc_err)
268: return(0);
269:
270: va_start(ap, fmt);
271: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
272: va_end(ap);
273: return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
274: }
275:
276:
277: int
278: mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
279: enum mdoc_warn type, const char *fmt, ...)
280: {
281: char buf[256];
282: va_list ap;
283:
284: if (NULL == mdoc->cb.mdoc_warn)
285: return(0);
286:
287: va_start(ap, fmt);
288: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
289: va_end(ap);
290: return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
1.2 miod 291: }
292:
293:
294: int
295: mdoc_nwarn(struct mdoc *mdoc, const struct mdoc_node *node, enum mdoc_warn type,
296: const char *fmt, ...)
297: {
298: char buf[256];
299: va_list ap;
300:
301: if (NULL == mdoc->cb.mdoc_warn)
302: return(0);
303:
304: va_start(ap, fmt);
305: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
306: va_end(ap);
307: return((*mdoc->cb.mdoc_warn)(mdoc->data, node->line, node->pos, type,
308: buf));
309: }
310:
311: int
312: mdoc_nerr(struct mdoc *mdoc, const struct mdoc_node *node, const char *fmt, ...)
313: {
314: char buf[256];
315: va_list ap;
316:
317: if (NULL == mdoc->cb.mdoc_err)
318: return(0);
319:
320: va_start(ap, fmt);
321: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
322: va_end(ap);
323: return((*mdoc->cb.mdoc_err)(mdoc->data, node->line, node->pos, buf));
324: }
325:
326:
327: int
328: mdoc_warn(struct mdoc *mdoc, enum mdoc_warn type, const char *fmt, ...)
329: {
330: char buf[256];
331: va_list ap;
332:
333: if (NULL == mdoc->cb.mdoc_warn)
334: return(0);
335:
336: va_start(ap, fmt);
337: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
338: va_end(ap);
339: return((*mdoc->cb.mdoc_warn)(mdoc->data, mdoc->last->line,
340: mdoc->last->pos, type, buf));
341: }
342:
343:
344: int
345: mdoc_err(struct mdoc *mdoc, const char *fmt, ...)
346: {
347: char buf[256];
348: va_list ap;
349:
350: if (NULL == mdoc->cb.mdoc_err)
351: return(0);
352:
353: va_start(ap, fmt);
354: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
355: va_end(ap);
356: return((*mdoc->cb.mdoc_err)(mdoc->data, mdoc->last->line,
357: mdoc->last->pos, buf));
358: }
359:
360:
361: void
362: mdoc_msg(struct mdoc *mdoc, const char *fmt, ...)
363: {
364: char buf[256];
365: va_list ap;
366:
367: if (NULL == mdoc->cb.mdoc_msg)
368: return;
369:
370: va_start(ap, fmt);
371: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
372: va_end(ap);
373: (*mdoc->cb.mdoc_msg)(mdoc->data, mdoc->last->line, mdoc->last->pos,
374: buf);
375: }
376:
377:
378: void
379: mdoc_pmsg(struct mdoc *mdoc, int line, int pos, const char *fmt, ...)
380: {
381: char buf[256];
382: va_list ap;
383:
384: if (NULL == mdoc->cb.mdoc_msg)
385: return;
386:
387: va_start(ap, fmt);
388: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
389: va_end(ap);
390: (*mdoc->cb.mdoc_msg)(mdoc->data, line, pos, buf);
391: }
392:
393:
394: int
395: mdoc_pwarn(struct mdoc *mdoc, int line, int pos, enum mdoc_warn type,
396: const char *fmt, ...)
397: {
398: char buf[256];
399: va_list ap;
400:
401: if (NULL == mdoc->cb.mdoc_warn)
402: return(0);
403:
404: va_start(ap, fmt);
405: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
406: va_end(ap);
407: return((*mdoc->cb.mdoc_warn)(mdoc->data, line, pos, type, buf));
408: }
409:
410: int
411: mdoc_perr(struct mdoc *mdoc, int line, int pos, const char *fmt, ...)
412: {
413: char buf[256];
414: va_list ap;
415:
416: if (NULL == mdoc->cb.mdoc_err)
417: return(0);
418:
419: va_start(ap, fmt);
420: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
421: va_end(ap);
422: return((*mdoc->cb.mdoc_err)(mdoc->data, line, pos, buf));
1.1 kristaps 423: }
424:
425:
426: int
427: mdoc_macro(struct mdoc *m, int tok,
428: int ln, int pp, int *pos, char *buf)
429: {
430:
431: /* FIXME - these should happen during validation. */
432:
433: if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
434: SEC_PROLOGUE != m->lastnamed)
435: return(perr(m, ln, pp, EPROLBODY));
436:
437: if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
438: SEC_PROLOGUE == m->lastnamed)
439: return(perr(m, ln, pp, EBODYPROL));
440:
441: if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
442: return(perr(m, ln, pp, ENOCALL));
443:
444: return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
445: }
446:
447:
448: static int
449: perr(struct mdoc *m, int line, int pos, enum merr type)
450: {
451: char *p;
452:
453: p = NULL;
454: switch (type) {
455: case (ENOCALL):
456: p = "not callable";
457: break;
458: case (EPROLBODY):
459: p = "macro disallowed in document body";
460: break;
461: case (EBODYPROL):
462: p = "macro disallowed in document prologue";
463: break;
464: case (EMALLOC):
465: p = "memory exhausted";
466: break;
467: case (ETEXTPROL):
468: p = "text disallowed in document prologue";
469: break;
470: case (ENOBLANK):
471: p = "blank lines disallowed in non-literal contexts";
472: break;
473: case (ESPACE):
474: p = "whitespace disallowed after delimiter";
475: break;
476: }
477: assert(p);
478: return(mdoc_perr(m, line, pos, p));
479: }
480:
481:
482: static int
483: node_append(struct mdoc *mdoc, struct mdoc_node *p)
484: {
485:
486: assert(mdoc->last);
487: assert(mdoc->first);
488: assert(MDOC_ROOT != p->type);
489:
490: switch (mdoc->next) {
491: case (MDOC_NEXT_SIBLING):
492: mdoc->last->next = p;
493: p->prev = mdoc->last;
494: p->parent = mdoc->last->parent;
495: break;
496: case (MDOC_NEXT_CHILD):
497: mdoc->last->child = p;
498: p->parent = mdoc->last;
499: break;
500: default:
501: abort();
502: /* NOTREACHED */
503: }
504:
505: if ( ! mdoc_valid_pre(mdoc, p))
506: return(0);
507: if ( ! mdoc_action_pre(mdoc, p))
508: return(0);
509:
510: switch (p->type) {
511: case (MDOC_HEAD):
512: assert(MDOC_BLOCK == p->parent->type);
513: p->parent->head = p;
514: break;
515: case (MDOC_TAIL):
516: assert(MDOC_BLOCK == p->parent->type);
517: p->parent->tail = p;
518: break;
519: case (MDOC_BODY):
520: assert(MDOC_BLOCK == p->parent->type);
521: p->parent->body = p;
522: break;
523: default:
524: break;
525: }
526:
527: mdoc->last = p;
528:
529: switch (p->type) {
530: case (MDOC_TEXT):
531: if ( ! mdoc_valid_post(mdoc))
532: return(0);
533: if ( ! mdoc_action_post(mdoc))
534: return(0);
535: break;
536: default:
537: break;
538: }
539:
540: return(1);
541: }
542:
543:
544: static struct mdoc_node *
545: node_alloc(struct mdoc *mdoc, int line,
546: int pos, int tok, enum mdoc_type type)
547: {
548: struct mdoc_node *p;
549:
550: if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
551: (void)verr(mdoc, EMALLOC);
552: return(NULL);
553: }
554:
555: p->sec = mdoc->lastsec;
556: p->line = line;
557: p->pos = pos;
558: p->tok = tok;
559: if (MDOC_TEXT != (p->type = type))
560: assert(p->tok >= 0);
561:
562: return(p);
563: }
564:
565:
566: int
567: mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
568: {
569: struct mdoc_node *p;
570:
571: p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
572: if (NULL == p)
573: return(0);
574: return(node_append(mdoc, p));
575: }
576:
577:
578: int
579: mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
580: {
581: struct mdoc_node *p;
582:
583: assert(mdoc->first);
584: assert(mdoc->last);
585:
586: p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
587: if (NULL == p)
588: return(0);
589: return(node_append(mdoc, p));
590: }
591:
592:
593: int
594: mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
595: {
596: struct mdoc_node *p;
597:
598: p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
599: if (NULL == p)
600: return(0);
601: return(node_append(mdoc, p));
602: }
603:
604:
605: int
606: mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
607: int tok, struct mdoc_arg *args)
608: {
609: struct mdoc_node *p;
610:
611: p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
612: if (NULL == p)
613: return(0);
614: if ((p->args = args))
615: (args->refcnt)++;
616: return(node_append(mdoc, p));
617: }
618:
619:
620: int
621: mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
622: int tok, struct mdoc_arg *args)
623: {
624: struct mdoc_node *p;
625:
626: p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
627: if (NULL == p)
628: return(0);
629: if ((p->args = args))
630: (args->refcnt)++;
631: return(node_append(mdoc, p));
632: }
633:
634:
635: int
636: mdoc_word_alloc(struct mdoc *mdoc,
637: int line, int pos, const char *word)
638: {
639: struct mdoc_node *p;
640:
641: p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
642: if (NULL == p)
643: return(0);
644: if (NULL == (p->string = strdup(word))) {
645: (void)verr(mdoc, EMALLOC);
646: return(0);
647: }
648: return(node_append(mdoc, p));
649: }
650:
651:
652: void
653: mdoc_node_free(struct mdoc_node *p)
654: {
655:
656: if (p->string)
657: free(p->string);
658: if (p->args)
659: mdoc_argv_free(p->args);
660: free(p);
661: }
662:
663:
664: void
665: mdoc_node_freelist(struct mdoc_node *p)
666: {
667:
668: if (p->child)
669: mdoc_node_freelist(p->child);
670: if (p->next)
671: mdoc_node_freelist(p->next);
672:
673: mdoc_node_free(p);
674: }
675:
676:
677: /*
678: * Parse free-form text, that is, a line that does not begin with the
679: * control character.
680: */
681: static int
682: parsetext(struct mdoc *m, int line, char *buf)
683: {
684:
685: if (SEC_PROLOGUE == m->lastnamed)
686: return(perr(m, line, 0, ETEXTPROL));
687:
688: if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
689: return(perr(m, line, 0, ENOBLANK));
690:
691: if ( ! mdoc_word_alloc(m, line, 0, buf))
692: return(0);
693:
694: m->next = MDOC_NEXT_SIBLING;
695: return(1);
696: }
697:
698:
699: static int
700: macrowarn(struct mdoc *m, int ln, const char *buf)
701: {
702: if ( ! (MDOC_IGN_MACRO & m->pflags))
703: return(mdoc_perr(m, ln, 1,
704: "unknown macro: %s%s",
705: buf, strlen(buf) > 3 ? "..." : ""));
706: return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX,
707: "unknown macro: %s%s",
708: buf, strlen(buf) > 3 ? "..." : ""));
709: }
710:
711:
712:
713: /*
714: * Parse a macro line, that is, a line beginning with the control
715: * character.
716: */
717: int
718: parsemacro(struct mdoc *m, int ln, char *buf)
719: {
720: int i, c;
721: char mac[5];
722:
723: /* Comments and empties are quickly ignored. */
724:
725: if (0 == buf[1])
726: return(1);
727:
728: if (' ' == buf[1]) {
729: i = 2;
730: while (buf[i] && ' ' == buf[i])
731: i++;
732: if (0 == buf[i])
733: return(1);
734: return(perr(m, ln, 1, ESPACE));
735: }
736:
737: if (buf[1] && '\\' == buf[1])
738: if (buf[2] && '\"' == buf[2])
739: return(1);
740:
741: /* Copy the first word into a nil-terminated buffer. */
742:
743: for (i = 1; i < 5; i++) {
744: if (0 == (mac[i - 1] = buf[i]))
745: break;
746: else if (' ' == buf[i])
747: break;
748: }
749:
750: mac[i - 1] = 0;
751:
752: if (i == 5 || i <= 2) {
753: if ( ! macrowarn(m, ln, mac))
754: goto err;
755: return(1);
756: }
757:
758: if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
759: if ( ! macrowarn(m, ln, mac))
760: goto err;
761: return(1);
762: }
763:
764: /* The macro is sane. Jump to the next word. */
765:
766: while (buf[i] && ' ' == buf[i])
767: i++;
768:
769: /* Begin recursive parse sequence. */
770:
771: if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
772: goto err;
773:
774: return(1);
775:
776: err: /* Error out. */
777:
778: m->flags |= MDOC_HALT;
779: return(0);
780: }