Annotation of src/usr.bin/mandoc/mdoc.c, Revision 1.4
1.4 ! schwarze 1: /* $Id: mdoc.c,v 1.3 2009/06/14 23:00:57 schwarze Exp $ */
1.1 kristaps 2: /*
1.3 schwarze 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.3 schwarze 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.3 schwarze 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
17: #include <assert.h>
18: #include <ctype.h>
19: #include <stdarg.h>
20: #include <stdio.h>
21: #include <stdlib.h>
22: #include <string.h>
23:
24: #include "libmdoc.h"
25:
26: enum merr {
27: ENOCALL,
28: EBODYPROL,
29: EPROLBODY,
30: ESPACE,
31: ETEXTPROL,
32: ENOBLANK,
33: EMALLOC
34: };
35:
36: const char *const __mdoc_macronames[MDOC_MAX] = {
37: "\\\"", "Dd", "Dt", "Os",
38: "Sh", "Ss", "Pp", "D1",
39: "Dl", "Bd", "Ed", "Bl",
40: "El", "It", "Ad", "An",
41: "Ar", "Cd", "Cm", "Dv",
42: "Er", "Ev", "Ex", "Fa",
43: "Fd", "Fl", "Fn", "Ft",
44: "Ic", "In", "Li", "Nd",
45: "Nm", "Op", "Ot", "Pa",
46: "Rv", "St", "Va", "Vt",
47: /* LINTED */
48: "Xr", "\%A", "\%B", "\%D",
49: /* LINTED */
50: "\%I", "\%J", "\%N", "\%O",
51: /* LINTED */
52: "\%P", "\%R", "\%T", "\%V",
53: "Ac", "Ao", "Aq", "At",
54: "Bc", "Bf", "Bo", "Bq",
55: "Bsx", "Bx", "Db", "Dc",
56: "Do", "Dq", "Ec", "Ef",
57: "Em", "Eo", "Fx", "Ms",
58: "No", "Ns", "Nx", "Ox",
59: "Pc", "Pf", "Po", "Pq",
60: "Qc", "Ql", "Qo", "Qq",
61: "Re", "Rs", "Sc", "So",
62: "Sq", "Sm", "Sx", "Sy",
63: "Tn", "Ux", "Xc", "Xo",
64: "Fo", "Fc", "Oo", "Oc",
65: "Bk", "Ek", "Bt", "Hf",
66: "Fr", "Ud", "Lb", "Ap",
67: "Lp", "Lk", "Mt", "Brq",
68: /* LINTED */
69: "Bro", "Brc", "\%C", "Es",
70: /* LINTED */
71: "En", "Dx", "\%Q"
72: };
73:
74: const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
75: "split", "nosplit", "ragged",
76: "unfilled", "literal", "file",
77: "offset", "bullet", "dash",
78: "hyphen", "item", "enum",
79: "tag", "diag", "hang",
80: "ohang", "inset", "column",
81: "width", "compact", "std",
82: "filled", "words", "emphasis",
83: "symbolic", "nested"
84: };
85:
86: const char * const *mdoc_macronames = __mdoc_macronames;
87: const char * const *mdoc_argnames = __mdoc_argnames;
88:
89: static void mdoc_free1(struct mdoc *);
90: static int mdoc_alloc1(struct mdoc *);
91: static struct mdoc_node *node_alloc(struct mdoc *, int, int,
92: int, enum mdoc_type);
93: static int node_append(struct mdoc *,
94: struct mdoc_node *);
95: static int parsetext(struct mdoc *, int, char *);
96: static int parsemacro(struct mdoc *, int, char *);
97: static int macrowarn(struct mdoc *, int, const char *);
98: static int perr(struct mdoc *, int, int, enum merr);
99:
100: #define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t))
101:
102: /*
103: * Get the first (root) node of the parse tree.
104: */
105: const struct mdoc_node *
106: mdoc_node(const struct mdoc *m)
107: {
108:
109: return(MDOC_HALT & m->flags ? NULL : m->first);
110: }
111:
112:
113: const struct mdoc_meta *
114: mdoc_meta(const struct mdoc *m)
115: {
116:
117: return(MDOC_HALT & m->flags ? NULL : &m->meta);
118: }
119:
120:
121: static void
122: mdoc_free1(struct mdoc *mdoc)
123: {
124:
125: if (mdoc->first)
126: mdoc_node_freelist(mdoc->first);
127: if (mdoc->meta.title)
128: free(mdoc->meta.title);
129: if (mdoc->meta.os)
130: free(mdoc->meta.os);
131: if (mdoc->meta.name)
132: free(mdoc->meta.name);
133: if (mdoc->meta.arch)
134: free(mdoc->meta.arch);
135: if (mdoc->meta.vol)
136: free(mdoc->meta.vol);
137: }
138:
139:
140: static int
141: mdoc_alloc1(struct mdoc *mdoc)
142: {
143:
144: bzero(&mdoc->meta, sizeof(struct mdoc_meta));
145: mdoc->flags = 0;
146: mdoc->lastnamed = mdoc->lastsec = 0;
147: mdoc->last = calloc(1, sizeof(struct mdoc_node));
148: if (NULL == mdoc->last)
149: return(0);
150:
151: mdoc->first = mdoc->last;
152: mdoc->last->type = MDOC_ROOT;
153: mdoc->next = MDOC_NEXT_CHILD;
154: return(1);
155: }
156:
157:
158: /*
159: * Free up all resources contributed by a parse: the node tree,
160: * meta-data and so on. Then reallocate the root node for another
161: * parse.
162: */
163: int
164: mdoc_reset(struct mdoc *mdoc)
165: {
166:
167: mdoc_free1(mdoc);
168: return(mdoc_alloc1(mdoc));
169: }
170:
171:
172: /*
173: * Completely free up all resources.
174: */
175: void
176: mdoc_free(struct mdoc *mdoc)
177: {
178:
179: mdoc_free1(mdoc);
180: if (mdoc->htab)
181: mdoc_hash_free(mdoc->htab);
182: free(mdoc);
183: }
184:
185:
186: struct mdoc *
187: mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
188: {
189: struct mdoc *p;
190:
191: if (NULL == (p = calloc(1, sizeof(struct mdoc))))
192: return(NULL);
193: if (cb)
194: (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
195:
196: p->data = data;
197: p->pflags = pflags;
198:
199: if (NULL == (p->htab = mdoc_hash_alloc())) {
200: free(p);
201: return(NULL);
202: } else if (mdoc_alloc1(p))
203: return(p);
204:
205: free(p);
206: return(NULL);
207: }
208:
209:
210: /*
211: * Climb back up the parse tree, validating open scopes. Mostly calls
212: * through to macro_end in macro.c.
213: */
214: int
215: mdoc_endparse(struct mdoc *m)
216: {
217:
218: if (MDOC_HALT & m->flags)
219: return(0);
220: else if (mdoc_macroend(m))
221: return(1);
222: m->flags |= MDOC_HALT;
223: return(0);
224: }
225:
226:
227: /*
228: * Main parse routine. Parses a single line -- really just hands off to
229: * the macro or text parser.
230: */
231: int
232: mdoc_parseln(struct mdoc *m, int ln, char *buf)
233: {
234:
235: /* If in error-mode, then we parse no more. */
236:
237: if (MDOC_HALT & m->flags)
238: return(0);
239:
240: return('.' == *buf ? parsemacro(m, ln, buf) :
241: parsetext(m, ln, buf));
242: }
243:
244:
245: void
246: mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
247: {
248: char buf[256];
249: va_list ap;
250:
251: if (NULL == mdoc->cb.mdoc_msg)
252: return;
253:
254: va_start(ap, fmt);
255: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
256: va_end(ap);
257: (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
258: }
259:
260:
261: int
1.4 ! schwarze 262: mdoc_verr(struct mdoc *mdoc, int ln, int pos,
! 263: const char *fmt, ...)
1.1 kristaps 264: {
265: char buf[256];
266: va_list ap;
267:
268: if (NULL == mdoc->cb.mdoc_err)
269: return(0);
270:
271: va_start(ap, fmt);
272: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
273: va_end(ap);
274: return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
275: }
276:
277:
278: int
279: mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
280: enum mdoc_warn type, const char *fmt, ...)
281: {
282: char buf[256];
283: va_list ap;
284:
285: if (NULL == mdoc->cb.mdoc_warn)
286: return(0);
287:
288: va_start(ap, fmt);
289: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
290: va_end(ap);
291: return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
1.2 miod 292: }
293:
294:
295: int
1.1 kristaps 296: mdoc_macro(struct mdoc *m, int tok,
297: int ln, int pp, int *pos, char *buf)
298: {
299:
300: /* FIXME - these should happen during validation. */
301:
302: if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
303: SEC_PROLOGUE != m->lastnamed)
304: return(perr(m, ln, pp, EPROLBODY));
305:
306: if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
307: SEC_PROLOGUE == m->lastnamed)
308: return(perr(m, ln, pp, EBODYPROL));
309:
310: if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
311: return(perr(m, ln, pp, ENOCALL));
312:
313: return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
314: }
315:
316:
317: static int
318: perr(struct mdoc *m, int line, int pos, enum merr type)
319: {
320: char *p;
321:
322: p = NULL;
323: switch (type) {
324: case (ENOCALL):
325: p = "not callable";
326: break;
327: case (EPROLBODY):
328: p = "macro disallowed in document body";
329: break;
330: case (EBODYPROL):
331: p = "macro disallowed in document prologue";
332: break;
333: case (EMALLOC):
334: p = "memory exhausted";
335: break;
336: case (ETEXTPROL):
337: p = "text disallowed in document prologue";
338: break;
339: case (ENOBLANK):
340: p = "blank lines disallowed in non-literal contexts";
341: break;
342: case (ESPACE):
343: p = "whitespace disallowed after delimiter";
344: break;
345: }
346: assert(p);
347: return(mdoc_perr(m, line, pos, p));
348: }
349:
350:
351: static int
352: node_append(struct mdoc *mdoc, struct mdoc_node *p)
353: {
354:
355: assert(mdoc->last);
356: assert(mdoc->first);
357: assert(MDOC_ROOT != p->type);
358:
359: switch (mdoc->next) {
360: case (MDOC_NEXT_SIBLING):
361: mdoc->last->next = p;
362: p->prev = mdoc->last;
363: p->parent = mdoc->last->parent;
364: break;
365: case (MDOC_NEXT_CHILD):
366: mdoc->last->child = p;
367: p->parent = mdoc->last;
368: break;
369: default:
370: abort();
371: /* NOTREACHED */
372: }
373:
374: if ( ! mdoc_valid_pre(mdoc, p))
375: return(0);
376: if ( ! mdoc_action_pre(mdoc, p))
377: return(0);
378:
379: switch (p->type) {
380: case (MDOC_HEAD):
381: assert(MDOC_BLOCK == p->parent->type);
382: p->parent->head = p;
383: break;
384: case (MDOC_TAIL):
385: assert(MDOC_BLOCK == p->parent->type);
386: p->parent->tail = p;
387: break;
388: case (MDOC_BODY):
389: assert(MDOC_BLOCK == p->parent->type);
390: p->parent->body = p;
391: break;
392: default:
393: break;
394: }
395:
396: mdoc->last = p;
397:
398: switch (p->type) {
399: case (MDOC_TEXT):
400: if ( ! mdoc_valid_post(mdoc))
401: return(0);
402: if ( ! mdoc_action_post(mdoc))
403: return(0);
404: break;
405: default:
406: break;
407: }
408:
409: return(1);
410: }
411:
412:
413: static struct mdoc_node *
414: node_alloc(struct mdoc *mdoc, int line,
415: int pos, int tok, enum mdoc_type type)
416: {
417: struct mdoc_node *p;
418:
419: if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
420: (void)verr(mdoc, EMALLOC);
421: return(NULL);
422: }
423:
424: p->sec = mdoc->lastsec;
425: p->line = line;
426: p->pos = pos;
427: p->tok = tok;
428: if (MDOC_TEXT != (p->type = type))
429: assert(p->tok >= 0);
430:
431: return(p);
432: }
433:
434:
435: int
436: mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
437: {
438: struct mdoc_node *p;
439:
440: p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
441: if (NULL == p)
442: return(0);
443: return(node_append(mdoc, p));
444: }
445:
446:
447: int
448: mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
449: {
450: struct mdoc_node *p;
451:
452: assert(mdoc->first);
453: assert(mdoc->last);
454:
455: p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
456: if (NULL == p)
457: return(0);
458: return(node_append(mdoc, p));
459: }
460:
461:
462: int
463: mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
464: {
465: struct mdoc_node *p;
466:
467: p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
468: if (NULL == p)
469: return(0);
470: return(node_append(mdoc, p));
471: }
472:
473:
474: int
475: mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
476: int tok, struct mdoc_arg *args)
477: {
478: struct mdoc_node *p;
479:
480: p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
481: if (NULL == p)
482: return(0);
1.4 ! schwarze 483: p->args = args;
! 484: if (p->args)
1.1 kristaps 485: (args->refcnt)++;
486: return(node_append(mdoc, p));
487: }
488:
489:
490: int
491: mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
492: int tok, struct mdoc_arg *args)
493: {
494: struct mdoc_node *p;
495:
496: p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
497: if (NULL == p)
498: return(0);
1.4 ! schwarze 499: p->args = args;
! 500: if (p->args)
1.1 kristaps 501: (args->refcnt)++;
502: return(node_append(mdoc, p));
503: }
504:
505:
506: int
507: mdoc_word_alloc(struct mdoc *mdoc,
508: int line, int pos, const char *word)
509: {
510: struct mdoc_node *p;
511:
512: p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
513: if (NULL == p)
514: return(0);
515: if (NULL == (p->string = strdup(word))) {
516: (void)verr(mdoc, EMALLOC);
517: return(0);
518: }
519: return(node_append(mdoc, p));
520: }
521:
522:
523: void
524: mdoc_node_free(struct mdoc_node *p)
525: {
526:
527: if (p->string)
528: free(p->string);
529: if (p->args)
530: mdoc_argv_free(p->args);
531: free(p);
532: }
533:
534:
535: void
536: mdoc_node_freelist(struct mdoc_node *p)
537: {
538:
539: if (p->child)
540: mdoc_node_freelist(p->child);
541: if (p->next)
542: mdoc_node_freelist(p->next);
543:
544: mdoc_node_free(p);
545: }
546:
547:
548: /*
549: * Parse free-form text, that is, a line that does not begin with the
550: * control character.
551: */
552: static int
553: parsetext(struct mdoc *m, int line, char *buf)
554: {
555:
556: if (SEC_PROLOGUE == m->lastnamed)
557: return(perr(m, line, 0, ETEXTPROL));
558:
559: if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
560: return(perr(m, line, 0, ENOBLANK));
561:
562: if ( ! mdoc_word_alloc(m, line, 0, buf))
563: return(0);
564:
565: m->next = MDOC_NEXT_SIBLING;
566: return(1);
567: }
568:
569:
570: static int
571: macrowarn(struct mdoc *m, int ln, const char *buf)
572: {
573: if ( ! (MDOC_IGN_MACRO & m->pflags))
574: return(mdoc_perr(m, ln, 1,
575: "unknown macro: %s%s",
576: buf, strlen(buf) > 3 ? "..." : ""));
577: return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX,
578: "unknown macro: %s%s",
579: buf, strlen(buf) > 3 ? "..." : ""));
580: }
581:
582:
583:
584: /*
585: * Parse a macro line, that is, a line beginning with the control
586: * character.
587: */
588: int
589: parsemacro(struct mdoc *m, int ln, char *buf)
590: {
591: int i, c;
592: char mac[5];
593:
594: /* Comments and empties are quickly ignored. */
595:
596: if (0 == buf[1])
597: return(1);
598:
599: if (' ' == buf[1]) {
600: i = 2;
601: while (buf[i] && ' ' == buf[i])
602: i++;
603: if (0 == buf[i])
604: return(1);
605: return(perr(m, ln, 1, ESPACE));
606: }
607:
608: if (buf[1] && '\\' == buf[1])
609: if (buf[2] && '\"' == buf[2])
610: return(1);
611:
612: /* Copy the first word into a nil-terminated buffer. */
613:
614: for (i = 1; i < 5; i++) {
615: if (0 == (mac[i - 1] = buf[i]))
616: break;
617: else if (' ' == buf[i])
618: break;
619: }
620:
621: mac[i - 1] = 0;
622:
623: if (i == 5 || i <= 2) {
624: if ( ! macrowarn(m, ln, mac))
625: goto err;
626: return(1);
627: }
628:
629: if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
630: if ( ! macrowarn(m, ln, mac))
631: goto err;
632: return(1);
633: }
634:
635: /* The macro is sane. Jump to the next word. */
636:
637: while (buf[i] && ' ' == buf[i])
638: i++;
639:
640: /* Begin recursive parse sequence. */
641:
642: if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
643: goto err;
644:
645: return(1);
646:
647: err: /* Error out. */
648:
649: m->flags |= MDOC_HALT;
650: return(0);
651: }