Annotation of src/usr.bin/mandoc/mdoc.c, Revision 1.1
1.1 ! kristaps 1: /* $Id: mdoc.c,v 1.74 2009/04/02 06:51:44 kristaps Exp $ */
! 2: /*
! 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org>
! 4: *
! 5: * Permission to use, copy, modify, and distribute this software for any
! 6: * purpose with or without fee is hereby granted, provided that the
! 7: * above copyright notice and this permission notice appear in all
! 8: * copies.
! 9: *
! 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
! 11: * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
! 12: * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
! 13: * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
! 14: * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
! 15: * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
! 16: * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
! 17: * PERFORMANCE OF THIS SOFTWARE.
! 18: */
! 19: #include <assert.h>
! 20: #include <ctype.h>
! 21: #include <stdarg.h>
! 22: #include <stdio.h>
! 23: #include <stdlib.h>
! 24: #include <string.h>
! 25:
! 26: #include "libmdoc.h"
! 27:
! 28: enum merr {
! 29: ENOCALL,
! 30: EBODYPROL,
! 31: EPROLBODY,
! 32: ESPACE,
! 33: ETEXTPROL,
! 34: ENOBLANK,
! 35: EMALLOC
! 36: };
! 37:
! 38: const char *const __mdoc_macronames[MDOC_MAX] = {
! 39: "\\\"", "Dd", "Dt", "Os",
! 40: "Sh", "Ss", "Pp", "D1",
! 41: "Dl", "Bd", "Ed", "Bl",
! 42: "El", "It", "Ad", "An",
! 43: "Ar", "Cd", "Cm", "Dv",
! 44: "Er", "Ev", "Ex", "Fa",
! 45: "Fd", "Fl", "Fn", "Ft",
! 46: "Ic", "In", "Li", "Nd",
! 47: "Nm", "Op", "Ot", "Pa",
! 48: "Rv", "St", "Va", "Vt",
! 49: /* LINTED */
! 50: "Xr", "\%A", "\%B", "\%D",
! 51: /* LINTED */
! 52: "\%I", "\%J", "\%N", "\%O",
! 53: /* LINTED */
! 54: "\%P", "\%R", "\%T", "\%V",
! 55: "Ac", "Ao", "Aq", "At",
! 56: "Bc", "Bf", "Bo", "Bq",
! 57: "Bsx", "Bx", "Db", "Dc",
! 58: "Do", "Dq", "Ec", "Ef",
! 59: "Em", "Eo", "Fx", "Ms",
! 60: "No", "Ns", "Nx", "Ox",
! 61: "Pc", "Pf", "Po", "Pq",
! 62: "Qc", "Ql", "Qo", "Qq",
! 63: "Re", "Rs", "Sc", "So",
! 64: "Sq", "Sm", "Sx", "Sy",
! 65: "Tn", "Ux", "Xc", "Xo",
! 66: "Fo", "Fc", "Oo", "Oc",
! 67: "Bk", "Ek", "Bt", "Hf",
! 68: "Fr", "Ud", "Lb", "Ap",
! 69: "Lp", "Lk", "Mt", "Brq",
! 70: /* LINTED */
! 71: "Bro", "Brc", "\%C", "Es",
! 72: /* LINTED */
! 73: "En", "Dx", "\%Q"
! 74: };
! 75:
! 76: const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
! 77: "split", "nosplit", "ragged",
! 78: "unfilled", "literal", "file",
! 79: "offset", "bullet", "dash",
! 80: "hyphen", "item", "enum",
! 81: "tag", "diag", "hang",
! 82: "ohang", "inset", "column",
! 83: "width", "compact", "std",
! 84: "filled", "words", "emphasis",
! 85: "symbolic", "nested"
! 86: };
! 87:
! 88: const char * const *mdoc_macronames = __mdoc_macronames;
! 89: const char * const *mdoc_argnames = __mdoc_argnames;
! 90:
! 91: static void mdoc_free1(struct mdoc *);
! 92: static int mdoc_alloc1(struct mdoc *);
! 93: static struct mdoc_node *node_alloc(struct mdoc *, int, int,
! 94: int, enum mdoc_type);
! 95: static int node_append(struct mdoc *,
! 96: struct mdoc_node *);
! 97: static int parsetext(struct mdoc *, int, char *);
! 98: static int parsemacro(struct mdoc *, int, char *);
! 99: static int macrowarn(struct mdoc *, int, const char *);
! 100: static int perr(struct mdoc *, int, int, enum merr);
! 101:
! 102: #define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t))
! 103:
! 104: /*
! 105: * Get the first (root) node of the parse tree.
! 106: */
! 107: const struct mdoc_node *
! 108: mdoc_node(const struct mdoc *m)
! 109: {
! 110:
! 111: return(MDOC_HALT & m->flags ? NULL : m->first);
! 112: }
! 113:
! 114:
! 115: const struct mdoc_meta *
! 116: mdoc_meta(const struct mdoc *m)
! 117: {
! 118:
! 119: return(MDOC_HALT & m->flags ? NULL : &m->meta);
! 120: }
! 121:
! 122:
! 123: static void
! 124: mdoc_free1(struct mdoc *mdoc)
! 125: {
! 126:
! 127: if (mdoc->first)
! 128: mdoc_node_freelist(mdoc->first);
! 129: if (mdoc->meta.title)
! 130: free(mdoc->meta.title);
! 131: if (mdoc->meta.os)
! 132: free(mdoc->meta.os);
! 133: if (mdoc->meta.name)
! 134: free(mdoc->meta.name);
! 135: if (mdoc->meta.arch)
! 136: free(mdoc->meta.arch);
! 137: if (mdoc->meta.vol)
! 138: free(mdoc->meta.vol);
! 139: }
! 140:
! 141:
! 142: static int
! 143: mdoc_alloc1(struct mdoc *mdoc)
! 144: {
! 145:
! 146: bzero(&mdoc->meta, sizeof(struct mdoc_meta));
! 147: mdoc->flags = 0;
! 148: mdoc->lastnamed = mdoc->lastsec = 0;
! 149: mdoc->last = calloc(1, sizeof(struct mdoc_node));
! 150: if (NULL == mdoc->last)
! 151: return(0);
! 152:
! 153: mdoc->first = mdoc->last;
! 154: mdoc->last->type = MDOC_ROOT;
! 155: mdoc->next = MDOC_NEXT_CHILD;
! 156: return(1);
! 157: }
! 158:
! 159:
! 160: /*
! 161: * Free up all resources contributed by a parse: the node tree,
! 162: * meta-data and so on. Then reallocate the root node for another
! 163: * parse.
! 164: */
! 165: int
! 166: mdoc_reset(struct mdoc *mdoc)
! 167: {
! 168:
! 169: mdoc_free1(mdoc);
! 170: return(mdoc_alloc1(mdoc));
! 171: }
! 172:
! 173:
! 174: /*
! 175: * Completely free up all resources.
! 176: */
! 177: void
! 178: mdoc_free(struct mdoc *mdoc)
! 179: {
! 180:
! 181: mdoc_free1(mdoc);
! 182: if (mdoc->htab)
! 183: mdoc_hash_free(mdoc->htab);
! 184: free(mdoc);
! 185: }
! 186:
! 187:
! 188: struct mdoc *
! 189: mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
! 190: {
! 191: struct mdoc *p;
! 192:
! 193: if (NULL == (p = calloc(1, sizeof(struct mdoc))))
! 194: return(NULL);
! 195: if (cb)
! 196: (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
! 197:
! 198: p->data = data;
! 199: p->pflags = pflags;
! 200:
! 201: if (NULL == (p->htab = mdoc_hash_alloc())) {
! 202: free(p);
! 203: return(NULL);
! 204: } else if (mdoc_alloc1(p))
! 205: return(p);
! 206:
! 207: free(p);
! 208: return(NULL);
! 209: }
! 210:
! 211:
! 212: /*
! 213: * Climb back up the parse tree, validating open scopes. Mostly calls
! 214: * through to macro_end in macro.c.
! 215: */
! 216: int
! 217: mdoc_endparse(struct mdoc *m)
! 218: {
! 219:
! 220: if (MDOC_HALT & m->flags)
! 221: return(0);
! 222: else if (mdoc_macroend(m))
! 223: return(1);
! 224: m->flags |= MDOC_HALT;
! 225: return(0);
! 226: }
! 227:
! 228:
! 229: /*
! 230: * Main parse routine. Parses a single line -- really just hands off to
! 231: * the macro or text parser.
! 232: */
! 233: int
! 234: mdoc_parseln(struct mdoc *m, int ln, char *buf)
! 235: {
! 236:
! 237: /* If in error-mode, then we parse no more. */
! 238:
! 239: if (MDOC_HALT & m->flags)
! 240: return(0);
! 241:
! 242: return('.' == *buf ? parsemacro(m, ln, buf) :
! 243: parsetext(m, ln, buf));
! 244: }
! 245:
! 246:
! 247: void
! 248: mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
! 249: {
! 250: char buf[256];
! 251: va_list ap;
! 252:
! 253: if (NULL == mdoc->cb.mdoc_msg)
! 254: return;
! 255:
! 256: va_start(ap, fmt);
! 257: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
! 258: va_end(ap);
! 259: (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
! 260: }
! 261:
! 262:
! 263: int
! 264: mdoc_verr(struct mdoc *mdoc, int ln, int pos,
! 265: const char *fmt, ...)
! 266: {
! 267: char buf[256];
! 268: va_list ap;
! 269:
! 270: if (NULL == mdoc->cb.mdoc_err)
! 271: return(0);
! 272:
! 273: va_start(ap, fmt);
! 274: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
! 275: va_end(ap);
! 276: return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
! 277: }
! 278:
! 279:
! 280: int
! 281: mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
! 282: enum mdoc_warn type, const char *fmt, ...)
! 283: {
! 284: char buf[256];
! 285: va_list ap;
! 286:
! 287: if (NULL == mdoc->cb.mdoc_warn)
! 288: return(0);
! 289:
! 290: va_start(ap, fmt);
! 291: (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
! 292: va_end(ap);
! 293: return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
! 294: }
! 295:
! 296:
! 297: int
! 298: mdoc_macro(struct mdoc *m, int tok,
! 299: int ln, int pp, int *pos, char *buf)
! 300: {
! 301:
! 302: /* FIXME - these should happen during validation. */
! 303:
! 304: if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
! 305: SEC_PROLOGUE != m->lastnamed)
! 306: return(perr(m, ln, pp, EPROLBODY));
! 307:
! 308: if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
! 309: SEC_PROLOGUE == m->lastnamed)
! 310: return(perr(m, ln, pp, EBODYPROL));
! 311:
! 312: if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
! 313: return(perr(m, ln, pp, ENOCALL));
! 314:
! 315: return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
! 316: }
! 317:
! 318:
! 319: static int
! 320: perr(struct mdoc *m, int line, int pos, enum merr type)
! 321: {
! 322: char *p;
! 323:
! 324: p = NULL;
! 325: switch (type) {
! 326: case (ENOCALL):
! 327: p = "not callable";
! 328: break;
! 329: case (EPROLBODY):
! 330: p = "macro disallowed in document body";
! 331: break;
! 332: case (EBODYPROL):
! 333: p = "macro disallowed in document prologue";
! 334: break;
! 335: case (EMALLOC):
! 336: p = "memory exhausted";
! 337: break;
! 338: case (ETEXTPROL):
! 339: p = "text disallowed in document prologue";
! 340: break;
! 341: case (ENOBLANK):
! 342: p = "blank lines disallowed in non-literal contexts";
! 343: break;
! 344: case (ESPACE):
! 345: p = "whitespace disallowed after delimiter";
! 346: break;
! 347: }
! 348: assert(p);
! 349: return(mdoc_perr(m, line, pos, p));
! 350: }
! 351:
! 352:
! 353: static int
! 354: node_append(struct mdoc *mdoc, struct mdoc_node *p)
! 355: {
! 356:
! 357: assert(mdoc->last);
! 358: assert(mdoc->first);
! 359: assert(MDOC_ROOT != p->type);
! 360:
! 361: switch (mdoc->next) {
! 362: case (MDOC_NEXT_SIBLING):
! 363: mdoc->last->next = p;
! 364: p->prev = mdoc->last;
! 365: p->parent = mdoc->last->parent;
! 366: break;
! 367: case (MDOC_NEXT_CHILD):
! 368: mdoc->last->child = p;
! 369: p->parent = mdoc->last;
! 370: break;
! 371: default:
! 372: abort();
! 373: /* NOTREACHED */
! 374: }
! 375:
! 376: if ( ! mdoc_valid_pre(mdoc, p))
! 377: return(0);
! 378: if ( ! mdoc_action_pre(mdoc, p))
! 379: return(0);
! 380:
! 381: switch (p->type) {
! 382: case (MDOC_HEAD):
! 383: assert(MDOC_BLOCK == p->parent->type);
! 384: p->parent->head = p;
! 385: break;
! 386: case (MDOC_TAIL):
! 387: assert(MDOC_BLOCK == p->parent->type);
! 388: p->parent->tail = p;
! 389: break;
! 390: case (MDOC_BODY):
! 391: assert(MDOC_BLOCK == p->parent->type);
! 392: p->parent->body = p;
! 393: break;
! 394: default:
! 395: break;
! 396: }
! 397:
! 398: mdoc->last = p;
! 399:
! 400: switch (p->type) {
! 401: case (MDOC_TEXT):
! 402: if ( ! mdoc_valid_post(mdoc))
! 403: return(0);
! 404: if ( ! mdoc_action_post(mdoc))
! 405: return(0);
! 406: break;
! 407: default:
! 408: break;
! 409: }
! 410:
! 411: return(1);
! 412: }
! 413:
! 414:
! 415: static struct mdoc_node *
! 416: node_alloc(struct mdoc *mdoc, int line,
! 417: int pos, int tok, enum mdoc_type type)
! 418: {
! 419: struct mdoc_node *p;
! 420:
! 421: if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
! 422: (void)verr(mdoc, EMALLOC);
! 423: return(NULL);
! 424: }
! 425:
! 426: p->sec = mdoc->lastsec;
! 427: p->line = line;
! 428: p->pos = pos;
! 429: p->tok = tok;
! 430: if (MDOC_TEXT != (p->type = type))
! 431: assert(p->tok >= 0);
! 432:
! 433: return(p);
! 434: }
! 435:
! 436:
! 437: int
! 438: mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
! 439: {
! 440: struct mdoc_node *p;
! 441:
! 442: p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
! 443: if (NULL == p)
! 444: return(0);
! 445: return(node_append(mdoc, p));
! 446: }
! 447:
! 448:
! 449: int
! 450: mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
! 451: {
! 452: struct mdoc_node *p;
! 453:
! 454: assert(mdoc->first);
! 455: assert(mdoc->last);
! 456:
! 457: p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
! 458: if (NULL == p)
! 459: return(0);
! 460: return(node_append(mdoc, p));
! 461: }
! 462:
! 463:
! 464: int
! 465: mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
! 466: {
! 467: struct mdoc_node *p;
! 468:
! 469: p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
! 470: if (NULL == p)
! 471: return(0);
! 472: return(node_append(mdoc, p));
! 473: }
! 474:
! 475:
! 476: int
! 477: mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
! 478: int tok, struct mdoc_arg *args)
! 479: {
! 480: struct mdoc_node *p;
! 481:
! 482: p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
! 483: if (NULL == p)
! 484: return(0);
! 485: if ((p->args = args))
! 486: (args->refcnt)++;
! 487: return(node_append(mdoc, p));
! 488: }
! 489:
! 490:
! 491: int
! 492: mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
! 493: int tok, struct mdoc_arg *args)
! 494: {
! 495: struct mdoc_node *p;
! 496:
! 497: p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
! 498: if (NULL == p)
! 499: return(0);
! 500: if ((p->args = args))
! 501: (args->refcnt)++;
! 502: return(node_append(mdoc, p));
! 503: }
! 504:
! 505:
! 506: int
! 507: mdoc_word_alloc(struct mdoc *mdoc,
! 508: int line, int pos, const char *word)
! 509: {
! 510: struct mdoc_node *p;
! 511:
! 512: p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
! 513: if (NULL == p)
! 514: return(0);
! 515: if (NULL == (p->string = strdup(word))) {
! 516: (void)verr(mdoc, EMALLOC);
! 517: return(0);
! 518: }
! 519: return(node_append(mdoc, p));
! 520: }
! 521:
! 522:
! 523: void
! 524: mdoc_node_free(struct mdoc_node *p)
! 525: {
! 526:
! 527: if (p->string)
! 528: free(p->string);
! 529: if (p->args)
! 530: mdoc_argv_free(p->args);
! 531: free(p);
! 532: }
! 533:
! 534:
! 535: void
! 536: mdoc_node_freelist(struct mdoc_node *p)
! 537: {
! 538:
! 539: if (p->child)
! 540: mdoc_node_freelist(p->child);
! 541: if (p->next)
! 542: mdoc_node_freelist(p->next);
! 543:
! 544: mdoc_node_free(p);
! 545: }
! 546:
! 547:
! 548: /*
! 549: * Parse free-form text, that is, a line that does not begin with the
! 550: * control character.
! 551: */
! 552: static int
! 553: parsetext(struct mdoc *m, int line, char *buf)
! 554: {
! 555:
! 556: if (SEC_PROLOGUE == m->lastnamed)
! 557: return(perr(m, line, 0, ETEXTPROL));
! 558:
! 559: if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
! 560: return(perr(m, line, 0, ENOBLANK));
! 561:
! 562: if ( ! mdoc_word_alloc(m, line, 0, buf))
! 563: return(0);
! 564:
! 565: m->next = MDOC_NEXT_SIBLING;
! 566: return(1);
! 567: }
! 568:
! 569:
! 570: static int
! 571: macrowarn(struct mdoc *m, int ln, const char *buf)
! 572: {
! 573: if ( ! (MDOC_IGN_MACRO & m->pflags))
! 574: return(mdoc_perr(m, ln, 1,
! 575: "unknown macro: %s%s",
! 576: buf, strlen(buf) > 3 ? "..." : ""));
! 577: return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX,
! 578: "unknown macro: %s%s",
! 579: buf, strlen(buf) > 3 ? "..." : ""));
! 580: }
! 581:
! 582:
! 583:
! 584: /*
! 585: * Parse a macro line, that is, a line beginning with the control
! 586: * character.
! 587: */
! 588: int
! 589: parsemacro(struct mdoc *m, int ln, char *buf)
! 590: {
! 591: int i, c;
! 592: char mac[5];
! 593:
! 594: /* Comments and empties are quickly ignored. */
! 595:
! 596: if (0 == buf[1])
! 597: return(1);
! 598:
! 599: if (' ' == buf[1]) {
! 600: i = 2;
! 601: while (buf[i] && ' ' == buf[i])
! 602: i++;
! 603: if (0 == buf[i])
! 604: return(1);
! 605: return(perr(m, ln, 1, ESPACE));
! 606: }
! 607:
! 608: if (buf[1] && '\\' == buf[1])
! 609: if (buf[2] && '\"' == buf[2])
! 610: return(1);
! 611:
! 612: /* Copy the first word into a nil-terminated buffer. */
! 613:
! 614: for (i = 1; i < 5; i++) {
! 615: if (0 == (mac[i - 1] = buf[i]))
! 616: break;
! 617: else if (' ' == buf[i])
! 618: break;
! 619: }
! 620:
! 621: mac[i - 1] = 0;
! 622:
! 623: if (i == 5 || i <= 2) {
! 624: if ( ! macrowarn(m, ln, mac))
! 625: goto err;
! 626: return(1);
! 627: }
! 628:
! 629: if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
! 630: if ( ! macrowarn(m, ln, mac))
! 631: goto err;
! 632: return(1);
! 633: }
! 634:
! 635: /* The macro is sane. Jump to the next word. */
! 636:
! 637: while (buf[i] && ' ' == buf[i])
! 638: i++;
! 639:
! 640: /* Begin recursive parse sequence. */
! 641:
! 642: if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
! 643: goto err;
! 644:
! 645: return(1);
! 646:
! 647: err: /* Error out. */
! 648:
! 649: m->flags |= MDOC_HALT;
! 650: return(0);
! 651: }