Annotation of src/usr.bin/mandoc/html.c, Revision 1.6
1.6 ! schwarze 1: /* $Id: html.c,v 1.5 2009/12/24 02:08:14 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/types.h>
18:
19: #include <assert.h>
1.3 schwarze 20: #include <ctype.h>
1.4 schwarze 21: #include <stdarg.h>
1.1 schwarze 22: #include <stdio.h>
23: #include <stdint.h>
24: #include <stdlib.h>
25: #include <string.h>
26: #include <unistd.h>
27:
28: #include "out.h"
29: #include "chars.h"
30: #include "html.h"
31: #include "main.h"
32:
33: #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
34:
35: struct htmldata {
36: const char *name;
37: int flags;
38: #define HTML_CLRLINE (1 << 0)
39: #define HTML_NOSTACK (1 << 1)
1.6 ! schwarze 40: #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
1.1 schwarze 41: };
42:
43: static const struct htmldata htmltags[TAG_MAX] = {
44: {"html", HTML_CLRLINE}, /* TAG_HTML */
45: {"head", HTML_CLRLINE}, /* TAG_HEAD */
46: {"body", HTML_CLRLINE}, /* TAG_BODY */
1.6 ! schwarze 47: {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
1.1 schwarze 48: {"title", HTML_CLRLINE}, /* TAG_TITLE */
49: {"div", HTML_CLRLINE}, /* TAG_DIV */
50: {"h1", 0}, /* TAG_H1 */
51: {"h2", 0}, /* TAG_H2 */
52: {"span", 0}, /* TAG_SPAN */
53: {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
1.6 ! schwarze 54: {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
1.1 schwarze 55: {"a", 0}, /* TAG_A */
56: {"table", HTML_CLRLINE}, /* TAG_TABLE */
1.6 ! schwarze 57: {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
1.1 schwarze 58: {"tr", HTML_CLRLINE}, /* TAG_TR */
59: {"td", HTML_CLRLINE}, /* TAG_TD */
60: {"li", HTML_CLRLINE}, /* TAG_LI */
61: {"ul", HTML_CLRLINE}, /* TAG_UL */
62: {"ol", HTML_CLRLINE}, /* TAG_OL */
63: };
64:
1.5 schwarze 65: static const char *const htmlfonts[HTMLFONT_MAX] = {
66: "roman",
67: "bold",
68: "italic"
69: };
70:
71: static const char *const htmlattrs[ATTR_MAX] = {
1.1 schwarze 72: "http-equiv",
73: "content",
74: "name",
75: "rel",
76: "href",
77: "type",
78: "media",
79: "class",
80: "style",
81: "width",
82: "valign",
83: "target",
84: "id",
1.3 schwarze 85: "summary",
1.1 schwarze 86: };
87:
1.5 schwarze 88:
89: static void print_spec(struct html *, const char *, size_t);
90: static void print_res(struct html *, const char *, size_t);
91: static void print_ctag(struct html *, enum htmltag);
1.6 ! schwarze 92: static void print_doctype(struct html *);
! 93: static void print_xmltype(struct html *);
1.5 schwarze 94: static int print_encode(struct html *, const char *, int);
95: static void print_metaf(struct html *, enum roffdeco);
1.6 ! schwarze 96: static void print_attr(struct html *,
! 97: const char *, const char *);
! 98: static void *ml_alloc(char *, enum htmltype);
1.5 schwarze 99:
100:
1.6 ! schwarze 101: static void *
! 102: ml_alloc(char *outopts, enum htmltype type)
1.1 schwarze 103: {
104: struct html *h;
105: const char *toks[4];
106: char *v;
107:
108: toks[0] = "style";
109: toks[1] = "man";
110: toks[2] = "includes";
111: toks[3] = NULL;
112:
1.3 schwarze 113: h = calloc(1, sizeof(struct html));
114: if (NULL == h) {
115: perror(NULL);
116: exit(EXIT_FAILURE);
117: }
1.1 schwarze 118:
1.6 ! schwarze 119: h->type = type;
1.2 schwarze 120: h->tags.head = NULL;
121: h->ords.head = NULL;
1.3 schwarze 122: h->symtab = chars_init(CHARS_HTML);
1.1 schwarze 123:
124: while (outopts && *outopts)
125: switch (getsubopt(&outopts, UNCONST(toks), &v)) {
126: case (0):
127: h->style = v;
128: break;
129: case (1):
130: h->base_man = v;
131: break;
132: case (2):
133: h->base_includes = v;
134: break;
135: default:
136: break;
137: }
138:
139: return(h);
140: }
141:
1.6 ! schwarze 142: void *
! 143: html_alloc(char *outopts)
! 144: {
! 145:
! 146: return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
! 147: }
! 148:
! 149:
! 150: void *
! 151: xhtml_alloc(char *outopts)
! 152: {
! 153:
! 154: return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
! 155: }
! 156:
1.1 schwarze 157:
158: void
159: html_free(void *p)
160: {
161: struct tag *tag;
162: struct ord *ord;
163: struct html *h;
164:
165: h = (struct html *)p;
166:
1.2 schwarze 167: while ((ord = h->ords.head) != NULL) {
168: h->ords.head = ord->next;
1.1 schwarze 169: free(ord);
170: }
171:
1.2 schwarze 172: while ((tag = h->tags.head) != NULL) {
173: h->tags.head = tag->next;
1.1 schwarze 174: free(tag);
175: }
176:
177: if (h->symtab)
178: chars_free(h->symtab);
179:
180: free(h);
181: }
182:
183:
184: void
185: print_gen_head(struct html *h)
186: {
187: struct htmlpair tag[4];
188:
189: tag[0].key = ATTR_HTTPEQUIV;
190: tag[0].val = "Content-Type";
191: tag[1].key = ATTR_CONTENT;
192: tag[1].val = "text/html; charset=utf-8";
193: print_otag(h, TAG_META, 2, tag);
194:
195: tag[0].key = ATTR_NAME;
196: tag[0].val = "resource-type";
197: tag[1].key = ATTR_CONTENT;
198: tag[1].val = "document";
199: print_otag(h, TAG_META, 2, tag);
200:
201: if (h->style) {
202: tag[0].key = ATTR_REL;
203: tag[0].val = "stylesheet";
204: tag[1].key = ATTR_HREF;
205: tag[1].val = h->style;
206: tag[2].key = ATTR_TYPE;
207: tag[2].val = "text/css";
208: tag[3].key = ATTR_MEDIA;
209: tag[3].val = "all";
210: print_otag(h, TAG_LINK, 4, tag);
211: }
212: }
213:
214:
215: static void
1.5 schwarze 216: print_spec(struct html *h, const char *p, size_t len)
1.1 schwarze 217: {
218: const char *rhs;
219: size_t sz;
220:
1.5 schwarze 221: rhs = chars_a2ascii(h->symtab, p, len, &sz);
1.1 schwarze 222:
223: if (NULL == rhs)
224: return;
1.4 schwarze 225: fwrite(rhs, 1, sz, stdout);
1.1 schwarze 226: }
227:
228:
229: static void
1.5 schwarze 230: print_res(struct html *h, const char *p, size_t len)
1.1 schwarze 231: {
232: const char *rhs;
233: size_t sz;
234:
1.5 schwarze 235: rhs = chars_a2res(h->symtab, p, len, &sz);
1.1 schwarze 236:
237: if (NULL == rhs)
238: return;
1.4 schwarze 239: fwrite(rhs, 1, sz, stdout);
1.1 schwarze 240: }
241:
242:
1.5 schwarze 243: struct tag *
244: print_ofont(struct html *h, enum htmlfont font)
1.1 schwarze 245: {
1.5 schwarze 246: struct htmlpair tag;
1.1 schwarze 247:
1.5 schwarze 248: h->metal = h->metac;
249: h->metac = font;
1.1 schwarze 250:
1.5 schwarze 251: /* FIXME: DECO_ROMAN should just close out preexisting. */
1.1 schwarze 252:
1.5 schwarze 253: if (h->metaf && h->tags.head == h->metaf)
254: print_tagq(h, h->metaf);
1.1 schwarze 255:
1.5 schwarze 256: PAIR_CLASS_INIT(&tag, htmlfonts[font]);
257: h->metaf = print_otag(h, TAG_SPAN, 1, &tag);
258: return(h->metaf);
259: }
1.1 schwarze 260:
261:
1.5 schwarze 262: static void
263: print_metaf(struct html *h, enum roffdeco deco)
264: {
265: enum htmlfont font;
1.1 schwarze 266:
1.5 schwarze 267: switch (deco) {
268: case (DECO_PREVIOUS):
269: font = h->metal;
270: break;
271: case (DECO_ITALIC):
272: font = HTMLFONT_ITALIC;
273: break;
274: case (DECO_BOLD):
275: font = HTMLFONT_BOLD;
276: break;
277: case (DECO_ROMAN):
278: font = HTMLFONT_NONE;
279: break;
280: default:
281: abort();
282: /* NOTREACHED */
1.1 schwarze 283: }
284:
1.5 schwarze 285: (void)print_ofont(h, font);
1.1 schwarze 286: }
287:
288:
1.5 schwarze 289: static int
290: print_encode(struct html *h, const char *p, int norecurse)
1.1 schwarze 291: {
1.4 schwarze 292: size_t sz;
1.5 schwarze 293: int len, nospace;
294: const char *seq;
295: enum roffdeco deco;
296:
297: nospace = 0;
1.1 schwarze 298:
299: for (; *p; p++) {
1.4 schwarze 300: sz = strcspn(p, "\\<>&");
301:
302: fwrite(p, 1, sz, stdout);
303: p += /* LINTED */
304: sz;
305:
1.5 schwarze 306: if ('<' == *p) {
307: printf("<");
308: continue;
309: } else if ('>' == *p) {
310: printf(">");
311: continue;
312: } else if ('&' == *p) {
313: printf("&");
1.1 schwarze 314: continue;
1.4 schwarze 315: } else if ('\0' == *p)
316: break;
317:
1.5 schwarze 318: seq = ++p;
319: len = a2roffdeco(&deco, &seq, &sz);
320:
321: switch (deco) {
322: case (DECO_RESERVED):
323: print_res(h, seq, sz);
324: break;
325: case (DECO_SPECIAL):
326: print_spec(h, seq, sz);
327: break;
328: case (DECO_PREVIOUS):
329: /* FALLTHROUGH */
330: case (DECO_BOLD):
331: /* FALLTHROUGH */
332: case (DECO_ITALIC):
333: /* FALLTHROUGH */
334: case (DECO_ROMAN):
335: if (norecurse)
336: break;
337: print_metaf(h, deco);
338: break;
339: default:
340: break;
341: }
342:
343: p += len - 1;
344:
345: if (DECO_NOSPACE == deco && '\0' == *(p + 1))
346: nospace = 1;
1.1 schwarze 347: }
1.5 schwarze 348:
349: return(nospace);
1.1 schwarze 350: }
351:
352:
1.6 ! schwarze 353: static void
! 354: print_attr(struct html *h, const char *key, const char *val)
! 355: {
! 356: printf(" %s=\"", key);
! 357: (void)print_encode(h, val, 1);
! 358: putchar('\"');
! 359: }
! 360:
! 361:
1.1 schwarze 362: struct tag *
363: print_otag(struct html *h, enum htmltag tag,
364: int sz, const struct htmlpair *p)
365: {
366: int i;
367: struct tag *t;
368:
1.6 ! schwarze 369: /* Push this tags onto the stack of open scopes. */
! 370:
1.1 schwarze 371: if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
1.3 schwarze 372: t = malloc(sizeof(struct tag));
373: if (NULL == t) {
374: perror(NULL);
375: exit(EXIT_FAILURE);
376: }
1.1 schwarze 377: t->tag = tag;
1.2 schwarze 378: t->next = h->tags.head;
379: h->tags.head = t;
1.1 schwarze 380: } else
381: t = NULL;
382:
383: if ( ! (HTML_NOSPACE & h->flags))
384: if ( ! (HTML_CLRLINE & htmltags[tag].flags))
1.4 schwarze 385: putchar(' ');
1.1 schwarze 386:
1.6 ! schwarze 387: /* Print out the tag name and attributes. */
! 388:
1.1 schwarze 389: printf("<%s", htmltags[tag].name);
1.6 ! schwarze 390: for (i = 0; i < sz; i++)
! 391: print_attr(h, htmlattrs[p[i].key], p[i].val);
! 392:
! 393: /* Add non-overridable attributes. */
! 394:
! 395: if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
! 396: print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
! 397: print_attr(h, "xml:lang", "en");
! 398: print_attr(h, "lang", "en");
1.1 schwarze 399: }
1.6 ! schwarze 400:
! 401: /* Accomodate for XML "well-formed" singleton escaping. */
! 402:
! 403: if (HTML_AUTOCLOSE & htmltags[tag].flags)
! 404: switch (h->type) {
! 405: case (HTML_XHTML_1_0_STRICT):
! 406: putchar('/');
! 407: break;
! 408: default:
! 409: break;
! 410: }
! 411:
1.4 schwarze 412: putchar('>');
1.1 schwarze 413:
414: h->flags |= HTML_NOSPACE;
415: return(t);
416: }
417:
418:
419: static void
420: print_ctag(struct html *h, enum htmltag tag)
421: {
422:
423: printf("</%s>", htmltags[tag].name);
1.3 schwarze 424: if (HTML_CLRLINE & htmltags[tag].flags) {
1.1 schwarze 425: h->flags |= HTML_NOSPACE;
1.4 schwarze 426: putchar('\n');
1.5 schwarze 427: }
1.1 schwarze 428: }
429:
430:
431: void
1.6 ! schwarze 432: print_gen_decls(struct html *h)
! 433: {
! 434:
! 435: print_xmltype(h);
! 436: print_doctype(h);
! 437: }
! 438:
! 439:
! 440: static void
! 441: print_xmltype(struct html *h)
! 442: {
! 443: const char *decl;
! 444:
! 445: switch (h->type) {
! 446: case (HTML_XHTML_1_0_STRICT):
! 447: decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
! 448: break;
! 449: default:
! 450: decl = NULL;
! 451: break;
! 452: }
! 453:
! 454: if (NULL == decl)
! 455: return;
! 456:
! 457: printf("%s\n", decl);
! 458: }
! 459:
! 460:
! 461: static void
! 462: print_doctype(struct html *h)
1.1 schwarze 463: {
1.6 ! schwarze 464: const char *doctype;
! 465: const char *dtd;
! 466: const char *name;
! 467:
! 468: switch (h->type) {
! 469: case (HTML_HTML_4_01_STRICT):
! 470: name = "HTML";
! 471: doctype = "-//W3C//DTD HTML 4.01//EN";
! 472: dtd = "http://www.w3.org/TR/html4/strict.dtd";
! 473: break;
! 474: default:
! 475: name = "html";
! 476: doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
! 477: dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
! 478: break;
! 479: }
! 480:
! 481: printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
! 482: name, doctype, dtd);
1.1 schwarze 483: }
484:
485:
486: void
487: print_text(struct html *h, const char *p)
488: {
489:
490: if (*p && 0 == *(p + 1))
491: switch (*p) {
492: case('.'):
493: /* FALLTHROUGH */
494: case(','):
495: /* FALLTHROUGH */
496: case(';'):
497: /* FALLTHROUGH */
498: case(':'):
499: /* FALLTHROUGH */
500: case('?'):
501: /* FALLTHROUGH */
502: case('!'):
503: /* FALLTHROUGH */
504: case(')'):
505: /* FALLTHROUGH */
506: case(']'):
507: /* FALLTHROUGH */
508: case('}'):
509: if ( ! (HTML_IGNDELIM & h->flags))
510: h->flags |= HTML_NOSPACE;
511: break;
512: default:
513: break;
514: }
515:
516: if ( ! (h->flags & HTML_NOSPACE))
1.4 schwarze 517: putchar(' ');
1.1 schwarze 518:
1.5 schwarze 519: assert(p);
520: if ( ! print_encode(h, p, 0))
521: h->flags &= ~HTML_NOSPACE;
1.1 schwarze 522:
523: if (*p && 0 == *(p + 1))
524: switch (*p) {
525: case('('):
526: /* FALLTHROUGH */
527: case('['):
528: /* FALLTHROUGH */
529: case('{'):
530: h->flags |= HTML_NOSPACE;
531: break;
532: default:
533: break;
534: }
535: }
536:
537:
538: void
539: print_tagq(struct html *h, const struct tag *until)
540: {
541: struct tag *tag;
542:
1.2 schwarze 543: while ((tag = h->tags.head) != NULL) {
1.5 schwarze 544: if (tag == h->metaf)
545: h->metaf = NULL;
1.1 schwarze 546: print_ctag(h, tag->tag);
1.2 schwarze 547: h->tags.head = tag->next;
1.1 schwarze 548: free(tag);
549: if (until && tag == until)
550: return;
551: }
552: }
553:
554:
555: void
556: print_stagq(struct html *h, const struct tag *suntil)
557: {
558: struct tag *tag;
559:
1.2 schwarze 560: while ((tag = h->tags.head) != NULL) {
1.1 schwarze 561: if (suntil && tag == suntil)
562: return;
1.5 schwarze 563: if (tag == h->metaf)
564: h->metaf = NULL;
1.1 schwarze 565: print_ctag(h, tag->tag);
1.2 schwarze 566: h->tags.head = tag->next;
1.1 schwarze 567: free(tag);
568: }
569: }
570:
571:
572: void
573: bufinit(struct html *h)
574: {
575:
576: h->buf[0] = '\0';
577: h->buflen = 0;
578: }
579:
580:
581: void
582: bufcat_style(struct html *h, const char *key, const char *val)
583: {
584:
585: bufcat(h, key);
586: bufncat(h, ":", 1);
587: bufcat(h, val);
588: bufncat(h, ";", 1);
589: }
590:
591:
592: void
593: bufcat(struct html *h, const char *p)
594: {
595:
596: bufncat(h, p, strlen(p));
597: }
598:
599:
600: void
601: buffmt(struct html *h, const char *fmt, ...)
602: {
603: va_list ap;
604:
605: va_start(ap, fmt);
606: (void)vsnprintf(h->buf + (int)h->buflen,
607: BUFSIZ - h->buflen - 1, fmt, ap);
608: va_end(ap);
609: h->buflen = strlen(h->buf);
610: }
611:
612:
613: void
614: bufncat(struct html *h, const char *p, size_t sz)
615: {
616:
617: if (h->buflen + sz > BUFSIZ - 1)
618: sz = BUFSIZ - 1 - h->buflen;
619:
620: (void)strncat(h->buf, p, sz);
621: h->buflen += sz;
622: }
623:
624:
625: void
626: buffmt_includes(struct html *h, const char *name)
627: {
628: const char *p, *pp;
629:
630: pp = h->base_includes;
631:
632: while (NULL != (p = strchr(pp, '%'))) {
633: bufncat(h, pp, (size_t)(p - pp));
634: switch (*(p + 1)) {
635: case('I'):
636: bufcat(h, name);
637: break;
638: default:
639: bufncat(h, p, 2);
640: break;
641: }
642: pp = p + 2;
643: }
644: if (pp)
645: bufcat(h, pp);
646: }
647:
648:
649: void
650: buffmt_man(struct html *h,
651: const char *name, const char *sec)
652: {
653: const char *p, *pp;
654:
655: pp = h->base_man;
656:
657: /* LINTED */
658: while (NULL != (p = strchr(pp, '%'))) {
659: bufncat(h, pp, (size_t)(p - pp));
660: switch (*(p + 1)) {
661: case('S'):
662: bufcat(h, sec ? sec : "1");
663: break;
664: case('N'):
665: buffmt(h, name);
666: break;
667: default:
668: bufncat(h, p, 2);
669: break;
670: }
671: pp = p + 2;
672: }
673: if (pp)
674: bufcat(h, pp);
675: }
676:
677:
678: void
679: bufcat_su(struct html *h, const char *p, const struct roffsu *su)
680: {
681: double v;
682: const char *u;
683:
684: v = su->scale;
685:
686: switch (su->unit) {
687: case (SCALE_CM):
688: u = "cm";
689: break;
690: case (SCALE_IN):
691: u = "in";
692: break;
693: case (SCALE_PC):
694: u = "pc";
695: break;
696: case (SCALE_PT):
697: u = "pt";
698: break;
699: case (SCALE_EM):
700: u = "em";
701: break;
702: case (SCALE_MM):
703: if (0 == (v /= 100))
704: v = 1;
705: u = "em";
706: break;
707: case (SCALE_EN):
708: u = "ex";
709: break;
710: case (SCALE_BU):
711: u = "ex";
712: break;
713: case (SCALE_VS):
714: u = "em";
715: break;
716: default:
717: u = "ex";
718: break;
719: }
720:
721: if (su->pt)
722: buffmt(h, "%s: %f%s;", p, v, u);
723: else
724: /* LINTED */
725: buffmt(h, "%s: %d%s;", p, (int)v, u);
726: }
727:
1.3 schwarze 728:
729: void
730: html_idcat(char *dst, const char *src, int sz)
731: {
732: int ssz;
733:
734: assert(sz);
735:
736: /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
737:
738: for ( ; *dst != '\0' && sz; dst++, sz--)
739: /* Jump to end. */ ;
740:
741: assert(sz > 2);
742:
743: /* We can't start with a number (bah). */
744:
745: *dst++ = 'x';
746: *dst = '\0';
747: sz--;
748:
749: for ( ; *src != '\0' && sz > 1; src++) {
750: ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
751: sz -= ssz;
752: dst += ssz;
753: }
754: }