Annotation of src/usr.bin/mandoc/html.c, Revision 1.7
1.7 ! schwarze 1: /* $Id: html.c,v 1.6 2010/02/18 02:11:25 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/types.h>
18:
19: #include <assert.h>
1.3 schwarze 20: #include <ctype.h>
1.4 schwarze 21: #include <stdarg.h>
1.1 schwarze 22: #include <stdio.h>
23: #include <stdint.h>
24: #include <stdlib.h>
25: #include <string.h>
26: #include <unistd.h>
27:
28: #include "out.h"
29: #include "chars.h"
30: #include "html.h"
31: #include "main.h"
32:
33: #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
34:
35: struct htmldata {
36: const char *name;
37: int flags;
38: #define HTML_CLRLINE (1 << 0)
39: #define HTML_NOSTACK (1 << 1)
1.6 schwarze 40: #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
1.1 schwarze 41: };
42:
43: static const struct htmldata htmltags[TAG_MAX] = {
44: {"html", HTML_CLRLINE}, /* TAG_HTML */
45: {"head", HTML_CLRLINE}, /* TAG_HEAD */
46: {"body", HTML_CLRLINE}, /* TAG_BODY */
1.6 schwarze 47: {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
1.1 schwarze 48: {"title", HTML_CLRLINE}, /* TAG_TITLE */
49: {"div", HTML_CLRLINE}, /* TAG_DIV */
50: {"h1", 0}, /* TAG_H1 */
51: {"h2", 0}, /* TAG_H2 */
52: {"span", 0}, /* TAG_SPAN */
53: {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
1.6 schwarze 54: {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
1.1 schwarze 55: {"a", 0}, /* TAG_A */
56: {"table", HTML_CLRLINE}, /* TAG_TABLE */
1.6 schwarze 57: {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
1.1 schwarze 58: {"tr", HTML_CLRLINE}, /* TAG_TR */
59: {"td", HTML_CLRLINE}, /* TAG_TD */
60: {"li", HTML_CLRLINE}, /* TAG_LI */
61: {"ul", HTML_CLRLINE}, /* TAG_UL */
62: {"ol", HTML_CLRLINE}, /* TAG_OL */
63: };
64:
1.5 schwarze 65: static const char *const htmlfonts[HTMLFONT_MAX] = {
66: "roman",
67: "bold",
68: "italic"
69: };
70:
71: static const char *const htmlattrs[ATTR_MAX] = {
1.1 schwarze 72: "http-equiv",
73: "content",
74: "name",
75: "rel",
76: "href",
77: "type",
78: "media",
79: "class",
80: "style",
81: "width",
82: "valign",
83: "target",
84: "id",
1.3 schwarze 85: "summary",
1.1 schwarze 86: };
87:
1.5 schwarze 88: static void print_spec(struct html *, const char *, size_t);
89: static void print_res(struct html *, const char *, size_t);
90: static void print_ctag(struct html *, enum htmltag);
1.6 schwarze 91: static void print_doctype(struct html *);
92: static void print_xmltype(struct html *);
1.5 schwarze 93: static int print_encode(struct html *, const char *, int);
94: static void print_metaf(struct html *, enum roffdeco);
1.6 schwarze 95: static void print_attr(struct html *,
96: const char *, const char *);
97: static void *ml_alloc(char *, enum htmltype);
1.5 schwarze 98:
99:
1.6 schwarze 100: static void *
101: ml_alloc(char *outopts, enum htmltype type)
1.1 schwarze 102: {
103: struct html *h;
104: const char *toks[4];
105: char *v;
106:
107: toks[0] = "style";
108: toks[1] = "man";
109: toks[2] = "includes";
110: toks[3] = NULL;
111:
1.3 schwarze 112: h = calloc(1, sizeof(struct html));
113: if (NULL == h) {
114: perror(NULL);
115: exit(EXIT_FAILURE);
116: }
1.1 schwarze 117:
1.6 schwarze 118: h->type = type;
1.2 schwarze 119: h->tags.head = NULL;
120: h->ords.head = NULL;
1.3 schwarze 121: h->symtab = chars_init(CHARS_HTML);
1.1 schwarze 122:
123: while (outopts && *outopts)
124: switch (getsubopt(&outopts, UNCONST(toks), &v)) {
125: case (0):
126: h->style = v;
127: break;
128: case (1):
129: h->base_man = v;
130: break;
131: case (2):
132: h->base_includes = v;
133: break;
134: default:
135: break;
136: }
137:
138: return(h);
139: }
140:
1.6 schwarze 141: void *
142: html_alloc(char *outopts)
143: {
144:
145: return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
146: }
147:
148:
149: void *
150: xhtml_alloc(char *outopts)
151: {
152:
153: return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
154: }
155:
1.1 schwarze 156:
157: void
158: html_free(void *p)
159: {
160: struct tag *tag;
161: struct ord *ord;
162: struct html *h;
163:
164: h = (struct html *)p;
165:
1.2 schwarze 166: while ((ord = h->ords.head) != NULL) {
167: h->ords.head = ord->next;
1.1 schwarze 168: free(ord);
169: }
170:
1.2 schwarze 171: while ((tag = h->tags.head) != NULL) {
172: h->tags.head = tag->next;
1.1 schwarze 173: free(tag);
174: }
175:
176: if (h->symtab)
177: chars_free(h->symtab);
178:
179: free(h);
180: }
181:
182:
183: void
184: print_gen_head(struct html *h)
185: {
186: struct htmlpair tag[4];
187:
188: tag[0].key = ATTR_HTTPEQUIV;
189: tag[0].val = "Content-Type";
190: tag[1].key = ATTR_CONTENT;
191: tag[1].val = "text/html; charset=utf-8";
192: print_otag(h, TAG_META, 2, tag);
193:
194: tag[0].key = ATTR_NAME;
195: tag[0].val = "resource-type";
196: tag[1].key = ATTR_CONTENT;
197: tag[1].val = "document";
198: print_otag(h, TAG_META, 2, tag);
199:
200: if (h->style) {
201: tag[0].key = ATTR_REL;
202: tag[0].val = "stylesheet";
203: tag[1].key = ATTR_HREF;
204: tag[1].val = h->style;
205: tag[2].key = ATTR_TYPE;
206: tag[2].val = "text/css";
207: tag[3].key = ATTR_MEDIA;
208: tag[3].val = "all";
209: print_otag(h, TAG_LINK, 4, tag);
210: }
211: }
212:
213:
214: static void
1.5 schwarze 215: print_spec(struct html *h, const char *p, size_t len)
1.1 schwarze 216: {
217: const char *rhs;
218: size_t sz;
219:
1.5 schwarze 220: rhs = chars_a2ascii(h->symtab, p, len, &sz);
1.1 schwarze 221:
222: if (NULL == rhs)
223: return;
1.4 schwarze 224: fwrite(rhs, 1, sz, stdout);
1.1 schwarze 225: }
226:
227:
228: static void
1.5 schwarze 229: print_res(struct html *h, const char *p, size_t len)
1.1 schwarze 230: {
231: const char *rhs;
232: size_t sz;
233:
1.5 schwarze 234: rhs = chars_a2res(h->symtab, p, len, &sz);
1.1 schwarze 235:
236: if (NULL == rhs)
237: return;
1.4 schwarze 238: fwrite(rhs, 1, sz, stdout);
1.1 schwarze 239: }
240:
241:
1.5 schwarze 242: struct tag *
243: print_ofont(struct html *h, enum htmlfont font)
1.1 schwarze 244: {
1.5 schwarze 245: struct htmlpair tag;
1.1 schwarze 246:
1.5 schwarze 247: h->metal = h->metac;
248: h->metac = font;
1.1 schwarze 249:
1.5 schwarze 250: /* FIXME: DECO_ROMAN should just close out preexisting. */
1.1 schwarze 251:
1.5 schwarze 252: if (h->metaf && h->tags.head == h->metaf)
253: print_tagq(h, h->metaf);
1.1 schwarze 254:
1.5 schwarze 255: PAIR_CLASS_INIT(&tag, htmlfonts[font]);
256: h->metaf = print_otag(h, TAG_SPAN, 1, &tag);
257: return(h->metaf);
258: }
1.1 schwarze 259:
260:
1.5 schwarze 261: static void
262: print_metaf(struct html *h, enum roffdeco deco)
263: {
264: enum htmlfont font;
1.1 schwarze 265:
1.5 schwarze 266: switch (deco) {
267: case (DECO_PREVIOUS):
268: font = h->metal;
269: break;
270: case (DECO_ITALIC):
271: font = HTMLFONT_ITALIC;
272: break;
273: case (DECO_BOLD):
274: font = HTMLFONT_BOLD;
275: break;
276: case (DECO_ROMAN):
277: font = HTMLFONT_NONE;
278: break;
279: default:
280: abort();
281: /* NOTREACHED */
1.1 schwarze 282: }
283:
1.5 schwarze 284: (void)print_ofont(h, font);
1.1 schwarze 285: }
286:
287:
1.5 schwarze 288: static int
289: print_encode(struct html *h, const char *p, int norecurse)
1.1 schwarze 290: {
1.4 schwarze 291: size_t sz;
1.5 schwarze 292: int len, nospace;
293: const char *seq;
294: enum roffdeco deco;
295:
296: nospace = 0;
1.1 schwarze 297:
298: for (; *p; p++) {
1.4 schwarze 299: sz = strcspn(p, "\\<>&");
300:
301: fwrite(p, 1, sz, stdout);
302: p += /* LINTED */
303: sz;
304:
1.5 schwarze 305: if ('<' == *p) {
306: printf("<");
307: continue;
308: } else if ('>' == *p) {
309: printf(">");
310: continue;
311: } else if ('&' == *p) {
312: printf("&");
1.1 schwarze 313: continue;
1.4 schwarze 314: } else if ('\0' == *p)
315: break;
316:
1.5 schwarze 317: seq = ++p;
318: len = a2roffdeco(&deco, &seq, &sz);
319:
320: switch (deco) {
321: case (DECO_RESERVED):
322: print_res(h, seq, sz);
323: break;
324: case (DECO_SPECIAL):
325: print_spec(h, seq, sz);
326: break;
327: case (DECO_PREVIOUS):
328: /* FALLTHROUGH */
329: case (DECO_BOLD):
330: /* FALLTHROUGH */
331: case (DECO_ITALIC):
332: /* FALLTHROUGH */
333: case (DECO_ROMAN):
334: if (norecurse)
335: break;
336: print_metaf(h, deco);
337: break;
338: default:
339: break;
340: }
341:
342: p += len - 1;
343:
344: if (DECO_NOSPACE == deco && '\0' == *(p + 1))
345: nospace = 1;
1.1 schwarze 346: }
1.5 schwarze 347:
348: return(nospace);
1.1 schwarze 349: }
350:
351:
1.6 schwarze 352: static void
353: print_attr(struct html *h, const char *key, const char *val)
354: {
355: printf(" %s=\"", key);
356: (void)print_encode(h, val, 1);
357: putchar('\"');
358: }
359:
360:
1.1 schwarze 361: struct tag *
362: print_otag(struct html *h, enum htmltag tag,
363: int sz, const struct htmlpair *p)
364: {
365: int i;
366: struct tag *t;
367:
1.6 schwarze 368: /* Push this tags onto the stack of open scopes. */
369:
1.1 schwarze 370: if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
1.3 schwarze 371: t = malloc(sizeof(struct tag));
372: if (NULL == t) {
373: perror(NULL);
374: exit(EXIT_FAILURE);
375: }
1.1 schwarze 376: t->tag = tag;
1.2 schwarze 377: t->next = h->tags.head;
378: h->tags.head = t;
1.1 schwarze 379: } else
380: t = NULL;
381:
382: if ( ! (HTML_NOSPACE & h->flags))
383: if ( ! (HTML_CLRLINE & htmltags[tag].flags))
1.4 schwarze 384: putchar(' ');
1.1 schwarze 385:
1.6 schwarze 386: /* Print out the tag name and attributes. */
387:
1.1 schwarze 388: printf("<%s", htmltags[tag].name);
1.6 schwarze 389: for (i = 0; i < sz; i++)
390: print_attr(h, htmlattrs[p[i].key], p[i].val);
391:
392: /* Add non-overridable attributes. */
393:
394: if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
395: print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
396: print_attr(h, "xml:lang", "en");
397: print_attr(h, "lang", "en");
1.1 schwarze 398: }
1.6 schwarze 399:
400: /* Accomodate for XML "well-formed" singleton escaping. */
401:
402: if (HTML_AUTOCLOSE & htmltags[tag].flags)
403: switch (h->type) {
404: case (HTML_XHTML_1_0_STRICT):
405: putchar('/');
406: break;
407: default:
408: break;
409: }
410:
1.4 schwarze 411: putchar('>');
1.1 schwarze 412:
413: h->flags |= HTML_NOSPACE;
414: return(t);
415: }
416:
417:
418: static void
419: print_ctag(struct html *h, enum htmltag tag)
420: {
421:
422: printf("</%s>", htmltags[tag].name);
1.3 schwarze 423: if (HTML_CLRLINE & htmltags[tag].flags) {
1.1 schwarze 424: h->flags |= HTML_NOSPACE;
1.4 schwarze 425: putchar('\n');
1.5 schwarze 426: }
1.1 schwarze 427: }
428:
429:
430: void
1.6 schwarze 431: print_gen_decls(struct html *h)
432: {
433:
434: print_xmltype(h);
435: print_doctype(h);
436: }
437:
438:
439: static void
440: print_xmltype(struct html *h)
441: {
442: const char *decl;
443:
444: switch (h->type) {
445: case (HTML_XHTML_1_0_STRICT):
446: decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
447: break;
448: default:
449: decl = NULL;
450: break;
451: }
452:
453: if (NULL == decl)
454: return;
455:
456: printf("%s\n", decl);
457: }
458:
459:
460: static void
461: print_doctype(struct html *h)
1.1 schwarze 462: {
1.6 schwarze 463: const char *doctype;
464: const char *dtd;
465: const char *name;
466:
467: switch (h->type) {
468: case (HTML_HTML_4_01_STRICT):
469: name = "HTML";
470: doctype = "-//W3C//DTD HTML 4.01//EN";
471: dtd = "http://www.w3.org/TR/html4/strict.dtd";
472: break;
473: default:
474: name = "html";
475: doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
476: dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
477: break;
478: }
479:
480: printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
481: name, doctype, dtd);
1.1 schwarze 482: }
483:
484:
485: void
486: print_text(struct html *h, const char *p)
487: {
488:
489: if (*p && 0 == *(p + 1))
490: switch (*p) {
491: case('.'):
492: /* FALLTHROUGH */
493: case(','):
494: /* FALLTHROUGH */
495: case(';'):
496: /* FALLTHROUGH */
497: case(':'):
498: /* FALLTHROUGH */
499: case('?'):
500: /* FALLTHROUGH */
501: case('!'):
502: /* FALLTHROUGH */
503: case(')'):
504: /* FALLTHROUGH */
505: case(']'):
506: if ( ! (HTML_IGNDELIM & h->flags))
507: h->flags |= HTML_NOSPACE;
508: break;
509: default:
510: break;
511: }
512:
513: if ( ! (h->flags & HTML_NOSPACE))
1.4 schwarze 514: putchar(' ');
1.1 schwarze 515:
1.5 schwarze 516: assert(p);
517: if ( ! print_encode(h, p, 0))
518: h->flags &= ~HTML_NOSPACE;
1.1 schwarze 519:
520: if (*p && 0 == *(p + 1))
521: switch (*p) {
522: case('('):
523: /* FALLTHROUGH */
524: case('['):
525: h->flags |= HTML_NOSPACE;
526: break;
527: default:
528: break;
529: }
530: }
531:
532:
533: void
534: print_tagq(struct html *h, const struct tag *until)
535: {
536: struct tag *tag;
537:
1.2 schwarze 538: while ((tag = h->tags.head) != NULL) {
1.5 schwarze 539: if (tag == h->metaf)
540: h->metaf = NULL;
1.1 schwarze 541: print_ctag(h, tag->tag);
1.2 schwarze 542: h->tags.head = tag->next;
1.1 schwarze 543: free(tag);
544: if (until && tag == until)
545: return;
546: }
547: }
548:
549:
550: void
551: print_stagq(struct html *h, const struct tag *suntil)
552: {
553: struct tag *tag;
554:
1.2 schwarze 555: while ((tag = h->tags.head) != NULL) {
1.1 schwarze 556: if (suntil && tag == suntil)
557: return;
1.5 schwarze 558: if (tag == h->metaf)
559: h->metaf = NULL;
1.1 schwarze 560: print_ctag(h, tag->tag);
1.2 schwarze 561: h->tags.head = tag->next;
1.1 schwarze 562: free(tag);
563: }
564: }
565:
566:
567: void
568: bufinit(struct html *h)
569: {
570:
571: h->buf[0] = '\0';
572: h->buflen = 0;
573: }
574:
575:
576: void
577: bufcat_style(struct html *h, const char *key, const char *val)
578: {
579:
580: bufcat(h, key);
581: bufncat(h, ":", 1);
582: bufcat(h, val);
583: bufncat(h, ";", 1);
584: }
585:
586:
587: void
588: bufcat(struct html *h, const char *p)
589: {
590:
591: bufncat(h, p, strlen(p));
592: }
593:
594:
595: void
596: buffmt(struct html *h, const char *fmt, ...)
597: {
598: va_list ap;
599:
600: va_start(ap, fmt);
601: (void)vsnprintf(h->buf + (int)h->buflen,
602: BUFSIZ - h->buflen - 1, fmt, ap);
603: va_end(ap);
604: h->buflen = strlen(h->buf);
605: }
606:
607:
608: void
609: bufncat(struct html *h, const char *p, size_t sz)
610: {
611:
612: if (h->buflen + sz > BUFSIZ - 1)
613: sz = BUFSIZ - 1 - h->buflen;
614:
615: (void)strncat(h->buf, p, sz);
616: h->buflen += sz;
617: }
618:
619:
620: void
621: buffmt_includes(struct html *h, const char *name)
622: {
623: const char *p, *pp;
624:
625: pp = h->base_includes;
626:
627: while (NULL != (p = strchr(pp, '%'))) {
628: bufncat(h, pp, (size_t)(p - pp));
629: switch (*(p + 1)) {
630: case('I'):
631: bufcat(h, name);
632: break;
633: default:
634: bufncat(h, p, 2);
635: break;
636: }
637: pp = p + 2;
638: }
639: if (pp)
640: bufcat(h, pp);
641: }
642:
643:
644: void
645: buffmt_man(struct html *h,
646: const char *name, const char *sec)
647: {
648: const char *p, *pp;
649:
650: pp = h->base_man;
651:
652: /* LINTED */
653: while (NULL != (p = strchr(pp, '%'))) {
654: bufncat(h, pp, (size_t)(p - pp));
655: switch (*(p + 1)) {
656: case('S'):
657: bufcat(h, sec ? sec : "1");
658: break;
659: case('N'):
660: buffmt(h, name);
661: break;
662: default:
663: bufncat(h, p, 2);
664: break;
665: }
666: pp = p + 2;
667: }
668: if (pp)
669: bufcat(h, pp);
670: }
671:
672:
673: void
674: bufcat_su(struct html *h, const char *p, const struct roffsu *su)
675: {
676: double v;
677: const char *u;
678:
679: v = su->scale;
680:
681: switch (su->unit) {
682: case (SCALE_CM):
683: u = "cm";
684: break;
685: case (SCALE_IN):
686: u = "in";
687: break;
688: case (SCALE_PC):
689: u = "pc";
690: break;
691: case (SCALE_PT):
692: u = "pt";
693: break;
694: case (SCALE_EM):
695: u = "em";
696: break;
697: case (SCALE_MM):
698: if (0 == (v /= 100))
699: v = 1;
700: u = "em";
701: break;
702: case (SCALE_EN):
703: u = "ex";
704: break;
705: case (SCALE_BU):
706: u = "ex";
707: break;
708: case (SCALE_VS):
709: u = "em";
710: break;
711: default:
712: u = "ex";
713: break;
714: }
715:
716: if (su->pt)
717: buffmt(h, "%s: %f%s;", p, v, u);
718: else
719: /* LINTED */
720: buffmt(h, "%s: %d%s;", p, (int)v, u);
721: }
722:
1.3 schwarze 723:
724: void
725: html_idcat(char *dst, const char *src, int sz)
726: {
727: int ssz;
728:
729: assert(sz);
730:
731: /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
732:
733: for ( ; *dst != '\0' && sz; dst++, sz--)
734: /* Jump to end. */ ;
735:
736: assert(sz > 2);
737:
738: /* We can't start with a number (bah). */
739:
740: *dst++ = 'x';
741: *dst = '\0';
742: sz--;
743:
744: for ( ; *src != '\0' && sz > 1; src++) {
745: ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
746: sz -= ssz;
747: dst += ssz;
748: }
749: }