Annotation of src/usr.bin/mandoc/html.c, Revision 1.5
1.5 ! schwarze 1: /* $Id: html.c,v 1.4 2009/12/23 22:30:17 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/types.h>
18:
19: #include <assert.h>
1.3 schwarze 20: #include <ctype.h>
1.4 schwarze 21: #include <stdarg.h>
1.1 schwarze 22: #include <stdio.h>
23: #include <stdint.h>
24: #include <stdlib.h>
25: #include <string.h>
26: #include <unistd.h>
27:
28: #include "out.h"
29: #include "chars.h"
30: #include "html.h"
31: #include "main.h"
32:
33: #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
34:
35: #define DOCTYPE "-//W3C//DTD HTML 4.01//EN"
36: #define DTD "http://www.w3.org/TR/html4/strict.dtd"
37:
38: struct htmldata {
39: const char *name;
40: int flags;
41: #define HTML_CLRLINE (1 << 0)
42: #define HTML_NOSTACK (1 << 1)
43: };
44:
45: static const struct htmldata htmltags[TAG_MAX] = {
46: {"html", HTML_CLRLINE}, /* TAG_HTML */
47: {"head", HTML_CLRLINE}, /* TAG_HEAD */
48: {"body", HTML_CLRLINE}, /* TAG_BODY */
49: {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
50: {"title", HTML_CLRLINE}, /* TAG_TITLE */
51: {"div", HTML_CLRLINE}, /* TAG_DIV */
52: {"h1", 0}, /* TAG_H1 */
53: {"h2", 0}, /* TAG_H2 */
54: {"p", HTML_CLRLINE}, /* TAG_P */
55: {"span", 0}, /* TAG_SPAN */
56: {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57: {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58: {"a", 0}, /* TAG_A */
59: {"table", HTML_CLRLINE}, /* TAG_TABLE */
60: {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61: {"tr", HTML_CLRLINE}, /* TAG_TR */
62: {"td", HTML_CLRLINE}, /* TAG_TD */
63: {"li", HTML_CLRLINE}, /* TAG_LI */
64: {"ul", HTML_CLRLINE}, /* TAG_UL */
65: {"ol", HTML_CLRLINE}, /* TAG_OL */
66: {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
67: };
68:
1.5 ! schwarze 69: static const char *const htmlfonts[HTMLFONT_MAX] = {
! 70: "roman",
! 71: "bold",
! 72: "italic"
! 73: };
! 74:
! 75: static const char *const htmlattrs[ATTR_MAX] = {
1.1 schwarze 76: "http-equiv",
77: "content",
78: "name",
79: "rel",
80: "href",
81: "type",
82: "media",
83: "class",
84: "style",
85: "width",
86: "valign",
87: "target",
88: "id",
1.3 schwarze 89: "summary",
1.1 schwarze 90: };
91:
1.5 ! schwarze 92:
! 93: static void print_spec(struct html *, const char *, size_t);
! 94: static void print_res(struct html *, const char *, size_t);
! 95: static void print_ctag(struct html *, enum htmltag);
! 96: static int print_encode(struct html *, const char *, int);
! 97: static void print_metaf(struct html *, enum roffdeco);
! 98:
! 99:
1.1 schwarze 100: void *
101: html_alloc(char *outopts)
102: {
103: struct html *h;
104: const char *toks[4];
105: char *v;
106:
107: toks[0] = "style";
108: toks[1] = "man";
109: toks[2] = "includes";
110: toks[3] = NULL;
111:
1.3 schwarze 112: h = calloc(1, sizeof(struct html));
113: if (NULL == h) {
114: perror(NULL);
115: exit(EXIT_FAILURE);
116: }
1.1 schwarze 117:
1.2 schwarze 118: h->tags.head = NULL;
119: h->ords.head = NULL;
1.3 schwarze 120: h->symtab = chars_init(CHARS_HTML);
1.1 schwarze 121:
122: while (outopts && *outopts)
123: switch (getsubopt(&outopts, UNCONST(toks), &v)) {
124: case (0):
125: h->style = v;
126: break;
127: case (1):
128: h->base_man = v;
129: break;
130: case (2):
131: h->base_includes = v;
132: break;
133: default:
134: break;
135: }
136:
137: return(h);
138: }
139:
140:
141: void
142: html_free(void *p)
143: {
144: struct tag *tag;
145: struct ord *ord;
146: struct html *h;
147:
148: h = (struct html *)p;
149:
1.2 schwarze 150: while ((ord = h->ords.head) != NULL) {
151: h->ords.head = ord->next;
1.1 schwarze 152: free(ord);
153: }
154:
1.2 schwarze 155: while ((tag = h->tags.head) != NULL) {
156: h->tags.head = tag->next;
1.1 schwarze 157: free(tag);
158: }
159:
160: if (h->symtab)
161: chars_free(h->symtab);
162:
163: free(h);
164: }
165:
166:
167: void
168: print_gen_head(struct html *h)
169: {
170: struct htmlpair tag[4];
171:
172: tag[0].key = ATTR_HTTPEQUIV;
173: tag[0].val = "Content-Type";
174: tag[1].key = ATTR_CONTENT;
175: tag[1].val = "text/html; charset=utf-8";
176: print_otag(h, TAG_META, 2, tag);
177:
178: tag[0].key = ATTR_NAME;
179: tag[0].val = "resource-type";
180: tag[1].key = ATTR_CONTENT;
181: tag[1].val = "document";
182: print_otag(h, TAG_META, 2, tag);
183:
184: if (h->style) {
185: tag[0].key = ATTR_REL;
186: tag[0].val = "stylesheet";
187: tag[1].key = ATTR_HREF;
188: tag[1].val = h->style;
189: tag[2].key = ATTR_TYPE;
190: tag[2].val = "text/css";
191: tag[3].key = ATTR_MEDIA;
192: tag[3].val = "all";
193: print_otag(h, TAG_LINK, 4, tag);
194: }
195: }
196:
197:
198: static void
1.5 ! schwarze 199: print_spec(struct html *h, const char *p, size_t len)
1.1 schwarze 200: {
201: const char *rhs;
202: size_t sz;
203:
1.5 ! schwarze 204: rhs = chars_a2ascii(h->symtab, p, len, &sz);
1.1 schwarze 205:
206: if (NULL == rhs)
207: return;
1.4 schwarze 208: fwrite(rhs, 1, sz, stdout);
1.1 schwarze 209: }
210:
211:
212: static void
1.5 ! schwarze 213: print_res(struct html *h, const char *p, size_t len)
1.1 schwarze 214: {
215: const char *rhs;
216: size_t sz;
217:
1.5 ! schwarze 218: rhs = chars_a2res(h->symtab, p, len, &sz);
1.1 schwarze 219:
220: if (NULL == rhs)
221: return;
1.4 schwarze 222: fwrite(rhs, 1, sz, stdout);
1.1 schwarze 223: }
224:
225:
1.5 ! schwarze 226: struct tag *
! 227: print_ofont(struct html *h, enum htmlfont font)
1.1 schwarze 228: {
1.5 ! schwarze 229: struct htmlpair tag;
1.1 schwarze 230:
1.5 ! schwarze 231: h->metal = h->metac;
! 232: h->metac = font;
1.1 schwarze 233:
1.5 ! schwarze 234: /* FIXME: DECO_ROMAN should just close out preexisting. */
1.1 schwarze 235:
1.5 ! schwarze 236: if (h->metaf && h->tags.head == h->metaf)
! 237: print_tagq(h, h->metaf);
1.1 schwarze 238:
1.5 ! schwarze 239: PAIR_CLASS_INIT(&tag, htmlfonts[font]);
! 240: h->metaf = print_otag(h, TAG_SPAN, 1, &tag);
! 241: return(h->metaf);
! 242: }
1.1 schwarze 243:
244:
1.5 ! schwarze 245: static void
! 246: print_metaf(struct html *h, enum roffdeco deco)
! 247: {
! 248: enum htmlfont font;
1.1 schwarze 249:
1.5 ! schwarze 250: switch (deco) {
! 251: case (DECO_PREVIOUS):
! 252: font = h->metal;
! 253: break;
! 254: case (DECO_ITALIC):
! 255: font = HTMLFONT_ITALIC;
! 256: break;
! 257: case (DECO_BOLD):
! 258: font = HTMLFONT_BOLD;
! 259: break;
! 260: case (DECO_ROMAN):
! 261: font = HTMLFONT_NONE;
! 262: break;
! 263: default:
! 264: abort();
! 265: /* NOTREACHED */
1.1 schwarze 266: }
267:
1.5 ! schwarze 268: (void)print_ofont(h, font);
1.1 schwarze 269: }
270:
271:
1.5 ! schwarze 272: static int
! 273: print_encode(struct html *h, const char *p, int norecurse)
1.1 schwarze 274: {
1.4 schwarze 275: size_t sz;
1.5 ! schwarze 276: int len, nospace;
! 277: const char *seq;
! 278: enum roffdeco deco;
! 279:
! 280: nospace = 0;
1.1 schwarze 281:
282: for (; *p; p++) {
1.4 schwarze 283: sz = strcspn(p, "\\<>&");
284:
285: fwrite(p, 1, sz, stdout);
286: p += /* LINTED */
287: sz;
288:
1.5 ! schwarze 289: if ('<' == *p) {
! 290: printf("<");
! 291: continue;
! 292: } else if ('>' == *p) {
! 293: printf(">");
! 294: continue;
! 295: } else if ('&' == *p) {
! 296: printf("&");
1.1 schwarze 297: continue;
1.4 schwarze 298: } else if ('\0' == *p)
299: break;
300:
1.5 ! schwarze 301: seq = ++p;
! 302: len = a2roffdeco(&deco, &seq, &sz);
! 303:
! 304: switch (deco) {
! 305: case (DECO_RESERVED):
! 306: print_res(h, seq, sz);
! 307: break;
! 308: case (DECO_SPECIAL):
! 309: print_spec(h, seq, sz);
! 310: break;
! 311: case (DECO_PREVIOUS):
! 312: /* FALLTHROUGH */
! 313: case (DECO_BOLD):
! 314: /* FALLTHROUGH */
! 315: case (DECO_ITALIC):
! 316: /* FALLTHROUGH */
! 317: case (DECO_ROMAN):
! 318: if (norecurse)
! 319: break;
! 320: print_metaf(h, deco);
! 321: break;
! 322: default:
! 323: break;
! 324: }
! 325:
! 326: p += len - 1;
! 327:
! 328: if (DECO_NOSPACE == deco && '\0' == *(p + 1))
! 329: nospace = 1;
1.1 schwarze 330: }
1.5 ! schwarze 331:
! 332: return(nospace);
1.1 schwarze 333: }
334:
335:
336: struct tag *
337: print_otag(struct html *h, enum htmltag tag,
338: int sz, const struct htmlpair *p)
339: {
340: int i;
341: struct tag *t;
342:
343: if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
1.3 schwarze 344: t = malloc(sizeof(struct tag));
345: if (NULL == t) {
346: perror(NULL);
347: exit(EXIT_FAILURE);
348: }
1.1 schwarze 349: t->tag = tag;
1.2 schwarze 350: t->next = h->tags.head;
351: h->tags.head = t;
1.1 schwarze 352: } else
353: t = NULL;
354:
355: if ( ! (HTML_NOSPACE & h->flags))
356: if ( ! (HTML_CLRLINE & htmltags[tag].flags))
1.4 schwarze 357: putchar(' ');
1.1 schwarze 358:
359: printf("<%s", htmltags[tag].name);
360: for (i = 0; i < sz; i++) {
361: printf(" %s=\"", htmlattrs[p[i].key]);
362: assert(p->val);
1.5 ! schwarze 363: (void)print_encode(h, p[i].val, 1);
1.4 schwarze 364: putchar('\"');
1.1 schwarze 365: }
1.4 schwarze 366: putchar('>');
1.1 schwarze 367:
368: h->flags |= HTML_NOSPACE;
369: return(t);
370: }
371:
372:
373: static void
374: print_ctag(struct html *h, enum htmltag tag)
375: {
376:
377: printf("</%s>", htmltags[tag].name);
1.3 schwarze 378: if (HTML_CLRLINE & htmltags[tag].flags) {
1.1 schwarze 379: h->flags |= HTML_NOSPACE;
1.4 schwarze 380: putchar('\n');
1.5 ! schwarze 381: }
1.1 schwarze 382: }
383:
384:
385: /* ARGSUSED */
386: void
387: print_gen_doctype(struct html *h)
388: {
389:
390: printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
391: }
392:
393:
394: void
395: print_text(struct html *h, const char *p)
396: {
397:
398: if (*p && 0 == *(p + 1))
399: switch (*p) {
400: case('.'):
401: /* FALLTHROUGH */
402: case(','):
403: /* FALLTHROUGH */
404: case(';'):
405: /* FALLTHROUGH */
406: case(':'):
407: /* FALLTHROUGH */
408: case('?'):
409: /* FALLTHROUGH */
410: case('!'):
411: /* FALLTHROUGH */
412: case(')'):
413: /* FALLTHROUGH */
414: case(']'):
415: /* FALLTHROUGH */
416: case('}'):
417: if ( ! (HTML_IGNDELIM & h->flags))
418: h->flags |= HTML_NOSPACE;
419: break;
420: default:
421: break;
422: }
423:
424: if ( ! (h->flags & HTML_NOSPACE))
1.4 schwarze 425: putchar(' ');
1.1 schwarze 426:
1.5 ! schwarze 427: assert(p);
! 428: if ( ! print_encode(h, p, 0))
! 429: h->flags &= ~HTML_NOSPACE;
1.1 schwarze 430:
431: if (*p && 0 == *(p + 1))
432: switch (*p) {
433: case('('):
434: /* FALLTHROUGH */
435: case('['):
436: /* FALLTHROUGH */
437: case('{'):
438: h->flags |= HTML_NOSPACE;
439: break;
440: default:
441: break;
442: }
443: }
444:
445:
446: void
447: print_tagq(struct html *h, const struct tag *until)
448: {
449: struct tag *tag;
450:
1.2 schwarze 451: while ((tag = h->tags.head) != NULL) {
1.5 ! schwarze 452: if (tag == h->metaf)
! 453: h->metaf = NULL;
1.1 schwarze 454: print_ctag(h, tag->tag);
1.2 schwarze 455: h->tags.head = tag->next;
1.1 schwarze 456: free(tag);
457: if (until && tag == until)
458: return;
459: }
460: }
461:
462:
463: void
464: print_stagq(struct html *h, const struct tag *suntil)
465: {
466: struct tag *tag;
467:
1.2 schwarze 468: while ((tag = h->tags.head) != NULL) {
1.1 schwarze 469: if (suntil && tag == suntil)
470: return;
1.5 ! schwarze 471: if (tag == h->metaf)
! 472: h->metaf = NULL;
1.1 schwarze 473: print_ctag(h, tag->tag);
1.2 schwarze 474: h->tags.head = tag->next;
1.1 schwarze 475: free(tag);
476: }
477: }
478:
479:
480: void
481: bufinit(struct html *h)
482: {
483:
484: h->buf[0] = '\0';
485: h->buflen = 0;
486: }
487:
488:
489: void
490: bufcat_style(struct html *h, const char *key, const char *val)
491: {
492:
493: bufcat(h, key);
494: bufncat(h, ":", 1);
495: bufcat(h, val);
496: bufncat(h, ";", 1);
497: }
498:
499:
500: void
501: bufcat(struct html *h, const char *p)
502: {
503:
504: bufncat(h, p, strlen(p));
505: }
506:
507:
508: void
509: buffmt(struct html *h, const char *fmt, ...)
510: {
511: va_list ap;
512:
513: va_start(ap, fmt);
514: (void)vsnprintf(h->buf + (int)h->buflen,
515: BUFSIZ - h->buflen - 1, fmt, ap);
516: va_end(ap);
517: h->buflen = strlen(h->buf);
518: }
519:
520:
521: void
522: bufncat(struct html *h, const char *p, size_t sz)
523: {
524:
525: if (h->buflen + sz > BUFSIZ - 1)
526: sz = BUFSIZ - 1 - h->buflen;
527:
528: (void)strncat(h->buf, p, sz);
529: h->buflen += sz;
530: }
531:
532:
533: void
534: buffmt_includes(struct html *h, const char *name)
535: {
536: const char *p, *pp;
537:
538: pp = h->base_includes;
539:
540: while (NULL != (p = strchr(pp, '%'))) {
541: bufncat(h, pp, (size_t)(p - pp));
542: switch (*(p + 1)) {
543: case('I'):
544: bufcat(h, name);
545: break;
546: default:
547: bufncat(h, p, 2);
548: break;
549: }
550: pp = p + 2;
551: }
552: if (pp)
553: bufcat(h, pp);
554: }
555:
556:
557: void
558: buffmt_man(struct html *h,
559: const char *name, const char *sec)
560: {
561: const char *p, *pp;
562:
563: pp = h->base_man;
564:
565: /* LINTED */
566: while (NULL != (p = strchr(pp, '%'))) {
567: bufncat(h, pp, (size_t)(p - pp));
568: switch (*(p + 1)) {
569: case('S'):
570: bufcat(h, sec ? sec : "1");
571: break;
572: case('N'):
573: buffmt(h, name);
574: break;
575: default:
576: bufncat(h, p, 2);
577: break;
578: }
579: pp = p + 2;
580: }
581: if (pp)
582: bufcat(h, pp);
583: }
584:
585:
586: void
587: bufcat_su(struct html *h, const char *p, const struct roffsu *su)
588: {
589: double v;
590: const char *u;
591:
592: v = su->scale;
593:
594: switch (su->unit) {
595: case (SCALE_CM):
596: u = "cm";
597: break;
598: case (SCALE_IN):
599: u = "in";
600: break;
601: case (SCALE_PC):
602: u = "pc";
603: break;
604: case (SCALE_PT):
605: u = "pt";
606: break;
607: case (SCALE_EM):
608: u = "em";
609: break;
610: case (SCALE_MM):
611: if (0 == (v /= 100))
612: v = 1;
613: u = "em";
614: break;
615: case (SCALE_EN):
616: u = "ex";
617: break;
618: case (SCALE_BU):
619: u = "ex";
620: break;
621: case (SCALE_VS):
622: u = "em";
623: break;
624: default:
625: u = "ex";
626: break;
627: }
628:
629: if (su->pt)
630: buffmt(h, "%s: %f%s;", p, v, u);
631: else
632: /* LINTED */
633: buffmt(h, "%s: %d%s;", p, (int)v, u);
634: }
635:
1.3 schwarze 636:
637: void
638: html_idcat(char *dst, const char *src, int sz)
639: {
640: int ssz;
641:
642: assert(sz);
643:
644: /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
645:
646: for ( ; *dst != '\0' && sz; dst++, sz--)
647: /* Jump to end. */ ;
648:
649: assert(sz > 2);
650:
651: /* We can't start with a number (bah). */
652:
653: *dst++ = 'x';
654: *dst = '\0';
655: sz--;
656:
657: for ( ; *src != '\0' && sz > 1; src++) {
658: ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
659: sz -= ssz;
660: dst += ssz;
661: }
662: }