Annotation of src/usr.bin/mandoc/html.c, Revision 1.4
1.4 ! schwarze 1: /* $Id: html.c,v 1.3 2009/12/22 23:58:00 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/types.h>
18:
19: #include <assert.h>
1.3 schwarze 20: #include <ctype.h>
1.4 ! schwarze 21: #include <stdarg.h>
1.1 schwarze 22: #include <stdio.h>
23: #include <stdint.h>
24: #include <stdlib.h>
25: #include <string.h>
26: #include <unistd.h>
27:
28: #include "out.h"
29: #include "chars.h"
30: #include "html.h"
31: #include "main.h"
32:
33: #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
34:
35: #define DOCTYPE "-//W3C//DTD HTML 4.01//EN"
36: #define DTD "http://www.w3.org/TR/html4/strict.dtd"
37:
38: struct htmldata {
39: const char *name;
40: int flags;
41: #define HTML_CLRLINE (1 << 0)
42: #define HTML_NOSTACK (1 << 1)
43: };
44:
45: static const struct htmldata htmltags[TAG_MAX] = {
46: {"html", HTML_CLRLINE}, /* TAG_HTML */
47: {"head", HTML_CLRLINE}, /* TAG_HEAD */
48: {"body", HTML_CLRLINE}, /* TAG_BODY */
49: {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
50: {"title", HTML_CLRLINE}, /* TAG_TITLE */
51: {"div", HTML_CLRLINE}, /* TAG_DIV */
52: {"h1", 0}, /* TAG_H1 */
53: {"h2", 0}, /* TAG_H2 */
54: {"p", HTML_CLRLINE}, /* TAG_P */
55: {"span", 0}, /* TAG_SPAN */
56: {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57: {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58: {"a", 0}, /* TAG_A */
59: {"table", HTML_CLRLINE}, /* TAG_TABLE */
60: {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61: {"tr", HTML_CLRLINE}, /* TAG_TR */
62: {"td", HTML_CLRLINE}, /* TAG_TD */
63: {"li", HTML_CLRLINE}, /* TAG_LI */
64: {"ul", HTML_CLRLINE}, /* TAG_UL */
65: {"ol", HTML_CLRLINE}, /* TAG_OL */
66: {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
67: };
68:
69: static const char *const htmlattrs[ATTR_MAX] = {
70: "http-equiv",
71: "content",
72: "name",
73: "rel",
74: "href",
75: "type",
76: "media",
77: "class",
78: "style",
79: "width",
80: "valign",
81: "target",
82: "id",
1.3 schwarze 83: "summary",
1.1 schwarze 84: };
85:
86: void *
87: html_alloc(char *outopts)
88: {
89: struct html *h;
90: const char *toks[4];
91: char *v;
92:
93: toks[0] = "style";
94: toks[1] = "man";
95: toks[2] = "includes";
96: toks[3] = NULL;
97:
1.3 schwarze 98: h = calloc(1, sizeof(struct html));
99: if (NULL == h) {
100: perror(NULL);
101: exit(EXIT_FAILURE);
102: }
1.1 schwarze 103:
1.2 schwarze 104: h->tags.head = NULL;
105: h->ords.head = NULL;
1.3 schwarze 106: h->symtab = chars_init(CHARS_HTML);
1.1 schwarze 107:
108: while (outopts && *outopts)
109: switch (getsubopt(&outopts, UNCONST(toks), &v)) {
110: case (0):
111: h->style = v;
112: break;
113: case (1):
114: h->base_man = v;
115: break;
116: case (2):
117: h->base_includes = v;
118: break;
119: default:
120: break;
121: }
122:
123: return(h);
124: }
125:
126:
127: void
128: html_free(void *p)
129: {
130: struct tag *tag;
131: struct ord *ord;
132: struct html *h;
133:
134: h = (struct html *)p;
135:
1.2 schwarze 136: while ((ord = h->ords.head) != NULL) {
137: h->ords.head = ord->next;
1.1 schwarze 138: free(ord);
139: }
140:
1.2 schwarze 141: while ((tag = h->tags.head) != NULL) {
142: h->tags.head = tag->next;
1.1 schwarze 143: free(tag);
144: }
145:
146: if (h->symtab)
147: chars_free(h->symtab);
148:
149: free(h);
150: }
151:
152:
153: void
154: print_gen_head(struct html *h)
155: {
156: struct htmlpair tag[4];
157:
158: tag[0].key = ATTR_HTTPEQUIV;
159: tag[0].val = "Content-Type";
160: tag[1].key = ATTR_CONTENT;
161: tag[1].val = "text/html; charset=utf-8";
162: print_otag(h, TAG_META, 2, tag);
163:
164: tag[0].key = ATTR_NAME;
165: tag[0].val = "resource-type";
166: tag[1].key = ATTR_CONTENT;
167: tag[1].val = "document";
168: print_otag(h, TAG_META, 2, tag);
169:
170: if (h->style) {
171: tag[0].key = ATTR_REL;
172: tag[0].val = "stylesheet";
173: tag[1].key = ATTR_HREF;
174: tag[1].val = h->style;
175: tag[2].key = ATTR_TYPE;
176: tag[2].val = "text/css";
177: tag[3].key = ATTR_MEDIA;
178: tag[3].val = "all";
179: print_otag(h, TAG_LINK, 4, tag);
180: }
181: }
182:
183:
184: static void
185: print_spec(struct html *h, const char *p, int len)
186: {
187: const char *rhs;
188: size_t sz;
189:
190: rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz);
191:
192: if (NULL == rhs)
193: return;
1.4 ! schwarze 194: fwrite(rhs, 1, sz, stdout);
1.1 schwarze 195: }
196:
197:
198: static void
199: print_res(struct html *h, const char *p, int len)
200: {
201: const char *rhs;
202: size_t sz;
203:
204: rhs = chars_a2res(h->symtab, p, (size_t)len, &sz);
205:
206: if (NULL == rhs)
207: return;
1.4 ! schwarze 208: fwrite(rhs, 1, sz, stdout);
1.1 schwarze 209: }
210:
211:
212: static void
213: print_escape(struct html *h, const char **p)
214: {
215: int j, type;
216: const char *wp;
217:
218: wp = *p;
219: type = 1;
220:
221: if (0 == *(++wp)) {
222: *p = wp;
223: return;
224: }
225:
226: if ('(' == *wp) {
227: wp++;
228: if (0 == *wp || 0 == *(wp + 1)) {
229: *p = 0 == *wp ? wp : wp + 1;
230: return;
231: }
232:
233: print_spec(h, wp, 2);
234: *p = ++wp;
235: return;
236:
237: } else if ('*' == *wp) {
238: if (0 == *(++wp)) {
239: *p = wp;
240: return;
241: }
242:
243: switch (*wp) {
244: case ('('):
245: wp++;
246: if (0 == *wp || 0 == *(wp + 1)) {
247: *p = 0 == *wp ? wp : wp + 1;
248: return;
249: }
250:
251: print_res(h, wp, 2);
252: *p = ++wp;
253: return;
254: case ('['):
255: type = 0;
256: break;
257: default:
258: print_res(h, wp, 1);
259: *p = wp;
260: return;
261: }
262:
263: } else if ('f' == *wp) {
264: if (0 == *(++wp)) {
265: *p = wp;
266: return;
267: }
268:
269: switch (*wp) {
270: case ('B'):
271: /* TODO */
272: break;
273: case ('I'):
274: /* TODO */
275: break;
276: case ('P'):
277: /* FALLTHROUGH */
278: case ('R'):
279: /* TODO */
280: break;
281: default:
282: break;
283: }
284:
285: *p = wp;
286: return;
287:
288: } else if ('[' != *wp) {
289: print_spec(h, wp, 1);
290: *p = wp;
291: return;
292: }
293:
294: wp++;
295: for (j = 0; *wp && ']' != *wp; wp++, j++)
296: /* Loop... */ ;
297:
298: if (0 == *wp) {
299: *p = wp;
300: return;
301: }
302:
303: if (type)
304: print_spec(h, wp - j, j);
305: else
306: print_res(h, wp - j, j);
307:
308: *p = wp;
309: }
310:
311:
312: static void
313: print_encode(struct html *h, const char *p)
314: {
1.4 ! schwarze 315: size_t sz;
1.1 schwarze 316:
317: for (; *p; p++) {
1.4 ! schwarze 318: sz = strcspn(p, "\\<>&");
! 319:
! 320: fwrite(p, 1, sz, stdout);
! 321: p += /* LINTED */
! 322: sz;
! 323:
1.1 schwarze 324: if ('\\' == *p) {
325: print_escape(h, &p);
326: continue;
1.4 ! schwarze 327: } else if ('\0' == *p)
! 328: break;
! 329:
! 330: if ('<' == *p)
1.1 schwarze 331: printf("<");
1.4 ! schwarze 332: else if ('>' == *p)
1.1 schwarze 333: printf(">");
1.4 ! schwarze 334: else if ('&' == *p)
1.1 schwarze 335: printf("&");
336: }
337: }
338:
339:
340: struct tag *
341: print_otag(struct html *h, enum htmltag tag,
342: int sz, const struct htmlpair *p)
343: {
344: int i;
345: struct tag *t;
346:
347: if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
1.3 schwarze 348: t = malloc(sizeof(struct tag));
349: if (NULL == t) {
350: perror(NULL);
351: exit(EXIT_FAILURE);
352: }
1.1 schwarze 353: t->tag = tag;
1.2 schwarze 354: t->next = h->tags.head;
355: h->tags.head = t;
1.1 schwarze 356: } else
357: t = NULL;
358:
359: if ( ! (HTML_NOSPACE & h->flags))
360: if ( ! (HTML_CLRLINE & htmltags[tag].flags))
1.4 ! schwarze 361: putchar(' ');
1.1 schwarze 362:
363: printf("<%s", htmltags[tag].name);
364: for (i = 0; i < sz; i++) {
365: printf(" %s=\"", htmlattrs[p[i].key]);
366: assert(p->val);
367: print_encode(h, p[i].val);
1.4 ! schwarze 368: putchar('\"');
1.1 schwarze 369: }
1.4 ! schwarze 370: putchar('>');
1.1 schwarze 371:
372: h->flags |= HTML_NOSPACE;
373: if (HTML_CLRLINE & htmltags[tag].flags)
374: h->flags |= HTML_NEWLINE;
375: else
376: h->flags &= ~HTML_NEWLINE;
377:
378: return(t);
379: }
380:
381:
382: /* ARGSUSED */
383: static void
384: print_ctag(struct html *h, enum htmltag tag)
385: {
386:
387: printf("</%s>", htmltags[tag].name);
1.3 schwarze 388: if (HTML_CLRLINE & htmltags[tag].flags) {
1.1 schwarze 389: h->flags |= HTML_NOSPACE;
390: h->flags |= HTML_NEWLINE;
1.4 ! schwarze 391: putchar('\n');
1.3 schwarze 392: } else
1.1 schwarze 393: h->flags &= ~HTML_NEWLINE;
394: }
395:
396:
397: /* ARGSUSED */
398: void
399: print_gen_doctype(struct html *h)
400: {
401:
402: printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
403: }
404:
405:
406: void
407: print_text(struct html *h, const char *p)
408: {
409:
410: if (*p && 0 == *(p + 1))
411: switch (*p) {
412: case('.'):
413: /* FALLTHROUGH */
414: case(','):
415: /* FALLTHROUGH */
416: case(';'):
417: /* FALLTHROUGH */
418: case(':'):
419: /* FALLTHROUGH */
420: case('?'):
421: /* FALLTHROUGH */
422: case('!'):
423: /* FALLTHROUGH */
424: case(')'):
425: /* FALLTHROUGH */
426: case(']'):
427: /* FALLTHROUGH */
428: case('}'):
429: if ( ! (HTML_IGNDELIM & h->flags))
430: h->flags |= HTML_NOSPACE;
431: break;
432: default:
433: break;
434: }
435:
436: if ( ! (h->flags & HTML_NOSPACE))
1.4 ! schwarze 437: putchar(' ');
1.1 schwarze 438:
439: h->flags &= ~HTML_NOSPACE;
440: h->flags &= ~HTML_NEWLINE;
441:
442: if (p)
443: print_encode(h, p);
444:
445: if (*p && 0 == *(p + 1))
446: switch (*p) {
447: case('('):
448: /* FALLTHROUGH */
449: case('['):
450: /* FALLTHROUGH */
451: case('{'):
452: h->flags |= HTML_NOSPACE;
453: break;
454: default:
455: break;
456: }
457: }
458:
459:
460: void
461: print_tagq(struct html *h, const struct tag *until)
462: {
463: struct tag *tag;
464:
1.2 schwarze 465: while ((tag = h->tags.head) != NULL) {
1.1 schwarze 466: print_ctag(h, tag->tag);
1.2 schwarze 467: h->tags.head = tag->next;
1.1 schwarze 468: free(tag);
469: if (until && tag == until)
470: return;
471: }
472: }
473:
474:
475: void
476: print_stagq(struct html *h, const struct tag *suntil)
477: {
478: struct tag *tag;
479:
1.2 schwarze 480: while ((tag = h->tags.head) != NULL) {
1.1 schwarze 481: if (suntil && tag == suntil)
482: return;
483: print_ctag(h, tag->tag);
1.2 schwarze 484: h->tags.head = tag->next;
1.1 schwarze 485: free(tag);
486: }
487: }
488:
489:
490: void
491: bufinit(struct html *h)
492: {
493:
494: h->buf[0] = '\0';
495: h->buflen = 0;
496: }
497:
498:
499: void
500: bufcat_style(struct html *h, const char *key, const char *val)
501: {
502:
503: bufcat(h, key);
504: bufncat(h, ":", 1);
505: bufcat(h, val);
506: bufncat(h, ";", 1);
507: }
508:
509:
510: void
511: bufcat(struct html *h, const char *p)
512: {
513:
514: bufncat(h, p, strlen(p));
515: }
516:
517:
518: void
519: buffmt(struct html *h, const char *fmt, ...)
520: {
521: va_list ap;
522:
523: va_start(ap, fmt);
524: (void)vsnprintf(h->buf + (int)h->buflen,
525: BUFSIZ - h->buflen - 1, fmt, ap);
526: va_end(ap);
527: h->buflen = strlen(h->buf);
528: }
529:
530:
531: void
532: bufncat(struct html *h, const char *p, size_t sz)
533: {
534:
535: if (h->buflen + sz > BUFSIZ - 1)
536: sz = BUFSIZ - 1 - h->buflen;
537:
538: (void)strncat(h->buf, p, sz);
539: h->buflen += sz;
540: }
541:
542:
543: void
544: buffmt_includes(struct html *h, const char *name)
545: {
546: const char *p, *pp;
547:
548: pp = h->base_includes;
549:
550: while (NULL != (p = strchr(pp, '%'))) {
551: bufncat(h, pp, (size_t)(p - pp));
552: switch (*(p + 1)) {
553: case('I'):
554: bufcat(h, name);
555: break;
556: default:
557: bufncat(h, p, 2);
558: break;
559: }
560: pp = p + 2;
561: }
562: if (pp)
563: bufcat(h, pp);
564: }
565:
566:
567: void
568: buffmt_man(struct html *h,
569: const char *name, const char *sec)
570: {
571: const char *p, *pp;
572:
573: pp = h->base_man;
574:
575: /* LINTED */
576: while (NULL != (p = strchr(pp, '%'))) {
577: bufncat(h, pp, (size_t)(p - pp));
578: switch (*(p + 1)) {
579: case('S'):
580: bufcat(h, sec ? sec : "1");
581: break;
582: case('N'):
583: buffmt(h, name);
584: break;
585: default:
586: bufncat(h, p, 2);
587: break;
588: }
589: pp = p + 2;
590: }
591: if (pp)
592: bufcat(h, pp);
593: }
594:
595:
596: void
597: bufcat_su(struct html *h, const char *p, const struct roffsu *su)
598: {
599: double v;
600: const char *u;
601:
602: v = su->scale;
603:
604: switch (su->unit) {
605: case (SCALE_CM):
606: u = "cm";
607: break;
608: case (SCALE_IN):
609: u = "in";
610: break;
611: case (SCALE_PC):
612: u = "pc";
613: break;
614: case (SCALE_PT):
615: u = "pt";
616: break;
617: case (SCALE_EM):
618: u = "em";
619: break;
620: case (SCALE_MM):
621: if (0 == (v /= 100))
622: v = 1;
623: u = "em";
624: break;
625: case (SCALE_EN):
626: u = "ex";
627: break;
628: case (SCALE_BU):
629: u = "ex";
630: break;
631: case (SCALE_VS):
632: u = "em";
633: break;
634: default:
635: u = "ex";
636: break;
637: }
638:
639: if (su->pt)
640: buffmt(h, "%s: %f%s;", p, v, u);
641: else
642: /* LINTED */
643: buffmt(h, "%s: %d%s;", p, (int)v, u);
644: }
645:
1.3 schwarze 646:
647: void
648: html_idcat(char *dst, const char *src, int sz)
649: {
650: int ssz;
651:
652: assert(sz);
653:
654: /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
655:
656: for ( ; *dst != '\0' && sz; dst++, sz--)
657: /* Jump to end. */ ;
658:
659: assert(sz > 2);
660:
661: /* We can't start with a number (bah). */
662:
663: *dst++ = 'x';
664: *dst = '\0';
665: sz--;
666:
667: for ( ; *src != '\0' && sz > 1; src++) {
668: ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
669: sz -= ssz;
670: dst += ssz;
671: }
672: }