Annotation of src/usr.bin/mandoc/html.c, Revision 1.134
1.134 ! schwarze 1: /* $OpenBSD: html.c,v 1.133 2020/01/19 17:59:01 schwarze Exp $ */
1.1 schwarze 2: /*
1.42 schwarze 3: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.134 ! schwarze 4: * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.56 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.56 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <sys/types.h>
1.110 schwarze 19: #include <sys/stat.h>
1.1 schwarze 20:
21: #include <assert.h>
1.3 schwarze 22: #include <ctype.h>
1.4 schwarze 23: #include <stdarg.h>
1.99 schwarze 24: #include <stddef.h>
1.1 schwarze 25: #include <stdio.h>
26: #include <stdint.h>
27: #include <stdlib.h>
28: #include <string.h>
29: #include <unistd.h>
30:
1.80 schwarze 31: #include "mandoc_aux.h"
1.99 schwarze 32: #include "mandoc_ohash.h"
1.9 schwarze 33: #include "mandoc.h"
1.80 schwarze 34: #include "roff.h"
1.1 schwarze 35: #include "out.h"
36: #include "html.h"
1.56 schwarze 37: #include "manconf.h"
1.1 schwarze 38: #include "main.h"
39:
40: struct htmldata {
41: const char *name;
42: int flags;
1.127 schwarze 43: #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
44: #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
45: #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
46: #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
47: #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
48: #define HTML_NLEND (1 << 5) /* Output line break before closing. */
49: #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
1.66 schwarze 50: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
51: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
52: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
1.127 schwarze 53: #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
54: #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
1.1 schwarze 55: };
56:
57: static const struct htmldata htmltags[TAG_MAX] = {
1.66 schwarze 58: {"html", HTML_NLALL},
59: {"head", HTML_NLALL | HTML_INDENT},
1.127 schwarze 60: {"meta", HTML_NOSTACK | HTML_NLALL},
61: {"link", HTML_NOSTACK | HTML_NLALL},
62: {"style", HTML_NLALL | HTML_INDENT},
63: {"title", HTML_NLAROUND},
1.66 schwarze 64: {"body", HTML_NLALL},
65: {"div", HTML_NLAROUND},
1.123 schwarze 66: {"section", HTML_NLALL},
1.66 schwarze 67: {"table", HTML_NLALL | HTML_INDENT},
68: {"tr", HTML_NLALL | HTML_INDENT},
69: {"td", HTML_NLAROUND},
70: {"li", HTML_NLAROUND | HTML_INDENT},
71: {"ul", HTML_NLALL | HTML_INDENT},
72: {"ol", HTML_NLALL | HTML_INDENT},
73: {"dl", HTML_NLALL | HTML_INDENT},
74: {"dt", HTML_NLAROUND},
75: {"dd", HTML_NLAROUND | HTML_INDENT},
1.127 schwarze 76: {"h1", HTML_TOPHRASE | HTML_NLAROUND},
77: {"h2", HTML_TOPHRASE | HTML_NLAROUND},
78: {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
79: {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
80: {"a", HTML_INPHRASE | HTML_TOPHRASE},
81: {"b", HTML_INPHRASE | HTML_TOPHRASE},
82: {"cite", HTML_INPHRASE | HTML_TOPHRASE},
83: {"code", HTML_INPHRASE | HTML_TOPHRASE},
84: {"i", HTML_INPHRASE | HTML_TOPHRASE},
85: {"small", HTML_INPHRASE | HTML_TOPHRASE},
86: {"span", HTML_INPHRASE | HTML_TOPHRASE},
87: {"var", HTML_INPHRASE | HTML_TOPHRASE},
88: {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
1.134 ! schwarze 89: {"mark", HTML_INPHRASE },
1.127 schwarze 90: {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
1.66 schwarze 91: {"mrow", 0},
92: {"mi", 0},
1.85 schwarze 93: {"mn", 0},
1.66 schwarze 94: {"mo", 0},
95: {"msup", 0},
96: {"msub", 0},
97: {"msubsup", 0},
98: {"mfrac", 0},
99: {"msqrt", 0},
100: {"mfenced", 0},
101: {"mtable", 0},
102: {"mtr", 0},
103: {"mtd", 0},
104: {"munderover", 0},
105: {"munder", 0},
106: {"mover", 0},
1.5 schwarze 107: };
108:
1.99 schwarze 109: /* Avoid duplicate HTML id= attributes. */
110: static struct ohash id_unique;
111:
1.124 schwarze 112: static void html_reset_internal(struct html *);
1.67 schwarze 113: static void print_byte(struct html *, char);
114: static void print_endword(struct html *);
115: static void print_indent(struct html *);
116: static void print_word(struct html *, const char *);
117:
1.54 schwarze 118: static void print_ctag(struct html *, struct tag *);
1.67 schwarze 119: static int print_escape(struct html *, char);
1.65 schwarze 120: static int print_encode(struct html *, const char *, const char *, int);
121: static void print_href(struct html *, const char *, const char *, int);
1.125 schwarze 122: static void print_metaf(struct html *);
1.5 schwarze 123:
1.35 schwarze 124:
1.50 schwarze 125: void *
1.61 schwarze 126: html_alloc(const struct manoutput *outopts)
1.1 schwarze 127: {
128: struct html *h;
129:
1.24 schwarze 130: h = mandoc_calloc(1, sizeof(struct html));
1.1 schwarze 131:
1.74 schwarze 132: h->tag = NULL;
1.56 schwarze 133: h->style = outopts->style;
1.110 schwarze 134: if ((h->base_man1 = outopts->man) == NULL)
135: h->base_man2 = NULL;
136: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
137: *h->base_man2++ = '\0';
1.56 schwarze 138: h->base_includes = outopts->includes;
139: if (outopts->fragment)
140: h->oflags |= HTML_FRAGMENT;
1.111 schwarze 141: if (outopts->toc)
142: h->oflags |= HTML_TOC;
1.1 schwarze 143:
1.99 schwarze 144: mandoc_ohash_init(&id_unique, 4, 0);
145:
1.58 schwarze 146: return h;
1.1 schwarze 147: }
148:
1.124 schwarze 149: static void
150: html_reset_internal(struct html *h)
1.1 schwarze 151: {
152: struct tag *tag;
1.99 schwarze 153: char *cp;
154: unsigned int slot;
1.1 schwarze 155:
1.74 schwarze 156: while ((tag = h->tag) != NULL) {
157: h->tag = tag->next;
1.1 schwarze 158: free(tag);
159: }
1.99 schwarze 160: cp = ohash_first(&id_unique, &slot);
161: while (cp != NULL) {
162: free(cp);
163: cp = ohash_next(&id_unique, &slot);
164: }
165: ohash_delete(&id_unique);
1.124 schwarze 166: }
167:
168: void
169: html_reset(void *p)
170: {
171: html_reset_internal(p);
172: mandoc_ohash_init(&id_unique, 4, 0);
173: }
174:
175: void
176: html_free(void *p)
177: {
178: html_reset_internal(p);
179: free(p);
1.1 schwarze 180: }
181:
182: void
183: print_gen_head(struct html *h)
184: {
1.42 schwarze 185: struct tag *t;
186:
1.64 schwarze 187: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.92 schwarze 188: if (h->style != NULL) {
189: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
190: h->style, "type", "text/css", "media", "all");
191: return;
192: }
1.1 schwarze 193:
1.42 schwarze 194: /*
1.92 schwarze 195: * Print a minimal embedded style sheet.
1.42 schwarze 196: */
1.66 schwarze 197:
1.64 schwarze 198: t = print_otag(h, TAG_STYLE, "");
1.66 schwarze 199: print_text(h, "table.head, table.foot { width: 100%; }");
1.67 schwarze 200: print_endline(h);
1.66 schwarze 201: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.67 schwarze 202: print_endline(h);
1.66 schwarze 203: print_text(h, "td.head-vol { text-align: center; }");
1.67 schwarze 204: print_endline(h);
1.126 schwarze 205: print_text(h, ".Nd, .Bf, .Op { display: inline; }");
1.95 schwarze 206: print_endline(h);
1.126 schwarze 207: print_text(h, ".Pa, .Ad { font-style: italic; }");
1.96 schwarze 208: print_endline(h);
1.126 schwarze 209: print_text(h, ".Ms { font-weight: bold; }");
1.98 schwarze 210: print_endline(h);
1.126 schwarze 211: print_text(h, ".Bl-diag ");
1.94 schwarze 212: print_byte(h, '>');
213: print_text(h, " dt { font-weight: bold; }");
1.93 schwarze 214: print_endline(h);
1.126 schwarze 215: print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
216: "{ font-weight: bold; font-family: inherit; }");
1.42 schwarze 217: print_tagq(h, t);
1.1 schwarze 218: }
219:
1.125 schwarze 220: int
221: html_setfont(struct html *h, enum mandoc_esc font)
1.5 schwarze 222: {
1.125 schwarze 223: switch (font) {
1.35 schwarze 224: case ESCAPE_FONTPREV:
1.5 schwarze 225: font = h->metal;
226: break;
1.35 schwarze 227: case ESCAPE_FONTITALIC:
228: case ESCAPE_FONTBOLD:
229: case ESCAPE_FONTBI:
1.112 schwarze 230: case ESCAPE_FONTCW:
1.125 schwarze 231: case ESCAPE_FONTROMAN:
1.112 schwarze 232: break;
1.35 schwarze 233: case ESCAPE_FONT:
1.125 schwarze 234: font = ESCAPE_FONTROMAN;
1.5 schwarze 235: break;
236: default:
1.125 schwarze 237: return 0;
1.1 schwarze 238: }
1.125 schwarze 239: h->metal = h->metac;
240: h->metac = font;
241: return 1;
242: }
1.1 schwarze 243:
1.125 schwarze 244: static void
245: print_metaf(struct html *h)
246: {
1.20 schwarze 247: if (h->metaf) {
248: print_tagq(h, h->metaf);
249: h->metaf = NULL;
250: }
1.125 schwarze 251: switch (h->metac) {
252: case ESCAPE_FONTITALIC:
1.64 schwarze 253: h->metaf = print_otag(h, TAG_I, "");
1.31 schwarze 254: break;
1.125 schwarze 255: case ESCAPE_FONTBOLD:
1.64 schwarze 256: h->metaf = print_otag(h, TAG_B, "");
1.31 schwarze 257: break;
1.125 schwarze 258: case ESCAPE_FONTBI:
1.64 schwarze 259: h->metaf = print_otag(h, TAG_B, "");
260: print_otag(h, TAG_I, "");
1.31 schwarze 261: break;
1.125 schwarze 262: case ESCAPE_FONTCW:
1.112 schwarze 263: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
264: break;
1.31 schwarze 265: default:
266: break;
267: }
1.118 schwarze 268: }
269:
1.119 schwarze 270: void
271: html_close_paragraph(struct html *h)
272: {
1.129 schwarze 273: struct tag *this, *next;
1.130 schwarze 274: int flags;
1.119 schwarze 275:
1.129 schwarze 276: this = h->tag;
277: for (;;) {
278: next = this->next;
1.130 schwarze 279: flags = htmltags[this->tag].flags;
280: if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
1.129 schwarze 281: print_ctag(h, this);
1.130 schwarze 282: if ((flags & HTML_INPHRASE) == 0)
1.119 schwarze 283: break;
1.129 schwarze 284: this = next;
1.119 schwarze 285: }
286: }
287:
1.118 schwarze 288: /*
289: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
290: * TOKEN_NONE does not switch. The old mode is returned.
291: */
292: enum roff_tok
293: html_fillmode(struct html *h, enum roff_tok want)
294: {
295: struct tag *t;
296: enum roff_tok had;
297:
298: for (t = h->tag; t != NULL; t = t->next)
299: if (t->tag == TAG_PRE)
300: break;
301:
302: had = t == NULL ? ROFF_fi : ROFF_nf;
303:
304: if (want != had) {
305: switch (want) {
306: case ROFF_fi:
307: print_tagq(h, t);
308: break;
309: case ROFF_nf:
1.119 schwarze 310: html_close_paragraph(h);
1.118 schwarze 311: print_otag(h, TAG_PRE, "");
312: break;
313: case TOKEN_NONE:
314: break;
315: default:
316: abort();
317: }
318: }
319: return had;
1.80 schwarze 320: }
321:
322: char *
1.99 schwarze 323: html_make_id(const struct roff_node *n, int unique)
1.80 schwarze 324: {
325: const struct roff_node *nch;
1.99 schwarze 326: char *buf, *bufs, *cp;
327: unsigned int slot;
328: int suffix;
1.80 schwarze 329:
330: for (nch = n->child; nch != NULL; nch = nch->next)
331: if (nch->type != ROFFT_TEXT)
332: return NULL;
333:
334: buf = NULL;
335: deroff(&buf, n);
1.90 schwarze 336: if (buf == NULL)
337: return NULL;
1.80 schwarze 338:
1.100 schwarze 339: /*
340: * In ID attributes, only use ASCII characters that are
341: * permitted in URL-fragment strings according to the
342: * explicit list at:
343: * https://url.spec.whatwg.org/#url-fragment-string
344: */
1.80 schwarze 345:
346: for (cp = buf; *cp != '\0'; cp++)
1.100 schwarze 347: if (isalnum((unsigned char)*cp) == 0 &&
348: strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
1.80 schwarze 349: *cp = '_';
350:
1.99 schwarze 351: if (unique == 0)
352: return buf;
353:
354: /* Avoid duplicate HTML id= attributes. */
355:
356: bufs = NULL;
357: suffix = 1;
358: slot = ohash_qlookup(&id_unique, buf);
359: cp = ohash_find(&id_unique, slot);
360: if (cp != NULL) {
361: while (cp != NULL) {
362: free(bufs);
363: if (++suffix > 127) {
364: free(buf);
365: return NULL;
366: }
367: mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
368: slot = ohash_qlookup(&id_unique, bufs);
369: cp = ohash_find(&id_unique, slot);
370: }
371: free(buf);
372: buf = bufs;
373: }
374: ohash_insert(&id_unique, slot, buf);
1.80 schwarze 375: return buf;
1.1 schwarze 376: }
377:
1.5 schwarze 378: static int
1.67 schwarze 379: print_escape(struct html *h, char c)
1.38 schwarze 380: {
381:
382: switch (c) {
383: case '<':
1.67 schwarze 384: print_word(h, "<");
1.38 schwarze 385: break;
386: case '>':
1.67 schwarze 387: print_word(h, ">");
1.38 schwarze 388: break;
389: case '&':
1.67 schwarze 390: print_word(h, "&");
1.38 schwarze 391: break;
392: case '"':
1.67 schwarze 393: print_word(h, """);
1.38 schwarze 394: break;
395: case ASCII_NBRSP:
1.67 schwarze 396: print_word(h, " ");
1.38 schwarze 397: break;
398: case ASCII_HYPH:
1.67 schwarze 399: print_byte(h, '-');
1.59 schwarze 400: break;
1.38 schwarze 401: case ASCII_BREAK:
402: break;
403: default:
1.58 schwarze 404: return 0;
1.38 schwarze 405: }
1.58 schwarze 406: return 1;
1.38 schwarze 407: }
408:
409: static int
1.65 schwarze 410: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.1 schwarze 411: {
1.67 schwarze 412: char numbuf[16];
1.84 schwarze 413: const char *seq;
1.4 schwarze 414: size_t sz;
1.84 schwarze 415: int c, len, breakline, nospace;
1.26 schwarze 416: enum mandoc_esc esc;
1.84 schwarze 417: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.33 schwarze 418: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.5 schwarze 419:
1.65 schwarze 420: if (pend == NULL)
421: pend = strchr(p, '\0');
422:
1.84 schwarze 423: breakline = 0;
1.5 schwarze 424: nospace = 0;
1.1 schwarze 425:
1.65 schwarze 426: while (p < pend) {
1.30 schwarze 427: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
428: h->flags &= ~HTML_SKIPCHAR;
429: p++;
430: continue;
431: }
432:
1.67 schwarze 433: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.84 schwarze 434: print_byte(h, *p);
435:
436: if (breakline &&
437: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.115 schwarze 438: print_otag(h, TAG_BR, "");
1.84 schwarze 439: breakline = 0;
440: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
441: p++;
442: continue;
443: }
1.4 schwarze 444:
1.65 schwarze 445: if (p >= pend)
1.26 schwarze 446: break;
447:
1.84 schwarze 448: if (*p == ' ') {
449: print_endword(h);
450: p++;
451: continue;
452: }
453:
1.67 schwarze 454: if (print_escape(h, *p++))
1.33 schwarze 455: continue;
1.4 schwarze 456:
1.26 schwarze 457: esc = mandoc_escape(&p, &seq, &len);
458: switch (esc) {
1.35 schwarze 459: case ESCAPE_FONT:
460: case ESCAPE_FONTPREV:
461: case ESCAPE_FONTBOLD:
462: case ESCAPE_FONTITALIC:
463: case ESCAPE_FONTBI:
1.112 schwarze 464: case ESCAPE_FONTCW:
1.35 schwarze 465: case ESCAPE_FONTROMAN:
1.113 schwarze 466: if (0 == norecurse) {
467: h->flags |= HTML_NOSPACE;
1.125 schwarze 468: if (html_setfont(h, esc))
469: print_metaf(h);
1.113 schwarze 470: h->flags &= ~HTML_NOSPACE;
471: }
1.30 schwarze 472: continue;
1.35 schwarze 473: case ESCAPE_SKIPCHAR:
1.30 schwarze 474: h->flags |= HTML_SKIPCHAR;
475: continue;
1.116 schwarze 476: case ESCAPE_ERROR:
477: continue;
1.30 schwarze 478: default:
479: break;
480: }
481:
482: if (h->flags & HTML_SKIPCHAR) {
483: h->flags &= ~HTML_SKIPCHAR;
484: continue;
485: }
486:
487: switch (esc) {
1.35 schwarze 488: case ESCAPE_UNICODE:
1.38 schwarze 489: /* Skip past "u" header. */
1.26 schwarze 490: c = mchars_num2uc(seq + 1, len - 1);
491: break;
1.35 schwarze 492: case ESCAPE_NUMBERED:
1.26 schwarze 493: c = mchars_num2char(seq, len);
1.51 schwarze 494: if (c < 0)
495: continue;
1.26 schwarze 496: break;
1.35 schwarze 497: case ESCAPE_SPECIAL:
1.61 schwarze 498: c = mchars_spec2cp(seq, len);
1.51 schwarze 499: if (c <= 0)
500: continue;
1.116 schwarze 501: break;
502: case ESCAPE_UNDEF:
503: c = *seq;
1.26 schwarze 504: break;
1.109 schwarze 505: case ESCAPE_DEVICE:
506: print_word(h, "html");
507: continue;
1.84 schwarze 508: case ESCAPE_BREAK:
509: breakline = 1;
510: continue;
1.35 schwarze 511: case ESCAPE_NOSPACE:
1.26 schwarze 512: if ('\0' == *p)
513: nospace = 1;
1.49 schwarze 514: continue;
1.55 schwarze 515: case ESCAPE_OVERSTRIKE:
516: if (len == 0)
517: continue;
518: c = seq[len - 1];
519: break;
1.5 schwarze 520: default:
1.49 schwarze 521: continue;
1.5 schwarze 522: }
1.51 schwarze 523: if ((c < 0x20 && c != 0x09) ||
524: (c > 0x7E && c < 0xA0))
1.49 schwarze 525: c = 0xFFFD;
1.67 schwarze 526: if (c > 0x7E) {
1.86 bentley 527: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.67 schwarze 528: print_word(h, numbuf);
529: } else if (print_escape(h, c) == 0)
530: print_byte(h, c);
1.1 schwarze 531: }
1.5 schwarze 532:
1.58 schwarze 533: return nospace;
1.1 schwarze 534: }
535:
1.6 schwarze 536: static void
1.65 schwarze 537: print_href(struct html *h, const char *name, const char *sec, int man)
1.6 schwarze 538: {
1.110 schwarze 539: struct stat sb;
1.65 schwarze 540: const char *p, *pp;
1.110 schwarze 541: char *filename;
542:
543: if (man) {
544: pp = h->base_man1;
545: if (h->base_man2 != NULL) {
546: mandoc_asprintf(&filename, "%s.%s", name, sec);
547: if (stat(filename, &sb) == -1)
548: pp = h->base_man2;
549: free(filename);
550: }
551: } else
552: pp = h->base_includes;
1.65 schwarze 553:
554: while ((p = strchr(pp, '%')) != NULL) {
555: print_encode(h, pp, p, 1);
556: if (man && p[1] == 'S') {
557: if (sec == NULL)
1.67 schwarze 558: print_byte(h, '1');
1.65 schwarze 559: else
560: print_encode(h, sec, NULL, 1);
561: } else if ((man && p[1] == 'N') ||
562: (man == 0 && p[1] == 'I'))
563: print_encode(h, name, NULL, 1);
564: else
565: print_encode(h, p, p + 2, 1);
566: pp = p + 2;
567: }
568: if (*pp != '\0')
569: print_encode(h, pp, NULL, 1);
1.6 schwarze 570: }
571:
1.1 schwarze 572: struct tag *
1.64 schwarze 573: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.1 schwarze 574: {
1.64 schwarze 575: va_list ap;
1.1 schwarze 576: struct tag *t;
1.65 schwarze 577: const char *attr;
1.73 schwarze 578: char *arg1, *arg2;
1.114 schwarze 579: int style_written, tflags;
1.66 schwarze 580:
581: tflags = htmltags[tag].flags;
1.1 schwarze 582:
1.127 schwarze 583: /* Flow content is not allowed in phrasing context. */
584:
585: if ((tflags & HTML_INPHRASE) == 0) {
586: for (t = h->tag; t != NULL; t = t->next) {
587: if (t->closed)
588: continue;
589: assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
590: break;
591: }
1.131 schwarze 592:
593: /*
594: * Always wrap phrasing elements in a paragraph
595: * unless already contained in some flow container;
596: * never put them directly into a section.
597: */
598:
599: } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
600: print_otag(h, TAG_P, "c", "Pp");
1.127 schwarze 601:
1.74 schwarze 602: /* Push this tag onto the stack of open scopes. */
1.6 schwarze 603:
1.66 schwarze 604: if ((tflags & HTML_NOSTACK) == 0) {
1.24 schwarze 605: t = mandoc_malloc(sizeof(struct tag));
1.1 schwarze 606: t->tag = tag;
1.74 schwarze 607: t->next = h->tag;
1.122 schwarze 608: t->refcnt = 0;
609: t->closed = 0;
1.74 schwarze 610: h->tag = t;
1.1 schwarze 611: } else
612: t = NULL;
613:
1.66 schwarze 614: if (tflags & HTML_NLBEFORE)
1.67 schwarze 615: print_endline(h);
616: if (h->col == 0)
617: print_indent(h);
1.66 schwarze 618: else if ((h->flags & HTML_NOSPACE) == 0) {
619: if (h->flags & HTML_KEEP)
1.86 bentley 620: print_word(h, " ");
1.66 schwarze 621: else {
622: if (h->flags & HTML_PREKEEP)
623: h->flags |= HTML_KEEP;
1.67 schwarze 624: print_endword(h);
1.12 schwarze 625: }
1.66 schwarze 626: }
1.1 schwarze 627:
1.13 schwarze 628: if ( ! (h->flags & HTML_NONOSPACE))
629: h->flags &= ~HTML_NOSPACE;
1.14 schwarze 630: else
631: h->flags |= HTML_NOSPACE;
1.13 schwarze 632:
1.6 schwarze 633: /* Print out the tag name and attributes. */
634:
1.67 schwarze 635: print_byte(h, '<');
636: print_word(h, htmltags[tag].name);
1.64 schwarze 637:
638: va_start(ap, fmt);
639:
1.114 schwarze 640: while (*fmt != '\0' && *fmt != 's') {
1.73 schwarze 641:
1.108 schwarze 642: /* Parse attributes and arguments. */
1.73 schwarze 643:
644: arg1 = va_arg(ap, char *);
1.108 schwarze 645: arg2 = NULL;
1.64 schwarze 646: switch (*fmt++) {
647: case 'c':
1.65 schwarze 648: attr = "class";
1.64 schwarze 649: break;
650: case 'h':
1.65 schwarze 651: attr = "href";
1.64 schwarze 652: break;
653: case 'i':
1.65 schwarze 654: attr = "id";
1.64 schwarze 655: break;
656: case '?':
1.73 schwarze 657: attr = arg1;
658: arg1 = va_arg(ap, char *);
1.64 schwarze 659: break;
660: default:
661: abort();
662: }
1.73 schwarze 663: if (*fmt == 'M')
664: arg2 = va_arg(ap, char *);
665: if (arg1 == NULL)
666: continue;
667:
1.108 schwarze 668: /* Print the attributes. */
1.73 schwarze 669:
1.67 schwarze 670: print_byte(h, ' ');
671: print_word(h, attr);
672: print_byte(h, '=');
673: print_byte(h, '"');
1.65 schwarze 674: switch (*fmt) {
1.78 schwarze 675: case 'I':
676: print_href(h, arg1, NULL, 0);
677: fmt++;
678: break;
1.65 schwarze 679: case 'M':
1.73 schwarze 680: print_href(h, arg1, arg2, 1);
1.65 schwarze 681: fmt++;
682: break;
1.78 schwarze 683: case 'R':
684: print_byte(h, '#');
685: print_encode(h, arg1, NULL, 1);
1.65 schwarze 686: fmt++;
1.78 schwarze 687: break;
1.65 schwarze 688: default:
1.114 schwarze 689: print_encode(h, arg1, NULL, 1);
1.65 schwarze 690: break;
691: }
1.67 schwarze 692: print_byte(h, '"');
1.64 schwarze 693: }
1.114 schwarze 694:
695: style_written = 0;
696: while (*fmt++ == 's') {
697: arg1 = va_arg(ap, char *);
698: arg2 = va_arg(ap, char *);
699: if (arg2 == NULL)
700: continue;
701: print_byte(h, ' ');
702: if (style_written == 0) {
703: print_word(h, "style=\"");
704: style_written = 1;
705: }
706: print_word(h, arg1);
707: print_byte(h, ':');
708: print_byte(h, ' ');
709: print_word(h, arg2);
710: print_byte(h, ';');
711: }
712: if (style_written)
713: print_byte(h, '"');
714:
1.64 schwarze 715: va_end(ap);
1.6 schwarze 716:
1.42 schwarze 717: /* Accommodate for "well-formed" singleton escaping. */
1.6 schwarze 718:
1.127 schwarze 719: if (htmltags[tag].flags & HTML_NOSTACK)
1.67 schwarze 720: print_byte(h, '/');
1.6 schwarze 721:
1.67 schwarze 722: print_byte(h, '>');
1.1 schwarze 723:
1.66 schwarze 724: if (tflags & HTML_NLBEGIN)
1.67 schwarze 725: print_endline(h);
1.66 schwarze 726: else
727: h->flags |= HTML_NOSPACE;
1.18 schwarze 728:
1.66 schwarze 729: if (tflags & HTML_INDENT)
730: h->indent++;
731: if (tflags & HTML_NOINDENT)
732: h->noindent++;
1.18 schwarze 733:
1.58 schwarze 734: return t;
1.1 schwarze 735: }
736:
737: static void
1.54 schwarze 738: print_ctag(struct html *h, struct tag *tag)
1.1 schwarze 739: {
1.66 schwarze 740: int tflags;
1.35 schwarze 741:
1.122 schwarze 742: if (tag->closed == 0) {
743: tag->closed = 1;
744: if (tag == h->metaf)
745: h->metaf = NULL;
746: if (tag == h->tblt)
747: h->tblt = NULL;
748:
749: tflags = htmltags[tag->tag].flags;
750: if (tflags & HTML_INDENT)
751: h->indent--;
752: if (tflags & HTML_NOINDENT)
753: h->noindent--;
754: if (tflags & HTML_NLEND)
755: print_endline(h);
756: print_indent(h);
757: print_byte(h, '<');
758: print_byte(h, '/');
759: print_word(h, htmltags[tag->tag].name);
760: print_byte(h, '>');
761: if (tflags & HTML_NLAFTER)
762: print_endline(h);
763: }
764: if (tag->refcnt == 0) {
765: h->tag = tag->next;
766: free(tag);
767: }
1.1 schwarze 768: }
769:
770: void
1.6 schwarze 771: print_gen_decls(struct html *h)
772: {
1.67 schwarze 773: print_word(h, "<!DOCTYPE html>");
774: print_endline(h);
1.91 schwarze 775: }
776:
777: void
778: print_gen_comment(struct html *h, struct roff_node *n)
779: {
780: int wantblank;
781:
782: print_word(h, "<!-- This is an automatically generated file."
783: " Do not edit.");
784: h->indent = 1;
785: wantblank = 0;
786: while (n != NULL && n->type == ROFFT_COMMENT) {
787: if (strstr(n->string, "-->") == NULL &&
788: (wantblank || *n->string != '\0')) {
789: print_endline(h);
790: print_indent(h);
791: print_word(h, n->string);
792: wantblank = *n->string != '\0';
793: }
794: n = n->next;
795: }
796: if (wantblank)
797: print_endline(h);
798: print_word(h, " -->");
799: print_endline(h);
800: h->indent = 0;
1.1 schwarze 801: }
802:
803: void
1.12 schwarze 804: print_text(struct html *h, const char *word)
1.1 schwarze 805: {
1.131 schwarze 806: /*
807: * Always wrap text in a paragraph unless already contained in
808: * some flow container; never put it directly into a section.
809: */
810:
811: if (h->tag->tag == TAG_SECTION)
812: print_otag(h, TAG_P, "c", "Pp");
813:
814: /* Output whitespace before this text? */
815:
1.67 schwarze 816: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.12 schwarze 817: if ( ! (HTML_KEEP & h->flags)) {
818: if (HTML_PREKEEP & h->flags)
819: h->flags |= HTML_KEEP;
1.67 schwarze 820: print_endword(h);
1.12 schwarze 821: } else
1.86 bentley 822: print_word(h, " ");
1.12 schwarze 823: }
1.131 schwarze 824:
825: /*
826: * Print the text, optionally surrounded by HTML whitespace,
827: * optionally manually switching fonts before and after.
828: */
1.1 schwarze 829:
1.125 schwarze 830: assert(h->metaf == NULL);
831: print_metaf(h);
832: print_indent(h);
1.65 schwarze 833: if ( ! print_encode(h, word, NULL, 0)) {
1.13 schwarze 834: if ( ! (h->flags & HTML_NONOSPACE))
835: h->flags &= ~HTML_NOSPACE;
1.53 schwarze 836: h->flags &= ~HTML_NONEWLINE;
1.28 schwarze 837: } else
1.53 schwarze 838: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.20 schwarze 839:
1.125 schwarze 840: if (h->metaf != NULL) {
1.20 schwarze 841: print_tagq(h, h->metaf);
842: h->metaf = NULL;
843: }
1.17 schwarze 844:
845: h->flags &= ~HTML_IGNDELIM;
1.1 schwarze 846: }
847:
848: void
849: print_tagq(struct html *h, const struct tag *until)
850: {
1.122 schwarze 851: struct tag *this, *next;
1.1 schwarze 852:
1.122 schwarze 853: for (this = h->tag; this != NULL; this = next) {
854: next = this == until ? NULL : this->next;
855: print_ctag(h, this);
1.1 schwarze 856: }
857: }
858:
1.120 schwarze 859: /*
860: * Close out all open elements up to but excluding suntil.
861: * Note that a paragraph just inside stays open together with it
862: * because paragraphs include subsequent phrasing content.
863: */
1.1 schwarze 864: void
865: print_stagq(struct html *h, const struct tag *suntil)
866: {
1.122 schwarze 867: struct tag *this, *next;
1.1 schwarze 868:
1.122 schwarze 869: for (this = h->tag; this != NULL; this = next) {
870: next = this->next;
871: if (this == suntil || (next == suntil &&
872: (this->tag == TAG_P || this->tag == TAG_PRE)))
873: break;
874: print_ctag(h, this);
1.1 schwarze 875: }
1.42 schwarze 876: }
877:
1.67 schwarze 878:
879: /***********************************************************************
880: * Low level output functions.
881: * They implement line breaking using a short static buffer.
882: ***********************************************************************/
883:
884: /*
885: * Buffer one HTML output byte.
886: * If the buffer is full, flush and deactivate it and start a new line.
887: * If the buffer is inactive, print directly.
888: */
889: static void
890: print_byte(struct html *h, char c)
891: {
892: if ((h->flags & HTML_BUFFER) == 0) {
893: putchar(c);
894: h->col++;
895: return;
896: }
897:
898: if (h->col + h->bufcol < sizeof(h->buf)) {
899: h->buf[h->bufcol++] = c;
900: return;
901: }
902:
903: putchar('\n');
904: h->col = 0;
905: print_indent(h);
906: putchar(' ');
907: putchar(' ');
908: fwrite(h->buf, h->bufcol, 1, stdout);
909: putchar(c);
910: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
911: h->bufcol = 0;
912: h->flags &= ~HTML_BUFFER;
913: }
914:
1.66 schwarze 915: /*
916: * If something was printed on the current output line, end it.
1.67 schwarze 917: * Not to be called right after print_indent().
1.66 schwarze 918: */
1.72 schwarze 919: void
1.67 schwarze 920: print_endline(struct html *h)
1.66 schwarze 921: {
1.67 schwarze 922: if (h->col == 0)
1.66 schwarze 923: return;
924:
1.67 schwarze 925: if (h->bufcol) {
926: putchar(' ');
927: fwrite(h->buf, h->bufcol, 1, stdout);
928: h->bufcol = 0;
929: }
1.66 schwarze 930: putchar('\n');
1.67 schwarze 931: h->col = 0;
932: h->flags |= HTML_NOSPACE;
933: h->flags &= ~HTML_BUFFER;
934: }
935:
936: /*
937: * Flush the HTML output buffer.
938: * If it is inactive, activate it.
939: */
940: static void
941: print_endword(struct html *h)
942: {
943: if (h->noindent) {
944: print_byte(h, ' ');
945: return;
946: }
947:
948: if ((h->flags & HTML_BUFFER) == 0) {
949: h->col++;
950: h->flags |= HTML_BUFFER;
951: } else if (h->bufcol) {
952: putchar(' ');
953: fwrite(h->buf, h->bufcol, 1, stdout);
954: h->col += h->bufcol + 1;
955: }
956: h->bufcol = 0;
1.66 schwarze 957: }
958:
959: /*
960: * If at the beginning of a new output line,
961: * perform indentation and mark the line as containing output.
962: * Make sure to really produce some output right afterwards,
963: * but do not use print_otag() for producing it.
964: */
965: static void
1.67 schwarze 966: print_indent(struct html *h)
1.66 schwarze 967: {
1.67 schwarze 968: size_t i;
1.66 schwarze 969:
1.132 schwarze 970: if (h->col || h->noindent)
1.66 schwarze 971: return;
972:
1.132 schwarze 973: h->col = h->indent * 2;
974: for (i = 0; i < h->col; i++)
975: putchar(' ');
1.67 schwarze 976: }
977:
978: /*
979: * Print or buffer some characters
980: * depending on the current HTML output buffer state.
981: */
982: static void
983: print_word(struct html *h, const char *cp)
984: {
985: while (*cp != '\0')
986: print_byte(h, *cp++);
1.3 schwarze 987: }