Annotation of src/usr.bin/mandoc/html.c, Revision 1.138
1.138 ! schwarze 1: /* $OpenBSD: html.c,v 1.137 2020/04/07 22:45:37 schwarze Exp $ */
1.1 schwarze 2: /*
1.135 schwarze 3: * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
1.42 schwarze 4: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.56 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.56 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.135 schwarze 17: *
18: * Common functions for mandoc(1) HTML formatters.
19: * For use by individual formatters and by the main program.
1.1 schwarze 20: */
21: #include <sys/types.h>
1.110 schwarze 22: #include <sys/stat.h>
1.1 schwarze 23:
24: #include <assert.h>
1.3 schwarze 25: #include <ctype.h>
1.4 schwarze 26: #include <stdarg.h>
1.99 schwarze 27: #include <stddef.h>
1.1 schwarze 28: #include <stdio.h>
29: #include <stdint.h>
30: #include <stdlib.h>
31: #include <string.h>
32: #include <unistd.h>
33:
1.80 schwarze 34: #include "mandoc_aux.h"
1.99 schwarze 35: #include "mandoc_ohash.h"
1.9 schwarze 36: #include "mandoc.h"
1.80 schwarze 37: #include "roff.h"
1.1 schwarze 38: #include "out.h"
39: #include "html.h"
1.56 schwarze 40: #include "manconf.h"
1.1 schwarze 41: #include "main.h"
42:
43: struct htmldata {
44: const char *name;
45: int flags;
1.127 schwarze 46: #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
47: #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
48: #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
49: #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
50: #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
51: #define HTML_NLEND (1 << 5) /* Output line break before closing. */
52: #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
1.66 schwarze 53: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
54: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
55: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
1.127 schwarze 56: #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
57: #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
1.1 schwarze 58: };
59:
60: static const struct htmldata htmltags[TAG_MAX] = {
1.66 schwarze 61: {"html", HTML_NLALL},
62: {"head", HTML_NLALL | HTML_INDENT},
1.127 schwarze 63: {"meta", HTML_NOSTACK | HTML_NLALL},
64: {"link", HTML_NOSTACK | HTML_NLALL},
65: {"style", HTML_NLALL | HTML_INDENT},
66: {"title", HTML_NLAROUND},
1.66 schwarze 67: {"body", HTML_NLALL},
68: {"div", HTML_NLAROUND},
1.123 schwarze 69: {"section", HTML_NLALL},
1.66 schwarze 70: {"table", HTML_NLALL | HTML_INDENT},
71: {"tr", HTML_NLALL | HTML_INDENT},
72: {"td", HTML_NLAROUND},
73: {"li", HTML_NLAROUND | HTML_INDENT},
74: {"ul", HTML_NLALL | HTML_INDENT},
75: {"ol", HTML_NLALL | HTML_INDENT},
76: {"dl", HTML_NLALL | HTML_INDENT},
77: {"dt", HTML_NLAROUND},
78: {"dd", HTML_NLAROUND | HTML_INDENT},
1.127 schwarze 79: {"h1", HTML_TOPHRASE | HTML_NLAROUND},
80: {"h2", HTML_TOPHRASE | HTML_NLAROUND},
81: {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
82: {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
83: {"a", HTML_INPHRASE | HTML_TOPHRASE},
84: {"b", HTML_INPHRASE | HTML_TOPHRASE},
85: {"cite", HTML_INPHRASE | HTML_TOPHRASE},
86: {"code", HTML_INPHRASE | HTML_TOPHRASE},
87: {"i", HTML_INPHRASE | HTML_TOPHRASE},
88: {"small", HTML_INPHRASE | HTML_TOPHRASE},
89: {"span", HTML_INPHRASE | HTML_TOPHRASE},
90: {"var", HTML_INPHRASE | HTML_TOPHRASE},
91: {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
1.134 schwarze 92: {"mark", HTML_INPHRASE },
1.127 schwarze 93: {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
1.66 schwarze 94: {"mrow", 0},
95: {"mi", 0},
1.85 schwarze 96: {"mn", 0},
1.66 schwarze 97: {"mo", 0},
98: {"msup", 0},
99: {"msub", 0},
100: {"msubsup", 0},
101: {"mfrac", 0},
102: {"msqrt", 0},
103: {"mfenced", 0},
104: {"mtable", 0},
105: {"mtr", 0},
106: {"mtd", 0},
107: {"munderover", 0},
108: {"munder", 0},
109: {"mover", 0},
1.5 schwarze 110: };
111:
1.99 schwarze 112: /* Avoid duplicate HTML id= attributes. */
113: static struct ohash id_unique;
114:
1.124 schwarze 115: static void html_reset_internal(struct html *);
1.67 schwarze 116: static void print_byte(struct html *, char);
117: static void print_endword(struct html *);
118: static void print_indent(struct html *);
119: static void print_word(struct html *, const char *);
120:
1.54 schwarze 121: static void print_ctag(struct html *, struct tag *);
1.67 schwarze 122: static int print_escape(struct html *, char);
1.65 schwarze 123: static int print_encode(struct html *, const char *, const char *, int);
124: static void print_href(struct html *, const char *, const char *, int);
1.125 schwarze 125: static void print_metaf(struct html *);
1.5 schwarze 126:
1.35 schwarze 127:
1.50 schwarze 128: void *
1.61 schwarze 129: html_alloc(const struct manoutput *outopts)
1.1 schwarze 130: {
131: struct html *h;
132:
1.24 schwarze 133: h = mandoc_calloc(1, sizeof(struct html));
1.1 schwarze 134:
1.74 schwarze 135: h->tag = NULL;
1.56 schwarze 136: h->style = outopts->style;
1.110 schwarze 137: if ((h->base_man1 = outopts->man) == NULL)
138: h->base_man2 = NULL;
139: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
140: *h->base_man2++ = '\0';
1.56 schwarze 141: h->base_includes = outopts->includes;
142: if (outopts->fragment)
143: h->oflags |= HTML_FRAGMENT;
1.111 schwarze 144: if (outopts->toc)
145: h->oflags |= HTML_TOC;
1.1 schwarze 146:
1.99 schwarze 147: mandoc_ohash_init(&id_unique, 4, 0);
148:
1.58 schwarze 149: return h;
1.1 schwarze 150: }
151:
1.124 schwarze 152: static void
153: html_reset_internal(struct html *h)
1.1 schwarze 154: {
155: struct tag *tag;
1.99 schwarze 156: char *cp;
157: unsigned int slot;
1.1 schwarze 158:
1.74 schwarze 159: while ((tag = h->tag) != NULL) {
160: h->tag = tag->next;
1.1 schwarze 161: free(tag);
162: }
1.99 schwarze 163: cp = ohash_first(&id_unique, &slot);
164: while (cp != NULL) {
165: free(cp);
166: cp = ohash_next(&id_unique, &slot);
167: }
168: ohash_delete(&id_unique);
1.124 schwarze 169: }
170:
171: void
172: html_reset(void *p)
173: {
174: html_reset_internal(p);
175: mandoc_ohash_init(&id_unique, 4, 0);
176: }
177:
178: void
179: html_free(void *p)
180: {
181: html_reset_internal(p);
182: free(p);
1.1 schwarze 183: }
184:
185: void
186: print_gen_head(struct html *h)
187: {
1.42 schwarze 188: struct tag *t;
189:
1.64 schwarze 190: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.92 schwarze 191: if (h->style != NULL) {
192: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
193: h->style, "type", "text/css", "media", "all");
194: return;
195: }
1.1 schwarze 196:
1.42 schwarze 197: /*
1.92 schwarze 198: * Print a minimal embedded style sheet.
1.42 schwarze 199: */
1.66 schwarze 200:
1.64 schwarze 201: t = print_otag(h, TAG_STYLE, "");
1.66 schwarze 202: print_text(h, "table.head, table.foot { width: 100%; }");
1.67 schwarze 203: print_endline(h);
1.66 schwarze 204: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.67 schwarze 205: print_endline(h);
1.66 schwarze 206: print_text(h, "td.head-vol { text-align: center; }");
1.67 schwarze 207: print_endline(h);
1.126 schwarze 208: print_text(h, ".Nd, .Bf, .Op { display: inline; }");
1.95 schwarze 209: print_endline(h);
1.126 schwarze 210: print_text(h, ".Pa, .Ad { font-style: italic; }");
1.96 schwarze 211: print_endline(h);
1.126 schwarze 212: print_text(h, ".Ms { font-weight: bold; }");
1.98 schwarze 213: print_endline(h);
1.126 schwarze 214: print_text(h, ".Bl-diag ");
1.94 schwarze 215: print_byte(h, '>');
216: print_text(h, " dt { font-weight: bold; }");
1.93 schwarze 217: print_endline(h);
1.126 schwarze 218: print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
219: "{ font-weight: bold; font-family: inherit; }");
1.42 schwarze 220: print_tagq(h, t);
1.1 schwarze 221: }
222:
1.125 schwarze 223: int
224: html_setfont(struct html *h, enum mandoc_esc font)
1.5 schwarze 225: {
1.125 schwarze 226: switch (font) {
1.35 schwarze 227: case ESCAPE_FONTPREV:
1.5 schwarze 228: font = h->metal;
229: break;
1.35 schwarze 230: case ESCAPE_FONTITALIC:
231: case ESCAPE_FONTBOLD:
232: case ESCAPE_FONTBI:
1.112 schwarze 233: case ESCAPE_FONTCW:
1.125 schwarze 234: case ESCAPE_FONTROMAN:
1.112 schwarze 235: break;
1.35 schwarze 236: case ESCAPE_FONT:
1.125 schwarze 237: font = ESCAPE_FONTROMAN;
1.5 schwarze 238: break;
239: default:
1.125 schwarze 240: return 0;
1.1 schwarze 241: }
1.125 schwarze 242: h->metal = h->metac;
243: h->metac = font;
244: return 1;
245: }
1.1 schwarze 246:
1.125 schwarze 247: static void
248: print_metaf(struct html *h)
249: {
1.20 schwarze 250: if (h->metaf) {
251: print_tagq(h, h->metaf);
252: h->metaf = NULL;
253: }
1.125 schwarze 254: switch (h->metac) {
255: case ESCAPE_FONTITALIC:
1.64 schwarze 256: h->metaf = print_otag(h, TAG_I, "");
1.31 schwarze 257: break;
1.125 schwarze 258: case ESCAPE_FONTBOLD:
1.64 schwarze 259: h->metaf = print_otag(h, TAG_B, "");
1.31 schwarze 260: break;
1.125 schwarze 261: case ESCAPE_FONTBI:
1.64 schwarze 262: h->metaf = print_otag(h, TAG_B, "");
263: print_otag(h, TAG_I, "");
1.31 schwarze 264: break;
1.125 schwarze 265: case ESCAPE_FONTCW:
1.112 schwarze 266: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
267: break;
1.31 schwarze 268: default:
269: break;
270: }
1.118 schwarze 271: }
272:
1.119 schwarze 273: void
274: html_close_paragraph(struct html *h)
275: {
1.129 schwarze 276: struct tag *this, *next;
1.130 schwarze 277: int flags;
1.119 schwarze 278:
1.129 schwarze 279: this = h->tag;
280: for (;;) {
281: next = this->next;
1.130 schwarze 282: flags = htmltags[this->tag].flags;
283: if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
1.129 schwarze 284: print_ctag(h, this);
1.130 schwarze 285: if ((flags & HTML_INPHRASE) == 0)
1.119 schwarze 286: break;
1.129 schwarze 287: this = next;
1.119 schwarze 288: }
289: }
290:
1.118 schwarze 291: /*
292: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
293: * TOKEN_NONE does not switch. The old mode is returned.
294: */
295: enum roff_tok
296: html_fillmode(struct html *h, enum roff_tok want)
297: {
298: struct tag *t;
299: enum roff_tok had;
300:
301: for (t = h->tag; t != NULL; t = t->next)
302: if (t->tag == TAG_PRE)
303: break;
304:
305: had = t == NULL ? ROFF_fi : ROFF_nf;
306:
307: if (want != had) {
308: switch (want) {
309: case ROFF_fi:
310: print_tagq(h, t);
311: break;
312: case ROFF_nf:
1.119 schwarze 313: html_close_paragraph(h);
1.118 schwarze 314: print_otag(h, TAG_PRE, "");
315: break;
316: case TOKEN_NONE:
317: break;
318: default:
319: abort();
320: }
321: }
322: return had;
1.80 schwarze 323: }
324:
1.135 schwarze 325: /*
326: * Allocate a string to be used for the "id=" attribute of an HTML
327: * element and/or as a segment identifier for a URI in an <a> element.
328: * The function may fail and return NULL if the node lacks text data
329: * to create the attribute from.
330: * If the "unique" argument is 0, the caller is responsible for
331: * free(3)ing the returned string after using it.
332: * If the "unique" argument is non-zero, the "id_unique" ohash table
333: * is used for de-duplication and owns the returned string, so the
334: * caller must not free(3) it. In this case, it will be freed
335: * automatically by html_reset() or html_free().
336: */
1.80 schwarze 337: char *
1.99 schwarze 338: html_make_id(const struct roff_node *n, int unique)
1.80 schwarze 339: {
340: const struct roff_node *nch;
1.99 schwarze 341: char *buf, *bufs, *cp;
342: unsigned int slot;
343: int suffix;
1.80 schwarze 344:
1.138 ! schwarze 345: if (n->tag != NULL)
! 346: buf = mandoc_strdup(n->tag);
1.135 schwarze 347: else {
348: switch (n->tok) {
349: case MDOC_Sh:
350: case MDOC_Ss:
351: case MDOC_Sx:
352: case MAN_SH:
353: case MAN_SS:
354: for (nch = n->child; nch != NULL; nch = nch->next)
355: if (nch->type != ROFFT_TEXT)
356: return NULL;
357: buf = NULL;
358: deroff(&buf, n);
359: if (buf == NULL)
360: return NULL;
361: break;
362: default:
1.136 schwarze 363: if (n->child == NULL || n->child->type != ROFFT_TEXT)
1.135 schwarze 364: return NULL;
365: buf = mandoc_strdup(n->child->string);
366: break;
367: }
368: }
1.80 schwarze 369:
1.100 schwarze 370: /*
371: * In ID attributes, only use ASCII characters that are
372: * permitted in URL-fragment strings according to the
373: * explicit list at:
374: * https://url.spec.whatwg.org/#url-fragment-string
375: */
1.80 schwarze 376:
377: for (cp = buf; *cp != '\0'; cp++)
1.100 schwarze 378: if (isalnum((unsigned char)*cp) == 0 &&
379: strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
1.80 schwarze 380: *cp = '_';
381:
1.99 schwarze 382: if (unique == 0)
383: return buf;
384:
385: /* Avoid duplicate HTML id= attributes. */
386:
387: bufs = NULL;
388: suffix = 1;
389: slot = ohash_qlookup(&id_unique, buf);
390: cp = ohash_find(&id_unique, slot);
391: if (cp != NULL) {
392: while (cp != NULL) {
393: free(bufs);
394: if (++suffix > 127) {
395: free(buf);
396: return NULL;
397: }
398: mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
399: slot = ohash_qlookup(&id_unique, bufs);
400: cp = ohash_find(&id_unique, slot);
401: }
402: free(buf);
403: buf = bufs;
404: }
405: ohash_insert(&id_unique, slot, buf);
1.80 schwarze 406: return buf;
1.1 schwarze 407: }
408:
1.5 schwarze 409: static int
1.67 schwarze 410: print_escape(struct html *h, char c)
1.38 schwarze 411: {
412:
413: switch (c) {
414: case '<':
1.67 schwarze 415: print_word(h, "<");
1.38 schwarze 416: break;
417: case '>':
1.67 schwarze 418: print_word(h, ">");
1.38 schwarze 419: break;
420: case '&':
1.67 schwarze 421: print_word(h, "&");
1.38 schwarze 422: break;
423: case '"':
1.67 schwarze 424: print_word(h, """);
1.38 schwarze 425: break;
426: case ASCII_NBRSP:
1.67 schwarze 427: print_word(h, " ");
1.38 schwarze 428: break;
429: case ASCII_HYPH:
1.67 schwarze 430: print_byte(h, '-');
1.59 schwarze 431: break;
1.38 schwarze 432: case ASCII_BREAK:
433: break;
434: default:
1.58 schwarze 435: return 0;
1.38 schwarze 436: }
1.58 schwarze 437: return 1;
1.38 schwarze 438: }
439:
440: static int
1.65 schwarze 441: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.1 schwarze 442: {
1.67 schwarze 443: char numbuf[16];
1.84 schwarze 444: const char *seq;
1.4 schwarze 445: size_t sz;
1.84 schwarze 446: int c, len, breakline, nospace;
1.26 schwarze 447: enum mandoc_esc esc;
1.84 schwarze 448: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.33 schwarze 449: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.5 schwarze 450:
1.65 schwarze 451: if (pend == NULL)
452: pend = strchr(p, '\0');
453:
1.84 schwarze 454: breakline = 0;
1.5 schwarze 455: nospace = 0;
1.1 schwarze 456:
1.65 schwarze 457: while (p < pend) {
1.30 schwarze 458: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
459: h->flags &= ~HTML_SKIPCHAR;
460: p++;
461: continue;
462: }
463:
1.67 schwarze 464: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.84 schwarze 465: print_byte(h, *p);
466:
467: if (breakline &&
468: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.115 schwarze 469: print_otag(h, TAG_BR, "");
1.84 schwarze 470: breakline = 0;
471: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
472: p++;
473: continue;
474: }
1.4 schwarze 475:
1.65 schwarze 476: if (p >= pend)
1.26 schwarze 477: break;
478:
1.84 schwarze 479: if (*p == ' ') {
480: print_endword(h);
481: p++;
482: continue;
483: }
484:
1.67 schwarze 485: if (print_escape(h, *p++))
1.33 schwarze 486: continue;
1.4 schwarze 487:
1.26 schwarze 488: esc = mandoc_escape(&p, &seq, &len);
489: switch (esc) {
1.35 schwarze 490: case ESCAPE_FONT:
491: case ESCAPE_FONTPREV:
492: case ESCAPE_FONTBOLD:
493: case ESCAPE_FONTITALIC:
494: case ESCAPE_FONTBI:
1.112 schwarze 495: case ESCAPE_FONTCW:
1.35 schwarze 496: case ESCAPE_FONTROMAN:
1.113 schwarze 497: if (0 == norecurse) {
498: h->flags |= HTML_NOSPACE;
1.125 schwarze 499: if (html_setfont(h, esc))
500: print_metaf(h);
1.113 schwarze 501: h->flags &= ~HTML_NOSPACE;
502: }
1.30 schwarze 503: continue;
1.35 schwarze 504: case ESCAPE_SKIPCHAR:
1.30 schwarze 505: h->flags |= HTML_SKIPCHAR;
506: continue;
1.116 schwarze 507: case ESCAPE_ERROR:
508: continue;
1.30 schwarze 509: default:
510: break;
511: }
512:
513: if (h->flags & HTML_SKIPCHAR) {
514: h->flags &= ~HTML_SKIPCHAR;
515: continue;
516: }
517:
518: switch (esc) {
1.35 schwarze 519: case ESCAPE_UNICODE:
1.38 schwarze 520: /* Skip past "u" header. */
1.26 schwarze 521: c = mchars_num2uc(seq + 1, len - 1);
522: break;
1.35 schwarze 523: case ESCAPE_NUMBERED:
1.26 schwarze 524: c = mchars_num2char(seq, len);
1.51 schwarze 525: if (c < 0)
526: continue;
1.26 schwarze 527: break;
1.35 schwarze 528: case ESCAPE_SPECIAL:
1.61 schwarze 529: c = mchars_spec2cp(seq, len);
1.51 schwarze 530: if (c <= 0)
531: continue;
1.116 schwarze 532: break;
533: case ESCAPE_UNDEF:
534: c = *seq;
1.26 schwarze 535: break;
1.109 schwarze 536: case ESCAPE_DEVICE:
537: print_word(h, "html");
538: continue;
1.84 schwarze 539: case ESCAPE_BREAK:
540: breakline = 1;
541: continue;
1.35 schwarze 542: case ESCAPE_NOSPACE:
1.26 schwarze 543: if ('\0' == *p)
544: nospace = 1;
1.49 schwarze 545: continue;
1.55 schwarze 546: case ESCAPE_OVERSTRIKE:
547: if (len == 0)
548: continue;
549: c = seq[len - 1];
550: break;
1.5 schwarze 551: default:
1.49 schwarze 552: continue;
1.5 schwarze 553: }
1.51 schwarze 554: if ((c < 0x20 && c != 0x09) ||
555: (c > 0x7E && c < 0xA0))
1.49 schwarze 556: c = 0xFFFD;
1.67 schwarze 557: if (c > 0x7E) {
1.86 bentley 558: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.67 schwarze 559: print_word(h, numbuf);
560: } else if (print_escape(h, c) == 0)
561: print_byte(h, c);
1.1 schwarze 562: }
1.5 schwarze 563:
1.58 schwarze 564: return nospace;
1.1 schwarze 565: }
566:
1.6 schwarze 567: static void
1.65 schwarze 568: print_href(struct html *h, const char *name, const char *sec, int man)
1.6 schwarze 569: {
1.110 schwarze 570: struct stat sb;
1.65 schwarze 571: const char *p, *pp;
1.110 schwarze 572: char *filename;
573:
574: if (man) {
575: pp = h->base_man1;
576: if (h->base_man2 != NULL) {
577: mandoc_asprintf(&filename, "%s.%s", name, sec);
578: if (stat(filename, &sb) == -1)
579: pp = h->base_man2;
580: free(filename);
581: }
582: } else
583: pp = h->base_includes;
1.65 schwarze 584:
585: while ((p = strchr(pp, '%')) != NULL) {
586: print_encode(h, pp, p, 1);
587: if (man && p[1] == 'S') {
588: if (sec == NULL)
1.67 schwarze 589: print_byte(h, '1');
1.65 schwarze 590: else
591: print_encode(h, sec, NULL, 1);
592: } else if ((man && p[1] == 'N') ||
593: (man == 0 && p[1] == 'I'))
594: print_encode(h, name, NULL, 1);
595: else
596: print_encode(h, p, p + 2, 1);
597: pp = p + 2;
598: }
599: if (*pp != '\0')
600: print_encode(h, pp, NULL, 1);
1.6 schwarze 601: }
602:
1.1 schwarze 603: struct tag *
1.64 schwarze 604: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.1 schwarze 605: {
1.64 schwarze 606: va_list ap;
1.1 schwarze 607: struct tag *t;
1.65 schwarze 608: const char *attr;
1.73 schwarze 609: char *arg1, *arg2;
1.114 schwarze 610: int style_written, tflags;
1.66 schwarze 611:
612: tflags = htmltags[tag].flags;
1.1 schwarze 613:
1.127 schwarze 614: /* Flow content is not allowed in phrasing context. */
615:
616: if ((tflags & HTML_INPHRASE) == 0) {
617: for (t = h->tag; t != NULL; t = t->next) {
618: if (t->closed)
619: continue;
620: assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
621: break;
622: }
1.131 schwarze 623:
624: /*
625: * Always wrap phrasing elements in a paragraph
626: * unless already contained in some flow container;
627: * never put them directly into a section.
628: */
629:
630: } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
631: print_otag(h, TAG_P, "c", "Pp");
1.127 schwarze 632:
1.74 schwarze 633: /* Push this tag onto the stack of open scopes. */
1.6 schwarze 634:
1.66 schwarze 635: if ((tflags & HTML_NOSTACK) == 0) {
1.24 schwarze 636: t = mandoc_malloc(sizeof(struct tag));
1.1 schwarze 637: t->tag = tag;
1.74 schwarze 638: t->next = h->tag;
1.122 schwarze 639: t->refcnt = 0;
640: t->closed = 0;
1.74 schwarze 641: h->tag = t;
1.1 schwarze 642: } else
643: t = NULL;
644:
1.66 schwarze 645: if (tflags & HTML_NLBEFORE)
1.67 schwarze 646: print_endline(h);
647: if (h->col == 0)
648: print_indent(h);
1.66 schwarze 649: else if ((h->flags & HTML_NOSPACE) == 0) {
650: if (h->flags & HTML_KEEP)
1.86 bentley 651: print_word(h, " ");
1.66 schwarze 652: else {
653: if (h->flags & HTML_PREKEEP)
654: h->flags |= HTML_KEEP;
1.67 schwarze 655: print_endword(h);
1.12 schwarze 656: }
1.66 schwarze 657: }
1.1 schwarze 658:
1.13 schwarze 659: if ( ! (h->flags & HTML_NONOSPACE))
660: h->flags &= ~HTML_NOSPACE;
1.14 schwarze 661: else
662: h->flags |= HTML_NOSPACE;
1.13 schwarze 663:
1.6 schwarze 664: /* Print out the tag name and attributes. */
665:
1.67 schwarze 666: print_byte(h, '<');
667: print_word(h, htmltags[tag].name);
1.64 schwarze 668:
669: va_start(ap, fmt);
670:
1.114 schwarze 671: while (*fmt != '\0' && *fmt != 's') {
1.73 schwarze 672:
1.108 schwarze 673: /* Parse attributes and arguments. */
1.73 schwarze 674:
675: arg1 = va_arg(ap, char *);
1.108 schwarze 676: arg2 = NULL;
1.64 schwarze 677: switch (*fmt++) {
678: case 'c':
1.65 schwarze 679: attr = "class";
1.64 schwarze 680: break;
681: case 'h':
1.65 schwarze 682: attr = "href";
1.64 schwarze 683: break;
684: case 'i':
1.65 schwarze 685: attr = "id";
1.64 schwarze 686: break;
687: case '?':
1.73 schwarze 688: attr = arg1;
689: arg1 = va_arg(ap, char *);
1.64 schwarze 690: break;
691: default:
692: abort();
693: }
1.73 schwarze 694: if (*fmt == 'M')
695: arg2 = va_arg(ap, char *);
696: if (arg1 == NULL)
697: continue;
698:
1.108 schwarze 699: /* Print the attributes. */
1.73 schwarze 700:
1.67 schwarze 701: print_byte(h, ' ');
702: print_word(h, attr);
703: print_byte(h, '=');
704: print_byte(h, '"');
1.65 schwarze 705: switch (*fmt) {
1.78 schwarze 706: case 'I':
707: print_href(h, arg1, NULL, 0);
708: fmt++;
709: break;
1.65 schwarze 710: case 'M':
1.73 schwarze 711: print_href(h, arg1, arg2, 1);
1.65 schwarze 712: fmt++;
713: break;
1.78 schwarze 714: case 'R':
715: print_byte(h, '#');
716: print_encode(h, arg1, NULL, 1);
1.65 schwarze 717: fmt++;
1.78 schwarze 718: break;
1.65 schwarze 719: default:
1.114 schwarze 720: print_encode(h, arg1, NULL, 1);
1.65 schwarze 721: break;
722: }
1.67 schwarze 723: print_byte(h, '"');
1.64 schwarze 724: }
1.114 schwarze 725:
726: style_written = 0;
727: while (*fmt++ == 's') {
728: arg1 = va_arg(ap, char *);
729: arg2 = va_arg(ap, char *);
730: if (arg2 == NULL)
731: continue;
732: print_byte(h, ' ');
733: if (style_written == 0) {
734: print_word(h, "style=\"");
735: style_written = 1;
736: }
737: print_word(h, arg1);
738: print_byte(h, ':');
739: print_byte(h, ' ');
740: print_word(h, arg2);
741: print_byte(h, ';');
742: }
743: if (style_written)
744: print_byte(h, '"');
745:
1.64 schwarze 746: va_end(ap);
1.6 schwarze 747:
1.42 schwarze 748: /* Accommodate for "well-formed" singleton escaping. */
1.6 schwarze 749:
1.127 schwarze 750: if (htmltags[tag].flags & HTML_NOSTACK)
1.67 schwarze 751: print_byte(h, '/');
1.6 schwarze 752:
1.67 schwarze 753: print_byte(h, '>');
1.1 schwarze 754:
1.66 schwarze 755: if (tflags & HTML_NLBEGIN)
1.67 schwarze 756: print_endline(h);
1.66 schwarze 757: else
758: h->flags |= HTML_NOSPACE;
1.18 schwarze 759:
1.66 schwarze 760: if (tflags & HTML_INDENT)
761: h->indent++;
762: if (tflags & HTML_NOINDENT)
763: h->noindent++;
1.18 schwarze 764:
1.58 schwarze 765: return t;
1.135 schwarze 766: }
767:
768: /*
769: * Print an element with an optional "id=" attribute.
1.136 schwarze 770: * If the element has phrasing content and an "id=" attribute,
771: * also add a permalink: outside if it can be in phrasing context,
772: * inside otherwise.
1.135 schwarze 773: */
774: struct tag *
775: print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
776: struct roff_node *n)
777: {
1.136 schwarze 778: struct roff_node *nch;
1.135 schwarze 779: struct tag *ret, *t;
1.137 schwarze 780: char *id, *href;
1.135 schwarze 781:
782: ret = NULL;
1.137 schwarze 783: id = href = NULL;
1.135 schwarze 784: if (n->flags & NODE_ID)
785: id = html_make_id(n, 1);
1.137 schwarze 786: if (n->flags & NODE_HREF)
787: href = id == NULL ? html_make_id(n, 0) : id;
788: if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
789: ret = print_otag(h, TAG_A, "chR", "permalink", href);
1.135 schwarze 790: t = print_otag(h, elemtype, "ci", cattr, id);
791: if (ret == NULL) {
792: ret = t;
1.137 schwarze 793: if (href != NULL && (nch = n->child) != NULL) {
1.136 schwarze 794: /* man(7) is safe, it tags phrasing content only. */
795: if (n->tok > MDOC_MAX ||
796: htmltags[elemtype].flags & HTML_TOPHRASE)
797: nch = NULL;
798: else /* For mdoc(7), beware of nested blocks. */
799: while (nch != NULL && nch->type == ROFFT_TEXT)
800: nch = nch->next;
801: if (nch == NULL)
1.137 schwarze 802: print_otag(h, TAG_A, "chR", "permalink", href);
1.136 schwarze 803: }
1.135 schwarze 804: }
1.137 schwarze 805: if (id == NULL)
806: free(href);
1.135 schwarze 807: return ret;
1.1 schwarze 808: }
809:
810: static void
1.54 schwarze 811: print_ctag(struct html *h, struct tag *tag)
1.1 schwarze 812: {
1.66 schwarze 813: int tflags;
1.35 schwarze 814:
1.122 schwarze 815: if (tag->closed == 0) {
816: tag->closed = 1;
817: if (tag == h->metaf)
818: h->metaf = NULL;
819: if (tag == h->tblt)
820: h->tblt = NULL;
821:
822: tflags = htmltags[tag->tag].flags;
823: if (tflags & HTML_INDENT)
824: h->indent--;
825: if (tflags & HTML_NOINDENT)
826: h->noindent--;
827: if (tflags & HTML_NLEND)
828: print_endline(h);
829: print_indent(h);
830: print_byte(h, '<');
831: print_byte(h, '/');
832: print_word(h, htmltags[tag->tag].name);
833: print_byte(h, '>');
834: if (tflags & HTML_NLAFTER)
835: print_endline(h);
836: }
837: if (tag->refcnt == 0) {
838: h->tag = tag->next;
839: free(tag);
840: }
1.1 schwarze 841: }
842:
843: void
1.6 schwarze 844: print_gen_decls(struct html *h)
845: {
1.67 schwarze 846: print_word(h, "<!DOCTYPE html>");
847: print_endline(h);
1.91 schwarze 848: }
849:
850: void
851: print_gen_comment(struct html *h, struct roff_node *n)
852: {
853: int wantblank;
854:
855: print_word(h, "<!-- This is an automatically generated file."
856: " Do not edit.");
857: h->indent = 1;
858: wantblank = 0;
859: while (n != NULL && n->type == ROFFT_COMMENT) {
860: if (strstr(n->string, "-->") == NULL &&
861: (wantblank || *n->string != '\0')) {
862: print_endline(h);
863: print_indent(h);
864: print_word(h, n->string);
865: wantblank = *n->string != '\0';
866: }
867: n = n->next;
868: }
869: if (wantblank)
870: print_endline(h);
871: print_word(h, " -->");
872: print_endline(h);
873: h->indent = 0;
1.1 schwarze 874: }
875:
876: void
1.12 schwarze 877: print_text(struct html *h, const char *word)
1.1 schwarze 878: {
1.131 schwarze 879: /*
880: * Always wrap text in a paragraph unless already contained in
881: * some flow container; never put it directly into a section.
882: */
883:
884: if (h->tag->tag == TAG_SECTION)
885: print_otag(h, TAG_P, "c", "Pp");
886:
887: /* Output whitespace before this text? */
888:
1.67 schwarze 889: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.12 schwarze 890: if ( ! (HTML_KEEP & h->flags)) {
891: if (HTML_PREKEEP & h->flags)
892: h->flags |= HTML_KEEP;
1.67 schwarze 893: print_endword(h);
1.12 schwarze 894: } else
1.86 bentley 895: print_word(h, " ");
1.12 schwarze 896: }
1.131 schwarze 897:
898: /*
899: * Print the text, optionally surrounded by HTML whitespace,
900: * optionally manually switching fonts before and after.
901: */
1.1 schwarze 902:
1.125 schwarze 903: assert(h->metaf == NULL);
904: print_metaf(h);
905: print_indent(h);
1.65 schwarze 906: if ( ! print_encode(h, word, NULL, 0)) {
1.13 schwarze 907: if ( ! (h->flags & HTML_NONOSPACE))
908: h->flags &= ~HTML_NOSPACE;
1.53 schwarze 909: h->flags &= ~HTML_NONEWLINE;
1.28 schwarze 910: } else
1.53 schwarze 911: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.20 schwarze 912:
1.125 schwarze 913: if (h->metaf != NULL) {
1.20 schwarze 914: print_tagq(h, h->metaf);
915: h->metaf = NULL;
916: }
1.17 schwarze 917:
918: h->flags &= ~HTML_IGNDELIM;
1.1 schwarze 919: }
920:
921: void
922: print_tagq(struct html *h, const struct tag *until)
923: {
1.122 schwarze 924: struct tag *this, *next;
1.1 schwarze 925:
1.122 schwarze 926: for (this = h->tag; this != NULL; this = next) {
927: next = this == until ? NULL : this->next;
928: print_ctag(h, this);
1.1 schwarze 929: }
930: }
931:
1.120 schwarze 932: /*
933: * Close out all open elements up to but excluding suntil.
934: * Note that a paragraph just inside stays open together with it
935: * because paragraphs include subsequent phrasing content.
936: */
1.1 schwarze 937: void
938: print_stagq(struct html *h, const struct tag *suntil)
939: {
1.122 schwarze 940: struct tag *this, *next;
1.1 schwarze 941:
1.122 schwarze 942: for (this = h->tag; this != NULL; this = next) {
943: next = this->next;
944: if (this == suntil || (next == suntil &&
945: (this->tag == TAG_P || this->tag == TAG_PRE)))
946: break;
947: print_ctag(h, this);
1.1 schwarze 948: }
1.42 schwarze 949: }
950:
1.67 schwarze 951:
952: /***********************************************************************
953: * Low level output functions.
954: * They implement line breaking using a short static buffer.
955: ***********************************************************************/
956:
957: /*
958: * Buffer one HTML output byte.
959: * If the buffer is full, flush and deactivate it and start a new line.
960: * If the buffer is inactive, print directly.
961: */
962: static void
963: print_byte(struct html *h, char c)
964: {
965: if ((h->flags & HTML_BUFFER) == 0) {
966: putchar(c);
967: h->col++;
968: return;
969: }
970:
971: if (h->col + h->bufcol < sizeof(h->buf)) {
972: h->buf[h->bufcol++] = c;
973: return;
974: }
975:
976: putchar('\n');
977: h->col = 0;
978: print_indent(h);
979: putchar(' ');
980: putchar(' ');
981: fwrite(h->buf, h->bufcol, 1, stdout);
982: putchar(c);
983: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
984: h->bufcol = 0;
985: h->flags &= ~HTML_BUFFER;
986: }
987:
1.66 schwarze 988: /*
989: * If something was printed on the current output line, end it.
1.67 schwarze 990: * Not to be called right after print_indent().
1.66 schwarze 991: */
1.72 schwarze 992: void
1.67 schwarze 993: print_endline(struct html *h)
1.66 schwarze 994: {
1.67 schwarze 995: if (h->col == 0)
1.66 schwarze 996: return;
997:
1.67 schwarze 998: if (h->bufcol) {
999: putchar(' ');
1000: fwrite(h->buf, h->bufcol, 1, stdout);
1001: h->bufcol = 0;
1002: }
1.66 schwarze 1003: putchar('\n');
1.67 schwarze 1004: h->col = 0;
1005: h->flags |= HTML_NOSPACE;
1006: h->flags &= ~HTML_BUFFER;
1007: }
1008:
1009: /*
1010: * Flush the HTML output buffer.
1011: * If it is inactive, activate it.
1012: */
1013: static void
1014: print_endword(struct html *h)
1015: {
1016: if (h->noindent) {
1017: print_byte(h, ' ');
1018: return;
1019: }
1020:
1021: if ((h->flags & HTML_BUFFER) == 0) {
1022: h->col++;
1023: h->flags |= HTML_BUFFER;
1024: } else if (h->bufcol) {
1025: putchar(' ');
1026: fwrite(h->buf, h->bufcol, 1, stdout);
1027: h->col += h->bufcol + 1;
1028: }
1029: h->bufcol = 0;
1.66 schwarze 1030: }
1031:
1032: /*
1033: * If at the beginning of a new output line,
1034: * perform indentation and mark the line as containing output.
1035: * Make sure to really produce some output right afterwards,
1036: * but do not use print_otag() for producing it.
1037: */
1038: static void
1.67 schwarze 1039: print_indent(struct html *h)
1.66 schwarze 1040: {
1.67 schwarze 1041: size_t i;
1.66 schwarze 1042:
1.132 schwarze 1043: if (h->col || h->noindent)
1.66 schwarze 1044: return;
1045:
1.132 schwarze 1046: h->col = h->indent * 2;
1047: for (i = 0; i < h->col; i++)
1048: putchar(' ');
1.67 schwarze 1049: }
1050:
1051: /*
1052: * Print or buffer some characters
1053: * depending on the current HTML output buffer state.
1054: */
1055: static void
1056: print_word(struct html *h, const char *cp)
1057: {
1058: while (*cp != '\0')
1059: print_byte(h, *cp++);
1.3 schwarze 1060: }