Annotation of src/usr.bin/mandoc/html.c, Revision 1.145
1.145 ! schwarze 1: /* $OpenBSD: html.c,v 1.144 2021/05/22 05:49:32 anton Exp $ */
1.1 schwarze 2: /*
1.42 schwarze 3: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.145 ! schwarze 4: * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.56 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.56 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.135 schwarze 17: *
18: * Common functions for mandoc(1) HTML formatters.
19: * For use by individual formatters and by the main program.
1.1 schwarze 20: */
21: #include <sys/types.h>
1.110 schwarze 22: #include <sys/stat.h>
1.1 schwarze 23:
24: #include <assert.h>
1.3 schwarze 25: #include <ctype.h>
1.4 schwarze 26: #include <stdarg.h>
1.99 schwarze 27: #include <stddef.h>
1.1 schwarze 28: #include <stdio.h>
29: #include <stdint.h>
30: #include <stdlib.h>
31: #include <string.h>
32: #include <unistd.h>
33:
1.80 schwarze 34: #include "mandoc_aux.h"
1.99 schwarze 35: #include "mandoc_ohash.h"
1.9 schwarze 36: #include "mandoc.h"
1.80 schwarze 37: #include "roff.h"
1.1 schwarze 38: #include "out.h"
39: #include "html.h"
1.56 schwarze 40: #include "manconf.h"
1.1 schwarze 41: #include "main.h"
42:
43: struct htmldata {
44: const char *name;
45: int flags;
1.127 schwarze 46: #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
47: #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
48: #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
49: #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
50: #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
51: #define HTML_NLEND (1 << 5) /* Output line break before closing. */
52: #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
1.66 schwarze 53: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
54: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
55: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
1.127 schwarze 56: #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
57: #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
1.1 schwarze 58: };
59:
60: static const struct htmldata htmltags[TAG_MAX] = {
1.66 schwarze 61: {"html", HTML_NLALL},
62: {"head", HTML_NLALL | HTML_INDENT},
1.127 schwarze 63: {"meta", HTML_NOSTACK | HTML_NLALL},
64: {"link", HTML_NOSTACK | HTML_NLALL},
65: {"style", HTML_NLALL | HTML_INDENT},
66: {"title", HTML_NLAROUND},
1.66 schwarze 67: {"body", HTML_NLALL},
68: {"div", HTML_NLAROUND},
1.123 schwarze 69: {"section", HTML_NLALL},
1.66 schwarze 70: {"table", HTML_NLALL | HTML_INDENT},
71: {"tr", HTML_NLALL | HTML_INDENT},
72: {"td", HTML_NLAROUND},
73: {"li", HTML_NLAROUND | HTML_INDENT},
74: {"ul", HTML_NLALL | HTML_INDENT},
75: {"ol", HTML_NLALL | HTML_INDENT},
76: {"dl", HTML_NLALL | HTML_INDENT},
77: {"dt", HTML_NLAROUND},
78: {"dd", HTML_NLAROUND | HTML_INDENT},
1.127 schwarze 79: {"h1", HTML_TOPHRASE | HTML_NLAROUND},
80: {"h2", HTML_TOPHRASE | HTML_NLAROUND},
81: {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
1.142 schwarze 82: {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
1.127 schwarze 83: {"a", HTML_INPHRASE | HTML_TOPHRASE},
84: {"b", HTML_INPHRASE | HTML_TOPHRASE},
85: {"cite", HTML_INPHRASE | HTML_TOPHRASE},
86: {"code", HTML_INPHRASE | HTML_TOPHRASE},
87: {"i", HTML_INPHRASE | HTML_TOPHRASE},
88: {"small", HTML_INPHRASE | HTML_TOPHRASE},
89: {"span", HTML_INPHRASE | HTML_TOPHRASE},
90: {"var", HTML_INPHRASE | HTML_TOPHRASE},
91: {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
1.134 schwarze 92: {"mark", HTML_INPHRASE },
1.127 schwarze 93: {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
1.66 schwarze 94: {"mrow", 0},
95: {"mi", 0},
1.85 schwarze 96: {"mn", 0},
1.66 schwarze 97: {"mo", 0},
98: {"msup", 0},
99: {"msub", 0},
100: {"msubsup", 0},
101: {"mfrac", 0},
102: {"msqrt", 0},
103: {"mfenced", 0},
104: {"mtable", 0},
105: {"mtr", 0},
106: {"mtd", 0},
107: {"munderover", 0},
108: {"munder", 0},
109: {"mover", 0},
1.5 schwarze 110: };
111:
1.99 schwarze 112: /* Avoid duplicate HTML id= attributes. */
1.140 schwarze 113:
114: struct id_entry {
115: int ord; /* Ordinal number of the latest occurrence. */
116: char id[]; /* The id= attribute without any ordinal suffix. */
117: };
1.99 schwarze 118: static struct ohash id_unique;
119:
1.124 schwarze 120: static void html_reset_internal(struct html *);
1.67 schwarze 121: static void print_byte(struct html *, char);
122: static void print_endword(struct html *);
123: static void print_indent(struct html *);
124: static void print_word(struct html *, const char *);
125:
1.54 schwarze 126: static void print_ctag(struct html *, struct tag *);
1.67 schwarze 127: static int print_escape(struct html *, char);
1.65 schwarze 128: static int print_encode(struct html *, const char *, const char *, int);
129: static void print_href(struct html *, const char *, const char *, int);
1.125 schwarze 130: static void print_metaf(struct html *);
1.5 schwarze 131:
1.35 schwarze 132:
1.50 schwarze 133: void *
1.61 schwarze 134: html_alloc(const struct manoutput *outopts)
1.1 schwarze 135: {
136: struct html *h;
137:
1.24 schwarze 138: h = mandoc_calloc(1, sizeof(struct html));
1.1 schwarze 139:
1.74 schwarze 140: h->tag = NULL;
1.143 schwarze 141: h->metac = h->metal = ESCAPE_FONTROMAN;
1.56 schwarze 142: h->style = outopts->style;
1.110 schwarze 143: if ((h->base_man1 = outopts->man) == NULL)
144: h->base_man2 = NULL;
145: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
146: *h->base_man2++ = '\0';
1.56 schwarze 147: h->base_includes = outopts->includes;
148: if (outopts->fragment)
149: h->oflags |= HTML_FRAGMENT;
1.111 schwarze 150: if (outopts->toc)
151: h->oflags |= HTML_TOC;
1.1 schwarze 152:
1.140 schwarze 153: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.99 schwarze 154:
1.58 schwarze 155: return h;
1.1 schwarze 156: }
157:
1.124 schwarze 158: static void
159: html_reset_internal(struct html *h)
1.1 schwarze 160: {
161: struct tag *tag;
1.140 schwarze 162: struct id_entry *entry;
1.99 schwarze 163: unsigned int slot;
1.1 schwarze 164:
1.74 schwarze 165: while ((tag = h->tag) != NULL) {
166: h->tag = tag->next;
1.1 schwarze 167: free(tag);
168: }
1.140 schwarze 169: entry = ohash_first(&id_unique, &slot);
170: while (entry != NULL) {
171: free(entry);
172: entry = ohash_next(&id_unique, &slot);
1.99 schwarze 173: }
174: ohash_delete(&id_unique);
1.124 schwarze 175: }
176:
177: void
178: html_reset(void *p)
179: {
180: html_reset_internal(p);
1.140 schwarze 181: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.124 schwarze 182: }
183:
184: void
185: html_free(void *p)
186: {
187: html_reset_internal(p);
188: free(p);
1.1 schwarze 189: }
190:
191: void
192: print_gen_head(struct html *h)
193: {
1.42 schwarze 194: struct tag *t;
195:
1.64 schwarze 196: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.144 anton 197: print_otag(h, TAG_META, "??", "name", "viewport",
198: "content", "width=device-width, initial-scale=1.0");
1.92 schwarze 199: if (h->style != NULL) {
200: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
201: h->style, "type", "text/css", "media", "all");
202: return;
203: }
1.1 schwarze 204:
1.42 schwarze 205: /*
1.92 schwarze 206: * Print a minimal embedded style sheet.
1.42 schwarze 207: */
1.66 schwarze 208:
1.64 schwarze 209: t = print_otag(h, TAG_STYLE, "");
1.66 schwarze 210: print_text(h, "table.head, table.foot { width: 100%; }");
1.67 schwarze 211: print_endline(h);
1.66 schwarze 212: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.67 schwarze 213: print_endline(h);
1.66 schwarze 214: print_text(h, "td.head-vol { text-align: center; }");
1.67 schwarze 215: print_endline(h);
1.126 schwarze 216: print_text(h, ".Nd, .Bf, .Op { display: inline; }");
1.95 schwarze 217: print_endline(h);
1.126 schwarze 218: print_text(h, ".Pa, .Ad { font-style: italic; }");
1.96 schwarze 219: print_endline(h);
1.126 schwarze 220: print_text(h, ".Ms { font-weight: bold; }");
1.98 schwarze 221: print_endline(h);
1.126 schwarze 222: print_text(h, ".Bl-diag ");
1.94 schwarze 223: print_byte(h, '>');
224: print_text(h, " dt { font-weight: bold; }");
1.93 schwarze 225: print_endline(h);
1.126 schwarze 226: print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
227: "{ font-weight: bold; font-family: inherit; }");
1.42 schwarze 228: print_tagq(h, t);
1.1 schwarze 229: }
230:
1.125 schwarze 231: int
232: html_setfont(struct html *h, enum mandoc_esc font)
1.5 schwarze 233: {
1.125 schwarze 234: switch (font) {
1.35 schwarze 235: case ESCAPE_FONTPREV:
1.5 schwarze 236: font = h->metal;
237: break;
1.35 schwarze 238: case ESCAPE_FONTITALIC:
239: case ESCAPE_FONTBOLD:
240: case ESCAPE_FONTBI:
1.125 schwarze 241: case ESCAPE_FONTROMAN:
1.145 ! schwarze 242: case ESCAPE_FONTCR:
! 243: case ESCAPE_FONTCB:
! 244: case ESCAPE_FONTCI:
1.112 schwarze 245: break;
1.35 schwarze 246: case ESCAPE_FONT:
1.125 schwarze 247: font = ESCAPE_FONTROMAN;
1.5 schwarze 248: break;
249: default:
1.125 schwarze 250: return 0;
1.1 schwarze 251: }
1.125 schwarze 252: h->metal = h->metac;
253: h->metac = font;
254: return 1;
255: }
1.1 schwarze 256:
1.125 schwarze 257: static void
258: print_metaf(struct html *h)
259: {
1.20 schwarze 260: if (h->metaf) {
261: print_tagq(h, h->metaf);
262: h->metaf = NULL;
263: }
1.125 schwarze 264: switch (h->metac) {
265: case ESCAPE_FONTITALIC:
1.64 schwarze 266: h->metaf = print_otag(h, TAG_I, "");
1.31 schwarze 267: break;
1.125 schwarze 268: case ESCAPE_FONTBOLD:
1.64 schwarze 269: h->metaf = print_otag(h, TAG_B, "");
1.31 schwarze 270: break;
1.125 schwarze 271: case ESCAPE_FONTBI:
1.64 schwarze 272: h->metaf = print_otag(h, TAG_B, "");
273: print_otag(h, TAG_I, "");
1.31 schwarze 274: break;
1.145 ! schwarze 275: case ESCAPE_FONTCR:
1.112 schwarze 276: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
277: break;
1.145 ! schwarze 278: case ESCAPE_FONTCB:
! 279: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
! 280: print_otag(h, TAG_B, "");
! 281: break;
! 282: case ESCAPE_FONTCI:
! 283: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
! 284: print_otag(h, TAG_I, "");
! 285: break;
1.31 schwarze 286: default:
287: break;
288: }
1.118 schwarze 289: }
290:
1.119 schwarze 291: void
292: html_close_paragraph(struct html *h)
293: {
1.129 schwarze 294: struct tag *this, *next;
1.130 schwarze 295: int flags;
1.119 schwarze 296:
1.129 schwarze 297: this = h->tag;
298: for (;;) {
299: next = this->next;
1.130 schwarze 300: flags = htmltags[this->tag].flags;
301: if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
1.129 schwarze 302: print_ctag(h, this);
1.130 schwarze 303: if ((flags & HTML_INPHRASE) == 0)
1.119 schwarze 304: break;
1.129 schwarze 305: this = next;
1.119 schwarze 306: }
307: }
308:
1.118 schwarze 309: /*
310: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
311: * TOKEN_NONE does not switch. The old mode is returned.
312: */
313: enum roff_tok
314: html_fillmode(struct html *h, enum roff_tok want)
315: {
316: struct tag *t;
317: enum roff_tok had;
318:
319: for (t = h->tag; t != NULL; t = t->next)
320: if (t->tag == TAG_PRE)
321: break;
322:
323: had = t == NULL ? ROFF_fi : ROFF_nf;
324:
325: if (want != had) {
326: switch (want) {
327: case ROFF_fi:
328: print_tagq(h, t);
329: break;
330: case ROFF_nf:
1.119 schwarze 331: html_close_paragraph(h);
1.118 schwarze 332: print_otag(h, TAG_PRE, "");
333: break;
334: case TOKEN_NONE:
335: break;
336: default:
337: abort();
338: }
339: }
340: return had;
1.80 schwarze 341: }
342:
1.135 schwarze 343: /*
344: * Allocate a string to be used for the "id=" attribute of an HTML
345: * element and/or as a segment identifier for a URI in an <a> element.
346: * The function may fail and return NULL if the node lacks text data
347: * to create the attribute from.
1.140 schwarze 348: * The caller is responsible for free(3)ing the returned string.
349: *
1.135 schwarze 350: * If the "unique" argument is non-zero, the "id_unique" ohash table
1.140 schwarze 351: * is used for de-duplication. If the "unique" argument is 1,
352: * it is the first time the function is called for this tag and
353: * location, so if an ordinal suffix is needed, it is incremented.
354: * If the "unique" argument is 2, it is the second time the function
355: * is called for this tag and location, so the ordinal suffix
356: * remains unchanged.
1.135 schwarze 357: */
1.80 schwarze 358: char *
1.99 schwarze 359: html_make_id(const struct roff_node *n, int unique)
1.80 schwarze 360: {
361: const struct roff_node *nch;
1.140 schwarze 362: struct id_entry *entry;
363: char *buf, *cp;
364: size_t len;
1.99 schwarze 365: unsigned int slot;
1.80 schwarze 366:
1.138 schwarze 367: if (n->tag != NULL)
368: buf = mandoc_strdup(n->tag);
1.135 schwarze 369: else {
370: switch (n->tok) {
371: case MDOC_Sh:
372: case MDOC_Ss:
373: case MDOC_Sx:
374: case MAN_SH:
375: case MAN_SS:
376: for (nch = n->child; nch != NULL; nch = nch->next)
377: if (nch->type != ROFFT_TEXT)
378: return NULL;
379: buf = NULL;
380: deroff(&buf, n);
381: if (buf == NULL)
382: return NULL;
383: break;
384: default:
1.136 schwarze 385: if (n->child == NULL || n->child->type != ROFFT_TEXT)
1.135 schwarze 386: return NULL;
387: buf = mandoc_strdup(n->child->string);
388: break;
389: }
390: }
1.80 schwarze 391:
1.100 schwarze 392: /*
393: * In ID attributes, only use ASCII characters that are
394: * permitted in URL-fragment strings according to the
395: * explicit list at:
396: * https://url.spec.whatwg.org/#url-fragment-string
1.141 schwarze 397: * In addition, reserve '~' for ordinal suffixes.
1.100 schwarze 398: */
1.80 schwarze 399:
400: for (cp = buf; *cp != '\0'; cp++)
1.100 schwarze 401: if (isalnum((unsigned char)*cp) == 0 &&
1.141 schwarze 402: strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
1.80 schwarze 403: *cp = '_';
404:
1.99 schwarze 405: if (unique == 0)
406: return buf;
407:
408: /* Avoid duplicate HTML id= attributes. */
409:
410: slot = ohash_qlookup(&id_unique, buf);
1.140 schwarze 411: if ((entry = ohash_find(&id_unique, slot)) == NULL) {
412: len = strlen(buf) + 1;
413: entry = mandoc_malloc(sizeof(*entry) + len);
414: entry->ord = 1;
415: memcpy(entry->id, buf, len);
416: ohash_insert(&id_unique, slot, entry);
417: } else if (unique == 1)
418: entry->ord++;
419:
420: if (entry->ord > 1) {
421: cp = buf;
1.141 schwarze 422: mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
1.140 schwarze 423: free(cp);
1.99 schwarze 424: }
1.80 schwarze 425: return buf;
1.1 schwarze 426: }
427:
1.5 schwarze 428: static int
1.67 schwarze 429: print_escape(struct html *h, char c)
1.38 schwarze 430: {
431:
432: switch (c) {
433: case '<':
1.67 schwarze 434: print_word(h, "<");
1.38 schwarze 435: break;
436: case '>':
1.67 schwarze 437: print_word(h, ">");
1.38 schwarze 438: break;
439: case '&':
1.67 schwarze 440: print_word(h, "&");
1.38 schwarze 441: break;
442: case '"':
1.67 schwarze 443: print_word(h, """);
1.38 schwarze 444: break;
445: case ASCII_NBRSP:
1.67 schwarze 446: print_word(h, " ");
1.38 schwarze 447: break;
448: case ASCII_HYPH:
1.67 schwarze 449: print_byte(h, '-');
1.59 schwarze 450: break;
1.38 schwarze 451: case ASCII_BREAK:
452: break;
453: default:
1.58 schwarze 454: return 0;
1.38 schwarze 455: }
1.58 schwarze 456: return 1;
1.38 schwarze 457: }
458:
459: static int
1.65 schwarze 460: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.1 schwarze 461: {
1.67 schwarze 462: char numbuf[16];
1.84 schwarze 463: const char *seq;
1.4 schwarze 464: size_t sz;
1.84 schwarze 465: int c, len, breakline, nospace;
1.26 schwarze 466: enum mandoc_esc esc;
1.84 schwarze 467: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.33 schwarze 468: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.5 schwarze 469:
1.65 schwarze 470: if (pend == NULL)
471: pend = strchr(p, '\0');
472:
1.84 schwarze 473: breakline = 0;
1.5 schwarze 474: nospace = 0;
1.1 schwarze 475:
1.65 schwarze 476: while (p < pend) {
1.30 schwarze 477: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
478: h->flags &= ~HTML_SKIPCHAR;
479: p++;
480: continue;
481: }
482:
1.67 schwarze 483: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.84 schwarze 484: print_byte(h, *p);
485:
486: if (breakline &&
487: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.115 schwarze 488: print_otag(h, TAG_BR, "");
1.84 schwarze 489: breakline = 0;
490: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
491: p++;
492: continue;
493: }
1.4 schwarze 494:
1.65 schwarze 495: if (p >= pend)
1.26 schwarze 496: break;
497:
1.84 schwarze 498: if (*p == ' ') {
499: print_endword(h);
500: p++;
501: continue;
502: }
503:
1.67 schwarze 504: if (print_escape(h, *p++))
1.33 schwarze 505: continue;
1.4 schwarze 506:
1.26 schwarze 507: esc = mandoc_escape(&p, &seq, &len);
508: switch (esc) {
1.35 schwarze 509: case ESCAPE_FONT:
510: case ESCAPE_FONTPREV:
511: case ESCAPE_FONTBOLD:
512: case ESCAPE_FONTITALIC:
513: case ESCAPE_FONTBI:
514: case ESCAPE_FONTROMAN:
1.145 ! schwarze 515: case ESCAPE_FONTCR:
! 516: case ESCAPE_FONTCB:
! 517: case ESCAPE_FONTCI:
1.113 schwarze 518: if (0 == norecurse) {
519: h->flags |= HTML_NOSPACE;
1.125 schwarze 520: if (html_setfont(h, esc))
521: print_metaf(h);
1.113 schwarze 522: h->flags &= ~HTML_NOSPACE;
523: }
1.30 schwarze 524: continue;
1.35 schwarze 525: case ESCAPE_SKIPCHAR:
1.30 schwarze 526: h->flags |= HTML_SKIPCHAR;
527: continue;
1.116 schwarze 528: case ESCAPE_ERROR:
529: continue;
1.30 schwarze 530: default:
531: break;
532: }
533:
534: if (h->flags & HTML_SKIPCHAR) {
535: h->flags &= ~HTML_SKIPCHAR;
536: continue;
537: }
538:
539: switch (esc) {
1.35 schwarze 540: case ESCAPE_UNICODE:
1.38 schwarze 541: /* Skip past "u" header. */
1.26 schwarze 542: c = mchars_num2uc(seq + 1, len - 1);
543: break;
1.35 schwarze 544: case ESCAPE_NUMBERED:
1.26 schwarze 545: c = mchars_num2char(seq, len);
1.51 schwarze 546: if (c < 0)
547: continue;
1.26 schwarze 548: break;
1.35 schwarze 549: case ESCAPE_SPECIAL:
1.61 schwarze 550: c = mchars_spec2cp(seq, len);
1.51 schwarze 551: if (c <= 0)
552: continue;
1.116 schwarze 553: break;
554: case ESCAPE_UNDEF:
555: c = *seq;
1.26 schwarze 556: break;
1.109 schwarze 557: case ESCAPE_DEVICE:
558: print_word(h, "html");
559: continue;
1.84 schwarze 560: case ESCAPE_BREAK:
561: breakline = 1;
562: continue;
1.35 schwarze 563: case ESCAPE_NOSPACE:
1.26 schwarze 564: if ('\0' == *p)
565: nospace = 1;
1.49 schwarze 566: continue;
1.55 schwarze 567: case ESCAPE_OVERSTRIKE:
568: if (len == 0)
569: continue;
570: c = seq[len - 1];
571: break;
1.5 schwarze 572: default:
1.49 schwarze 573: continue;
1.5 schwarze 574: }
1.51 schwarze 575: if ((c < 0x20 && c != 0x09) ||
576: (c > 0x7E && c < 0xA0))
1.49 schwarze 577: c = 0xFFFD;
1.67 schwarze 578: if (c > 0x7E) {
1.86 bentley 579: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.67 schwarze 580: print_word(h, numbuf);
581: } else if (print_escape(h, c) == 0)
582: print_byte(h, c);
1.1 schwarze 583: }
1.5 schwarze 584:
1.58 schwarze 585: return nospace;
1.1 schwarze 586: }
587:
1.6 schwarze 588: static void
1.65 schwarze 589: print_href(struct html *h, const char *name, const char *sec, int man)
1.6 schwarze 590: {
1.110 schwarze 591: struct stat sb;
1.65 schwarze 592: const char *p, *pp;
1.110 schwarze 593: char *filename;
594:
595: if (man) {
596: pp = h->base_man1;
597: if (h->base_man2 != NULL) {
598: mandoc_asprintf(&filename, "%s.%s", name, sec);
599: if (stat(filename, &sb) == -1)
600: pp = h->base_man2;
601: free(filename);
602: }
603: } else
604: pp = h->base_includes;
1.65 schwarze 605:
606: while ((p = strchr(pp, '%')) != NULL) {
607: print_encode(h, pp, p, 1);
608: if (man && p[1] == 'S') {
609: if (sec == NULL)
1.67 schwarze 610: print_byte(h, '1');
1.65 schwarze 611: else
612: print_encode(h, sec, NULL, 1);
613: } else if ((man && p[1] == 'N') ||
614: (man == 0 && p[1] == 'I'))
615: print_encode(h, name, NULL, 1);
616: else
617: print_encode(h, p, p + 2, 1);
618: pp = p + 2;
619: }
620: if (*pp != '\0')
621: print_encode(h, pp, NULL, 1);
1.6 schwarze 622: }
623:
1.1 schwarze 624: struct tag *
1.64 schwarze 625: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.1 schwarze 626: {
1.64 schwarze 627: va_list ap;
1.1 schwarze 628: struct tag *t;
1.65 schwarze 629: const char *attr;
1.73 schwarze 630: char *arg1, *arg2;
1.114 schwarze 631: int style_written, tflags;
1.66 schwarze 632:
633: tflags = htmltags[tag].flags;
1.1 schwarze 634:
1.127 schwarze 635: /* Flow content is not allowed in phrasing context. */
636:
637: if ((tflags & HTML_INPHRASE) == 0) {
638: for (t = h->tag; t != NULL; t = t->next) {
639: if (t->closed)
640: continue;
641: assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
642: break;
643: }
1.131 schwarze 644:
645: /*
646: * Always wrap phrasing elements in a paragraph
647: * unless already contained in some flow container;
648: * never put them directly into a section.
649: */
650:
651: } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
652: print_otag(h, TAG_P, "c", "Pp");
1.127 schwarze 653:
1.74 schwarze 654: /* Push this tag onto the stack of open scopes. */
1.6 schwarze 655:
1.66 schwarze 656: if ((tflags & HTML_NOSTACK) == 0) {
1.24 schwarze 657: t = mandoc_malloc(sizeof(struct tag));
1.1 schwarze 658: t->tag = tag;
1.74 schwarze 659: t->next = h->tag;
1.122 schwarze 660: t->refcnt = 0;
661: t->closed = 0;
1.74 schwarze 662: h->tag = t;
1.1 schwarze 663: } else
664: t = NULL;
665:
1.66 schwarze 666: if (tflags & HTML_NLBEFORE)
1.67 schwarze 667: print_endline(h);
668: if (h->col == 0)
669: print_indent(h);
1.66 schwarze 670: else if ((h->flags & HTML_NOSPACE) == 0) {
671: if (h->flags & HTML_KEEP)
1.86 bentley 672: print_word(h, " ");
1.66 schwarze 673: else {
674: if (h->flags & HTML_PREKEEP)
675: h->flags |= HTML_KEEP;
1.67 schwarze 676: print_endword(h);
1.12 schwarze 677: }
1.66 schwarze 678: }
1.1 schwarze 679:
1.13 schwarze 680: if ( ! (h->flags & HTML_NONOSPACE))
681: h->flags &= ~HTML_NOSPACE;
1.14 schwarze 682: else
683: h->flags |= HTML_NOSPACE;
1.13 schwarze 684:
1.6 schwarze 685: /* Print out the tag name and attributes. */
686:
1.67 schwarze 687: print_byte(h, '<');
688: print_word(h, htmltags[tag].name);
1.64 schwarze 689:
690: va_start(ap, fmt);
691:
1.114 schwarze 692: while (*fmt != '\0' && *fmt != 's') {
1.73 schwarze 693:
1.108 schwarze 694: /* Parse attributes and arguments. */
1.73 schwarze 695:
696: arg1 = va_arg(ap, char *);
1.108 schwarze 697: arg2 = NULL;
1.64 schwarze 698: switch (*fmt++) {
699: case 'c':
1.65 schwarze 700: attr = "class";
1.64 schwarze 701: break;
702: case 'h':
1.65 schwarze 703: attr = "href";
1.64 schwarze 704: break;
705: case 'i':
1.65 schwarze 706: attr = "id";
1.64 schwarze 707: break;
708: case '?':
1.73 schwarze 709: attr = arg1;
710: arg1 = va_arg(ap, char *);
1.64 schwarze 711: break;
712: default:
713: abort();
714: }
1.73 schwarze 715: if (*fmt == 'M')
716: arg2 = va_arg(ap, char *);
717: if (arg1 == NULL)
718: continue;
719:
1.108 schwarze 720: /* Print the attributes. */
1.73 schwarze 721:
1.67 schwarze 722: print_byte(h, ' ');
723: print_word(h, attr);
724: print_byte(h, '=');
725: print_byte(h, '"');
1.65 schwarze 726: switch (*fmt) {
1.78 schwarze 727: case 'I':
728: print_href(h, arg1, NULL, 0);
729: fmt++;
730: break;
1.65 schwarze 731: case 'M':
1.73 schwarze 732: print_href(h, arg1, arg2, 1);
1.65 schwarze 733: fmt++;
734: break;
1.78 schwarze 735: case 'R':
736: print_byte(h, '#');
737: print_encode(h, arg1, NULL, 1);
1.65 schwarze 738: fmt++;
1.78 schwarze 739: break;
1.65 schwarze 740: default:
1.114 schwarze 741: print_encode(h, arg1, NULL, 1);
1.65 schwarze 742: break;
743: }
1.67 schwarze 744: print_byte(h, '"');
1.64 schwarze 745: }
1.114 schwarze 746:
747: style_written = 0;
748: while (*fmt++ == 's') {
749: arg1 = va_arg(ap, char *);
750: arg2 = va_arg(ap, char *);
751: if (arg2 == NULL)
752: continue;
753: print_byte(h, ' ');
754: if (style_written == 0) {
755: print_word(h, "style=\"");
756: style_written = 1;
757: }
758: print_word(h, arg1);
759: print_byte(h, ':');
760: print_byte(h, ' ');
761: print_word(h, arg2);
762: print_byte(h, ';');
763: }
764: if (style_written)
765: print_byte(h, '"');
766:
1.64 schwarze 767: va_end(ap);
1.6 schwarze 768:
1.42 schwarze 769: /* Accommodate for "well-formed" singleton escaping. */
1.6 schwarze 770:
1.127 schwarze 771: if (htmltags[tag].flags & HTML_NOSTACK)
1.67 schwarze 772: print_byte(h, '/');
1.6 schwarze 773:
1.67 schwarze 774: print_byte(h, '>');
1.1 schwarze 775:
1.66 schwarze 776: if (tflags & HTML_NLBEGIN)
1.67 schwarze 777: print_endline(h);
1.66 schwarze 778: else
779: h->flags |= HTML_NOSPACE;
1.18 schwarze 780:
1.66 schwarze 781: if (tflags & HTML_INDENT)
782: h->indent++;
783: if (tflags & HTML_NOINDENT)
784: h->noindent++;
1.18 schwarze 785:
1.58 schwarze 786: return t;
1.135 schwarze 787: }
788:
789: /*
790: * Print an element with an optional "id=" attribute.
1.136 schwarze 791: * If the element has phrasing content and an "id=" attribute,
792: * also add a permalink: outside if it can be in phrasing context,
793: * inside otherwise.
1.135 schwarze 794: */
795: struct tag *
796: print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
797: struct roff_node *n)
798: {
1.136 schwarze 799: struct roff_node *nch;
1.135 schwarze 800: struct tag *ret, *t;
1.137 schwarze 801: char *id, *href;
1.135 schwarze 802:
803: ret = NULL;
1.137 schwarze 804: id = href = NULL;
1.135 schwarze 805: if (n->flags & NODE_ID)
806: id = html_make_id(n, 1);
1.137 schwarze 807: if (n->flags & NODE_HREF)
1.140 schwarze 808: href = id == NULL ? html_make_id(n, 2) : id;
1.137 schwarze 809: if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
810: ret = print_otag(h, TAG_A, "chR", "permalink", href);
1.135 schwarze 811: t = print_otag(h, elemtype, "ci", cattr, id);
812: if (ret == NULL) {
813: ret = t;
1.137 schwarze 814: if (href != NULL && (nch = n->child) != NULL) {
1.136 schwarze 815: /* man(7) is safe, it tags phrasing content only. */
816: if (n->tok > MDOC_MAX ||
817: htmltags[elemtype].flags & HTML_TOPHRASE)
818: nch = NULL;
819: else /* For mdoc(7), beware of nested blocks. */
820: while (nch != NULL && nch->type == ROFFT_TEXT)
821: nch = nch->next;
822: if (nch == NULL)
1.137 schwarze 823: print_otag(h, TAG_A, "chR", "permalink", href);
1.136 schwarze 824: }
1.135 schwarze 825: }
1.140 schwarze 826: free(id);
1.137 schwarze 827: if (id == NULL)
828: free(href);
1.135 schwarze 829: return ret;
1.1 schwarze 830: }
831:
832: static void
1.54 schwarze 833: print_ctag(struct html *h, struct tag *tag)
1.1 schwarze 834: {
1.66 schwarze 835: int tflags;
1.35 schwarze 836:
1.122 schwarze 837: if (tag->closed == 0) {
838: tag->closed = 1;
839: if (tag == h->metaf)
840: h->metaf = NULL;
841: if (tag == h->tblt)
842: h->tblt = NULL;
843:
844: tflags = htmltags[tag->tag].flags;
845: if (tflags & HTML_INDENT)
846: h->indent--;
847: if (tflags & HTML_NOINDENT)
848: h->noindent--;
849: if (tflags & HTML_NLEND)
850: print_endline(h);
851: print_indent(h);
852: print_byte(h, '<');
853: print_byte(h, '/');
854: print_word(h, htmltags[tag->tag].name);
855: print_byte(h, '>');
856: if (tflags & HTML_NLAFTER)
857: print_endline(h);
858: }
859: if (tag->refcnt == 0) {
860: h->tag = tag->next;
861: free(tag);
862: }
1.1 schwarze 863: }
864:
865: void
1.6 schwarze 866: print_gen_decls(struct html *h)
867: {
1.67 schwarze 868: print_word(h, "<!DOCTYPE html>");
869: print_endline(h);
1.91 schwarze 870: }
871:
872: void
873: print_gen_comment(struct html *h, struct roff_node *n)
874: {
875: int wantblank;
876:
877: print_word(h, "<!-- This is an automatically generated file."
878: " Do not edit.");
879: h->indent = 1;
880: wantblank = 0;
881: while (n != NULL && n->type == ROFFT_COMMENT) {
882: if (strstr(n->string, "-->") == NULL &&
883: (wantblank || *n->string != '\0')) {
884: print_endline(h);
885: print_indent(h);
886: print_word(h, n->string);
887: wantblank = *n->string != '\0';
888: }
889: n = n->next;
890: }
891: if (wantblank)
892: print_endline(h);
893: print_word(h, " -->");
894: print_endline(h);
895: h->indent = 0;
1.1 schwarze 896: }
897:
898: void
1.12 schwarze 899: print_text(struct html *h, const char *word)
1.1 schwarze 900: {
1.139 schwarze 901: print_tagged_text(h, word, NULL);
902: }
903:
904: void
905: print_tagged_text(struct html *h, const char *word, struct roff_node *n)
906: {
907: struct tag *t;
908: char *href;
909:
1.131 schwarze 910: /*
911: * Always wrap text in a paragraph unless already contained in
912: * some flow container; never put it directly into a section.
913: */
914:
915: if (h->tag->tag == TAG_SECTION)
916: print_otag(h, TAG_P, "c", "Pp");
917:
918: /* Output whitespace before this text? */
919:
1.67 schwarze 920: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.12 schwarze 921: if ( ! (HTML_KEEP & h->flags)) {
922: if (HTML_PREKEEP & h->flags)
923: h->flags |= HTML_KEEP;
1.67 schwarze 924: print_endword(h);
1.12 schwarze 925: } else
1.86 bentley 926: print_word(h, " ");
1.12 schwarze 927: }
1.131 schwarze 928:
929: /*
1.139 schwarze 930: * Optionally switch fonts, optionally write a permalink, then
931: * print the text, optionally surrounded by HTML whitespace.
1.131 schwarze 932: */
1.1 schwarze 933:
1.125 schwarze 934: assert(h->metaf == NULL);
935: print_metaf(h);
936: print_indent(h);
1.139 schwarze 937:
1.140 schwarze 938: if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
1.139 schwarze 939: t = print_otag(h, TAG_A, "chR", "permalink", href);
940: free(href);
941: } else
942: t = NULL;
943:
1.65 schwarze 944: if ( ! print_encode(h, word, NULL, 0)) {
1.13 schwarze 945: if ( ! (h->flags & HTML_NONOSPACE))
946: h->flags &= ~HTML_NOSPACE;
1.53 schwarze 947: h->flags &= ~HTML_NONEWLINE;
1.28 schwarze 948: } else
1.53 schwarze 949: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.20 schwarze 950:
1.125 schwarze 951: if (h->metaf != NULL) {
1.20 schwarze 952: print_tagq(h, h->metaf);
953: h->metaf = NULL;
1.139 schwarze 954: } else if (t != NULL)
955: print_tagq(h, t);
1.17 schwarze 956:
957: h->flags &= ~HTML_IGNDELIM;
1.1 schwarze 958: }
959:
960: void
961: print_tagq(struct html *h, const struct tag *until)
962: {
1.122 schwarze 963: struct tag *this, *next;
1.1 schwarze 964:
1.122 schwarze 965: for (this = h->tag; this != NULL; this = next) {
966: next = this == until ? NULL : this->next;
967: print_ctag(h, this);
1.1 schwarze 968: }
969: }
970:
1.120 schwarze 971: /*
972: * Close out all open elements up to but excluding suntil.
973: * Note that a paragraph just inside stays open together with it
974: * because paragraphs include subsequent phrasing content.
975: */
1.1 schwarze 976: void
977: print_stagq(struct html *h, const struct tag *suntil)
978: {
1.122 schwarze 979: struct tag *this, *next;
1.1 schwarze 980:
1.122 schwarze 981: for (this = h->tag; this != NULL; this = next) {
982: next = this->next;
983: if (this == suntil || (next == suntil &&
984: (this->tag == TAG_P || this->tag == TAG_PRE)))
985: break;
986: print_ctag(h, this);
1.1 schwarze 987: }
1.42 schwarze 988: }
989:
1.67 schwarze 990:
991: /***********************************************************************
992: * Low level output functions.
993: * They implement line breaking using a short static buffer.
994: ***********************************************************************/
995:
996: /*
997: * Buffer one HTML output byte.
998: * If the buffer is full, flush and deactivate it and start a new line.
999: * If the buffer is inactive, print directly.
1000: */
1001: static void
1002: print_byte(struct html *h, char c)
1003: {
1004: if ((h->flags & HTML_BUFFER) == 0) {
1005: putchar(c);
1006: h->col++;
1007: return;
1008: }
1009:
1010: if (h->col + h->bufcol < sizeof(h->buf)) {
1011: h->buf[h->bufcol++] = c;
1012: return;
1013: }
1014:
1015: putchar('\n');
1016: h->col = 0;
1017: print_indent(h);
1018: putchar(' ');
1019: putchar(' ');
1020: fwrite(h->buf, h->bufcol, 1, stdout);
1021: putchar(c);
1022: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1023: h->bufcol = 0;
1024: h->flags &= ~HTML_BUFFER;
1025: }
1026:
1.66 schwarze 1027: /*
1028: * If something was printed on the current output line, end it.
1.67 schwarze 1029: * Not to be called right after print_indent().
1.66 schwarze 1030: */
1.72 schwarze 1031: void
1.67 schwarze 1032: print_endline(struct html *h)
1.66 schwarze 1033: {
1.67 schwarze 1034: if (h->col == 0)
1.66 schwarze 1035: return;
1036:
1.67 schwarze 1037: if (h->bufcol) {
1038: putchar(' ');
1039: fwrite(h->buf, h->bufcol, 1, stdout);
1040: h->bufcol = 0;
1041: }
1.66 schwarze 1042: putchar('\n');
1.67 schwarze 1043: h->col = 0;
1044: h->flags |= HTML_NOSPACE;
1045: h->flags &= ~HTML_BUFFER;
1046: }
1047:
1048: /*
1049: * Flush the HTML output buffer.
1050: * If it is inactive, activate it.
1051: */
1052: static void
1053: print_endword(struct html *h)
1054: {
1055: if (h->noindent) {
1056: print_byte(h, ' ');
1057: return;
1058: }
1059:
1060: if ((h->flags & HTML_BUFFER) == 0) {
1061: h->col++;
1062: h->flags |= HTML_BUFFER;
1063: } else if (h->bufcol) {
1064: putchar(' ');
1065: fwrite(h->buf, h->bufcol, 1, stdout);
1066: h->col += h->bufcol + 1;
1067: }
1068: h->bufcol = 0;
1.66 schwarze 1069: }
1070:
1071: /*
1072: * If at the beginning of a new output line,
1073: * perform indentation and mark the line as containing output.
1074: * Make sure to really produce some output right afterwards,
1075: * but do not use print_otag() for producing it.
1076: */
1077: static void
1.67 schwarze 1078: print_indent(struct html *h)
1.66 schwarze 1079: {
1.67 schwarze 1080: size_t i;
1.66 schwarze 1081:
1.132 schwarze 1082: if (h->col || h->noindent)
1.66 schwarze 1083: return;
1084:
1.132 schwarze 1085: h->col = h->indent * 2;
1086: for (i = 0; i < h->col; i++)
1087: putchar(' ');
1.67 schwarze 1088: }
1089:
1090: /*
1091: * Print or buffer some characters
1092: * depending on the current HTML output buffer state.
1093: */
1094: static void
1095: print_word(struct html *h, const char *cp)
1096: {
1097: while (*cp != '\0')
1098: print_byte(h, *cp++);
1.3 schwarze 1099: }