Annotation of src/usr.bin/mandoc/html.c, Revision 1.140
1.140 ! schwarze 1: /* $OpenBSD: html.c,v 1.139 2020/04/18 20:28:46 schwarze Exp $ */
1.1 schwarze 2: /*
1.135 schwarze 3: * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
1.42 schwarze 4: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.56 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.56 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.135 schwarze 17: *
18: * Common functions for mandoc(1) HTML formatters.
19: * For use by individual formatters and by the main program.
1.1 schwarze 20: */
21: #include <sys/types.h>
1.110 schwarze 22: #include <sys/stat.h>
1.1 schwarze 23:
24: #include <assert.h>
1.3 schwarze 25: #include <ctype.h>
1.4 schwarze 26: #include <stdarg.h>
1.99 schwarze 27: #include <stddef.h>
1.1 schwarze 28: #include <stdio.h>
29: #include <stdint.h>
30: #include <stdlib.h>
31: #include <string.h>
32: #include <unistd.h>
33:
1.80 schwarze 34: #include "mandoc_aux.h"
1.99 schwarze 35: #include "mandoc_ohash.h"
1.9 schwarze 36: #include "mandoc.h"
1.80 schwarze 37: #include "roff.h"
1.1 schwarze 38: #include "out.h"
39: #include "html.h"
1.56 schwarze 40: #include "manconf.h"
1.1 schwarze 41: #include "main.h"
42:
43: struct htmldata {
44: const char *name;
45: int flags;
1.127 schwarze 46: #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
47: #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
48: #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
49: #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
50: #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
51: #define HTML_NLEND (1 << 5) /* Output line break before closing. */
52: #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
1.66 schwarze 53: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
54: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
55: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
1.127 schwarze 56: #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
57: #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
1.1 schwarze 58: };
59:
60: static const struct htmldata htmltags[TAG_MAX] = {
1.66 schwarze 61: {"html", HTML_NLALL},
62: {"head", HTML_NLALL | HTML_INDENT},
1.127 schwarze 63: {"meta", HTML_NOSTACK | HTML_NLALL},
64: {"link", HTML_NOSTACK | HTML_NLALL},
65: {"style", HTML_NLALL | HTML_INDENT},
66: {"title", HTML_NLAROUND},
1.66 schwarze 67: {"body", HTML_NLALL},
68: {"div", HTML_NLAROUND},
1.123 schwarze 69: {"section", HTML_NLALL},
1.66 schwarze 70: {"table", HTML_NLALL | HTML_INDENT},
71: {"tr", HTML_NLALL | HTML_INDENT},
72: {"td", HTML_NLAROUND},
73: {"li", HTML_NLAROUND | HTML_INDENT},
74: {"ul", HTML_NLALL | HTML_INDENT},
75: {"ol", HTML_NLALL | HTML_INDENT},
76: {"dl", HTML_NLALL | HTML_INDENT},
77: {"dt", HTML_NLAROUND},
78: {"dd", HTML_NLAROUND | HTML_INDENT},
1.127 schwarze 79: {"h1", HTML_TOPHRASE | HTML_NLAROUND},
80: {"h2", HTML_TOPHRASE | HTML_NLAROUND},
81: {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
82: {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
83: {"a", HTML_INPHRASE | HTML_TOPHRASE},
84: {"b", HTML_INPHRASE | HTML_TOPHRASE},
85: {"cite", HTML_INPHRASE | HTML_TOPHRASE},
86: {"code", HTML_INPHRASE | HTML_TOPHRASE},
87: {"i", HTML_INPHRASE | HTML_TOPHRASE},
88: {"small", HTML_INPHRASE | HTML_TOPHRASE},
89: {"span", HTML_INPHRASE | HTML_TOPHRASE},
90: {"var", HTML_INPHRASE | HTML_TOPHRASE},
91: {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
1.134 schwarze 92: {"mark", HTML_INPHRASE },
1.127 schwarze 93: {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
1.66 schwarze 94: {"mrow", 0},
95: {"mi", 0},
1.85 schwarze 96: {"mn", 0},
1.66 schwarze 97: {"mo", 0},
98: {"msup", 0},
99: {"msub", 0},
100: {"msubsup", 0},
101: {"mfrac", 0},
102: {"msqrt", 0},
103: {"mfenced", 0},
104: {"mtable", 0},
105: {"mtr", 0},
106: {"mtd", 0},
107: {"munderover", 0},
108: {"munder", 0},
109: {"mover", 0},
1.5 schwarze 110: };
111:
1.99 schwarze 112: /* Avoid duplicate HTML id= attributes. */
1.140 ! schwarze 113:
! 114: struct id_entry {
! 115: int ord; /* Ordinal number of the latest occurrence. */
! 116: char id[]; /* The id= attribute without any ordinal suffix. */
! 117: };
1.99 schwarze 118: static struct ohash id_unique;
119:
1.124 schwarze 120: static void html_reset_internal(struct html *);
1.67 schwarze 121: static void print_byte(struct html *, char);
122: static void print_endword(struct html *);
123: static void print_indent(struct html *);
124: static void print_word(struct html *, const char *);
125:
1.54 schwarze 126: static void print_ctag(struct html *, struct tag *);
1.67 schwarze 127: static int print_escape(struct html *, char);
1.65 schwarze 128: static int print_encode(struct html *, const char *, const char *, int);
129: static void print_href(struct html *, const char *, const char *, int);
1.125 schwarze 130: static void print_metaf(struct html *);
1.5 schwarze 131:
1.35 schwarze 132:
1.50 schwarze 133: void *
1.61 schwarze 134: html_alloc(const struct manoutput *outopts)
1.1 schwarze 135: {
136: struct html *h;
137:
1.24 schwarze 138: h = mandoc_calloc(1, sizeof(struct html));
1.1 schwarze 139:
1.74 schwarze 140: h->tag = NULL;
1.56 schwarze 141: h->style = outopts->style;
1.110 schwarze 142: if ((h->base_man1 = outopts->man) == NULL)
143: h->base_man2 = NULL;
144: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
145: *h->base_man2++ = '\0';
1.56 schwarze 146: h->base_includes = outopts->includes;
147: if (outopts->fragment)
148: h->oflags |= HTML_FRAGMENT;
1.111 schwarze 149: if (outopts->toc)
150: h->oflags |= HTML_TOC;
1.1 schwarze 151:
1.140 ! schwarze 152: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.99 schwarze 153:
1.58 schwarze 154: return h;
1.1 schwarze 155: }
156:
1.124 schwarze 157: static void
158: html_reset_internal(struct html *h)
1.1 schwarze 159: {
160: struct tag *tag;
1.140 ! schwarze 161: struct id_entry *entry;
1.99 schwarze 162: unsigned int slot;
1.1 schwarze 163:
1.74 schwarze 164: while ((tag = h->tag) != NULL) {
165: h->tag = tag->next;
1.1 schwarze 166: free(tag);
167: }
1.140 ! schwarze 168: entry = ohash_first(&id_unique, &slot);
! 169: while (entry != NULL) {
! 170: free(entry);
! 171: entry = ohash_next(&id_unique, &slot);
1.99 schwarze 172: }
173: ohash_delete(&id_unique);
1.124 schwarze 174: }
175:
176: void
177: html_reset(void *p)
178: {
179: html_reset_internal(p);
1.140 ! schwarze 180: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.124 schwarze 181: }
182:
183: void
184: html_free(void *p)
185: {
186: html_reset_internal(p);
187: free(p);
1.1 schwarze 188: }
189:
190: void
191: print_gen_head(struct html *h)
192: {
1.42 schwarze 193: struct tag *t;
194:
1.64 schwarze 195: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.92 schwarze 196: if (h->style != NULL) {
197: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
198: h->style, "type", "text/css", "media", "all");
199: return;
200: }
1.1 schwarze 201:
1.42 schwarze 202: /*
1.92 schwarze 203: * Print a minimal embedded style sheet.
1.42 schwarze 204: */
1.66 schwarze 205:
1.64 schwarze 206: t = print_otag(h, TAG_STYLE, "");
1.66 schwarze 207: print_text(h, "table.head, table.foot { width: 100%; }");
1.67 schwarze 208: print_endline(h);
1.66 schwarze 209: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.67 schwarze 210: print_endline(h);
1.66 schwarze 211: print_text(h, "td.head-vol { text-align: center; }");
1.67 schwarze 212: print_endline(h);
1.126 schwarze 213: print_text(h, ".Nd, .Bf, .Op { display: inline; }");
1.95 schwarze 214: print_endline(h);
1.126 schwarze 215: print_text(h, ".Pa, .Ad { font-style: italic; }");
1.96 schwarze 216: print_endline(h);
1.126 schwarze 217: print_text(h, ".Ms { font-weight: bold; }");
1.98 schwarze 218: print_endline(h);
1.126 schwarze 219: print_text(h, ".Bl-diag ");
1.94 schwarze 220: print_byte(h, '>');
221: print_text(h, " dt { font-weight: bold; }");
1.93 schwarze 222: print_endline(h);
1.126 schwarze 223: print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
224: "{ font-weight: bold; font-family: inherit; }");
1.42 schwarze 225: print_tagq(h, t);
1.1 schwarze 226: }
227:
1.125 schwarze 228: int
229: html_setfont(struct html *h, enum mandoc_esc font)
1.5 schwarze 230: {
1.125 schwarze 231: switch (font) {
1.35 schwarze 232: case ESCAPE_FONTPREV:
1.5 schwarze 233: font = h->metal;
234: break;
1.35 schwarze 235: case ESCAPE_FONTITALIC:
236: case ESCAPE_FONTBOLD:
237: case ESCAPE_FONTBI:
1.112 schwarze 238: case ESCAPE_FONTCW:
1.125 schwarze 239: case ESCAPE_FONTROMAN:
1.112 schwarze 240: break;
1.35 schwarze 241: case ESCAPE_FONT:
1.125 schwarze 242: font = ESCAPE_FONTROMAN;
1.5 schwarze 243: break;
244: default:
1.125 schwarze 245: return 0;
1.1 schwarze 246: }
1.125 schwarze 247: h->metal = h->metac;
248: h->metac = font;
249: return 1;
250: }
1.1 schwarze 251:
1.125 schwarze 252: static void
253: print_metaf(struct html *h)
254: {
1.20 schwarze 255: if (h->metaf) {
256: print_tagq(h, h->metaf);
257: h->metaf = NULL;
258: }
1.125 schwarze 259: switch (h->metac) {
260: case ESCAPE_FONTITALIC:
1.64 schwarze 261: h->metaf = print_otag(h, TAG_I, "");
1.31 schwarze 262: break;
1.125 schwarze 263: case ESCAPE_FONTBOLD:
1.64 schwarze 264: h->metaf = print_otag(h, TAG_B, "");
1.31 schwarze 265: break;
1.125 schwarze 266: case ESCAPE_FONTBI:
1.64 schwarze 267: h->metaf = print_otag(h, TAG_B, "");
268: print_otag(h, TAG_I, "");
1.31 schwarze 269: break;
1.125 schwarze 270: case ESCAPE_FONTCW:
1.112 schwarze 271: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
272: break;
1.31 schwarze 273: default:
274: break;
275: }
1.118 schwarze 276: }
277:
1.119 schwarze 278: void
279: html_close_paragraph(struct html *h)
280: {
1.129 schwarze 281: struct tag *this, *next;
1.130 schwarze 282: int flags;
1.119 schwarze 283:
1.129 schwarze 284: this = h->tag;
285: for (;;) {
286: next = this->next;
1.130 schwarze 287: flags = htmltags[this->tag].flags;
288: if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
1.129 schwarze 289: print_ctag(h, this);
1.130 schwarze 290: if ((flags & HTML_INPHRASE) == 0)
1.119 schwarze 291: break;
1.129 schwarze 292: this = next;
1.119 schwarze 293: }
294: }
295:
1.118 schwarze 296: /*
297: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
298: * TOKEN_NONE does not switch. The old mode is returned.
299: */
300: enum roff_tok
301: html_fillmode(struct html *h, enum roff_tok want)
302: {
303: struct tag *t;
304: enum roff_tok had;
305:
306: for (t = h->tag; t != NULL; t = t->next)
307: if (t->tag == TAG_PRE)
308: break;
309:
310: had = t == NULL ? ROFF_fi : ROFF_nf;
311:
312: if (want != had) {
313: switch (want) {
314: case ROFF_fi:
315: print_tagq(h, t);
316: break;
317: case ROFF_nf:
1.119 schwarze 318: html_close_paragraph(h);
1.118 schwarze 319: print_otag(h, TAG_PRE, "");
320: break;
321: case TOKEN_NONE:
322: break;
323: default:
324: abort();
325: }
326: }
327: return had;
1.80 schwarze 328: }
329:
1.135 schwarze 330: /*
331: * Allocate a string to be used for the "id=" attribute of an HTML
332: * element and/or as a segment identifier for a URI in an <a> element.
333: * The function may fail and return NULL if the node lacks text data
334: * to create the attribute from.
1.140 ! schwarze 335: * The caller is responsible for free(3)ing the returned string.
! 336: *
1.135 schwarze 337: * If the "unique" argument is non-zero, the "id_unique" ohash table
1.140 ! schwarze 338: * is used for de-duplication. If the "unique" argument is 1,
! 339: * it is the first time the function is called for this tag and
! 340: * location, so if an ordinal suffix is needed, it is incremented.
! 341: * If the "unique" argument is 2, it is the second time the function
! 342: * is called for this tag and location, so the ordinal suffix
! 343: * remains unchanged.
1.135 schwarze 344: */
1.80 schwarze 345: char *
1.99 schwarze 346: html_make_id(const struct roff_node *n, int unique)
1.80 schwarze 347: {
348: const struct roff_node *nch;
1.140 ! schwarze 349: struct id_entry *entry;
! 350: char *buf, *cp;
! 351: size_t len;
1.99 schwarze 352: unsigned int slot;
1.80 schwarze 353:
1.138 schwarze 354: if (n->tag != NULL)
355: buf = mandoc_strdup(n->tag);
1.135 schwarze 356: else {
357: switch (n->tok) {
358: case MDOC_Sh:
359: case MDOC_Ss:
360: case MDOC_Sx:
361: case MAN_SH:
362: case MAN_SS:
363: for (nch = n->child; nch != NULL; nch = nch->next)
364: if (nch->type != ROFFT_TEXT)
365: return NULL;
366: buf = NULL;
367: deroff(&buf, n);
368: if (buf == NULL)
369: return NULL;
370: break;
371: default:
1.136 schwarze 372: if (n->child == NULL || n->child->type != ROFFT_TEXT)
1.135 schwarze 373: return NULL;
374: buf = mandoc_strdup(n->child->string);
375: break;
376: }
377: }
1.80 schwarze 378:
1.100 schwarze 379: /*
380: * In ID attributes, only use ASCII characters that are
381: * permitted in URL-fragment strings according to the
382: * explicit list at:
383: * https://url.spec.whatwg.org/#url-fragment-string
384: */
1.80 schwarze 385:
386: for (cp = buf; *cp != '\0'; cp++)
1.100 schwarze 387: if (isalnum((unsigned char)*cp) == 0 &&
388: strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
1.80 schwarze 389: *cp = '_';
390:
1.99 schwarze 391: if (unique == 0)
392: return buf;
393:
394: /* Avoid duplicate HTML id= attributes. */
395:
396: slot = ohash_qlookup(&id_unique, buf);
1.140 ! schwarze 397: if ((entry = ohash_find(&id_unique, slot)) == NULL) {
! 398: len = strlen(buf) + 1;
! 399: entry = mandoc_malloc(sizeof(*entry) + len);
! 400: entry->ord = 1;
! 401: memcpy(entry->id, buf, len);
! 402: ohash_insert(&id_unique, slot, entry);
! 403: } else if (unique == 1)
! 404: entry->ord++;
! 405:
! 406: if (entry->ord > 1) {
! 407: cp = buf;
! 408: mandoc_asprintf(&buf, "%s_%d", cp, entry->ord);
! 409: free(cp);
1.99 schwarze 410: }
1.80 schwarze 411: return buf;
1.1 schwarze 412: }
413:
1.5 schwarze 414: static int
1.67 schwarze 415: print_escape(struct html *h, char c)
1.38 schwarze 416: {
417:
418: switch (c) {
419: case '<':
1.67 schwarze 420: print_word(h, "<");
1.38 schwarze 421: break;
422: case '>':
1.67 schwarze 423: print_word(h, ">");
1.38 schwarze 424: break;
425: case '&':
1.67 schwarze 426: print_word(h, "&");
1.38 schwarze 427: break;
428: case '"':
1.67 schwarze 429: print_word(h, """);
1.38 schwarze 430: break;
431: case ASCII_NBRSP:
1.67 schwarze 432: print_word(h, " ");
1.38 schwarze 433: break;
434: case ASCII_HYPH:
1.67 schwarze 435: print_byte(h, '-');
1.59 schwarze 436: break;
1.38 schwarze 437: case ASCII_BREAK:
438: break;
439: default:
1.58 schwarze 440: return 0;
1.38 schwarze 441: }
1.58 schwarze 442: return 1;
1.38 schwarze 443: }
444:
445: static int
1.65 schwarze 446: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.1 schwarze 447: {
1.67 schwarze 448: char numbuf[16];
1.84 schwarze 449: const char *seq;
1.4 schwarze 450: size_t sz;
1.84 schwarze 451: int c, len, breakline, nospace;
1.26 schwarze 452: enum mandoc_esc esc;
1.84 schwarze 453: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.33 schwarze 454: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.5 schwarze 455:
1.65 schwarze 456: if (pend == NULL)
457: pend = strchr(p, '\0');
458:
1.84 schwarze 459: breakline = 0;
1.5 schwarze 460: nospace = 0;
1.1 schwarze 461:
1.65 schwarze 462: while (p < pend) {
1.30 schwarze 463: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
464: h->flags &= ~HTML_SKIPCHAR;
465: p++;
466: continue;
467: }
468:
1.67 schwarze 469: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.84 schwarze 470: print_byte(h, *p);
471:
472: if (breakline &&
473: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.115 schwarze 474: print_otag(h, TAG_BR, "");
1.84 schwarze 475: breakline = 0;
476: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
477: p++;
478: continue;
479: }
1.4 schwarze 480:
1.65 schwarze 481: if (p >= pend)
1.26 schwarze 482: break;
483:
1.84 schwarze 484: if (*p == ' ') {
485: print_endword(h);
486: p++;
487: continue;
488: }
489:
1.67 schwarze 490: if (print_escape(h, *p++))
1.33 schwarze 491: continue;
1.4 schwarze 492:
1.26 schwarze 493: esc = mandoc_escape(&p, &seq, &len);
494: switch (esc) {
1.35 schwarze 495: case ESCAPE_FONT:
496: case ESCAPE_FONTPREV:
497: case ESCAPE_FONTBOLD:
498: case ESCAPE_FONTITALIC:
499: case ESCAPE_FONTBI:
1.112 schwarze 500: case ESCAPE_FONTCW:
1.35 schwarze 501: case ESCAPE_FONTROMAN:
1.113 schwarze 502: if (0 == norecurse) {
503: h->flags |= HTML_NOSPACE;
1.125 schwarze 504: if (html_setfont(h, esc))
505: print_metaf(h);
1.113 schwarze 506: h->flags &= ~HTML_NOSPACE;
507: }
1.30 schwarze 508: continue;
1.35 schwarze 509: case ESCAPE_SKIPCHAR:
1.30 schwarze 510: h->flags |= HTML_SKIPCHAR;
511: continue;
1.116 schwarze 512: case ESCAPE_ERROR:
513: continue;
1.30 schwarze 514: default:
515: break;
516: }
517:
518: if (h->flags & HTML_SKIPCHAR) {
519: h->flags &= ~HTML_SKIPCHAR;
520: continue;
521: }
522:
523: switch (esc) {
1.35 schwarze 524: case ESCAPE_UNICODE:
1.38 schwarze 525: /* Skip past "u" header. */
1.26 schwarze 526: c = mchars_num2uc(seq + 1, len - 1);
527: break;
1.35 schwarze 528: case ESCAPE_NUMBERED:
1.26 schwarze 529: c = mchars_num2char(seq, len);
1.51 schwarze 530: if (c < 0)
531: continue;
1.26 schwarze 532: break;
1.35 schwarze 533: case ESCAPE_SPECIAL:
1.61 schwarze 534: c = mchars_spec2cp(seq, len);
1.51 schwarze 535: if (c <= 0)
536: continue;
1.116 schwarze 537: break;
538: case ESCAPE_UNDEF:
539: c = *seq;
1.26 schwarze 540: break;
1.109 schwarze 541: case ESCAPE_DEVICE:
542: print_word(h, "html");
543: continue;
1.84 schwarze 544: case ESCAPE_BREAK:
545: breakline = 1;
546: continue;
1.35 schwarze 547: case ESCAPE_NOSPACE:
1.26 schwarze 548: if ('\0' == *p)
549: nospace = 1;
1.49 schwarze 550: continue;
1.55 schwarze 551: case ESCAPE_OVERSTRIKE:
552: if (len == 0)
553: continue;
554: c = seq[len - 1];
555: break;
1.5 schwarze 556: default:
1.49 schwarze 557: continue;
1.5 schwarze 558: }
1.51 schwarze 559: if ((c < 0x20 && c != 0x09) ||
560: (c > 0x7E && c < 0xA0))
1.49 schwarze 561: c = 0xFFFD;
1.67 schwarze 562: if (c > 0x7E) {
1.86 bentley 563: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.67 schwarze 564: print_word(h, numbuf);
565: } else if (print_escape(h, c) == 0)
566: print_byte(h, c);
1.1 schwarze 567: }
1.5 schwarze 568:
1.58 schwarze 569: return nospace;
1.1 schwarze 570: }
571:
1.6 schwarze 572: static void
1.65 schwarze 573: print_href(struct html *h, const char *name, const char *sec, int man)
1.6 schwarze 574: {
1.110 schwarze 575: struct stat sb;
1.65 schwarze 576: const char *p, *pp;
1.110 schwarze 577: char *filename;
578:
579: if (man) {
580: pp = h->base_man1;
581: if (h->base_man2 != NULL) {
582: mandoc_asprintf(&filename, "%s.%s", name, sec);
583: if (stat(filename, &sb) == -1)
584: pp = h->base_man2;
585: free(filename);
586: }
587: } else
588: pp = h->base_includes;
1.65 schwarze 589:
590: while ((p = strchr(pp, '%')) != NULL) {
591: print_encode(h, pp, p, 1);
592: if (man && p[1] == 'S') {
593: if (sec == NULL)
1.67 schwarze 594: print_byte(h, '1');
1.65 schwarze 595: else
596: print_encode(h, sec, NULL, 1);
597: } else if ((man && p[1] == 'N') ||
598: (man == 0 && p[1] == 'I'))
599: print_encode(h, name, NULL, 1);
600: else
601: print_encode(h, p, p + 2, 1);
602: pp = p + 2;
603: }
604: if (*pp != '\0')
605: print_encode(h, pp, NULL, 1);
1.6 schwarze 606: }
607:
1.1 schwarze 608: struct tag *
1.64 schwarze 609: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.1 schwarze 610: {
1.64 schwarze 611: va_list ap;
1.1 schwarze 612: struct tag *t;
1.65 schwarze 613: const char *attr;
1.73 schwarze 614: char *arg1, *arg2;
1.114 schwarze 615: int style_written, tflags;
1.66 schwarze 616:
617: tflags = htmltags[tag].flags;
1.1 schwarze 618:
1.127 schwarze 619: /* Flow content is not allowed in phrasing context. */
620:
621: if ((tflags & HTML_INPHRASE) == 0) {
622: for (t = h->tag; t != NULL; t = t->next) {
623: if (t->closed)
624: continue;
625: assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
626: break;
627: }
1.131 schwarze 628:
629: /*
630: * Always wrap phrasing elements in a paragraph
631: * unless already contained in some flow container;
632: * never put them directly into a section.
633: */
634:
635: } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
636: print_otag(h, TAG_P, "c", "Pp");
1.127 schwarze 637:
1.74 schwarze 638: /* Push this tag onto the stack of open scopes. */
1.6 schwarze 639:
1.66 schwarze 640: if ((tflags & HTML_NOSTACK) == 0) {
1.24 schwarze 641: t = mandoc_malloc(sizeof(struct tag));
1.1 schwarze 642: t->tag = tag;
1.74 schwarze 643: t->next = h->tag;
1.122 schwarze 644: t->refcnt = 0;
645: t->closed = 0;
1.74 schwarze 646: h->tag = t;
1.1 schwarze 647: } else
648: t = NULL;
649:
1.66 schwarze 650: if (tflags & HTML_NLBEFORE)
1.67 schwarze 651: print_endline(h);
652: if (h->col == 0)
653: print_indent(h);
1.66 schwarze 654: else if ((h->flags & HTML_NOSPACE) == 0) {
655: if (h->flags & HTML_KEEP)
1.86 bentley 656: print_word(h, " ");
1.66 schwarze 657: else {
658: if (h->flags & HTML_PREKEEP)
659: h->flags |= HTML_KEEP;
1.67 schwarze 660: print_endword(h);
1.12 schwarze 661: }
1.66 schwarze 662: }
1.1 schwarze 663:
1.13 schwarze 664: if ( ! (h->flags & HTML_NONOSPACE))
665: h->flags &= ~HTML_NOSPACE;
1.14 schwarze 666: else
667: h->flags |= HTML_NOSPACE;
1.13 schwarze 668:
1.6 schwarze 669: /* Print out the tag name and attributes. */
670:
1.67 schwarze 671: print_byte(h, '<');
672: print_word(h, htmltags[tag].name);
1.64 schwarze 673:
674: va_start(ap, fmt);
675:
1.114 schwarze 676: while (*fmt != '\0' && *fmt != 's') {
1.73 schwarze 677:
1.108 schwarze 678: /* Parse attributes and arguments. */
1.73 schwarze 679:
680: arg1 = va_arg(ap, char *);
1.108 schwarze 681: arg2 = NULL;
1.64 schwarze 682: switch (*fmt++) {
683: case 'c':
1.65 schwarze 684: attr = "class";
1.64 schwarze 685: break;
686: case 'h':
1.65 schwarze 687: attr = "href";
1.64 schwarze 688: break;
689: case 'i':
1.65 schwarze 690: attr = "id";
1.64 schwarze 691: break;
692: case '?':
1.73 schwarze 693: attr = arg1;
694: arg1 = va_arg(ap, char *);
1.64 schwarze 695: break;
696: default:
697: abort();
698: }
1.73 schwarze 699: if (*fmt == 'M')
700: arg2 = va_arg(ap, char *);
701: if (arg1 == NULL)
702: continue;
703:
1.108 schwarze 704: /* Print the attributes. */
1.73 schwarze 705:
1.67 schwarze 706: print_byte(h, ' ');
707: print_word(h, attr);
708: print_byte(h, '=');
709: print_byte(h, '"');
1.65 schwarze 710: switch (*fmt) {
1.78 schwarze 711: case 'I':
712: print_href(h, arg1, NULL, 0);
713: fmt++;
714: break;
1.65 schwarze 715: case 'M':
1.73 schwarze 716: print_href(h, arg1, arg2, 1);
1.65 schwarze 717: fmt++;
718: break;
1.78 schwarze 719: case 'R':
720: print_byte(h, '#');
721: print_encode(h, arg1, NULL, 1);
1.65 schwarze 722: fmt++;
1.78 schwarze 723: break;
1.65 schwarze 724: default:
1.114 schwarze 725: print_encode(h, arg1, NULL, 1);
1.65 schwarze 726: break;
727: }
1.67 schwarze 728: print_byte(h, '"');
1.64 schwarze 729: }
1.114 schwarze 730:
731: style_written = 0;
732: while (*fmt++ == 's') {
733: arg1 = va_arg(ap, char *);
734: arg2 = va_arg(ap, char *);
735: if (arg2 == NULL)
736: continue;
737: print_byte(h, ' ');
738: if (style_written == 0) {
739: print_word(h, "style=\"");
740: style_written = 1;
741: }
742: print_word(h, arg1);
743: print_byte(h, ':');
744: print_byte(h, ' ');
745: print_word(h, arg2);
746: print_byte(h, ';');
747: }
748: if (style_written)
749: print_byte(h, '"');
750:
1.64 schwarze 751: va_end(ap);
1.6 schwarze 752:
1.42 schwarze 753: /* Accommodate for "well-formed" singleton escaping. */
1.6 schwarze 754:
1.127 schwarze 755: if (htmltags[tag].flags & HTML_NOSTACK)
1.67 schwarze 756: print_byte(h, '/');
1.6 schwarze 757:
1.67 schwarze 758: print_byte(h, '>');
1.1 schwarze 759:
1.66 schwarze 760: if (tflags & HTML_NLBEGIN)
1.67 schwarze 761: print_endline(h);
1.66 schwarze 762: else
763: h->flags |= HTML_NOSPACE;
1.18 schwarze 764:
1.66 schwarze 765: if (tflags & HTML_INDENT)
766: h->indent++;
767: if (tflags & HTML_NOINDENT)
768: h->noindent++;
1.18 schwarze 769:
1.58 schwarze 770: return t;
1.135 schwarze 771: }
772:
773: /*
774: * Print an element with an optional "id=" attribute.
1.136 schwarze 775: * If the element has phrasing content and an "id=" attribute,
776: * also add a permalink: outside if it can be in phrasing context,
777: * inside otherwise.
1.135 schwarze 778: */
779: struct tag *
780: print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
781: struct roff_node *n)
782: {
1.136 schwarze 783: struct roff_node *nch;
1.135 schwarze 784: struct tag *ret, *t;
1.137 schwarze 785: char *id, *href;
1.135 schwarze 786:
787: ret = NULL;
1.137 schwarze 788: id = href = NULL;
1.135 schwarze 789: if (n->flags & NODE_ID)
790: id = html_make_id(n, 1);
1.137 schwarze 791: if (n->flags & NODE_HREF)
1.140 ! schwarze 792: href = id == NULL ? html_make_id(n, 2) : id;
1.137 schwarze 793: if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
794: ret = print_otag(h, TAG_A, "chR", "permalink", href);
1.135 schwarze 795: t = print_otag(h, elemtype, "ci", cattr, id);
796: if (ret == NULL) {
797: ret = t;
1.137 schwarze 798: if (href != NULL && (nch = n->child) != NULL) {
1.136 schwarze 799: /* man(7) is safe, it tags phrasing content only. */
800: if (n->tok > MDOC_MAX ||
801: htmltags[elemtype].flags & HTML_TOPHRASE)
802: nch = NULL;
803: else /* For mdoc(7), beware of nested blocks. */
804: while (nch != NULL && nch->type == ROFFT_TEXT)
805: nch = nch->next;
806: if (nch == NULL)
1.137 schwarze 807: print_otag(h, TAG_A, "chR", "permalink", href);
1.136 schwarze 808: }
1.135 schwarze 809: }
1.140 ! schwarze 810: free(id);
1.137 schwarze 811: if (id == NULL)
812: free(href);
1.135 schwarze 813: return ret;
1.1 schwarze 814: }
815:
816: static void
1.54 schwarze 817: print_ctag(struct html *h, struct tag *tag)
1.1 schwarze 818: {
1.66 schwarze 819: int tflags;
1.35 schwarze 820:
1.122 schwarze 821: if (tag->closed == 0) {
822: tag->closed = 1;
823: if (tag == h->metaf)
824: h->metaf = NULL;
825: if (tag == h->tblt)
826: h->tblt = NULL;
827:
828: tflags = htmltags[tag->tag].flags;
829: if (tflags & HTML_INDENT)
830: h->indent--;
831: if (tflags & HTML_NOINDENT)
832: h->noindent--;
833: if (tflags & HTML_NLEND)
834: print_endline(h);
835: print_indent(h);
836: print_byte(h, '<');
837: print_byte(h, '/');
838: print_word(h, htmltags[tag->tag].name);
839: print_byte(h, '>');
840: if (tflags & HTML_NLAFTER)
841: print_endline(h);
842: }
843: if (tag->refcnt == 0) {
844: h->tag = tag->next;
845: free(tag);
846: }
1.1 schwarze 847: }
848:
849: void
1.6 schwarze 850: print_gen_decls(struct html *h)
851: {
1.67 schwarze 852: print_word(h, "<!DOCTYPE html>");
853: print_endline(h);
1.91 schwarze 854: }
855:
856: void
857: print_gen_comment(struct html *h, struct roff_node *n)
858: {
859: int wantblank;
860:
861: print_word(h, "<!-- This is an automatically generated file."
862: " Do not edit.");
863: h->indent = 1;
864: wantblank = 0;
865: while (n != NULL && n->type == ROFFT_COMMENT) {
866: if (strstr(n->string, "-->") == NULL &&
867: (wantblank || *n->string != '\0')) {
868: print_endline(h);
869: print_indent(h);
870: print_word(h, n->string);
871: wantblank = *n->string != '\0';
872: }
873: n = n->next;
874: }
875: if (wantblank)
876: print_endline(h);
877: print_word(h, " -->");
878: print_endline(h);
879: h->indent = 0;
1.1 schwarze 880: }
881:
882: void
1.12 schwarze 883: print_text(struct html *h, const char *word)
1.1 schwarze 884: {
1.139 schwarze 885: print_tagged_text(h, word, NULL);
886: }
887:
888: void
889: print_tagged_text(struct html *h, const char *word, struct roff_node *n)
890: {
891: struct tag *t;
892: char *href;
893:
1.131 schwarze 894: /*
895: * Always wrap text in a paragraph unless already contained in
896: * some flow container; never put it directly into a section.
897: */
898:
899: if (h->tag->tag == TAG_SECTION)
900: print_otag(h, TAG_P, "c", "Pp");
901:
902: /* Output whitespace before this text? */
903:
1.67 schwarze 904: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.12 schwarze 905: if ( ! (HTML_KEEP & h->flags)) {
906: if (HTML_PREKEEP & h->flags)
907: h->flags |= HTML_KEEP;
1.67 schwarze 908: print_endword(h);
1.12 schwarze 909: } else
1.86 bentley 910: print_word(h, " ");
1.12 schwarze 911: }
1.131 schwarze 912:
913: /*
1.139 schwarze 914: * Optionally switch fonts, optionally write a permalink, then
915: * print the text, optionally surrounded by HTML whitespace.
1.131 schwarze 916: */
1.1 schwarze 917:
1.125 schwarze 918: assert(h->metaf == NULL);
919: print_metaf(h);
920: print_indent(h);
1.139 schwarze 921:
1.140 ! schwarze 922: if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
1.139 schwarze 923: t = print_otag(h, TAG_A, "chR", "permalink", href);
924: free(href);
925: } else
926: t = NULL;
927:
1.65 schwarze 928: if ( ! print_encode(h, word, NULL, 0)) {
1.13 schwarze 929: if ( ! (h->flags & HTML_NONOSPACE))
930: h->flags &= ~HTML_NOSPACE;
1.53 schwarze 931: h->flags &= ~HTML_NONEWLINE;
1.28 schwarze 932: } else
1.53 schwarze 933: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.20 schwarze 934:
1.125 schwarze 935: if (h->metaf != NULL) {
1.20 schwarze 936: print_tagq(h, h->metaf);
937: h->metaf = NULL;
1.139 schwarze 938: } else if (t != NULL)
939: print_tagq(h, t);
1.17 schwarze 940:
941: h->flags &= ~HTML_IGNDELIM;
1.1 schwarze 942: }
943:
944: void
945: print_tagq(struct html *h, const struct tag *until)
946: {
1.122 schwarze 947: struct tag *this, *next;
1.1 schwarze 948:
1.122 schwarze 949: for (this = h->tag; this != NULL; this = next) {
950: next = this == until ? NULL : this->next;
951: print_ctag(h, this);
1.1 schwarze 952: }
953: }
954:
1.120 schwarze 955: /*
956: * Close out all open elements up to but excluding suntil.
957: * Note that a paragraph just inside stays open together with it
958: * because paragraphs include subsequent phrasing content.
959: */
1.1 schwarze 960: void
961: print_stagq(struct html *h, const struct tag *suntil)
962: {
1.122 schwarze 963: struct tag *this, *next;
1.1 schwarze 964:
1.122 schwarze 965: for (this = h->tag; this != NULL; this = next) {
966: next = this->next;
967: if (this == suntil || (next == suntil &&
968: (this->tag == TAG_P || this->tag == TAG_PRE)))
969: break;
970: print_ctag(h, this);
1.1 schwarze 971: }
1.42 schwarze 972: }
973:
1.67 schwarze 974:
975: /***********************************************************************
976: * Low level output functions.
977: * They implement line breaking using a short static buffer.
978: ***********************************************************************/
979:
980: /*
981: * Buffer one HTML output byte.
982: * If the buffer is full, flush and deactivate it and start a new line.
983: * If the buffer is inactive, print directly.
984: */
985: static void
986: print_byte(struct html *h, char c)
987: {
988: if ((h->flags & HTML_BUFFER) == 0) {
989: putchar(c);
990: h->col++;
991: return;
992: }
993:
994: if (h->col + h->bufcol < sizeof(h->buf)) {
995: h->buf[h->bufcol++] = c;
996: return;
997: }
998:
999: putchar('\n');
1000: h->col = 0;
1001: print_indent(h);
1002: putchar(' ');
1003: putchar(' ');
1004: fwrite(h->buf, h->bufcol, 1, stdout);
1005: putchar(c);
1006: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1007: h->bufcol = 0;
1008: h->flags &= ~HTML_BUFFER;
1009: }
1010:
1.66 schwarze 1011: /*
1012: * If something was printed on the current output line, end it.
1.67 schwarze 1013: * Not to be called right after print_indent().
1.66 schwarze 1014: */
1.72 schwarze 1015: void
1.67 schwarze 1016: print_endline(struct html *h)
1.66 schwarze 1017: {
1.67 schwarze 1018: if (h->col == 0)
1.66 schwarze 1019: return;
1020:
1.67 schwarze 1021: if (h->bufcol) {
1022: putchar(' ');
1023: fwrite(h->buf, h->bufcol, 1, stdout);
1024: h->bufcol = 0;
1025: }
1.66 schwarze 1026: putchar('\n');
1.67 schwarze 1027: h->col = 0;
1028: h->flags |= HTML_NOSPACE;
1029: h->flags &= ~HTML_BUFFER;
1030: }
1031:
1032: /*
1033: * Flush the HTML output buffer.
1034: * If it is inactive, activate it.
1035: */
1036: static void
1037: print_endword(struct html *h)
1038: {
1039: if (h->noindent) {
1040: print_byte(h, ' ');
1041: return;
1042: }
1043:
1044: if ((h->flags & HTML_BUFFER) == 0) {
1045: h->col++;
1046: h->flags |= HTML_BUFFER;
1047: } else if (h->bufcol) {
1048: putchar(' ');
1049: fwrite(h->buf, h->bufcol, 1, stdout);
1050: h->col += h->bufcol + 1;
1051: }
1052: h->bufcol = 0;
1.66 schwarze 1053: }
1054:
1055: /*
1056: * If at the beginning of a new output line,
1057: * perform indentation and mark the line as containing output.
1058: * Make sure to really produce some output right afterwards,
1059: * but do not use print_otag() for producing it.
1060: */
1061: static void
1.67 schwarze 1062: print_indent(struct html *h)
1.66 schwarze 1063: {
1.67 schwarze 1064: size_t i;
1.66 schwarze 1065:
1.132 schwarze 1066: if (h->col || h->noindent)
1.66 schwarze 1067: return;
1068:
1.132 schwarze 1069: h->col = h->indent * 2;
1070: for (i = 0; i < h->col; i++)
1071: putchar(' ');
1.67 schwarze 1072: }
1073:
1074: /*
1075: * Print or buffer some characters
1076: * depending on the current HTML output buffer state.
1077: */
1078: static void
1079: print_word(struct html *h, const char *cp)
1080: {
1081: while (*cp != '\0')
1082: print_byte(h, *cp++);
1.3 schwarze 1083: }