Annotation of src/usr.bin/mandoc/html.c, Revision 1.127
1.127 ! schwarze 1: /* $OpenBSD: html.c,v 1.126 2019/08/02 17:04:55 schwarze Exp $ */
1.1 schwarze 2: /*
1.42 schwarze 3: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.119 schwarze 4: * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.56 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.56 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <sys/types.h>
1.110 schwarze 19: #include <sys/stat.h>
1.1 schwarze 20:
21: #include <assert.h>
1.3 schwarze 22: #include <ctype.h>
1.4 schwarze 23: #include <stdarg.h>
1.99 schwarze 24: #include <stddef.h>
1.1 schwarze 25: #include <stdio.h>
26: #include <stdint.h>
27: #include <stdlib.h>
28: #include <string.h>
29: #include <unistd.h>
30:
1.80 schwarze 31: #include "mandoc_aux.h"
1.99 schwarze 32: #include "mandoc_ohash.h"
1.9 schwarze 33: #include "mandoc.h"
1.80 schwarze 34: #include "roff.h"
1.1 schwarze 35: #include "out.h"
36: #include "html.h"
1.56 schwarze 37: #include "manconf.h"
1.1 schwarze 38: #include "main.h"
39:
40: struct htmldata {
41: const char *name;
42: int flags;
1.127 ! schwarze 43: #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
! 44: #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
! 45: #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
! 46: #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
! 47: #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
! 48: #define HTML_NLEND (1 << 5) /* Output line break before closing. */
! 49: #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
1.66 schwarze 50: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
51: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
52: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
1.127 ! schwarze 53: #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
! 54: #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
1.1 schwarze 55: };
56:
57: static const struct htmldata htmltags[TAG_MAX] = {
1.66 schwarze 58: {"html", HTML_NLALL},
59: {"head", HTML_NLALL | HTML_INDENT},
1.127 ! schwarze 60: {"meta", HTML_NOSTACK | HTML_NLALL},
! 61: {"link", HTML_NOSTACK | HTML_NLALL},
! 62: {"style", HTML_NLALL | HTML_INDENT},
! 63: {"title", HTML_NLAROUND},
1.66 schwarze 64: {"body", HTML_NLALL},
65: {"div", HTML_NLAROUND},
1.95 schwarze 66: {"div", 0},
1.123 schwarze 67: {"section", HTML_NLALL},
1.66 schwarze 68: {"table", HTML_NLALL | HTML_INDENT},
69: {"tr", HTML_NLALL | HTML_INDENT},
70: {"td", HTML_NLAROUND},
71: {"li", HTML_NLAROUND | HTML_INDENT},
72: {"ul", HTML_NLALL | HTML_INDENT},
73: {"ol", HTML_NLALL | HTML_INDENT},
74: {"dl", HTML_NLALL | HTML_INDENT},
75: {"dt", HTML_NLAROUND},
76: {"dd", HTML_NLAROUND | HTML_INDENT},
1.127 ! schwarze 77: {"h1", HTML_TOPHRASE | HTML_NLAROUND},
! 78: {"h2", HTML_TOPHRASE | HTML_NLAROUND},
! 79: {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
! 80: {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
! 81: {"a", HTML_INPHRASE | HTML_TOPHRASE},
! 82: {"b", HTML_INPHRASE | HTML_TOPHRASE},
! 83: {"cite", HTML_INPHRASE | HTML_TOPHRASE},
! 84: {"code", HTML_INPHRASE | HTML_TOPHRASE},
! 85: {"i", HTML_INPHRASE | HTML_TOPHRASE},
! 86: {"small", HTML_INPHRASE | HTML_TOPHRASE},
! 87: {"span", HTML_INPHRASE | HTML_TOPHRASE},
! 88: {"var", HTML_INPHRASE | HTML_TOPHRASE},
! 89: {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
! 90: {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
1.66 schwarze 91: {"mrow", 0},
92: {"mi", 0},
1.85 schwarze 93: {"mn", 0},
1.66 schwarze 94: {"mo", 0},
95: {"msup", 0},
96: {"msub", 0},
97: {"msubsup", 0},
98: {"mfrac", 0},
99: {"msqrt", 0},
100: {"mfenced", 0},
101: {"mtable", 0},
102: {"mtr", 0},
103: {"mtd", 0},
104: {"munderover", 0},
105: {"munder", 0},
106: {"mover", 0},
1.5 schwarze 107: };
108:
1.99 schwarze 109: /* Avoid duplicate HTML id= attributes. */
110: static struct ohash id_unique;
111:
1.124 schwarze 112: static void html_reset_internal(struct html *);
1.67 schwarze 113: static void print_byte(struct html *, char);
114: static void print_endword(struct html *);
115: static void print_indent(struct html *);
116: static void print_word(struct html *, const char *);
117:
1.54 schwarze 118: static void print_ctag(struct html *, struct tag *);
1.67 schwarze 119: static int print_escape(struct html *, char);
1.65 schwarze 120: static int print_encode(struct html *, const char *, const char *, int);
121: static void print_href(struct html *, const char *, const char *, int);
1.125 schwarze 122: static void print_metaf(struct html *);
1.5 schwarze 123:
1.35 schwarze 124:
1.50 schwarze 125: void *
1.61 schwarze 126: html_alloc(const struct manoutput *outopts)
1.1 schwarze 127: {
128: struct html *h;
129:
1.24 schwarze 130: h = mandoc_calloc(1, sizeof(struct html));
1.1 schwarze 131:
1.74 schwarze 132: h->tag = NULL;
1.56 schwarze 133: h->style = outopts->style;
1.110 schwarze 134: if ((h->base_man1 = outopts->man) == NULL)
135: h->base_man2 = NULL;
136: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
137: *h->base_man2++ = '\0';
1.56 schwarze 138: h->base_includes = outopts->includes;
139: if (outopts->fragment)
140: h->oflags |= HTML_FRAGMENT;
1.111 schwarze 141: if (outopts->toc)
142: h->oflags |= HTML_TOC;
1.1 schwarze 143:
1.99 schwarze 144: mandoc_ohash_init(&id_unique, 4, 0);
145:
1.58 schwarze 146: return h;
1.1 schwarze 147: }
148:
1.124 schwarze 149: static void
150: html_reset_internal(struct html *h)
1.1 schwarze 151: {
152: struct tag *tag;
1.99 schwarze 153: char *cp;
154: unsigned int slot;
1.1 schwarze 155:
1.74 schwarze 156: while ((tag = h->tag) != NULL) {
157: h->tag = tag->next;
1.1 schwarze 158: free(tag);
159: }
1.99 schwarze 160: cp = ohash_first(&id_unique, &slot);
161: while (cp != NULL) {
162: free(cp);
163: cp = ohash_next(&id_unique, &slot);
164: }
165: ohash_delete(&id_unique);
1.124 schwarze 166: }
167:
168: void
169: html_reset(void *p)
170: {
171: html_reset_internal(p);
172: mandoc_ohash_init(&id_unique, 4, 0);
173: }
174:
175: void
176: html_free(void *p)
177: {
178: html_reset_internal(p);
179: free(p);
1.1 schwarze 180: }
181:
182: void
183: print_gen_head(struct html *h)
184: {
1.42 schwarze 185: struct tag *t;
186:
1.64 schwarze 187: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.92 schwarze 188: if (h->style != NULL) {
189: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
190: h->style, "type", "text/css", "media", "all");
191: return;
192: }
1.1 schwarze 193:
1.42 schwarze 194: /*
1.92 schwarze 195: * Print a minimal embedded style sheet.
1.42 schwarze 196: */
1.66 schwarze 197:
1.64 schwarze 198: t = print_otag(h, TAG_STYLE, "");
1.66 schwarze 199: print_text(h, "table.head, table.foot { width: 100%; }");
1.67 schwarze 200: print_endline(h);
1.66 schwarze 201: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.67 schwarze 202: print_endline(h);
1.66 schwarze 203: print_text(h, "td.head-vol { text-align: center; }");
1.67 schwarze 204: print_endline(h);
1.126 schwarze 205: print_text(h, ".Nd, .Bf, .Op { display: inline; }");
1.95 schwarze 206: print_endline(h);
1.126 schwarze 207: print_text(h, ".Pa, .Ad { font-style: italic; }");
1.96 schwarze 208: print_endline(h);
1.126 schwarze 209: print_text(h, ".Ms { font-weight: bold; }");
1.98 schwarze 210: print_endline(h);
1.126 schwarze 211: print_text(h, ".Bl-diag ");
1.94 schwarze 212: print_byte(h, '>');
213: print_text(h, " dt { font-weight: bold; }");
1.93 schwarze 214: print_endline(h);
1.126 schwarze 215: print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
216: "{ font-weight: bold; font-family: inherit; }");
1.42 schwarze 217: print_tagq(h, t);
1.1 schwarze 218: }
219:
1.125 schwarze 220: int
221: html_setfont(struct html *h, enum mandoc_esc font)
1.5 schwarze 222: {
1.125 schwarze 223: switch (font) {
1.35 schwarze 224: case ESCAPE_FONTPREV:
1.5 schwarze 225: font = h->metal;
226: break;
1.35 schwarze 227: case ESCAPE_FONTITALIC:
228: case ESCAPE_FONTBOLD:
229: case ESCAPE_FONTBI:
1.112 schwarze 230: case ESCAPE_FONTCW:
1.125 schwarze 231: case ESCAPE_FONTROMAN:
1.112 schwarze 232: break;
1.35 schwarze 233: case ESCAPE_FONT:
1.125 schwarze 234: font = ESCAPE_FONTROMAN;
1.5 schwarze 235: break;
236: default:
1.125 schwarze 237: return 0;
1.1 schwarze 238: }
1.125 schwarze 239: h->metal = h->metac;
240: h->metac = font;
241: return 1;
242: }
1.1 schwarze 243:
1.125 schwarze 244: static void
245: print_metaf(struct html *h)
246: {
1.20 schwarze 247: if (h->metaf) {
248: print_tagq(h, h->metaf);
249: h->metaf = NULL;
250: }
1.125 schwarze 251: switch (h->metac) {
252: case ESCAPE_FONTITALIC:
1.64 schwarze 253: h->metaf = print_otag(h, TAG_I, "");
1.31 schwarze 254: break;
1.125 schwarze 255: case ESCAPE_FONTBOLD:
1.64 schwarze 256: h->metaf = print_otag(h, TAG_B, "");
1.31 schwarze 257: break;
1.125 schwarze 258: case ESCAPE_FONTBI:
1.64 schwarze 259: h->metaf = print_otag(h, TAG_B, "");
260: print_otag(h, TAG_I, "");
1.31 schwarze 261: break;
1.125 schwarze 262: case ESCAPE_FONTCW:
1.112 schwarze 263: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
264: break;
1.31 schwarze 265: default:
266: break;
267: }
1.118 schwarze 268: }
269:
1.119 schwarze 270: void
271: html_close_paragraph(struct html *h)
272: {
273: struct tag *t;
274:
1.122 schwarze 275: for (t = h->tag; t != NULL && t->closed == 0; t = t->next) {
276: switch(t->tag) {
277: case TAG_P:
278: case TAG_PRE:
1.119 schwarze 279: print_tagq(h, t);
280: break;
1.122 schwarze 281: case TAG_A:
282: print_tagq(h, t);
283: continue;
284: default:
285: continue;
1.119 schwarze 286: }
1.122 schwarze 287: break;
1.119 schwarze 288: }
289: }
290:
1.118 schwarze 291: /*
292: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
293: * TOKEN_NONE does not switch. The old mode is returned.
294: */
295: enum roff_tok
296: html_fillmode(struct html *h, enum roff_tok want)
297: {
298: struct tag *t;
299: enum roff_tok had;
300:
301: for (t = h->tag; t != NULL; t = t->next)
302: if (t->tag == TAG_PRE)
303: break;
304:
305: had = t == NULL ? ROFF_fi : ROFF_nf;
306:
307: if (want != had) {
308: switch (want) {
309: case ROFF_fi:
310: print_tagq(h, t);
311: break;
312: case ROFF_nf:
1.119 schwarze 313: html_close_paragraph(h);
1.118 schwarze 314: print_otag(h, TAG_PRE, "");
315: break;
316: case TOKEN_NONE:
317: break;
318: default:
319: abort();
320: }
321: }
322: return had;
1.80 schwarze 323: }
324:
325: char *
1.99 schwarze 326: html_make_id(const struct roff_node *n, int unique)
1.80 schwarze 327: {
328: const struct roff_node *nch;
1.99 schwarze 329: char *buf, *bufs, *cp;
330: unsigned int slot;
331: int suffix;
1.80 schwarze 332:
333: for (nch = n->child; nch != NULL; nch = nch->next)
334: if (nch->type != ROFFT_TEXT)
335: return NULL;
336:
337: buf = NULL;
338: deroff(&buf, n);
1.90 schwarze 339: if (buf == NULL)
340: return NULL;
1.80 schwarze 341:
1.100 schwarze 342: /*
343: * In ID attributes, only use ASCII characters that are
344: * permitted in URL-fragment strings according to the
345: * explicit list at:
346: * https://url.spec.whatwg.org/#url-fragment-string
347: */
1.80 schwarze 348:
349: for (cp = buf; *cp != '\0'; cp++)
1.100 schwarze 350: if (isalnum((unsigned char)*cp) == 0 &&
351: strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
1.80 schwarze 352: *cp = '_';
353:
1.99 schwarze 354: if (unique == 0)
355: return buf;
356:
357: /* Avoid duplicate HTML id= attributes. */
358:
359: bufs = NULL;
360: suffix = 1;
361: slot = ohash_qlookup(&id_unique, buf);
362: cp = ohash_find(&id_unique, slot);
363: if (cp != NULL) {
364: while (cp != NULL) {
365: free(bufs);
366: if (++suffix > 127) {
367: free(buf);
368: return NULL;
369: }
370: mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
371: slot = ohash_qlookup(&id_unique, bufs);
372: cp = ohash_find(&id_unique, slot);
373: }
374: free(buf);
375: buf = bufs;
376: }
377: ohash_insert(&id_unique, slot, buf);
1.80 schwarze 378: return buf;
1.1 schwarze 379: }
380:
1.5 schwarze 381: static int
1.67 schwarze 382: print_escape(struct html *h, char c)
1.38 schwarze 383: {
384:
385: switch (c) {
386: case '<':
1.67 schwarze 387: print_word(h, "<");
1.38 schwarze 388: break;
389: case '>':
1.67 schwarze 390: print_word(h, ">");
1.38 schwarze 391: break;
392: case '&':
1.67 schwarze 393: print_word(h, "&");
1.38 schwarze 394: break;
395: case '"':
1.67 schwarze 396: print_word(h, """);
1.38 schwarze 397: break;
398: case ASCII_NBRSP:
1.67 schwarze 399: print_word(h, " ");
1.38 schwarze 400: break;
401: case ASCII_HYPH:
1.67 schwarze 402: print_byte(h, '-');
1.59 schwarze 403: break;
1.38 schwarze 404: case ASCII_BREAK:
405: break;
406: default:
1.58 schwarze 407: return 0;
1.38 schwarze 408: }
1.58 schwarze 409: return 1;
1.38 schwarze 410: }
411:
412: static int
1.65 schwarze 413: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.1 schwarze 414: {
1.67 schwarze 415: char numbuf[16];
1.84 schwarze 416: const char *seq;
1.4 schwarze 417: size_t sz;
1.84 schwarze 418: int c, len, breakline, nospace;
1.26 schwarze 419: enum mandoc_esc esc;
1.84 schwarze 420: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.33 schwarze 421: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.5 schwarze 422:
1.65 schwarze 423: if (pend == NULL)
424: pend = strchr(p, '\0');
425:
1.84 schwarze 426: breakline = 0;
1.5 schwarze 427: nospace = 0;
1.1 schwarze 428:
1.65 schwarze 429: while (p < pend) {
1.30 schwarze 430: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
431: h->flags &= ~HTML_SKIPCHAR;
432: p++;
433: continue;
434: }
435:
1.67 schwarze 436: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.84 schwarze 437: print_byte(h, *p);
438:
439: if (breakline &&
440: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.115 schwarze 441: print_otag(h, TAG_BR, "");
1.84 schwarze 442: breakline = 0;
443: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
444: p++;
445: continue;
446: }
1.4 schwarze 447:
1.65 schwarze 448: if (p >= pend)
1.26 schwarze 449: break;
450:
1.84 schwarze 451: if (*p == ' ') {
452: print_endword(h);
453: p++;
454: continue;
455: }
456:
1.67 schwarze 457: if (print_escape(h, *p++))
1.33 schwarze 458: continue;
1.4 schwarze 459:
1.26 schwarze 460: esc = mandoc_escape(&p, &seq, &len);
461: switch (esc) {
1.35 schwarze 462: case ESCAPE_FONT:
463: case ESCAPE_FONTPREV:
464: case ESCAPE_FONTBOLD:
465: case ESCAPE_FONTITALIC:
466: case ESCAPE_FONTBI:
1.112 schwarze 467: case ESCAPE_FONTCW:
1.35 schwarze 468: case ESCAPE_FONTROMAN:
1.113 schwarze 469: if (0 == norecurse) {
470: h->flags |= HTML_NOSPACE;
1.125 schwarze 471: if (html_setfont(h, esc))
472: print_metaf(h);
1.113 schwarze 473: h->flags &= ~HTML_NOSPACE;
474: }
1.30 schwarze 475: continue;
1.35 schwarze 476: case ESCAPE_SKIPCHAR:
1.30 schwarze 477: h->flags |= HTML_SKIPCHAR;
478: continue;
1.116 schwarze 479: case ESCAPE_ERROR:
480: continue;
1.30 schwarze 481: default:
482: break;
483: }
484:
485: if (h->flags & HTML_SKIPCHAR) {
486: h->flags &= ~HTML_SKIPCHAR;
487: continue;
488: }
489:
490: switch (esc) {
1.35 schwarze 491: case ESCAPE_UNICODE:
1.38 schwarze 492: /* Skip past "u" header. */
1.26 schwarze 493: c = mchars_num2uc(seq + 1, len - 1);
494: break;
1.35 schwarze 495: case ESCAPE_NUMBERED:
1.26 schwarze 496: c = mchars_num2char(seq, len);
1.51 schwarze 497: if (c < 0)
498: continue;
1.26 schwarze 499: break;
1.35 schwarze 500: case ESCAPE_SPECIAL:
1.61 schwarze 501: c = mchars_spec2cp(seq, len);
1.51 schwarze 502: if (c <= 0)
503: continue;
1.116 schwarze 504: break;
505: case ESCAPE_UNDEF:
506: c = *seq;
1.26 schwarze 507: break;
1.109 schwarze 508: case ESCAPE_DEVICE:
509: print_word(h, "html");
510: continue;
1.84 schwarze 511: case ESCAPE_BREAK:
512: breakline = 1;
513: continue;
1.35 schwarze 514: case ESCAPE_NOSPACE:
1.26 schwarze 515: if ('\0' == *p)
516: nospace = 1;
1.49 schwarze 517: continue;
1.55 schwarze 518: case ESCAPE_OVERSTRIKE:
519: if (len == 0)
520: continue;
521: c = seq[len - 1];
522: break;
1.5 schwarze 523: default:
1.49 schwarze 524: continue;
1.5 schwarze 525: }
1.51 schwarze 526: if ((c < 0x20 && c != 0x09) ||
527: (c > 0x7E && c < 0xA0))
1.49 schwarze 528: c = 0xFFFD;
1.67 schwarze 529: if (c > 0x7E) {
1.86 bentley 530: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.67 schwarze 531: print_word(h, numbuf);
532: } else if (print_escape(h, c) == 0)
533: print_byte(h, c);
1.1 schwarze 534: }
1.5 schwarze 535:
1.58 schwarze 536: return nospace;
1.1 schwarze 537: }
538:
1.6 schwarze 539: static void
1.65 schwarze 540: print_href(struct html *h, const char *name, const char *sec, int man)
1.6 schwarze 541: {
1.110 schwarze 542: struct stat sb;
1.65 schwarze 543: const char *p, *pp;
1.110 schwarze 544: char *filename;
545:
546: if (man) {
547: pp = h->base_man1;
548: if (h->base_man2 != NULL) {
549: mandoc_asprintf(&filename, "%s.%s", name, sec);
550: if (stat(filename, &sb) == -1)
551: pp = h->base_man2;
552: free(filename);
553: }
554: } else
555: pp = h->base_includes;
1.65 schwarze 556:
557: while ((p = strchr(pp, '%')) != NULL) {
558: print_encode(h, pp, p, 1);
559: if (man && p[1] == 'S') {
560: if (sec == NULL)
1.67 schwarze 561: print_byte(h, '1');
1.65 schwarze 562: else
563: print_encode(h, sec, NULL, 1);
564: } else if ((man && p[1] == 'N') ||
565: (man == 0 && p[1] == 'I'))
566: print_encode(h, name, NULL, 1);
567: else
568: print_encode(h, p, p + 2, 1);
569: pp = p + 2;
570: }
571: if (*pp != '\0')
572: print_encode(h, pp, NULL, 1);
1.6 schwarze 573: }
574:
1.1 schwarze 575: struct tag *
1.64 schwarze 576: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.1 schwarze 577: {
1.64 schwarze 578: va_list ap;
1.1 schwarze 579: struct tag *t;
1.65 schwarze 580: const char *attr;
1.73 schwarze 581: char *arg1, *arg2;
1.114 schwarze 582: int style_written, tflags;
1.66 schwarze 583:
584: tflags = htmltags[tag].flags;
1.1 schwarze 585:
1.127 ! schwarze 586: /* Flow content is not allowed in phrasing context. */
! 587:
! 588: if ((tflags & HTML_INPHRASE) == 0) {
! 589: for (t = h->tag; t != NULL; t = t->next) {
! 590: if (t->closed)
! 591: continue;
! 592: assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
! 593: break;
! 594: }
! 595: }
! 596:
1.74 schwarze 597: /* Push this tag onto the stack of open scopes. */
1.6 schwarze 598:
1.66 schwarze 599: if ((tflags & HTML_NOSTACK) == 0) {
1.24 schwarze 600: t = mandoc_malloc(sizeof(struct tag));
1.1 schwarze 601: t->tag = tag;
1.74 schwarze 602: t->next = h->tag;
1.122 schwarze 603: t->refcnt = 0;
604: t->closed = 0;
1.74 schwarze 605: h->tag = t;
1.1 schwarze 606: } else
607: t = NULL;
608:
1.66 schwarze 609: if (tflags & HTML_NLBEFORE)
1.67 schwarze 610: print_endline(h);
611: if (h->col == 0)
612: print_indent(h);
1.66 schwarze 613: else if ((h->flags & HTML_NOSPACE) == 0) {
614: if (h->flags & HTML_KEEP)
1.86 bentley 615: print_word(h, " ");
1.66 schwarze 616: else {
617: if (h->flags & HTML_PREKEEP)
618: h->flags |= HTML_KEEP;
1.67 schwarze 619: print_endword(h);
1.12 schwarze 620: }
1.66 schwarze 621: }
1.1 schwarze 622:
1.13 schwarze 623: if ( ! (h->flags & HTML_NONOSPACE))
624: h->flags &= ~HTML_NOSPACE;
1.14 schwarze 625: else
626: h->flags |= HTML_NOSPACE;
1.13 schwarze 627:
1.6 schwarze 628: /* Print out the tag name and attributes. */
629:
1.67 schwarze 630: print_byte(h, '<');
631: print_word(h, htmltags[tag].name);
1.64 schwarze 632:
633: va_start(ap, fmt);
634:
1.114 schwarze 635: while (*fmt != '\0' && *fmt != 's') {
1.73 schwarze 636:
1.108 schwarze 637: /* Parse attributes and arguments. */
1.73 schwarze 638:
639: arg1 = va_arg(ap, char *);
1.108 schwarze 640: arg2 = NULL;
1.64 schwarze 641: switch (*fmt++) {
642: case 'c':
1.65 schwarze 643: attr = "class";
1.64 schwarze 644: break;
645: case 'h':
1.65 schwarze 646: attr = "href";
1.64 schwarze 647: break;
648: case 'i':
1.65 schwarze 649: attr = "id";
1.64 schwarze 650: break;
651: case '?':
1.73 schwarze 652: attr = arg1;
653: arg1 = va_arg(ap, char *);
1.64 schwarze 654: break;
655: default:
656: abort();
657: }
1.73 schwarze 658: if (*fmt == 'M')
659: arg2 = va_arg(ap, char *);
660: if (arg1 == NULL)
661: continue;
662:
1.108 schwarze 663: /* Print the attributes. */
1.73 schwarze 664:
1.67 schwarze 665: print_byte(h, ' ');
666: print_word(h, attr);
667: print_byte(h, '=');
668: print_byte(h, '"');
1.65 schwarze 669: switch (*fmt) {
1.78 schwarze 670: case 'I':
671: print_href(h, arg1, NULL, 0);
672: fmt++;
673: break;
1.65 schwarze 674: case 'M':
1.73 schwarze 675: print_href(h, arg1, arg2, 1);
1.65 schwarze 676: fmt++;
677: break;
1.78 schwarze 678: case 'R':
679: print_byte(h, '#');
680: print_encode(h, arg1, NULL, 1);
1.65 schwarze 681: fmt++;
1.78 schwarze 682: break;
1.65 schwarze 683: default:
1.114 schwarze 684: print_encode(h, arg1, NULL, 1);
1.65 schwarze 685: break;
686: }
1.67 schwarze 687: print_byte(h, '"');
1.64 schwarze 688: }
1.114 schwarze 689:
690: style_written = 0;
691: while (*fmt++ == 's') {
692: arg1 = va_arg(ap, char *);
693: arg2 = va_arg(ap, char *);
694: if (arg2 == NULL)
695: continue;
696: print_byte(h, ' ');
697: if (style_written == 0) {
698: print_word(h, "style=\"");
699: style_written = 1;
700: }
701: print_word(h, arg1);
702: print_byte(h, ':');
703: print_byte(h, ' ');
704: print_word(h, arg2);
705: print_byte(h, ';');
706: }
707: if (style_written)
708: print_byte(h, '"');
709:
1.64 schwarze 710: va_end(ap);
1.6 schwarze 711:
1.42 schwarze 712: /* Accommodate for "well-formed" singleton escaping. */
1.6 schwarze 713:
1.127 ! schwarze 714: if (htmltags[tag].flags & HTML_NOSTACK)
1.67 schwarze 715: print_byte(h, '/');
1.6 schwarze 716:
1.67 schwarze 717: print_byte(h, '>');
1.1 schwarze 718:
1.66 schwarze 719: if (tflags & HTML_NLBEGIN)
1.67 schwarze 720: print_endline(h);
1.66 schwarze 721: else
722: h->flags |= HTML_NOSPACE;
1.18 schwarze 723:
1.66 schwarze 724: if (tflags & HTML_INDENT)
725: h->indent++;
726: if (tflags & HTML_NOINDENT)
727: h->noindent++;
1.18 schwarze 728:
1.58 schwarze 729: return t;
1.1 schwarze 730: }
731:
732: static void
1.54 schwarze 733: print_ctag(struct html *h, struct tag *tag)
1.1 schwarze 734: {
1.66 schwarze 735: int tflags;
1.35 schwarze 736:
1.122 schwarze 737: if (tag->closed == 0) {
738: tag->closed = 1;
739: if (tag == h->metaf)
740: h->metaf = NULL;
741: if (tag == h->tblt)
742: h->tblt = NULL;
743:
744: tflags = htmltags[tag->tag].flags;
745: if (tflags & HTML_INDENT)
746: h->indent--;
747: if (tflags & HTML_NOINDENT)
748: h->noindent--;
749: if (tflags & HTML_NLEND)
750: print_endline(h);
751: print_indent(h);
752: print_byte(h, '<');
753: print_byte(h, '/');
754: print_word(h, htmltags[tag->tag].name);
755: print_byte(h, '>');
756: if (tflags & HTML_NLAFTER)
757: print_endline(h);
758: }
759: if (tag->refcnt == 0) {
760: h->tag = tag->next;
761: free(tag);
762: }
1.1 schwarze 763: }
764:
765: void
1.6 schwarze 766: print_gen_decls(struct html *h)
767: {
1.67 schwarze 768: print_word(h, "<!DOCTYPE html>");
769: print_endline(h);
1.91 schwarze 770: }
771:
772: void
773: print_gen_comment(struct html *h, struct roff_node *n)
774: {
775: int wantblank;
776:
777: print_word(h, "<!-- This is an automatically generated file."
778: " Do not edit.");
779: h->indent = 1;
780: wantblank = 0;
781: while (n != NULL && n->type == ROFFT_COMMENT) {
782: if (strstr(n->string, "-->") == NULL &&
783: (wantblank || *n->string != '\0')) {
784: print_endline(h);
785: print_indent(h);
786: print_word(h, n->string);
787: wantblank = *n->string != '\0';
788: }
789: n = n->next;
790: }
791: if (wantblank)
792: print_endline(h);
793: print_word(h, " -->");
794: print_endline(h);
795: h->indent = 0;
1.1 schwarze 796: }
797:
798: void
1.12 schwarze 799: print_text(struct html *h, const char *word)
1.1 schwarze 800: {
1.67 schwarze 801: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.12 schwarze 802: if ( ! (HTML_KEEP & h->flags)) {
803: if (HTML_PREKEEP & h->flags)
804: h->flags |= HTML_KEEP;
1.67 schwarze 805: print_endword(h);
1.12 schwarze 806: } else
1.86 bentley 807: print_word(h, " ");
1.12 schwarze 808: }
1.1 schwarze 809:
1.125 schwarze 810: assert(h->metaf == NULL);
811: print_metaf(h);
812: print_indent(h);
1.65 schwarze 813: if ( ! print_encode(h, word, NULL, 0)) {
1.13 schwarze 814: if ( ! (h->flags & HTML_NONOSPACE))
815: h->flags &= ~HTML_NOSPACE;
1.53 schwarze 816: h->flags &= ~HTML_NONEWLINE;
1.28 schwarze 817: } else
1.53 schwarze 818: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.20 schwarze 819:
1.125 schwarze 820: if (h->metaf != NULL) {
1.20 schwarze 821: print_tagq(h, h->metaf);
822: h->metaf = NULL;
823: }
1.17 schwarze 824:
825: h->flags &= ~HTML_IGNDELIM;
1.1 schwarze 826: }
827:
828: void
829: print_tagq(struct html *h, const struct tag *until)
830: {
1.122 schwarze 831: struct tag *this, *next;
1.1 schwarze 832:
1.122 schwarze 833: for (this = h->tag; this != NULL; this = next) {
834: next = this == until ? NULL : this->next;
835: print_ctag(h, this);
1.1 schwarze 836: }
837: }
838:
1.120 schwarze 839: /*
840: * Close out all open elements up to but excluding suntil.
841: * Note that a paragraph just inside stays open together with it
842: * because paragraphs include subsequent phrasing content.
843: */
1.1 schwarze 844: void
845: print_stagq(struct html *h, const struct tag *suntil)
846: {
1.122 schwarze 847: struct tag *this, *next;
1.1 schwarze 848:
1.122 schwarze 849: for (this = h->tag; this != NULL; this = next) {
850: next = this->next;
851: if (this == suntil || (next == suntil &&
852: (this->tag == TAG_P || this->tag == TAG_PRE)))
853: break;
854: print_ctag(h, this);
1.1 schwarze 855: }
1.42 schwarze 856: }
857:
1.67 schwarze 858:
859: /***********************************************************************
860: * Low level output functions.
861: * They implement line breaking using a short static buffer.
862: ***********************************************************************/
863:
864: /*
865: * Buffer one HTML output byte.
866: * If the buffer is full, flush and deactivate it and start a new line.
867: * If the buffer is inactive, print directly.
868: */
869: static void
870: print_byte(struct html *h, char c)
871: {
872: if ((h->flags & HTML_BUFFER) == 0) {
873: putchar(c);
874: h->col++;
875: return;
876: }
877:
878: if (h->col + h->bufcol < sizeof(h->buf)) {
879: h->buf[h->bufcol++] = c;
880: return;
881: }
882:
883: putchar('\n');
884: h->col = 0;
885: print_indent(h);
886: putchar(' ');
887: putchar(' ');
888: fwrite(h->buf, h->bufcol, 1, stdout);
889: putchar(c);
890: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
891: h->bufcol = 0;
892: h->flags &= ~HTML_BUFFER;
893: }
894:
1.66 schwarze 895: /*
896: * If something was printed on the current output line, end it.
1.67 schwarze 897: * Not to be called right after print_indent().
1.66 schwarze 898: */
1.72 schwarze 899: void
1.67 schwarze 900: print_endline(struct html *h)
1.66 schwarze 901: {
1.67 schwarze 902: if (h->col == 0)
1.66 schwarze 903: return;
904:
1.67 schwarze 905: if (h->bufcol) {
906: putchar(' ');
907: fwrite(h->buf, h->bufcol, 1, stdout);
908: h->bufcol = 0;
909: }
1.66 schwarze 910: putchar('\n');
1.67 schwarze 911: h->col = 0;
912: h->flags |= HTML_NOSPACE;
913: h->flags &= ~HTML_BUFFER;
914: }
915:
916: /*
917: * Flush the HTML output buffer.
918: * If it is inactive, activate it.
919: */
920: static void
921: print_endword(struct html *h)
922: {
923: if (h->noindent) {
924: print_byte(h, ' ');
925: return;
926: }
927:
928: if ((h->flags & HTML_BUFFER) == 0) {
929: h->col++;
930: h->flags |= HTML_BUFFER;
931: } else if (h->bufcol) {
932: putchar(' ');
933: fwrite(h->buf, h->bufcol, 1, stdout);
934: h->col += h->bufcol + 1;
935: }
936: h->bufcol = 0;
1.66 schwarze 937: }
938:
939: /*
940: * If at the beginning of a new output line,
941: * perform indentation and mark the line as containing output.
942: * Make sure to really produce some output right afterwards,
943: * but do not use print_otag() for producing it.
944: */
945: static void
1.67 schwarze 946: print_indent(struct html *h)
1.66 schwarze 947: {
1.67 schwarze 948: size_t i;
1.66 schwarze 949:
1.67 schwarze 950: if (h->col)
1.66 schwarze 951: return;
952:
1.67 schwarze 953: if (h->noindent == 0) {
954: h->col = h->indent * 2;
955: for (i = 0; i < h->col; i++)
1.66 schwarze 956: putchar(' ');
1.67 schwarze 957: }
958: h->flags &= ~HTML_NOSPACE;
959: }
960:
961: /*
962: * Print or buffer some characters
963: * depending on the current HTML output buffer state.
964: */
965: static void
966: print_word(struct html *h, const char *cp)
967: {
968: while (*cp != '\0')
969: print_byte(h, *cp++);
1.3 schwarze 970: }