Annotation of src/usr.bin/mandoc/html.c, Revision 1.125
1.125 ! schwarze 1: /* $OpenBSD: html.c,v 1.124 2019/03/03 13:01:47 schwarze Exp $ */
1.1 schwarze 2: /*
1.42 schwarze 3: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.119 schwarze 4: * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.56 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.56 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <sys/types.h>
1.110 schwarze 19: #include <sys/stat.h>
1.1 schwarze 20:
21: #include <assert.h>
1.3 schwarze 22: #include <ctype.h>
1.4 schwarze 23: #include <stdarg.h>
1.99 schwarze 24: #include <stddef.h>
1.1 schwarze 25: #include <stdio.h>
26: #include <stdint.h>
27: #include <stdlib.h>
28: #include <string.h>
29: #include <unistd.h>
30:
1.80 schwarze 31: #include "mandoc_aux.h"
1.99 schwarze 32: #include "mandoc_ohash.h"
1.9 schwarze 33: #include "mandoc.h"
1.80 schwarze 34: #include "roff.h"
1.1 schwarze 35: #include "out.h"
36: #include "html.h"
1.56 schwarze 37: #include "manconf.h"
1.1 schwarze 38: #include "main.h"
39:
40: struct htmldata {
41: const char *name;
42: int flags;
1.66 schwarze 43: #define HTML_NOSTACK (1 << 0)
44: #define HTML_AUTOCLOSE (1 << 1)
45: #define HTML_NLBEFORE (1 << 2)
46: #define HTML_NLBEGIN (1 << 3)
47: #define HTML_NLEND (1 << 4)
48: #define HTML_NLAFTER (1 << 5)
49: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
50: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
51: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
52: #define HTML_INDENT (1 << 6)
53: #define HTML_NOINDENT (1 << 7)
1.1 schwarze 54: };
55:
56: static const struct htmldata htmltags[TAG_MAX] = {
1.66 schwarze 57: {"html", HTML_NLALL},
58: {"head", HTML_NLALL | HTML_INDENT},
59: {"body", HTML_NLALL},
60: {"meta", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
61: {"title", HTML_NLAROUND},
62: {"div", HTML_NLAROUND},
1.95 schwarze 63: {"div", 0},
1.123 schwarze 64: {"section", HTML_NLALL},
1.66 schwarze 65: {"h1", HTML_NLAROUND},
66: {"h2", HTML_NLAROUND},
67: {"span", 0},
68: {"link", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
69: {"br", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
70: {"a", 0},
71: {"table", HTML_NLALL | HTML_INDENT},
72: {"tr", HTML_NLALL | HTML_INDENT},
73: {"td", HTML_NLAROUND},
74: {"li", HTML_NLAROUND | HTML_INDENT},
75: {"ul", HTML_NLALL | HTML_INDENT},
76: {"ol", HTML_NLALL | HTML_INDENT},
77: {"dl", HTML_NLALL | HTML_INDENT},
78: {"dt", HTML_NLAROUND},
79: {"dd", HTML_NLAROUND | HTML_INDENT},
1.119 schwarze 80: {"p", HTML_NLAROUND | HTML_INDENT},
1.66 schwarze 81: {"pre", HTML_NLALL | HTML_NOINDENT},
1.77 schwarze 82: {"var", 0},
1.76 schwarze 83: {"cite", 0},
1.66 schwarze 84: {"b", 0},
85: {"i", 0},
86: {"code", 0},
87: {"small", 0},
88: {"style", HTML_NLALL | HTML_INDENT},
89: {"math", HTML_NLALL | HTML_INDENT},
90: {"mrow", 0},
91: {"mi", 0},
1.85 schwarze 92: {"mn", 0},
1.66 schwarze 93: {"mo", 0},
94: {"msup", 0},
95: {"msub", 0},
96: {"msubsup", 0},
97: {"mfrac", 0},
98: {"msqrt", 0},
99: {"mfenced", 0},
100: {"mtable", 0},
101: {"mtr", 0},
102: {"mtd", 0},
103: {"munderover", 0},
104: {"munder", 0},
105: {"mover", 0},
1.5 schwarze 106: };
107:
1.99 schwarze 108: /* Avoid duplicate HTML id= attributes. */
109: static struct ohash id_unique;
110:
1.124 schwarze 111: static void html_reset_internal(struct html *);
1.67 schwarze 112: static void print_byte(struct html *, char);
113: static void print_endword(struct html *);
114: static void print_indent(struct html *);
115: static void print_word(struct html *, const char *);
116:
1.54 schwarze 117: static void print_ctag(struct html *, struct tag *);
1.67 schwarze 118: static int print_escape(struct html *, char);
1.65 schwarze 119: static int print_encode(struct html *, const char *, const char *, int);
120: static void print_href(struct html *, const char *, const char *, int);
1.125 ! schwarze 121: static void print_metaf(struct html *);
1.5 schwarze 122:
1.35 schwarze 123:
1.50 schwarze 124: void *
1.61 schwarze 125: html_alloc(const struct manoutput *outopts)
1.1 schwarze 126: {
127: struct html *h;
128:
1.24 schwarze 129: h = mandoc_calloc(1, sizeof(struct html));
1.1 schwarze 130:
1.74 schwarze 131: h->tag = NULL;
1.56 schwarze 132: h->style = outopts->style;
1.110 schwarze 133: if ((h->base_man1 = outopts->man) == NULL)
134: h->base_man2 = NULL;
135: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
136: *h->base_man2++ = '\0';
1.56 schwarze 137: h->base_includes = outopts->includes;
138: if (outopts->fragment)
139: h->oflags |= HTML_FRAGMENT;
1.111 schwarze 140: if (outopts->toc)
141: h->oflags |= HTML_TOC;
1.1 schwarze 142:
1.99 schwarze 143: mandoc_ohash_init(&id_unique, 4, 0);
144:
1.58 schwarze 145: return h;
1.1 schwarze 146: }
147:
1.124 schwarze 148: static void
149: html_reset_internal(struct html *h)
1.1 schwarze 150: {
151: struct tag *tag;
1.99 schwarze 152: char *cp;
153: unsigned int slot;
1.1 schwarze 154:
1.74 schwarze 155: while ((tag = h->tag) != NULL) {
156: h->tag = tag->next;
1.1 schwarze 157: free(tag);
158: }
1.99 schwarze 159: cp = ohash_first(&id_unique, &slot);
160: while (cp != NULL) {
161: free(cp);
162: cp = ohash_next(&id_unique, &slot);
163: }
164: ohash_delete(&id_unique);
1.124 schwarze 165: }
166:
167: void
168: html_reset(void *p)
169: {
170: html_reset_internal(p);
171: mandoc_ohash_init(&id_unique, 4, 0);
172: }
173:
174: void
175: html_free(void *p)
176: {
177: html_reset_internal(p);
178: free(p);
1.1 schwarze 179: }
180:
181: void
182: print_gen_head(struct html *h)
183: {
1.42 schwarze 184: struct tag *t;
185:
1.64 schwarze 186: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.92 schwarze 187: if (h->style != NULL) {
188: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
189: h->style, "type", "text/css", "media", "all");
190: return;
191: }
1.1 schwarze 192:
1.42 schwarze 193: /*
1.92 schwarze 194: * Print a minimal embedded style sheet.
1.42 schwarze 195: */
1.66 schwarze 196:
1.64 schwarze 197: t = print_otag(h, TAG_STYLE, "");
1.66 schwarze 198: print_text(h, "table.head, table.foot { width: 100%; }");
1.67 schwarze 199: print_endline(h);
1.66 schwarze 200: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.67 schwarze 201: print_endline(h);
1.66 schwarze 202: print_text(h, "td.head-vol { text-align: center; }");
1.67 schwarze 203: print_endline(h);
1.68 schwarze 204: print_text(h, "div.Pp { margin: 1ex 0ex; }");
1.95 schwarze 205: print_endline(h);
206: print_text(h, "div.Nd, div.Bf, div.Op { display: inline; }");
1.96 schwarze 207: print_endline(h);
1.97 schwarze 208: print_text(h, "span.Pa, span.Ad { font-style: italic; }");
1.98 schwarze 209: print_endline(h);
210: print_text(h, "span.Ms { font-weight: bold; }");
1.94 schwarze 211: print_endline(h);
212: print_text(h, "dl.Bl-diag ");
213: print_byte(h, '>');
214: print_text(h, " dt { font-weight: bold; }");
1.93 schwarze 215: print_endline(h);
216: print_text(h, "code.Nm, code.Fl, code.Cm, code.Ic, "
217: "code.In, code.Fd, code.Fn,");
218: print_endline(h);
219: print_text(h, "code.Cd { font-weight: bold; "
220: "font-family: inherit; }");
1.42 schwarze 221: print_tagq(h, t);
1.1 schwarze 222: }
223:
1.125 ! schwarze 224: int
! 225: html_setfont(struct html *h, enum mandoc_esc font)
1.5 schwarze 226: {
1.125 ! schwarze 227: switch (font) {
1.35 schwarze 228: case ESCAPE_FONTPREV:
1.5 schwarze 229: font = h->metal;
230: break;
1.35 schwarze 231: case ESCAPE_FONTITALIC:
232: case ESCAPE_FONTBOLD:
233: case ESCAPE_FONTBI:
1.112 schwarze 234: case ESCAPE_FONTCW:
1.125 ! schwarze 235: case ESCAPE_FONTROMAN:
1.112 schwarze 236: break;
1.35 schwarze 237: case ESCAPE_FONT:
1.125 ! schwarze 238: font = ESCAPE_FONTROMAN;
1.5 schwarze 239: break;
240: default:
1.125 ! schwarze 241: return 0;
1.1 schwarze 242: }
1.125 ! schwarze 243: h->metal = h->metac;
! 244: h->metac = font;
! 245: return 1;
! 246: }
1.1 schwarze 247:
1.125 ! schwarze 248: static void
! 249: print_metaf(struct html *h)
! 250: {
1.20 schwarze 251: if (h->metaf) {
252: print_tagq(h, h->metaf);
253: h->metaf = NULL;
254: }
1.125 ! schwarze 255: switch (h->metac) {
! 256: case ESCAPE_FONTITALIC:
1.64 schwarze 257: h->metaf = print_otag(h, TAG_I, "");
1.31 schwarze 258: break;
1.125 ! schwarze 259: case ESCAPE_FONTBOLD:
1.64 schwarze 260: h->metaf = print_otag(h, TAG_B, "");
1.31 schwarze 261: break;
1.125 ! schwarze 262: case ESCAPE_FONTBI:
1.64 schwarze 263: h->metaf = print_otag(h, TAG_B, "");
264: print_otag(h, TAG_I, "");
1.31 schwarze 265: break;
1.125 ! schwarze 266: case ESCAPE_FONTCW:
1.112 schwarze 267: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
268: break;
1.31 schwarze 269: default:
270: break;
271: }
1.118 schwarze 272: }
273:
1.119 schwarze 274: void
275: html_close_paragraph(struct html *h)
276: {
277: struct tag *t;
278:
1.122 schwarze 279: for (t = h->tag; t != NULL && t->closed == 0; t = t->next) {
280: switch(t->tag) {
281: case TAG_P:
282: case TAG_PRE:
1.119 schwarze 283: print_tagq(h, t);
284: break;
1.122 schwarze 285: case TAG_A:
286: print_tagq(h, t);
287: continue;
288: default:
289: continue;
1.119 schwarze 290: }
1.122 schwarze 291: break;
1.119 schwarze 292: }
293: }
294:
1.118 schwarze 295: /*
296: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
297: * TOKEN_NONE does not switch. The old mode is returned.
298: */
299: enum roff_tok
300: html_fillmode(struct html *h, enum roff_tok want)
301: {
302: struct tag *t;
303: enum roff_tok had;
304:
305: for (t = h->tag; t != NULL; t = t->next)
306: if (t->tag == TAG_PRE)
307: break;
308:
309: had = t == NULL ? ROFF_fi : ROFF_nf;
310:
311: if (want != had) {
312: switch (want) {
313: case ROFF_fi:
314: print_tagq(h, t);
315: break;
316: case ROFF_nf:
1.119 schwarze 317: html_close_paragraph(h);
1.118 schwarze 318: print_otag(h, TAG_PRE, "");
319: break;
320: case TOKEN_NONE:
321: break;
322: default:
323: abort();
324: }
325: }
326: return had;
1.80 schwarze 327: }
328:
329: char *
1.99 schwarze 330: html_make_id(const struct roff_node *n, int unique)
1.80 schwarze 331: {
332: const struct roff_node *nch;
1.99 schwarze 333: char *buf, *bufs, *cp;
334: unsigned int slot;
335: int suffix;
1.80 schwarze 336:
337: for (nch = n->child; nch != NULL; nch = nch->next)
338: if (nch->type != ROFFT_TEXT)
339: return NULL;
340:
341: buf = NULL;
342: deroff(&buf, n);
1.90 schwarze 343: if (buf == NULL)
344: return NULL;
1.80 schwarze 345:
1.100 schwarze 346: /*
347: * In ID attributes, only use ASCII characters that are
348: * permitted in URL-fragment strings according to the
349: * explicit list at:
350: * https://url.spec.whatwg.org/#url-fragment-string
351: */
1.80 schwarze 352:
353: for (cp = buf; *cp != '\0'; cp++)
1.100 schwarze 354: if (isalnum((unsigned char)*cp) == 0 &&
355: strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
1.80 schwarze 356: *cp = '_';
357:
1.99 schwarze 358: if (unique == 0)
359: return buf;
360:
361: /* Avoid duplicate HTML id= attributes. */
362:
363: bufs = NULL;
364: suffix = 1;
365: slot = ohash_qlookup(&id_unique, buf);
366: cp = ohash_find(&id_unique, slot);
367: if (cp != NULL) {
368: while (cp != NULL) {
369: free(bufs);
370: if (++suffix > 127) {
371: free(buf);
372: return NULL;
373: }
374: mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
375: slot = ohash_qlookup(&id_unique, bufs);
376: cp = ohash_find(&id_unique, slot);
377: }
378: free(buf);
379: buf = bufs;
380: }
381: ohash_insert(&id_unique, slot, buf);
1.80 schwarze 382: return buf;
1.1 schwarze 383: }
384:
1.5 schwarze 385: static int
1.67 schwarze 386: print_escape(struct html *h, char c)
1.38 schwarze 387: {
388:
389: switch (c) {
390: case '<':
1.67 schwarze 391: print_word(h, "<");
1.38 schwarze 392: break;
393: case '>':
1.67 schwarze 394: print_word(h, ">");
1.38 schwarze 395: break;
396: case '&':
1.67 schwarze 397: print_word(h, "&");
1.38 schwarze 398: break;
399: case '"':
1.67 schwarze 400: print_word(h, """);
1.38 schwarze 401: break;
402: case ASCII_NBRSP:
1.67 schwarze 403: print_word(h, " ");
1.38 schwarze 404: break;
405: case ASCII_HYPH:
1.67 schwarze 406: print_byte(h, '-');
1.59 schwarze 407: break;
1.38 schwarze 408: case ASCII_BREAK:
409: break;
410: default:
1.58 schwarze 411: return 0;
1.38 schwarze 412: }
1.58 schwarze 413: return 1;
1.38 schwarze 414: }
415:
416: static int
1.65 schwarze 417: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.1 schwarze 418: {
1.67 schwarze 419: char numbuf[16];
1.84 schwarze 420: const char *seq;
1.4 schwarze 421: size_t sz;
1.84 schwarze 422: int c, len, breakline, nospace;
1.26 schwarze 423: enum mandoc_esc esc;
1.84 schwarze 424: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.33 schwarze 425: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.5 schwarze 426:
1.65 schwarze 427: if (pend == NULL)
428: pend = strchr(p, '\0');
429:
1.84 schwarze 430: breakline = 0;
1.5 schwarze 431: nospace = 0;
1.1 schwarze 432:
1.65 schwarze 433: while (p < pend) {
1.30 schwarze 434: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
435: h->flags &= ~HTML_SKIPCHAR;
436: p++;
437: continue;
438: }
439:
1.67 schwarze 440: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.84 schwarze 441: print_byte(h, *p);
442:
443: if (breakline &&
444: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.115 schwarze 445: print_otag(h, TAG_BR, "");
1.84 schwarze 446: breakline = 0;
447: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
448: p++;
449: continue;
450: }
1.4 schwarze 451:
1.65 schwarze 452: if (p >= pend)
1.26 schwarze 453: break;
454:
1.84 schwarze 455: if (*p == ' ') {
456: print_endword(h);
457: p++;
458: continue;
459: }
460:
1.67 schwarze 461: if (print_escape(h, *p++))
1.33 schwarze 462: continue;
1.4 schwarze 463:
1.26 schwarze 464: esc = mandoc_escape(&p, &seq, &len);
465: switch (esc) {
1.35 schwarze 466: case ESCAPE_FONT:
467: case ESCAPE_FONTPREV:
468: case ESCAPE_FONTBOLD:
469: case ESCAPE_FONTITALIC:
470: case ESCAPE_FONTBI:
1.112 schwarze 471: case ESCAPE_FONTCW:
1.35 schwarze 472: case ESCAPE_FONTROMAN:
1.113 schwarze 473: if (0 == norecurse) {
474: h->flags |= HTML_NOSPACE;
1.125 ! schwarze 475: if (html_setfont(h, esc))
! 476: print_metaf(h);
1.113 schwarze 477: h->flags &= ~HTML_NOSPACE;
478: }
1.30 schwarze 479: continue;
1.35 schwarze 480: case ESCAPE_SKIPCHAR:
1.30 schwarze 481: h->flags |= HTML_SKIPCHAR;
482: continue;
1.116 schwarze 483: case ESCAPE_ERROR:
484: continue;
1.30 schwarze 485: default:
486: break;
487: }
488:
489: if (h->flags & HTML_SKIPCHAR) {
490: h->flags &= ~HTML_SKIPCHAR;
491: continue;
492: }
493:
494: switch (esc) {
1.35 schwarze 495: case ESCAPE_UNICODE:
1.38 schwarze 496: /* Skip past "u" header. */
1.26 schwarze 497: c = mchars_num2uc(seq + 1, len - 1);
498: break;
1.35 schwarze 499: case ESCAPE_NUMBERED:
1.26 schwarze 500: c = mchars_num2char(seq, len);
1.51 schwarze 501: if (c < 0)
502: continue;
1.26 schwarze 503: break;
1.35 schwarze 504: case ESCAPE_SPECIAL:
1.61 schwarze 505: c = mchars_spec2cp(seq, len);
1.51 schwarze 506: if (c <= 0)
507: continue;
1.116 schwarze 508: break;
509: case ESCAPE_UNDEF:
510: c = *seq;
1.26 schwarze 511: break;
1.109 schwarze 512: case ESCAPE_DEVICE:
513: print_word(h, "html");
514: continue;
1.84 schwarze 515: case ESCAPE_BREAK:
516: breakline = 1;
517: continue;
1.35 schwarze 518: case ESCAPE_NOSPACE:
1.26 schwarze 519: if ('\0' == *p)
520: nospace = 1;
1.49 schwarze 521: continue;
1.55 schwarze 522: case ESCAPE_OVERSTRIKE:
523: if (len == 0)
524: continue;
525: c = seq[len - 1];
526: break;
1.5 schwarze 527: default:
1.49 schwarze 528: continue;
1.5 schwarze 529: }
1.51 schwarze 530: if ((c < 0x20 && c != 0x09) ||
531: (c > 0x7E && c < 0xA0))
1.49 schwarze 532: c = 0xFFFD;
1.67 schwarze 533: if (c > 0x7E) {
1.86 bentley 534: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.67 schwarze 535: print_word(h, numbuf);
536: } else if (print_escape(h, c) == 0)
537: print_byte(h, c);
1.1 schwarze 538: }
1.5 schwarze 539:
1.58 schwarze 540: return nospace;
1.1 schwarze 541: }
542:
1.6 schwarze 543: static void
1.65 schwarze 544: print_href(struct html *h, const char *name, const char *sec, int man)
1.6 schwarze 545: {
1.110 schwarze 546: struct stat sb;
1.65 schwarze 547: const char *p, *pp;
1.110 schwarze 548: char *filename;
549:
550: if (man) {
551: pp = h->base_man1;
552: if (h->base_man2 != NULL) {
553: mandoc_asprintf(&filename, "%s.%s", name, sec);
554: if (stat(filename, &sb) == -1)
555: pp = h->base_man2;
556: free(filename);
557: }
558: } else
559: pp = h->base_includes;
1.65 schwarze 560:
561: while ((p = strchr(pp, '%')) != NULL) {
562: print_encode(h, pp, p, 1);
563: if (man && p[1] == 'S') {
564: if (sec == NULL)
1.67 schwarze 565: print_byte(h, '1');
1.65 schwarze 566: else
567: print_encode(h, sec, NULL, 1);
568: } else if ((man && p[1] == 'N') ||
569: (man == 0 && p[1] == 'I'))
570: print_encode(h, name, NULL, 1);
571: else
572: print_encode(h, p, p + 2, 1);
573: pp = p + 2;
574: }
575: if (*pp != '\0')
576: print_encode(h, pp, NULL, 1);
1.6 schwarze 577: }
578:
1.1 schwarze 579: struct tag *
1.64 schwarze 580: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.1 schwarze 581: {
1.64 schwarze 582: va_list ap;
1.1 schwarze 583: struct tag *t;
1.65 schwarze 584: const char *attr;
1.73 schwarze 585: char *arg1, *arg2;
1.114 schwarze 586: int style_written, tflags;
1.66 schwarze 587:
588: tflags = htmltags[tag].flags;
1.1 schwarze 589:
1.74 schwarze 590: /* Push this tag onto the stack of open scopes. */
1.6 schwarze 591:
1.66 schwarze 592: if ((tflags & HTML_NOSTACK) == 0) {
1.24 schwarze 593: t = mandoc_malloc(sizeof(struct tag));
1.1 schwarze 594: t->tag = tag;
1.74 schwarze 595: t->next = h->tag;
1.122 schwarze 596: t->refcnt = 0;
597: t->closed = 0;
1.74 schwarze 598: h->tag = t;
1.1 schwarze 599: } else
600: t = NULL;
601:
1.66 schwarze 602: if (tflags & HTML_NLBEFORE)
1.67 schwarze 603: print_endline(h);
604: if (h->col == 0)
605: print_indent(h);
1.66 schwarze 606: else if ((h->flags & HTML_NOSPACE) == 0) {
607: if (h->flags & HTML_KEEP)
1.86 bentley 608: print_word(h, " ");
1.66 schwarze 609: else {
610: if (h->flags & HTML_PREKEEP)
611: h->flags |= HTML_KEEP;
1.67 schwarze 612: print_endword(h);
1.12 schwarze 613: }
1.66 schwarze 614: }
1.1 schwarze 615:
1.13 schwarze 616: if ( ! (h->flags & HTML_NONOSPACE))
617: h->flags &= ~HTML_NOSPACE;
1.14 schwarze 618: else
619: h->flags |= HTML_NOSPACE;
1.13 schwarze 620:
1.6 schwarze 621: /* Print out the tag name and attributes. */
622:
1.67 schwarze 623: print_byte(h, '<');
624: print_word(h, htmltags[tag].name);
1.64 schwarze 625:
626: va_start(ap, fmt);
627:
1.114 schwarze 628: while (*fmt != '\0' && *fmt != 's') {
1.73 schwarze 629:
1.108 schwarze 630: /* Parse attributes and arguments. */
1.73 schwarze 631:
632: arg1 = va_arg(ap, char *);
1.108 schwarze 633: arg2 = NULL;
1.64 schwarze 634: switch (*fmt++) {
635: case 'c':
1.65 schwarze 636: attr = "class";
1.64 schwarze 637: break;
638: case 'h':
1.65 schwarze 639: attr = "href";
1.64 schwarze 640: break;
641: case 'i':
1.65 schwarze 642: attr = "id";
1.64 schwarze 643: break;
644: case '?':
1.73 schwarze 645: attr = arg1;
646: arg1 = va_arg(ap, char *);
1.64 schwarze 647: break;
648: default:
649: abort();
650: }
1.73 schwarze 651: if (*fmt == 'M')
652: arg2 = va_arg(ap, char *);
653: if (arg1 == NULL)
654: continue;
655:
1.108 schwarze 656: /* Print the attributes. */
1.73 schwarze 657:
1.67 schwarze 658: print_byte(h, ' ');
659: print_word(h, attr);
660: print_byte(h, '=');
661: print_byte(h, '"');
1.65 schwarze 662: switch (*fmt) {
1.78 schwarze 663: case 'I':
664: print_href(h, arg1, NULL, 0);
665: fmt++;
666: break;
1.65 schwarze 667: case 'M':
1.73 schwarze 668: print_href(h, arg1, arg2, 1);
1.65 schwarze 669: fmt++;
670: break;
1.78 schwarze 671: case 'R':
672: print_byte(h, '#');
673: print_encode(h, arg1, NULL, 1);
1.65 schwarze 674: fmt++;
1.78 schwarze 675: break;
1.65 schwarze 676: default:
1.114 schwarze 677: print_encode(h, arg1, NULL, 1);
1.65 schwarze 678: break;
679: }
1.67 schwarze 680: print_byte(h, '"');
1.64 schwarze 681: }
1.114 schwarze 682:
683: style_written = 0;
684: while (*fmt++ == 's') {
685: arg1 = va_arg(ap, char *);
686: arg2 = va_arg(ap, char *);
687: if (arg2 == NULL)
688: continue;
689: print_byte(h, ' ');
690: if (style_written == 0) {
691: print_word(h, "style=\"");
692: style_written = 1;
693: }
694: print_word(h, arg1);
695: print_byte(h, ':');
696: print_byte(h, ' ');
697: print_word(h, arg2);
698: print_byte(h, ';');
699: }
700: if (style_written)
701: print_byte(h, '"');
702:
1.64 schwarze 703: va_end(ap);
1.6 schwarze 704:
1.42 schwarze 705: /* Accommodate for "well-formed" singleton escaping. */
1.6 schwarze 706:
707: if (HTML_AUTOCLOSE & htmltags[tag].flags)
1.67 schwarze 708: print_byte(h, '/');
1.6 schwarze 709:
1.67 schwarze 710: print_byte(h, '>');
1.1 schwarze 711:
1.66 schwarze 712: if (tflags & HTML_NLBEGIN)
1.67 schwarze 713: print_endline(h);
1.66 schwarze 714: else
715: h->flags |= HTML_NOSPACE;
1.18 schwarze 716:
1.66 schwarze 717: if (tflags & HTML_INDENT)
718: h->indent++;
719: if (tflags & HTML_NOINDENT)
720: h->noindent++;
1.18 schwarze 721:
1.58 schwarze 722: return t;
1.1 schwarze 723: }
724:
725: static void
1.54 schwarze 726: print_ctag(struct html *h, struct tag *tag)
1.1 schwarze 727: {
1.66 schwarze 728: int tflags;
1.35 schwarze 729:
1.122 schwarze 730: if (tag->closed == 0) {
731: tag->closed = 1;
732: if (tag == h->metaf)
733: h->metaf = NULL;
734: if (tag == h->tblt)
735: h->tblt = NULL;
736:
737: tflags = htmltags[tag->tag].flags;
738: if (tflags & HTML_INDENT)
739: h->indent--;
740: if (tflags & HTML_NOINDENT)
741: h->noindent--;
742: if (tflags & HTML_NLEND)
743: print_endline(h);
744: print_indent(h);
745: print_byte(h, '<');
746: print_byte(h, '/');
747: print_word(h, htmltags[tag->tag].name);
748: print_byte(h, '>');
749: if (tflags & HTML_NLAFTER)
750: print_endline(h);
751: }
752: if (tag->refcnt == 0) {
753: h->tag = tag->next;
754: free(tag);
755: }
1.1 schwarze 756: }
757:
758: void
1.6 schwarze 759: print_gen_decls(struct html *h)
760: {
1.67 schwarze 761: print_word(h, "<!DOCTYPE html>");
762: print_endline(h);
1.91 schwarze 763: }
764:
765: void
766: print_gen_comment(struct html *h, struct roff_node *n)
767: {
768: int wantblank;
769:
770: print_word(h, "<!-- This is an automatically generated file."
771: " Do not edit.");
772: h->indent = 1;
773: wantblank = 0;
774: while (n != NULL && n->type == ROFFT_COMMENT) {
775: if (strstr(n->string, "-->") == NULL &&
776: (wantblank || *n->string != '\0')) {
777: print_endline(h);
778: print_indent(h);
779: print_word(h, n->string);
780: wantblank = *n->string != '\0';
781: }
782: n = n->next;
783: }
784: if (wantblank)
785: print_endline(h);
786: print_word(h, " -->");
787: print_endline(h);
788: h->indent = 0;
1.1 schwarze 789: }
790:
791: void
1.12 schwarze 792: print_text(struct html *h, const char *word)
1.1 schwarze 793: {
1.67 schwarze 794: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.12 schwarze 795: if ( ! (HTML_KEEP & h->flags)) {
796: if (HTML_PREKEEP & h->flags)
797: h->flags |= HTML_KEEP;
1.67 schwarze 798: print_endword(h);
1.12 schwarze 799: } else
1.86 bentley 800: print_word(h, " ");
1.12 schwarze 801: }
1.1 schwarze 802:
1.125 ! schwarze 803: assert(h->metaf == NULL);
! 804: print_metaf(h);
! 805: print_indent(h);
1.65 schwarze 806: if ( ! print_encode(h, word, NULL, 0)) {
1.13 schwarze 807: if ( ! (h->flags & HTML_NONOSPACE))
808: h->flags &= ~HTML_NOSPACE;
1.53 schwarze 809: h->flags &= ~HTML_NONEWLINE;
1.28 schwarze 810: } else
1.53 schwarze 811: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.20 schwarze 812:
1.125 ! schwarze 813: if (h->metaf != NULL) {
1.20 schwarze 814: print_tagq(h, h->metaf);
815: h->metaf = NULL;
816: }
1.17 schwarze 817:
818: h->flags &= ~HTML_IGNDELIM;
1.1 schwarze 819: }
820:
821: void
822: print_tagq(struct html *h, const struct tag *until)
823: {
1.122 schwarze 824: struct tag *this, *next;
1.1 schwarze 825:
1.122 schwarze 826: for (this = h->tag; this != NULL; this = next) {
827: next = this == until ? NULL : this->next;
828: print_ctag(h, this);
1.1 schwarze 829: }
830: }
831:
1.120 schwarze 832: /*
833: * Close out all open elements up to but excluding suntil.
834: * Note that a paragraph just inside stays open together with it
835: * because paragraphs include subsequent phrasing content.
836: */
1.1 schwarze 837: void
838: print_stagq(struct html *h, const struct tag *suntil)
839: {
1.122 schwarze 840: struct tag *this, *next;
1.1 schwarze 841:
1.122 schwarze 842: for (this = h->tag; this != NULL; this = next) {
843: next = this->next;
844: if (this == suntil || (next == suntil &&
845: (this->tag == TAG_P || this->tag == TAG_PRE)))
846: break;
847: print_ctag(h, this);
1.1 schwarze 848: }
1.42 schwarze 849: }
850:
1.67 schwarze 851:
852: /***********************************************************************
853: * Low level output functions.
854: * They implement line breaking using a short static buffer.
855: ***********************************************************************/
856:
857: /*
858: * Buffer one HTML output byte.
859: * If the buffer is full, flush and deactivate it and start a new line.
860: * If the buffer is inactive, print directly.
861: */
862: static void
863: print_byte(struct html *h, char c)
864: {
865: if ((h->flags & HTML_BUFFER) == 0) {
866: putchar(c);
867: h->col++;
868: return;
869: }
870:
871: if (h->col + h->bufcol < sizeof(h->buf)) {
872: h->buf[h->bufcol++] = c;
873: return;
874: }
875:
876: putchar('\n');
877: h->col = 0;
878: print_indent(h);
879: putchar(' ');
880: putchar(' ');
881: fwrite(h->buf, h->bufcol, 1, stdout);
882: putchar(c);
883: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
884: h->bufcol = 0;
885: h->flags &= ~HTML_BUFFER;
886: }
887:
1.66 schwarze 888: /*
889: * If something was printed on the current output line, end it.
1.67 schwarze 890: * Not to be called right after print_indent().
1.66 schwarze 891: */
1.72 schwarze 892: void
1.67 schwarze 893: print_endline(struct html *h)
1.66 schwarze 894: {
1.67 schwarze 895: if (h->col == 0)
1.66 schwarze 896: return;
897:
1.67 schwarze 898: if (h->bufcol) {
899: putchar(' ');
900: fwrite(h->buf, h->bufcol, 1, stdout);
901: h->bufcol = 0;
902: }
1.66 schwarze 903: putchar('\n');
1.67 schwarze 904: h->col = 0;
905: h->flags |= HTML_NOSPACE;
906: h->flags &= ~HTML_BUFFER;
907: }
908:
909: /*
910: * Flush the HTML output buffer.
911: * If it is inactive, activate it.
912: */
913: static void
914: print_endword(struct html *h)
915: {
916: if (h->noindent) {
917: print_byte(h, ' ');
918: return;
919: }
920:
921: if ((h->flags & HTML_BUFFER) == 0) {
922: h->col++;
923: h->flags |= HTML_BUFFER;
924: } else if (h->bufcol) {
925: putchar(' ');
926: fwrite(h->buf, h->bufcol, 1, stdout);
927: h->col += h->bufcol + 1;
928: }
929: h->bufcol = 0;
1.66 schwarze 930: }
931:
932: /*
933: * If at the beginning of a new output line,
934: * perform indentation and mark the line as containing output.
935: * Make sure to really produce some output right afterwards,
936: * but do not use print_otag() for producing it.
937: */
938: static void
1.67 schwarze 939: print_indent(struct html *h)
1.66 schwarze 940: {
1.67 schwarze 941: size_t i;
1.66 schwarze 942:
1.67 schwarze 943: if (h->col)
1.66 schwarze 944: return;
945:
1.67 schwarze 946: if (h->noindent == 0) {
947: h->col = h->indent * 2;
948: for (i = 0; i < h->col; i++)
1.66 schwarze 949: putchar(' ');
1.67 schwarze 950: }
951: h->flags &= ~HTML_NOSPACE;
952: }
953:
954: /*
955: * Print or buffer some characters
956: * depending on the current HTML output buffer state.
957: */
958: static void
959: print_word(struct html *h, const char *cp)
960: {
961: while (*cp != '\0')
962: print_byte(h, *cp++);
1.3 schwarze 963: }