File: [local] / src / usr.bin / mandoc / html.h (download)
Revision 1.63, Fri Jan 18 14:36:16 2019 UTC (5 years, 4 months ago) by schwarze
Branch: MAIN
Changes since 1.62: +3 -1 lines
The .UR and .MT blocks in man(7) are represented by <a> elements
which establish phrasing context, but they can contain paragraph
breaks (which is relevant for terminal formatting, so we can't just
change the structure of the syntax tree), which are respresented
by <p> elements and cannot occur inside <a>.
Fix this by prematurely closing the <a> element in the HTML formatter.
This menas that the clickable text in HTML output is shorter than
what is represented as the link text in terminal output, but in
HTML, it is frankly impossible to have the clickable area of a
hyperlink extend across a paragraph break. The difference in
presentation is not a major problem, and besides, paragraph breaks
inside .UR are rather poor style in the first place.
The implementation is quite tricky. Naively closing out the <a>
prematurely would result in accessing a stale pointer when later
reaching the physical end of the .UR block. So this commit separates
visual and structural closing of "struct tag" stack items. Visual
closing means that the HTML element is closed but the "struct tag"
remains on the stack, to avoid later access to a stale pointer and
to avoid closing the same HTML element a second time later.
This also needs reference counting of pointers to "struct tag" stack
items because often more than one child holds a pointer to the same
parent item, and only the outermost child can safely do the physical
closing.
In the whole corpus of nearly half a million manual pages on
man.openbsd.org, this problem occurs in exactly one page: the
groff(1) version 1.20.1 manual contained in DragonFly-3.8.2, which
contains a formatting error triggering the bug.
|
/* $OpenBSD: html.h,v 1.63 2019/01/18 14:36:16 schwarze Exp $ */
/*
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2017, 2018, 2019 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
enum htmltag {
TAG_HTML,
TAG_HEAD,
TAG_BODY,
TAG_META,
TAG_TITLE,
TAG_DIV,
TAG_IDIV,
TAG_H1,
TAG_H2,
TAG_SPAN,
TAG_LINK,
TAG_BR,
TAG_A,
TAG_TABLE,
TAG_TR,
TAG_TD,
TAG_LI,
TAG_UL,
TAG_OL,
TAG_DL,
TAG_DT,
TAG_DD,
TAG_P,
TAG_PRE,
TAG_VAR,
TAG_CITE,
TAG_B,
TAG_I,
TAG_CODE,
TAG_SMALL,
TAG_STYLE,
TAG_MATH,
TAG_MROW,
TAG_MI,
TAG_MN,
TAG_MO,
TAG_MSUP,
TAG_MSUB,
TAG_MSUBSUP,
TAG_MFRAC,
TAG_MSQRT,
TAG_MFENCED,
TAG_MTABLE,
TAG_MTR,
TAG_MTD,
TAG_MUNDEROVER,
TAG_MUNDER,
TAG_MOVER,
TAG_MAX
};
enum htmlfont {
HTMLFONT_NONE = 0,
HTMLFONT_BOLD,
HTMLFONT_ITALIC,
HTMLFONT_BI,
HTMLFONT_CW,
HTMLFONT_MAX
};
struct tag {
struct tag *next;
int refcnt;
int closed;
enum htmltag tag;
};
struct html {
int flags;
#define HTML_NOSPACE (1 << 0) /* suppress next space */
#define HTML_IGNDELIM (1 << 1)
#define HTML_KEEP (1 << 2)
#define HTML_PREKEEP (1 << 3)
#define HTML_NONOSPACE (1 << 4) /* never add spaces */
#define HTML_SKIPCHAR (1 << 6) /* skip the next character */
#define HTML_NOSPLIT (1 << 7) /* do not break line before .An */
#define HTML_SPLIT (1 << 8) /* break line before .An */
#define HTML_NONEWLINE (1 << 9) /* No line break in nofill mode. */
#define HTML_BUFFER (1 << 10) /* Collect a word to see if it fits. */
#define HTML_TOCDONE (1 << 11) /* The TOC was already written. */
size_t indent; /* current output indentation level */
int noindent; /* indent disabled by <pre> */
size_t col; /* current output byte position */
size_t bufcol; /* current buf byte position */
char buf[80]; /* output buffer */
struct tag *tag; /* last open tag */
struct rofftbl tbl; /* current table */
struct tag *tblt; /* current open table scope */
char *base_man1; /* bases for manpage href */
char *base_man2;
char *base_includes; /* base for include href */
char *style; /* style-sheet URI */
struct tag *metaf; /* current open font scope */
enum htmlfont metal; /* last used font */
enum htmlfont metac; /* current font mode */
int oflags; /* output options */
#define HTML_FRAGMENT (1 << 0) /* don't emit HTML/HEAD/BODY */
#define HTML_TOC (1 << 1) /* emit a table of contents */
};
struct roff_node;
struct tbl_span;
struct eqn_box;
void roff_html_pre(struct html *, const struct roff_node *);
void print_gen_comment(struct html *, struct roff_node *);
void print_gen_decls(struct html *);
void print_gen_head(struct html *);
void print_metaf(struct html *, enum mandoc_esc);
struct tag *print_otag(struct html *, enum htmltag, const char *, ...);
void print_tagq(struct html *, const struct tag *);
void print_stagq(struct html *, const struct tag *);
void print_text(struct html *, const char *);
void print_tblclose(struct html *);
void print_tbl(struct html *, const struct tbl_span *);
void print_eqn(struct html *, const struct eqn_box *);
void print_endline(struct html *);
void html_close_paragraph(struct html *);
enum roff_tok html_fillmode(struct html *, enum roff_tok);
char *html_make_id(const struct roff_node *, int);