src/usr.bin/mandoc/html.c - annotate

Return to html.c CVS log
Up to [local] / src / usr.bin / mandoc
Annotation of src/usr.bin/mandoc/html.c, Revision 1.3

1.3     ! schwarze    1: /*     $Id: html.c,v 1.2 2009/10/27 21:40:07 schwarze Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/types.h>
                     18:
                     19: #include <assert.h>
1.3     ! schwarze   20: #include <ctype.h>
1.1       schwarze   21: #include <stdio.h>
                     22: #include <stdarg.h>
                     23: #include <stdint.h>
                     24: #include <stdlib.h>
                     25: #include <string.h>
                     26: #include <unistd.h>
                     27:
                     28: #include "out.h"
                     29: #include "chars.h"
                     30: #include "html.h"
                     31: #include "main.h"
                     32:
                     33: #define        UNCONST(a)      ((void *)(uintptr_t)(const void *)(a))
                     34:
                     35: #define        DOCTYPE         "-//W3C//DTD HTML 4.01//EN"
                     36: #define        DTD             "http://www.w3.org/TR/html4/strict.dtd"
                     37:
                     38: struct htmldata {
                     39:        const char       *name;
                     40:        int               flags;
                     41: #define        HTML_CLRLINE     (1 << 0)
                     42: #define        HTML_NOSTACK     (1 << 1)
                     43: };
                     44:
                     45: static const struct htmldata htmltags[TAG_MAX] = {
                     46:        {"html",        HTML_CLRLINE}, /* TAG_HTML */
                     47:        {"head",        HTML_CLRLINE}, /* TAG_HEAD */
                     48:        {"body",        HTML_CLRLINE}, /* TAG_BODY */
                     49:        {"meta",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
                     50:        {"title",       HTML_CLRLINE}, /* TAG_TITLE */
                     51:        {"div",         HTML_CLRLINE}, /* TAG_DIV */
                     52:        {"h1",          0}, /* TAG_H1 */
                     53:        {"h2",          0}, /* TAG_H2 */
                     54:        {"p",           HTML_CLRLINE}, /* TAG_P */
                     55:        {"span",        0}, /* TAG_SPAN */
                     56:        {"link",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
                     57:        {"br",          HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
                     58:        {"a",           0}, /* TAG_A */
                     59:        {"table",       HTML_CLRLINE}, /* TAG_TABLE */
                     60:        {"col",         HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
                     61:        {"tr",          HTML_CLRLINE}, /* TAG_TR */
                     62:        {"td",          HTML_CLRLINE}, /* TAG_TD */
                     63:        {"li",          HTML_CLRLINE}, /* TAG_LI */
                     64:        {"ul",          HTML_CLRLINE}, /* TAG_UL */
                     65:        {"ol",          HTML_CLRLINE}, /* TAG_OL */
                     66:        {"base",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
                     67: };
                     68:
                     69: static const char       *const htmlattrs[ATTR_MAX] = {
                     70:        "http-equiv",
                     71:        "content",
                     72:        "name",
                     73:        "rel",
                     74:        "href",
                     75:        "type",
                     76:        "media",
                     77:        "class",
                     78:        "style",
                     79:        "width",
                     80:        "valign",
                     81:        "target",
                     82:        "id",
1.3     ! schwarze   83:        "summary",
1.1       schwarze   84: };
                     85:
                     86: void *
                     87: html_alloc(char *outopts)
                     88: {
                     89:        struct html     *h;
                     90:        const char      *toks[4];
                     91:        char            *v;
                     92:
                     93:        toks[0] = "style";
                     94:        toks[1] = "man";
                     95:        toks[2] = "includes";
                     96:        toks[3] = NULL;
                     97:
1.3     ! schwarze   98:        h = calloc(1, sizeof(struct html));
        !            99:        if (NULL == h) {
        !           100:                perror(NULL);
        !           101:                exit(EXIT_FAILURE);
        !           102:        }
1.1       schwarze  103:
1.2       schwarze  104:        h->tags.head = NULL;
                    105:        h->ords.head = NULL;
1.3     ! schwarze  106:        h->symtab = chars_init(CHARS_HTML);
1.1       schwarze  107:
                    108:        while (outopts && *outopts)
                    109:                switch (getsubopt(&outopts, UNCONST(toks), &v)) {
                    110:                case (0):
                    111:                        h->style = v;
                    112:                        break;
                    113:                case (1):
                    114:                        h->base_man = v;
                    115:                        break;
                    116:                case (2):
                    117:                        h->base_includes = v;
                    118:                        break;
                    119:                default:
                    120:                        break;
                    121:                }
                    122:
                    123:        return(h);
                    124: }
                    125:
                    126:
                    127: void
                    128: html_free(void *p)
                    129: {
                    130:        struct tag      *tag;
                    131:        struct ord      *ord;
                    132:        struct html     *h;
                    133:
                    134:        h = (struct html *)p;
                    135:
1.2       schwarze  136:        while ((ord = h->ords.head) != NULL) {
                    137:                h->ords.head = ord->next;
1.1       schwarze  138:                free(ord);
                    139:        }
                    140:
1.2       schwarze  141:        while ((tag = h->tags.head) != NULL) {
                    142:                h->tags.head = tag->next;
1.1       schwarze  143:                free(tag);
                    144:        }
                    145:
                    146:        if (h->symtab)
                    147:                chars_free(h->symtab);
                    148:
                    149:        free(h);
                    150: }
                    151:
                    152:
                    153: void
                    154: print_gen_head(struct html *h)
                    155: {
                    156:        struct htmlpair  tag[4];
                    157:
                    158:        tag[0].key = ATTR_HTTPEQUIV;
                    159:        tag[0].val = "Content-Type";
                    160:        tag[1].key = ATTR_CONTENT;
                    161:        tag[1].val = "text/html; charset=utf-8";
                    162:        print_otag(h, TAG_META, 2, tag);
                    163:
                    164:        tag[0].key = ATTR_NAME;
                    165:        tag[0].val = "resource-type";
                    166:        tag[1].key = ATTR_CONTENT;
                    167:        tag[1].val = "document";
                    168:        print_otag(h, TAG_META, 2, tag);
                    169:
                    170:        if (h->style) {
                    171:                tag[0].key = ATTR_REL;
                    172:                tag[0].val = "stylesheet";
                    173:                tag[1].key = ATTR_HREF;
                    174:                tag[1].val = h->style;
                    175:                tag[2].key = ATTR_TYPE;
                    176:                tag[2].val = "text/css";
                    177:                tag[3].key = ATTR_MEDIA;
                    178:                tag[3].val = "all";
                    179:                print_otag(h, TAG_LINK, 4, tag);
                    180:        }
                    181: }
                    182:
                    183:
                    184: static void
                    185: print_spec(struct html *h, const char *p, int len)
                    186: {
                    187:        const char      *rhs;
                    188:        int              i;
                    189:        size_t           sz;
                    190:
                    191:        rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz);
                    192:
                    193:        if (NULL == rhs)
                    194:                return;
                    195:        for (i = 0; i < (int)sz; i++)
                    196:                putchar(rhs[i]);
                    197: }
                    198:
                    199:
                    200: static void
                    201: print_res(struct html *h, const char *p, int len)
                    202: {
                    203:        const char      *rhs;
                    204:        int              i;
                    205:        size_t           sz;
                    206:
                    207:        rhs = chars_a2res(h->symtab, p, (size_t)len, &sz);
                    208:
                    209:        if (NULL == rhs)
                    210:                return;
                    211:        for (i = 0; i < (int)sz; i++)
                    212:                putchar(rhs[i]);
                    213: }
                    214:
                    215:
                    216: static void
                    217: print_escape(struct html *h, const char **p)
                    218: {
                    219:        int              j, type;
                    220:        const char      *wp;
                    221:
                    222:        wp = *p;
                    223:        type = 1;
                    224:
                    225:        if (0 == *(++wp)) {
                    226:                *p = wp;
                    227:                return;
                    228:        }
                    229:
                    230:        if ('(' == *wp) {
                    231:                wp++;
                    232:                if (0 == *wp || 0 == *(wp + 1)) {
                    233:                        *p = 0 == *wp ? wp : wp + 1;
                    234:                        return;
                    235:                }
                    236:
                    237:                print_spec(h, wp, 2);
                    238:                *p = ++wp;
                    239:                return;
                    240:
                    241:        } else if ('*' == *wp) {
                    242:                if (0 == *(++wp)) {
                    243:                        *p = wp;
                    244:                        return;
                    245:                }
                    246:
                    247:                switch (*wp) {
                    248:                case ('('):
                    249:                        wp++;
                    250:                        if (0 == *wp || 0 == *(wp + 1)) {
                    251:                                *p = 0 == *wp ? wp : wp + 1;
                    252:                                return;
                    253:                        }
                    254:
                    255:                        print_res(h, wp, 2);
                    256:                        *p = ++wp;
                    257:                        return;
                    258:                case ('['):
                    259:                        type = 0;
                    260:                        break;
                    261:                default:
                    262:                        print_res(h, wp, 1);
                    263:                        *p = wp;
                    264:                        return;
                    265:                }
                    266:
                    267:        } else if ('f' == *wp) {
                    268:                if (0 == *(++wp)) {
                    269:                        *p = wp;
                    270:                        return;
                    271:                }
                    272:
                    273:                switch (*wp) {
                    274:                case ('B'):
                    275:                        /* TODO */
                    276:                        break;
                    277:                case ('I'):
                    278:                        /* TODO */
                    279:                        break;
                    280:                case ('P'):
                    281:                        /* FALLTHROUGH */
                    282:                case ('R'):
                    283:                        /* TODO */
                    284:                        break;
                    285:                default:
                    286:                        break;
                    287:                }
                    288:
                    289:                *p = wp;
                    290:                return;
                    291:
                    292:        } else if ('[' != *wp) {
                    293:                print_spec(h, wp, 1);
                    294:                *p = wp;
                    295:                return;
                    296:        }
                    297:
                    298:        wp++;
                    299:        for (j = 0; *wp && ']' != *wp; wp++, j++)
                    300:                /* Loop... */ ;
                    301:
                    302:        if (0 == *wp) {
                    303:                *p = wp;
                    304:                return;
                    305:        }
                    306:
                    307:        if (type)
                    308:                print_spec(h, wp - j, j);
                    309:        else
                    310:                print_res(h, wp - j, j);
                    311:
                    312:        *p = wp;
                    313: }
                    314:
                    315:
                    316: static void
                    317: print_encode(struct html *h, const char *p)
                    318: {
                    319:
                    320:        for (; *p; p++) {
                    321:                if ('\\' == *p) {
                    322:                        print_escape(h, &p);
                    323:                        continue;
                    324:                }
                    325:                switch (*p) {
                    326:                case ('<'):
                    327:                        printf("&lt;");
                    328:                        break;
                    329:                case ('>'):
                    330:                        printf("&gt;");
                    331:                        break;
                    332:                case ('&'):
                    333:                        printf("&amp;");
                    334:                        break;
                    335:                default:
                    336:                        putchar(*p);
                    337:                        break;
                    338:                }
                    339:        }
                    340: }
                    341:
                    342:
                    343: struct tag *
                    344: print_otag(struct html *h, enum htmltag tag,
                    345:                int sz, const struct htmlpair *p)
                    346: {
                    347:        int              i;
                    348:        struct tag      *t;
                    349:
                    350:        if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
1.3     ! schwarze  351:                t = malloc(sizeof(struct tag));
        !           352:                if (NULL == t) {
        !           353:                        perror(NULL);
        !           354:                        exit(EXIT_FAILURE);
        !           355:                }
1.1       schwarze  356:                t->tag = tag;
1.2       schwarze  357:                t->next = h->tags.head;
                    358:                h->tags.head = t;
1.1       schwarze  359:        } else
                    360:                t = NULL;
                    361:
                    362:        if ( ! (HTML_NOSPACE & h->flags))
                    363:                if ( ! (HTML_CLRLINE & htmltags[tag].flags))
                    364:                        printf(" ");
                    365:
                    366:        printf("<%s", htmltags[tag].name);
                    367:        for (i = 0; i < sz; i++) {
                    368:                printf(" %s=\"", htmlattrs[p[i].key]);
                    369:                assert(p->val);
                    370:                print_encode(h, p[i].val);
                    371:                printf("\"");
                    372:        }
                    373:        printf(">");
                    374:
                    375:        h->flags |= HTML_NOSPACE;
                    376:        if (HTML_CLRLINE & htmltags[tag].flags)
                    377:                h->flags |= HTML_NEWLINE;
                    378:        else
                    379:                h->flags &= ~HTML_NEWLINE;
                    380:
                    381:        return(t);
                    382: }
                    383:
                    384:
                    385: /* ARGSUSED */
                    386: static void
                    387: print_ctag(struct html *h, enum htmltag tag)
                    388: {
                    389:
                    390:        printf("</%s>", htmltags[tag].name);
1.3     ! schwarze  391:        if (HTML_CLRLINE & htmltags[tag].flags) {
1.1       schwarze  392:                h->flags |= HTML_NOSPACE;
                    393:                h->flags |= HTML_NEWLINE;
1.3     ! schwarze  394:                printf("\n");
        !           395:        } else
1.1       schwarze  396:                h->flags &= ~HTML_NEWLINE;
                    397: }
                    398:
                    399:
                    400: /* ARGSUSED */
                    401: void
                    402: print_gen_doctype(struct html *h)
                    403: {
                    404:
                    405:        printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
                    406: }
                    407:
                    408:
                    409: void
                    410: print_text(struct html *h, const char *p)
                    411: {
                    412:
                    413:        if (*p && 0 == *(p + 1))
                    414:                switch (*p) {
                    415:                case('.'):
                    416:                        /* FALLTHROUGH */
                    417:                case(','):
                    418:                        /* FALLTHROUGH */
                    419:                case(';'):
                    420:                        /* FALLTHROUGH */
                    421:                case(':'):
                    422:                        /* FALLTHROUGH */
                    423:                case('?'):
                    424:                        /* FALLTHROUGH */
                    425:                case('!'):
                    426:                        /* FALLTHROUGH */
                    427:                case(')'):
                    428:                        /* FALLTHROUGH */
                    429:                case(']'):
                    430:                        /* FALLTHROUGH */
                    431:                case('}'):
                    432:                        if ( ! (HTML_IGNDELIM & h->flags))
                    433:                                h->flags |= HTML_NOSPACE;
                    434:                        break;
                    435:                default:
                    436:                        break;
                    437:                }
                    438:
                    439:        if ( ! (h->flags & HTML_NOSPACE))
                    440:                printf(" ");
                    441:
                    442:        h->flags &= ~HTML_NOSPACE;
                    443:        h->flags &= ~HTML_NEWLINE;
                    444:
                    445:        if (p)
                    446:                print_encode(h, p);
                    447:
                    448:        if (*p && 0 == *(p + 1))
                    449:                switch (*p) {
                    450:                case('('):
                    451:                        /* FALLTHROUGH */
                    452:                case('['):
                    453:                        /* FALLTHROUGH */
                    454:                case('{'):
                    455:                        h->flags |= HTML_NOSPACE;
                    456:                        break;
                    457:                default:
                    458:                        break;
                    459:                }
                    460: }
                    461:
                    462:
                    463: void
                    464: print_tagq(struct html *h, const struct tag *until)
                    465: {
                    466:        struct tag      *tag;
                    467:
1.2       schwarze  468:        while ((tag = h->tags.head) != NULL) {
1.1       schwarze  469:                print_ctag(h, tag->tag);
1.2       schwarze  470:                h->tags.head = tag->next;
1.1       schwarze  471:                free(tag);
                    472:                if (until && tag == until)
                    473:                        return;
                    474:        }
                    475: }
                    476:
                    477:
                    478: void
                    479: print_stagq(struct html *h, const struct tag *suntil)
                    480: {
                    481:        struct tag      *tag;
                    482:
1.2       schwarze  483:        while ((tag = h->tags.head) != NULL) {
1.1       schwarze  484:                if (suntil && tag == suntil)
                    485:                        return;
                    486:                print_ctag(h, tag->tag);
1.2       schwarze  487:                h->tags.head = tag->next;
1.1       schwarze  488:                free(tag);
                    489:        }
                    490: }
                    491:
                    492:
                    493: void
                    494: bufinit(struct html *h)
                    495: {
                    496:
                    497:        h->buf[0] = '\0';
                    498:        h->buflen = 0;
                    499: }
                    500:
                    501:
                    502: void
                    503: bufcat_style(struct html *h, const char *key, const char *val)
                    504: {
                    505:
                    506:        bufcat(h, key);
                    507:        bufncat(h, ":", 1);
                    508:        bufcat(h, val);
                    509:        bufncat(h, ";", 1);
                    510: }
                    511:
                    512:
                    513: void
                    514: bufcat(struct html *h, const char *p)
                    515: {
                    516:
                    517:        bufncat(h, p, strlen(p));
                    518: }
                    519:
                    520:
                    521: void
                    522: buffmt(struct html *h, const char *fmt, ...)
                    523: {
                    524:        va_list          ap;
                    525:
                    526:        va_start(ap, fmt);
                    527:        (void)vsnprintf(h->buf + (int)h->buflen,
                    528:                        BUFSIZ - h->buflen - 1, fmt, ap);
                    529:        va_end(ap);
                    530:        h->buflen = strlen(h->buf);
                    531: }
                    532:
                    533:
                    534: void
                    535: bufncat(struct html *h, const char *p, size_t sz)
                    536: {
                    537:
                    538:        if (h->buflen + sz > BUFSIZ - 1)
                    539:                sz = BUFSIZ - 1 - h->buflen;
                    540:
                    541:        (void)strncat(h->buf, p, sz);
                    542:        h->buflen += sz;
                    543: }
                    544:
                    545:
                    546: void
                    547: buffmt_includes(struct html *h, const char *name)
                    548: {
                    549:        const char      *p, *pp;
                    550:
                    551:        pp = h->base_includes;
                    552:
                    553:        while (NULL != (p = strchr(pp, '%'))) {
                    554:                bufncat(h, pp, (size_t)(p - pp));
                    555:                switch (*(p + 1)) {
                    556:                case('I'):
                    557:                        bufcat(h, name);
                    558:                        break;
                    559:                default:
                    560:                        bufncat(h, p, 2);
                    561:                        break;
                    562:                }
                    563:                pp = p + 2;
                    564:        }
                    565:        if (pp)
                    566:                bufcat(h, pp);
                    567: }
                    568:
                    569:
                    570: void
                    571: buffmt_man(struct html *h,
                    572:                const char *name, const char *sec)
                    573: {
                    574:        const char      *p, *pp;
                    575:
                    576:        pp = h->base_man;
                    577:
                    578:        /* LINTED */
                    579:        while (NULL != (p = strchr(pp, '%'))) {
                    580:                bufncat(h, pp, (size_t)(p - pp));
                    581:                switch (*(p + 1)) {
                    582:                case('S'):
                    583:                        bufcat(h, sec ? sec : "1");
                    584:                        break;
                    585:                case('N'):
                    586:                        buffmt(h, name);
                    587:                        break;
                    588:                default:
                    589:                        bufncat(h, p, 2);
                    590:                        break;
                    591:                }
                    592:                pp = p + 2;
                    593:        }
                    594:        if (pp)
                    595:                bufcat(h, pp);
                    596: }
                    597:
                    598:
                    599: void
                    600: bufcat_su(struct html *h, const char *p, const struct roffsu *su)
                    601: {
                    602:        double           v;
                    603:        const char      *u;
                    604:
                    605:        v = su->scale;
                    606:
                    607:        switch (su->unit) {
                    608:        case (SCALE_CM):
                    609:                u = "cm";
                    610:                break;
                    611:        case (SCALE_IN):
                    612:                u = "in";
                    613:                break;
                    614:        case (SCALE_PC):
                    615:                u = "pc";
                    616:                break;
                    617:        case (SCALE_PT):
                    618:                u = "pt";
                    619:                break;
                    620:        case (SCALE_EM):
                    621:                u = "em";
                    622:                break;
                    623:        case (SCALE_MM):
                    624:                if (0 == (v /= 100))
                    625:                        v = 1;
                    626:                u = "em";
                    627:                break;
                    628:        case (SCALE_EN):
                    629:                u = "ex";
                    630:                break;
                    631:        case (SCALE_BU):
                    632:                u = "ex";
                    633:                break;
                    634:        case (SCALE_VS):
                    635:                u = "em";
                    636:                break;
                    637:        default:
                    638:                u = "ex";
                    639:                break;
                    640:        }
                    641:
                    642:        if (su->pt)
                    643:                buffmt(h, "%s: %f%s;", p, v, u);
                    644:        else
                    645:                /* LINTED */
                    646:                buffmt(h, "%s: %d%s;", p, (int)v, u);
                    647: }
                    648:
1.3     ! schwarze  649:
        !           650: void
        !           651: html_idcat(char *dst, const char *src, int sz)
        !           652: {
        !           653:        int              ssz;
        !           654:
        !           655:        assert(sz);
        !           656:
        !           657:        /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
        !           658:
        !           659:        for ( ; *dst != '\0' && sz; dst++, sz--)
        !           660:                /* Jump to end. */ ;
        !           661:
        !           662:        assert(sz > 2);
        !           663:
        !           664:        /* We can't start with a number (bah). */
        !           665:
        !           666:        *dst++ = 'x';
        !           667:        *dst = '\0';
        !           668:        sz--;
        !           669:
        !           670:        for ( ; *src != '\0' && sz > 1; src++) {
        !           671:                ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
        !           672:                sz -= ssz;
        !           673:                dst += ssz;
        !           674:        }
        !           675: }