[BACK]Return to tag.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / mandoc

Annotation of src/usr.bin/mandoc/tag.c, Revision 1.38

1.38    ! schwarze    1: /* $OpenBSD: tag.c,v 1.37 2022/04/26 11:28:35 schwarze Exp $ */
1.1       schwarze    2: /*
1.38    ! schwarze    3:  * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
1.37      schwarze    4:  *               Ingo Schwarze <schwarze@openbsd.org>
1.1       schwarze    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
                      7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
                      9:  *
                     10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.28      schwarze   17:  *
                     18:  * Functions to tag syntax tree nodes.
                     19:  * For internal use by mandoc(1) validation modules only.
1.1       schwarze   20:  */
                     21: #include <sys/types.h>
                     22:
1.27      schwarze   23: #include <assert.h>
1.20      schwarze   24: #include <limits.h>
1.1       schwarze   25: #include <stddef.h>
1.32      schwarze   26: #include <stdint.h>
1.38    ! schwarze   27: #include <stdio.h>
1.1       schwarze   28: #include <stdlib.h>
                     29: #include <string.h>
                     30:
                     31: #include "mandoc_aux.h"
1.10      schwarze   32: #include "mandoc_ohash.h"
1.38    ! schwarze   33: #include "mandoc.h"
1.28      schwarze   34: #include "roff.h"
1.33      schwarze   35: #include "mdoc.h"
1.35      schwarze   36: #include "roff_int.h"
1.1       schwarze   37: #include "tag.h"
                     38:
                     39: struct tag_entry {
1.28      schwarze   40:        struct roff_node **nodes;
                     41:        size_t   maxnodes;
                     42:        size_t   nnodes;
1.4       schwarze   43:        int      prio;
1.1       schwarze   44:        char     s[];
                     45: };
                     46:
1.36      schwarze   47: static void             tag_move_href(struct roff_man *,
                     48:                                struct roff_node *, const char *);
1.33      schwarze   49: static void             tag_move_id(struct roff_node *);
                     50:
1.1       schwarze   51: static struct ohash     tag_data;
                     52:
                     53:
                     54: /*
1.28      schwarze   55:  * Set up the ohash table to collect nodes
                     56:  * where various marked-up terms are documented.
1.1       schwarze   57:  */
1.28      schwarze   58: void
                     59: tag_alloc(void)
1.1       schwarze   60: {
1.28      schwarze   61:        mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
                     62: }
1.1       schwarze   63:
1.28      schwarze   64: void
                     65: tag_free(void)
                     66: {
                     67:        struct tag_entry        *entry;
                     68:        unsigned int             slot;
1.12      schwarze   69:
1.29      schwarze   70:        if (tag_data.info.free == NULL)
                     71:                return;
1.28      schwarze   72:        entry = ohash_first(&tag_data, &slot);
                     73:        while (entry != NULL) {
                     74:                free(entry->nodes);
                     75:                free(entry);
                     76:                entry = ohash_next(&tag_data, &slot);
1.22      schwarze   77:        }
1.28      schwarze   78:        ohash_delete(&tag_data);
1.29      schwarze   79:        tag_data.info.free = NULL;
1.1       schwarze   80: }
                     81:
                     82: /*
1.28      schwarze   83:  * Set a node where a term is defined,
1.37      schwarze   84:  * unless the term is already defined at a lower priority.
1.1       schwarze   85:  */
                     86: void
1.28      schwarze   87: tag_put(const char *s, int prio, struct roff_node *n)
1.1       schwarze   88: {
                     89:        struct tag_entry        *entry;
1.34      schwarze   90:        struct roff_node        *nold;
1.38    ! schwarze   91:        const char              *se, *src;
        !            92:        char                    *cpy;
1.5       schwarze   93:        size_t                   len;
1.1       schwarze   94:        unsigned int             slot;
1.38    ! schwarze   95:        int                      changed;
1.1       schwarze   96:
1.27      schwarze   97:        assert(prio <= TAG_FALLBACK);
1.20      schwarze   98:
1.37      schwarze   99:        /*
                    100:         * If the node is already tagged, the existing tag is
                    101:         * explicit and we are now about to add an implicit tag.
                    102:         * Don't do that; just skip implicit tagging if the author
                    103:         * specified an explicit tag.
                    104:         */
                    105:
                    106:        if (n->flags & NODE_ID)
                    107:                return;
                    108:
                    109:        /* Determine the implicit tag. */
                    110:
1.38    ! schwarze  111:        changed = 1;
1.28      schwarze  112:        if (s == NULL) {
                    113:                if (n->child == NULL || n->child->type != ROFFT_TEXT)
                    114:                        return;
                    115:                s = n->child->string;
1.30      schwarze  116:                switch (s[0]) {
                    117:                case '-':
                    118:                        s++;
                    119:                        break;
                    120:                case '\\':
                    121:                        switch (s[1]) {
                    122:                        case '&':
                    123:                        case '-':
                    124:                        case 'e':
                    125:                                s += 2;
                    126:                                break;
                    127:                        default:
1.38    ! schwarze  128:                                return;
1.30      schwarze  129:                        }
                    130:                        break;
                    131:                default:
1.38    ! schwarze  132:                        changed = 0;
1.30      schwarze  133:                        break;
                    134:                }
1.28      schwarze  135:        }
1.20      schwarze  136:
                    137:        /*
1.38    ! schwarze  138:         * Translate \- and ASCII_HYPH to plain '-'.
1.24      schwarze  139:         * Skip whitespace and escapes and whatever follows,
1.20      schwarze  140:         * and if there is any, downgrade the priority.
                    141:         */
                    142:
1.38    ! schwarze  143:        cpy = mandoc_malloc(strlen(s) + 1);
        !           144:        for (src = s, len = 0; *src != '\0'; src++, len++) {
        !           145:                switch (*src) {
        !           146:                case '\t':
        !           147:                case ' ':
        !           148:                        changed = 1;
        !           149:                        break;
        !           150:                case ASCII_HYPH:
        !           151:                        cpy[len] = '-';
        !           152:                        changed = 1;
        !           153:                        continue;
        !           154:                case '\\':
        !           155:                        if (src[1] != '-')
        !           156:                                break;
        !           157:                        src++;
        !           158:                        changed = 1;
        !           159:                        /* FALLTHROUGH */
        !           160:                default:
        !           161:                        cpy[len] = *src;
        !           162:                        continue;
        !           163:                }
        !           164:                break;
        !           165:        }
1.20      schwarze  166:        if (len == 0)
1.38    ! schwarze  167:                goto out;
        !           168:        cpy[len] = '\0';
1.14      schwarze  169:
1.38    ! schwarze  170:        if (*src != '\0' && prio < TAG_WEAK)
1.27      schwarze  171:                prio = TAG_WEAK;
1.20      schwarze  172:
1.38    ! schwarze  173:        s = cpy;
        !           174:        se = cpy + len;
1.20      schwarze  175:        slot = ohash_qlookupi(&tag_data, s, &se);
1.1       schwarze  176:        entry = ohash_find(&tag_data, slot);
1.14      schwarze  177:
1.28      schwarze  178:        /* Build a new entry. */
                    179:
1.1       schwarze  180:        if (entry == NULL) {
1.20      schwarze  181:                entry = mandoc_malloc(sizeof(*entry) + len + 1);
1.38    ! schwarze  182:                memcpy(entry->s, s, len + 1);
1.28      schwarze  183:                entry->nodes = NULL;
                    184:                entry->maxnodes = entry->nnodes = 0;
1.1       schwarze  185:                ohash_insert(&tag_data, slot, entry);
1.28      schwarze  186:        }
1.14      schwarze  187:
1.28      schwarze  188:        /*
                    189:         * Lower priority numbers take precedence.
                    190:         * If a better entry is already present, ignore the new one.
                    191:         */
                    192:
                    193:        else if (entry->prio < prio)
1.38    ! schwarze  194:                goto out;
1.28      schwarze  195:
                    196:        /*
                    197:         * If the existing entry is worse, clear it.
                    198:         * In addition, a tag with priority TAG_FALLBACK
                    199:         * is only used if the tag occurs exactly once.
                    200:         */
1.14      schwarze  201:
1.28      schwarze  202:        else if (entry->prio > prio || prio == TAG_FALLBACK) {
1.34      schwarze  203:                while (entry->nnodes > 0) {
                    204:                        nold = entry->nodes[--entry->nnodes];
                    205:                        nold->flags &= ~NODE_ID;
                    206:                        free(nold->tag);
                    207:                        nold->tag = NULL;
                    208:                }
1.27      schwarze  209:                if (prio == TAG_FALLBACK) {
1.28      schwarze  210:                        entry->prio = TAG_DELETE;
1.38    ! schwarze  211:                        goto out;
1.16      schwarze  212:                }
1.14      schwarze  213:        }
                    214:
1.28      schwarze  215:        /* Remember the new node. */
1.14      schwarze  216:
1.28      schwarze  217:        if (entry->maxnodes == entry->nnodes) {
                    218:                entry->maxnodes += 4;
                    219:                entry->nodes = mandoc_reallocarray(entry->nodes,
                    220:                    entry->maxnodes, sizeof(*entry->nodes));
1.14      schwarze  221:        }
1.28      schwarze  222:        entry->nodes[entry->nnodes++] = n;
1.4       schwarze  223:        entry->prio = prio;
1.28      schwarze  224:        n->flags |= NODE_ID;
1.38    ! schwarze  225:        if (changed) {
1.34      schwarze  226:                assert(n->tag == NULL);
                    227:                n->tag = mandoc_strndup(s, len);
1.28      schwarze  228:        }
1.38    ! schwarze  229:
        !           230:  out:
        !           231:        free(cpy);
1.1       schwarze  232: }
                    233:
1.31      schwarze  234: int
                    235: tag_exists(const char *tag)
1.1       schwarze  236: {
1.31      schwarze  237:        return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
1.33      schwarze  238: }
                    239:
                    240: /*
                    241:  * For in-line elements, move the link target
                    242:  * to the enclosing paragraph when appropriate.
                    243:  */
                    244: static void
                    245: tag_move_id(struct roff_node *n)
                    246: {
                    247:        struct roff_node *np;
                    248:
                    249:        np = n;
                    250:        for (;;) {
                    251:                if (np->prev != NULL)
                    252:                        np = np->prev;
                    253:                else if ((np = np->parent) == NULL)
                    254:                        return;
                    255:                switch (np->tok) {
                    256:                case MDOC_It:
                    257:                        switch (np->parent->parent->norm->Bl.type) {
                    258:                        case LIST_column:
                    259:                                /* Target the ROFFT_BLOCK = <tr>. */
                    260:                                np = np->parent;
                    261:                                break;
                    262:                        case LIST_diag:
                    263:                        case LIST_hang:
                    264:                        case LIST_inset:
                    265:                        case LIST_ohang:
                    266:                        case LIST_tag:
                    267:                                /* Target the ROFFT_HEAD = <dt>. */
                    268:                                np = np->parent->head;
                    269:                                break;
                    270:                        default:
                    271:                                /* Target the ROFF_BODY = <li>. */
                    272:                                break;
                    273:                        }
                    274:                        /* FALLTHROUGH */
                    275:                case MDOC_Pp:   /* Target the ROFFT_ELEM = <p>. */
1.34      schwarze  276:                        if (np->tag == NULL) {
                    277:                                np->tag = mandoc_strdup(n->tag == NULL ?
                    278:                                    n->child->string : n->tag);
1.33      schwarze  279:                                np->flags |= NODE_ID;
                    280:                                n->flags &= ~NODE_ID;
                    281:                        }
                    282:                        return;
                    283:                case MDOC_Sh:
                    284:                case MDOC_Ss:
                    285:                case MDOC_Bd:
                    286:                case MDOC_Bl:
                    287:                case MDOC_D1:
                    288:                case MDOC_Dl:
                    289:                case MDOC_Rs:
                    290:                        /* Do not move past major blocks. */
                    291:                        return;
                    292:                default:
                    293:                        /*
                    294:                         * Move past in-line content and partial
                    295:                         * blocks, for example .It Xo or .It Bq Er.
                    296:                         */
                    297:                        break;
                    298:                }
                    299:        }
                    300: }
                    301:
                    302: /*
1.36      schwarze  303:  * When a paragraph is tagged and starts with text,
                    304:  * move the permalink to the first few words.
                    305:  */
                    306: static void
                    307: tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
                    308: {
                    309:        char    *cp;
                    310:
                    311:        if (n == NULL || n->type != ROFFT_TEXT ||
                    312:            *n->string == '\0' || *n->string == ' ')
                    313:                return;
                    314:
                    315:        cp = n->string;
                    316:        while (cp != NULL && cp - n->string < 5)
                    317:                cp = strchr(cp + 1, ' ');
                    318:
                    319:        /* If the first text node is longer, split it. */
                    320:
                    321:        if (cp != NULL && cp[1] != '\0') {
                    322:                man->last = n;
                    323:                man->next = ROFF_NEXT_SIBLING;
                    324:                roff_word_alloc(man, n->line,
                    325:                    n->pos + (cp - n->string), cp + 1);
                    326:                man->last->flags = n->flags & ~NODE_LINE;
                    327:                *cp = '\0';
                    328:        }
                    329:
                    330:        assert(n->tag == NULL);
                    331:        n->tag = mandoc_strdup(tag);
                    332:        n->flags |= NODE_HREF;
                    333: }
                    334:
                    335: /*
1.33      schwarze  336:  * When all tags have been set, decide where to put
                    337:  * the associated permalinks, and maybe move some tags
                    338:  * to the beginning of the respective paragraphs.
                    339:  */
                    340: void
1.35      schwarze  341: tag_postprocess(struct roff_man *man, struct roff_node *n)
1.33      schwarze  342: {
                    343:        if (n->flags & NODE_ID) {
                    344:                switch (n->tok) {
1.35      schwarze  345:                case MDOC_Pp:
1.36      schwarze  346:                        tag_move_href(man, n->next, n->tag);
1.35      schwarze  347:                        break;
1.33      schwarze  348:                case MDOC_Bd:
1.36      schwarze  349:                case MDOC_D1:
                    350:                case MDOC_Dl:
                    351:                        tag_move_href(man, n->child, n->tag);
                    352:                        break;
1.33      schwarze  353:                case MDOC_Bl:
                    354:                        /* XXX No permalink for now. */
                    355:                        break;
                    356:                default:
                    357:                        if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
                    358:                                tag_move_id(n);
                    359:                        if (n->tok != MDOC_Tg)
                    360:                                n->flags |= NODE_HREF;
1.34      schwarze  361:                        else if ((n->flags & NODE_ID) == 0) {
1.33      schwarze  362:                                n->flags |= NODE_NOPRT;
1.34      schwarze  363:                                free(n->tag);
                    364:                                n->tag = NULL;
                    365:                        }
1.33      schwarze  366:                        break;
                    367:                }
                    368:        }
                    369:        for (n = n->child; n != NULL; n = n->next)
1.35      schwarze  370:                tag_postprocess(man, n);
1.1       schwarze  371: }