Annotation of src/usr.bin/mandoc/tag.c, Revision 1.38
1.38 ! schwarze 1: /* $OpenBSD: tag.c,v 1.37 2022/04/26 11:28:35 schwarze Exp $ */
1.1 schwarze 2: /*
1.38 ! schwarze 3: * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
1.37 schwarze 4: * Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.28 schwarze 17: *
18: * Functions to tag syntax tree nodes.
19: * For internal use by mandoc(1) validation modules only.
1.1 schwarze 20: */
21: #include <sys/types.h>
22:
1.27 schwarze 23: #include <assert.h>
1.20 schwarze 24: #include <limits.h>
1.1 schwarze 25: #include <stddef.h>
1.32 schwarze 26: #include <stdint.h>
1.38 ! schwarze 27: #include <stdio.h>
1.1 schwarze 28: #include <stdlib.h>
29: #include <string.h>
30:
31: #include "mandoc_aux.h"
1.10 schwarze 32: #include "mandoc_ohash.h"
1.38 ! schwarze 33: #include "mandoc.h"
1.28 schwarze 34: #include "roff.h"
1.33 schwarze 35: #include "mdoc.h"
1.35 schwarze 36: #include "roff_int.h"
1.1 schwarze 37: #include "tag.h"
38:
39: struct tag_entry {
1.28 schwarze 40: struct roff_node **nodes;
41: size_t maxnodes;
42: size_t nnodes;
1.4 schwarze 43: int prio;
1.1 schwarze 44: char s[];
45: };
46:
1.36 schwarze 47: static void tag_move_href(struct roff_man *,
48: struct roff_node *, const char *);
1.33 schwarze 49: static void tag_move_id(struct roff_node *);
50:
1.1 schwarze 51: static struct ohash tag_data;
52:
53:
54: /*
1.28 schwarze 55: * Set up the ohash table to collect nodes
56: * where various marked-up terms are documented.
1.1 schwarze 57: */
1.28 schwarze 58: void
59: tag_alloc(void)
1.1 schwarze 60: {
1.28 schwarze 61: mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
62: }
1.1 schwarze 63:
1.28 schwarze 64: void
65: tag_free(void)
66: {
67: struct tag_entry *entry;
68: unsigned int slot;
1.12 schwarze 69:
1.29 schwarze 70: if (tag_data.info.free == NULL)
71: return;
1.28 schwarze 72: entry = ohash_first(&tag_data, &slot);
73: while (entry != NULL) {
74: free(entry->nodes);
75: free(entry);
76: entry = ohash_next(&tag_data, &slot);
1.22 schwarze 77: }
1.28 schwarze 78: ohash_delete(&tag_data);
1.29 schwarze 79: tag_data.info.free = NULL;
1.1 schwarze 80: }
81:
82: /*
1.28 schwarze 83: * Set a node where a term is defined,
1.37 schwarze 84: * unless the term is already defined at a lower priority.
1.1 schwarze 85: */
86: void
1.28 schwarze 87: tag_put(const char *s, int prio, struct roff_node *n)
1.1 schwarze 88: {
89: struct tag_entry *entry;
1.34 schwarze 90: struct roff_node *nold;
1.38 ! schwarze 91: const char *se, *src;
! 92: char *cpy;
1.5 schwarze 93: size_t len;
1.1 schwarze 94: unsigned int slot;
1.38 ! schwarze 95: int changed;
1.1 schwarze 96:
1.27 schwarze 97: assert(prio <= TAG_FALLBACK);
1.20 schwarze 98:
1.37 schwarze 99: /*
100: * If the node is already tagged, the existing tag is
101: * explicit and we are now about to add an implicit tag.
102: * Don't do that; just skip implicit tagging if the author
103: * specified an explicit tag.
104: */
105:
106: if (n->flags & NODE_ID)
107: return;
108:
109: /* Determine the implicit tag. */
110:
1.38 ! schwarze 111: changed = 1;
1.28 schwarze 112: if (s == NULL) {
113: if (n->child == NULL || n->child->type != ROFFT_TEXT)
114: return;
115: s = n->child->string;
1.30 schwarze 116: switch (s[0]) {
117: case '-':
118: s++;
119: break;
120: case '\\':
121: switch (s[1]) {
122: case '&':
123: case '-':
124: case 'e':
125: s += 2;
126: break;
127: default:
1.38 ! schwarze 128: return;
1.30 schwarze 129: }
130: break;
131: default:
1.38 ! schwarze 132: changed = 0;
1.30 schwarze 133: break;
134: }
1.28 schwarze 135: }
1.20 schwarze 136:
137: /*
1.38 ! schwarze 138: * Translate \- and ASCII_HYPH to plain '-'.
1.24 schwarze 139: * Skip whitespace and escapes and whatever follows,
1.20 schwarze 140: * and if there is any, downgrade the priority.
141: */
142:
1.38 ! schwarze 143: cpy = mandoc_malloc(strlen(s) + 1);
! 144: for (src = s, len = 0; *src != '\0'; src++, len++) {
! 145: switch (*src) {
! 146: case '\t':
! 147: case ' ':
! 148: changed = 1;
! 149: break;
! 150: case ASCII_HYPH:
! 151: cpy[len] = '-';
! 152: changed = 1;
! 153: continue;
! 154: case '\\':
! 155: if (src[1] != '-')
! 156: break;
! 157: src++;
! 158: changed = 1;
! 159: /* FALLTHROUGH */
! 160: default:
! 161: cpy[len] = *src;
! 162: continue;
! 163: }
! 164: break;
! 165: }
1.20 schwarze 166: if (len == 0)
1.38 ! schwarze 167: goto out;
! 168: cpy[len] = '\0';
1.14 schwarze 169:
1.38 ! schwarze 170: if (*src != '\0' && prio < TAG_WEAK)
1.27 schwarze 171: prio = TAG_WEAK;
1.20 schwarze 172:
1.38 ! schwarze 173: s = cpy;
! 174: se = cpy + len;
1.20 schwarze 175: slot = ohash_qlookupi(&tag_data, s, &se);
1.1 schwarze 176: entry = ohash_find(&tag_data, slot);
1.14 schwarze 177:
1.28 schwarze 178: /* Build a new entry. */
179:
1.1 schwarze 180: if (entry == NULL) {
1.20 schwarze 181: entry = mandoc_malloc(sizeof(*entry) + len + 1);
1.38 ! schwarze 182: memcpy(entry->s, s, len + 1);
1.28 schwarze 183: entry->nodes = NULL;
184: entry->maxnodes = entry->nnodes = 0;
1.1 schwarze 185: ohash_insert(&tag_data, slot, entry);
1.28 schwarze 186: }
1.14 schwarze 187:
1.28 schwarze 188: /*
189: * Lower priority numbers take precedence.
190: * If a better entry is already present, ignore the new one.
191: */
192:
193: else if (entry->prio < prio)
1.38 ! schwarze 194: goto out;
1.28 schwarze 195:
196: /*
197: * If the existing entry is worse, clear it.
198: * In addition, a tag with priority TAG_FALLBACK
199: * is only used if the tag occurs exactly once.
200: */
1.14 schwarze 201:
1.28 schwarze 202: else if (entry->prio > prio || prio == TAG_FALLBACK) {
1.34 schwarze 203: while (entry->nnodes > 0) {
204: nold = entry->nodes[--entry->nnodes];
205: nold->flags &= ~NODE_ID;
206: free(nold->tag);
207: nold->tag = NULL;
208: }
1.27 schwarze 209: if (prio == TAG_FALLBACK) {
1.28 schwarze 210: entry->prio = TAG_DELETE;
1.38 ! schwarze 211: goto out;
1.16 schwarze 212: }
1.14 schwarze 213: }
214:
1.28 schwarze 215: /* Remember the new node. */
1.14 schwarze 216:
1.28 schwarze 217: if (entry->maxnodes == entry->nnodes) {
218: entry->maxnodes += 4;
219: entry->nodes = mandoc_reallocarray(entry->nodes,
220: entry->maxnodes, sizeof(*entry->nodes));
1.14 schwarze 221: }
1.28 schwarze 222: entry->nodes[entry->nnodes++] = n;
1.4 schwarze 223: entry->prio = prio;
1.28 schwarze 224: n->flags |= NODE_ID;
1.38 ! schwarze 225: if (changed) {
1.34 schwarze 226: assert(n->tag == NULL);
227: n->tag = mandoc_strndup(s, len);
1.28 schwarze 228: }
1.38 ! schwarze 229:
! 230: out:
! 231: free(cpy);
1.1 schwarze 232: }
233:
1.31 schwarze 234: int
235: tag_exists(const char *tag)
1.1 schwarze 236: {
1.31 schwarze 237: return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
1.33 schwarze 238: }
239:
240: /*
241: * For in-line elements, move the link target
242: * to the enclosing paragraph when appropriate.
243: */
244: static void
245: tag_move_id(struct roff_node *n)
246: {
247: struct roff_node *np;
248:
249: np = n;
250: for (;;) {
251: if (np->prev != NULL)
252: np = np->prev;
253: else if ((np = np->parent) == NULL)
254: return;
255: switch (np->tok) {
256: case MDOC_It:
257: switch (np->parent->parent->norm->Bl.type) {
258: case LIST_column:
259: /* Target the ROFFT_BLOCK = <tr>. */
260: np = np->parent;
261: break;
262: case LIST_diag:
263: case LIST_hang:
264: case LIST_inset:
265: case LIST_ohang:
266: case LIST_tag:
267: /* Target the ROFFT_HEAD = <dt>. */
268: np = np->parent->head;
269: break;
270: default:
271: /* Target the ROFF_BODY = <li>. */
272: break;
273: }
274: /* FALLTHROUGH */
275: case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */
1.34 schwarze 276: if (np->tag == NULL) {
277: np->tag = mandoc_strdup(n->tag == NULL ?
278: n->child->string : n->tag);
1.33 schwarze 279: np->flags |= NODE_ID;
280: n->flags &= ~NODE_ID;
281: }
282: return;
283: case MDOC_Sh:
284: case MDOC_Ss:
285: case MDOC_Bd:
286: case MDOC_Bl:
287: case MDOC_D1:
288: case MDOC_Dl:
289: case MDOC_Rs:
290: /* Do not move past major blocks. */
291: return;
292: default:
293: /*
294: * Move past in-line content and partial
295: * blocks, for example .It Xo or .It Bq Er.
296: */
297: break;
298: }
299: }
300: }
301:
302: /*
1.36 schwarze 303: * When a paragraph is tagged and starts with text,
304: * move the permalink to the first few words.
305: */
306: static void
307: tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
308: {
309: char *cp;
310:
311: if (n == NULL || n->type != ROFFT_TEXT ||
312: *n->string == '\0' || *n->string == ' ')
313: return;
314:
315: cp = n->string;
316: while (cp != NULL && cp - n->string < 5)
317: cp = strchr(cp + 1, ' ');
318:
319: /* If the first text node is longer, split it. */
320:
321: if (cp != NULL && cp[1] != '\0') {
322: man->last = n;
323: man->next = ROFF_NEXT_SIBLING;
324: roff_word_alloc(man, n->line,
325: n->pos + (cp - n->string), cp + 1);
326: man->last->flags = n->flags & ~NODE_LINE;
327: *cp = '\0';
328: }
329:
330: assert(n->tag == NULL);
331: n->tag = mandoc_strdup(tag);
332: n->flags |= NODE_HREF;
333: }
334:
335: /*
1.33 schwarze 336: * When all tags have been set, decide where to put
337: * the associated permalinks, and maybe move some tags
338: * to the beginning of the respective paragraphs.
339: */
340: void
1.35 schwarze 341: tag_postprocess(struct roff_man *man, struct roff_node *n)
1.33 schwarze 342: {
343: if (n->flags & NODE_ID) {
344: switch (n->tok) {
1.35 schwarze 345: case MDOC_Pp:
1.36 schwarze 346: tag_move_href(man, n->next, n->tag);
1.35 schwarze 347: break;
1.33 schwarze 348: case MDOC_Bd:
1.36 schwarze 349: case MDOC_D1:
350: case MDOC_Dl:
351: tag_move_href(man, n->child, n->tag);
352: break;
1.33 schwarze 353: case MDOC_Bl:
354: /* XXX No permalink for now. */
355: break;
356: default:
357: if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
358: tag_move_id(n);
359: if (n->tok != MDOC_Tg)
360: n->flags |= NODE_HREF;
1.34 schwarze 361: else if ((n->flags & NODE_ID) == 0) {
1.33 schwarze 362: n->flags |= NODE_NOPRT;
1.34 schwarze 363: free(n->tag);
364: n->tag = NULL;
365: }
1.33 schwarze 366: break;
367: }
368: }
369: for (n = n->child; n != NULL; n = n->next)
1.35 schwarze 370: tag_postprocess(man, n);
1.1 schwarze 371: }