Annotation of src/usr.bin/mandoc/tag.c, Revision 1.36
1.36 ! schwarze 1: /* $OpenBSD: tag.c,v 1.35 2020/04/18 20:28:46 schwarze Exp $ */
1.1 schwarze 2: /*
1.26 schwarze 3: * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.28 schwarze 16: *
17: * Functions to tag syntax tree nodes.
18: * For internal use by mandoc(1) validation modules only.
1.1 schwarze 19: */
20: #include <sys/types.h>
21:
1.27 schwarze 22: #include <assert.h>
1.20 schwarze 23: #include <limits.h>
1.1 schwarze 24: #include <stddef.h>
1.32 schwarze 25: #include <stdint.h>
1.1 schwarze 26: #include <stdlib.h>
27: #include <string.h>
28:
29: #include "mandoc_aux.h"
1.10 schwarze 30: #include "mandoc_ohash.h"
1.28 schwarze 31: #include "roff.h"
1.33 schwarze 32: #include "mdoc.h"
1.35 schwarze 33: #include "roff_int.h"
1.1 schwarze 34: #include "tag.h"
35:
36: struct tag_entry {
1.28 schwarze 37: struct roff_node **nodes;
38: size_t maxnodes;
39: size_t nnodes;
1.4 schwarze 40: int prio;
1.1 schwarze 41: char s[];
42: };
43:
1.36 ! schwarze 44: static void tag_move_href(struct roff_man *,
! 45: struct roff_node *, const char *);
1.33 schwarze 46: static void tag_move_id(struct roff_node *);
47:
1.1 schwarze 48: static struct ohash tag_data;
49:
50:
51: /*
1.28 schwarze 52: * Set up the ohash table to collect nodes
53: * where various marked-up terms are documented.
1.1 schwarze 54: */
1.28 schwarze 55: void
56: tag_alloc(void)
1.1 schwarze 57: {
1.28 schwarze 58: mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
59: }
1.1 schwarze 60:
1.28 schwarze 61: void
62: tag_free(void)
63: {
64: struct tag_entry *entry;
65: unsigned int slot;
1.12 schwarze 66:
1.29 schwarze 67: if (tag_data.info.free == NULL)
68: return;
1.28 schwarze 69: entry = ohash_first(&tag_data, &slot);
70: while (entry != NULL) {
71: free(entry->nodes);
72: free(entry);
73: entry = ohash_next(&tag_data, &slot);
1.22 schwarze 74: }
1.28 schwarze 75: ohash_delete(&tag_data);
1.29 schwarze 76: tag_data.info.free = NULL;
1.1 schwarze 77: }
78:
79: /*
1.28 schwarze 80: * Set a node where a term is defined,
1.20 schwarze 81: * unless it is already defined at a lower priority.
1.1 schwarze 82: */
83: void
1.28 schwarze 84: tag_put(const char *s, int prio, struct roff_node *n)
1.1 schwarze 85: {
86: struct tag_entry *entry;
1.34 schwarze 87: struct roff_node *nold;
1.20 schwarze 88: const char *se;
1.5 schwarze 89: size_t len;
1.1 schwarze 90: unsigned int slot;
91:
1.27 schwarze 92: assert(prio <= TAG_FALLBACK);
1.20 schwarze 93:
1.28 schwarze 94: if (s == NULL) {
95: if (n->child == NULL || n->child->type != ROFFT_TEXT)
96: return;
97: s = n->child->string;
1.30 schwarze 98: switch (s[0]) {
99: case '-':
100: s++;
101: break;
102: case '\\':
103: switch (s[1]) {
104: case '&':
105: case '-':
106: case 'e':
107: s += 2;
108: break;
109: default:
110: break;
111: }
112: break;
113: default:
114: break;
115: }
1.28 schwarze 116: }
1.20 schwarze 117:
118: /*
1.24 schwarze 119: * Skip whitespace and escapes and whatever follows,
1.20 schwarze 120: * and if there is any, downgrade the priority.
121: */
122:
1.24 schwarze 123: len = strcspn(s, " \t\\");
1.20 schwarze 124: if (len == 0)
1.1 schwarze 125: return;
1.14 schwarze 126:
1.20 schwarze 127: se = s + len;
1.27 schwarze 128: if (*se != '\0' && prio < TAG_WEAK)
129: prio = TAG_WEAK;
1.20 schwarze 130:
131: slot = ohash_qlookupi(&tag_data, s, &se);
1.1 schwarze 132: entry = ohash_find(&tag_data, slot);
1.14 schwarze 133:
1.28 schwarze 134: /* Build a new entry. */
135:
1.1 schwarze 136: if (entry == NULL) {
1.20 schwarze 137: entry = mandoc_malloc(sizeof(*entry) + len + 1);
1.1 schwarze 138: memcpy(entry->s, s, len);
1.20 schwarze 139: entry->s[len] = '\0';
1.28 schwarze 140: entry->nodes = NULL;
141: entry->maxnodes = entry->nnodes = 0;
1.1 schwarze 142: ohash_insert(&tag_data, slot, entry);
1.28 schwarze 143: }
1.14 schwarze 144:
1.28 schwarze 145: /*
146: * Lower priority numbers take precedence.
147: * If a better entry is already present, ignore the new one.
148: */
149:
150: else if (entry->prio < prio)
151: return;
152:
153: /*
154: * If the existing entry is worse, clear it.
155: * In addition, a tag with priority TAG_FALLBACK
156: * is only used if the tag occurs exactly once.
157: */
1.14 schwarze 158:
1.28 schwarze 159: else if (entry->prio > prio || prio == TAG_FALLBACK) {
1.34 schwarze 160: while (entry->nnodes > 0) {
161: nold = entry->nodes[--entry->nnodes];
162: nold->flags &= ~NODE_ID;
163: free(nold->tag);
164: nold->tag = NULL;
165: }
1.27 schwarze 166: if (prio == TAG_FALLBACK) {
1.28 schwarze 167: entry->prio = TAG_DELETE;
1.16 schwarze 168: return;
169: }
1.14 schwarze 170: }
171:
1.28 schwarze 172: /* Remember the new node. */
1.14 schwarze 173:
1.28 schwarze 174: if (entry->maxnodes == entry->nnodes) {
175: entry->maxnodes += 4;
176: entry->nodes = mandoc_reallocarray(entry->nodes,
177: entry->maxnodes, sizeof(*entry->nodes));
1.14 schwarze 178: }
1.28 schwarze 179: entry->nodes[entry->nnodes++] = n;
1.4 schwarze 180: entry->prio = prio;
1.28 schwarze 181: n->flags |= NODE_ID;
182: if (n->child == NULL || n->child->string != s || *se != '\0') {
1.34 schwarze 183: assert(n->tag == NULL);
184: n->tag = mandoc_strndup(s, len);
1.28 schwarze 185: }
1.1 schwarze 186: }
187:
1.31 schwarze 188: int
189: tag_exists(const char *tag)
1.1 schwarze 190: {
1.31 schwarze 191: return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
1.33 schwarze 192: }
193:
194: /*
195: * For in-line elements, move the link target
196: * to the enclosing paragraph when appropriate.
197: */
198: static void
199: tag_move_id(struct roff_node *n)
200: {
201: struct roff_node *np;
202:
203: np = n;
204: for (;;) {
205: if (np->prev != NULL)
206: np = np->prev;
207: else if ((np = np->parent) == NULL)
208: return;
209: switch (np->tok) {
210: case MDOC_It:
211: switch (np->parent->parent->norm->Bl.type) {
212: case LIST_column:
213: /* Target the ROFFT_BLOCK = <tr>. */
214: np = np->parent;
215: break;
216: case LIST_diag:
217: case LIST_hang:
218: case LIST_inset:
219: case LIST_ohang:
220: case LIST_tag:
221: /* Target the ROFFT_HEAD = <dt>. */
222: np = np->parent->head;
223: break;
224: default:
225: /* Target the ROFF_BODY = <li>. */
226: break;
227: }
228: /* FALLTHROUGH */
229: case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */
1.34 schwarze 230: if (np->tag == NULL) {
231: np->tag = mandoc_strdup(n->tag == NULL ?
232: n->child->string : n->tag);
1.33 schwarze 233: np->flags |= NODE_ID;
234: n->flags &= ~NODE_ID;
235: }
236: return;
237: case MDOC_Sh:
238: case MDOC_Ss:
239: case MDOC_Bd:
240: case MDOC_Bl:
241: case MDOC_D1:
242: case MDOC_Dl:
243: case MDOC_Rs:
244: /* Do not move past major blocks. */
245: return;
246: default:
247: /*
248: * Move past in-line content and partial
249: * blocks, for example .It Xo or .It Bq Er.
250: */
251: break;
252: }
253: }
254: }
255:
256: /*
1.36 ! schwarze 257: * When a paragraph is tagged and starts with text,
! 258: * move the permalink to the first few words.
! 259: */
! 260: static void
! 261: tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
! 262: {
! 263: char *cp;
! 264:
! 265: if (n == NULL || n->type != ROFFT_TEXT ||
! 266: *n->string == '\0' || *n->string == ' ')
! 267: return;
! 268:
! 269: cp = n->string;
! 270: while (cp != NULL && cp - n->string < 5)
! 271: cp = strchr(cp + 1, ' ');
! 272:
! 273: /* If the first text node is longer, split it. */
! 274:
! 275: if (cp != NULL && cp[1] != '\0') {
! 276: man->last = n;
! 277: man->next = ROFF_NEXT_SIBLING;
! 278: roff_word_alloc(man, n->line,
! 279: n->pos + (cp - n->string), cp + 1);
! 280: man->last->flags = n->flags & ~NODE_LINE;
! 281: *cp = '\0';
! 282: }
! 283:
! 284: assert(n->tag == NULL);
! 285: n->tag = mandoc_strdup(tag);
! 286: n->flags |= NODE_HREF;
! 287: }
! 288:
! 289: /*
1.33 schwarze 290: * When all tags have been set, decide where to put
291: * the associated permalinks, and maybe move some tags
292: * to the beginning of the respective paragraphs.
293: */
294: void
1.35 schwarze 295: tag_postprocess(struct roff_man *man, struct roff_node *n)
1.33 schwarze 296: {
297: if (n->flags & NODE_ID) {
298: switch (n->tok) {
1.35 schwarze 299: case MDOC_Pp:
1.36 ! schwarze 300: tag_move_href(man, n->next, n->tag);
1.35 schwarze 301: break;
1.33 schwarze 302: case MDOC_Bd:
1.36 ! schwarze 303: case MDOC_D1:
! 304: case MDOC_Dl:
! 305: tag_move_href(man, n->child, n->tag);
! 306: break;
1.33 schwarze 307: case MDOC_Bl:
308: /* XXX No permalink for now. */
309: break;
310: default:
311: if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
312: tag_move_id(n);
313: if (n->tok != MDOC_Tg)
314: n->flags |= NODE_HREF;
1.34 schwarze 315: else if ((n->flags & NODE_ID) == 0) {
1.33 schwarze 316: n->flags |= NODE_NOPRT;
1.34 schwarze 317: free(n->tag);
318: n->tag = NULL;
319: }
1.33 schwarze 320: break;
321: }
322: }
323: for (n = n->child; n != NULL; n = n->next)
1.35 schwarze 324: tag_postprocess(man, n);
1.1 schwarze 325: }