Annotation of src/usr.bin/mandoc/dba.c, Revision 1.7
1.7 ! schwarze 1: /* $OpenBSD: dba.c,v 1.6 2017/01/15 15:28:51 schwarze Exp $ */
1.1 schwarze 2: /*
1.6 schwarze 3: * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: *
17: * Allocation-based version of the mandoc database, for read-write access.
18: * The interface is defined in "dba.h".
19: */
20: #include <sys/types.h>
1.2 schwarze 21: #include <endian.h>
1.1 schwarze 22: #include <errno.h>
1.6 schwarze 23: #include <stddef.h>
1.1 schwarze 24: #include <stdint.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
29: #include "mandoc_aux.h"
1.6 schwarze 30: #include "mandoc_ohash.h"
1.1 schwarze 31: #include "mansearch.h"
32: #include "dba_write.h"
33: #include "dba_array.h"
34: #include "dba.h"
35:
1.6 schwarze 36: struct macro_entry {
37: struct dba_array *pages;
38: char value[];
39: };
40:
1.1 schwarze 41: static void *prepend(const char *, char);
42: static void dba_pages_write(struct dba_array *);
43: static int compare_names(const void *, const void *);
44: static int compare_strings(const void *, const void *);
1.6 schwarze 45:
46: static struct macro_entry
47: *get_macro_entry(struct ohash *, const char *, int32_t);
1.1 schwarze 48: static void dba_macros_write(struct dba_array *);
1.6 schwarze 49: static void dba_macro_write(struct ohash *);
50: static int compare_entries(const void *, const void *);
1.1 schwarze 51:
52:
53: /*** top-level functions **********************************************/
54:
55: struct dba *
56: dba_new(int32_t npages)
57: {
58: struct dba *dba;
1.6 schwarze 59: struct ohash *macro;
1.1 schwarze 60: int32_t im;
61:
62: dba = mandoc_malloc(sizeof(*dba));
63: dba->pages = dba_array_new(npages, DBA_GROW);
64: dba->macros = dba_array_new(MACRO_MAX, 0);
1.6 schwarze 65: for (im = 0; im < MACRO_MAX; im++) {
66: macro = mandoc_malloc(sizeof(*macro));
67: mandoc_ohash_init(macro, 4,
68: offsetof(struct macro_entry, value));
69: dba_array_set(dba->macros, im, macro);
70: }
1.1 schwarze 71: return dba;
72: }
73:
74: void
75: dba_free(struct dba *dba)
76: {
1.6 schwarze 77: struct dba_array *page;
78: struct ohash *macro;
79: struct macro_entry *entry;
80: unsigned int slot;
1.1 schwarze 81:
82: dba_array_FOREACH(dba->macros, macro) {
1.6 schwarze 83: for (entry = ohash_first(macro, &slot); entry != NULL;
84: entry = ohash_next(macro, &slot)) {
85: dba_array_free(entry->pages);
86: free(entry);
1.1 schwarze 87: }
1.6 schwarze 88: ohash_delete(macro);
89: free(macro);
1.1 schwarze 90: }
91: dba_array_free(dba->macros);
92:
93: dba_array_undel(dba->pages);
94: dba_array_FOREACH(dba->pages, page) {
95: dba_array_free(dba_array_get(page, DBP_NAME));
96: dba_array_free(dba_array_get(page, DBP_SECT));
97: dba_array_free(dba_array_get(page, DBP_ARCH));
98: free(dba_array_get(page, DBP_DESC));
99: dba_array_free(dba_array_get(page, DBP_FILE));
100: dba_array_free(page);
101: }
102: dba_array_free(dba->pages);
103:
104: free(dba);
105: }
106:
107: /*
108: * Write the complete mandoc database to disk; the format is:
109: * - One integer each for magic and version.
110: * - One pointer each to the macros table and to the final magic.
111: * - The pages table.
112: * - The macros table.
113: * - And at the very end, the magic integer again.
114: */
115: int
116: dba_write(const char *fname, struct dba *dba)
117: {
118: int save_errno;
119: int32_t pos_end, pos_macros, pos_macros_ptr;
120:
121: if (dba_open(fname) == -1)
122: return -1;
123: dba_int_write(MANDOCDB_MAGIC);
124: dba_int_write(MANDOCDB_VERSION);
125: pos_macros_ptr = dba_skip(1, 2);
126: dba_pages_write(dba->pages);
127: pos_macros = dba_tell();
128: dba_macros_write(dba->macros);
129: pos_end = dba_tell();
130: dba_int_write(MANDOCDB_MAGIC);
131: dba_seek(pos_macros_ptr);
132: dba_int_write(pos_macros);
133: dba_int_write(pos_end);
134: if (dba_close() == -1) {
135: save_errno = errno;
136: unlink(fname);
137: errno = save_errno;
138: return -1;
139: }
140: return 0;
141: }
142:
143:
144: /*** functions for handling pages *************************************/
145:
146: /*
147: * Create a new page and append it to the pages table.
148: */
149: struct dba_array *
1.5 schwarze 150: dba_page_new(struct dba_array *pages, const char *arch,
151: const char *desc, const char *file, enum form form)
1.1 schwarze 152: {
153: struct dba_array *page, *entry;
154:
155: page = dba_array_new(DBP_MAX, 0);
156: entry = dba_array_new(1, DBA_STR | DBA_GROW);
157: dba_array_add(page, entry);
158: entry = dba_array_new(1, DBA_STR | DBA_GROW);
159: dba_array_add(page, entry);
160: if (arch != NULL && *arch != '\0') {
161: entry = dba_array_new(1, DBA_STR | DBA_GROW);
162: dba_array_add(entry, (void *)arch);
163: } else
164: entry = NULL;
165: dba_array_add(page, entry);
166: dba_array_add(page, mandoc_strdup(desc));
167: entry = dba_array_new(1, DBA_STR | DBA_GROW);
168: dba_array_add(entry, prepend(file, form));
169: dba_array_add(page, entry);
170: dba_array_add(pages, page);
171: return page;
172: }
173:
174: /*
175: * Add a section, architecture, or file name to an existing page.
176: * Passing the NULL pointer for the architecture makes the page MI.
177: * In that case, any earlier or later architectures are ignored.
178: */
179: void
180: dba_page_add(struct dba_array *page, int32_t ie, const char *str)
181: {
182: struct dba_array *entries;
183: char *entry;
184:
185: entries = dba_array_get(page, ie);
186: if (ie == DBP_ARCH) {
187: if (entries == NULL)
188: return;
1.3 schwarze 189: if (str == NULL || *str == '\0') {
1.1 schwarze 190: dba_array_free(entries);
191: dba_array_set(page, DBP_ARCH, NULL);
192: return;
193: }
194: }
195: if (*str == '\0')
196: return;
197: dba_array_FOREACH(entries, entry) {
198: if (ie == DBP_FILE && *entry < ' ')
199: entry++;
200: if (strcmp(entry, str) == 0)
201: return;
202: }
203: dba_array_add(entries, (void *)str);
204: }
205:
206: /*
207: * Add an additional name to an existing page.
208: */
209: void
210: dba_page_alias(struct dba_array *page, const char *name, uint64_t mask)
211: {
212: struct dba_array *entries;
213: char *entry;
214: char maskbyte;
215:
216: if (*name == '\0')
217: return;
218: maskbyte = mask & NAME_MASK;
219: entries = dba_array_get(page, DBP_NAME);
220: dba_array_FOREACH(entries, entry) {
221: if (strcmp(entry + 1, name) == 0) {
222: *entry |= maskbyte;
223: return;
224: }
225: }
226: dba_array_add(entries, prepend(name, maskbyte));
227: }
228:
229: /*
230: * Return a pointer to a temporary copy of instr with inbyte prepended.
231: */
232: static void *
233: prepend(const char *instr, char inbyte)
234: {
235: static char *outstr = NULL;
236: static size_t outlen = 0;
237: size_t newlen;
238:
239: newlen = strlen(instr) + 1;
240: if (newlen > outlen) {
241: outstr = mandoc_realloc(outstr, newlen + 1);
242: outlen = newlen;
243: }
244: *outstr = inbyte;
245: memcpy(outstr + 1, instr, newlen);
246: return outstr;
247: }
248:
249: /*
250: * Write the pages table to disk; the format is:
251: * - One integer containing the number of pages.
252: * - For each page, five pointers to the names, sections,
253: * architectures, description, and file names of the page.
254: * MI pages write 0 instead of the architecture pointer.
255: * - One list each for names, sections, architectures, descriptions and
256: * file names. The description for each page ends with a NUL byte.
257: * For all the other lists, each string ends with a NUL byte,
258: * and the last string for a page ends with two NUL bytes.
259: * - To assure alignment of following integers,
260: * the end is padded with NUL bytes up to a multiple of four bytes.
261: */
262: static void
263: dba_pages_write(struct dba_array *pages)
264: {
265: struct dba_array *page, *entry;
266: int32_t pos_pages, pos_end;
267:
268: pos_pages = dba_array_writelen(pages, 5);
269: dba_array_FOREACH(pages, page) {
270: dba_array_setpos(page, DBP_NAME, dba_tell());
271: entry = dba_array_get(page, DBP_NAME);
272: dba_array_sort(entry, compare_names);
273: dba_array_writelst(entry);
274: }
275: dba_array_FOREACH(pages, page) {
276: dba_array_setpos(page, DBP_SECT, dba_tell());
277: entry = dba_array_get(page, DBP_SECT);
278: dba_array_sort(entry, compare_strings);
279: dba_array_writelst(entry);
280: }
281: dba_array_FOREACH(pages, page) {
282: if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) {
283: dba_array_setpos(page, DBP_ARCH, dba_tell());
284: dba_array_sort(entry, compare_strings);
285: dba_array_writelst(entry);
286: } else
287: dba_array_setpos(page, DBP_ARCH, 0);
288: }
289: dba_array_FOREACH(pages, page) {
290: dba_array_setpos(page, DBP_DESC, dba_tell());
291: dba_str_write(dba_array_get(page, DBP_DESC));
292: }
293: dba_array_FOREACH(pages, page) {
294: dba_array_setpos(page, DBP_FILE, dba_tell());
295: dba_array_writelst(dba_array_get(page, DBP_FILE));
296: }
297: pos_end = dba_align();
298: dba_seek(pos_pages);
299: dba_array_FOREACH(pages, page)
300: dba_array_writepos(page);
301: dba_seek(pos_end);
302: }
303:
304: static int
305: compare_names(const void *vp1, const void *vp2)
306: {
307: const char *cp1, *cp2;
308: int diff;
309:
1.7 ! schwarze 310: cp1 = *(const char * const *)vp1;
! 311: cp2 = *(const char * const *)vp2;
1.1 schwarze 312: return (diff = *cp2 - *cp1) ? diff :
313: strcasecmp(cp1 + 1, cp2 + 1);
314: }
315:
316: static int
317: compare_strings(const void *vp1, const void *vp2)
318: {
319: const char *cp1, *cp2;
320:
1.7 ! schwarze 321: cp1 = *(const char * const *)vp1;
! 322: cp2 = *(const char * const *)vp2;
1.1 schwarze 323: return strcmp(cp1, cp2);
324: }
325:
326: /*** functions for handling macros ************************************/
327:
328: /*
1.6 schwarze 329: * In the hash table for a single macro, look up an entry by
330: * the macro value or add an empty one if it doesn't exist yet.
331: */
332: static struct macro_entry *
333: get_macro_entry(struct ohash *macro, const char *value, int32_t np)
334: {
335: struct macro_entry *entry;
336: size_t len;
337: unsigned int slot;
338:
339: slot = ohash_qlookup(macro, value);
340: if ((entry = ohash_find(macro, slot)) == NULL) {
341: len = strlen(value) + 1;
342: entry = mandoc_malloc(sizeof(*entry) + len);
343: memcpy(&entry->value, value, len);
344: entry->pages = dba_array_new(np, DBA_GROW);
345: ohash_insert(macro, slot, entry);
346: }
347: return entry;
348: }
349:
350: /*
351: * In addition to get_macro_entry(), add multiple page references,
352: * converting them from the on-disk format (byte offsets in the file)
353: * to page pointers in memory.
1.1 schwarze 354: */
355: void
356: dba_macro_new(struct dba *dba, int32_t im, const char *value,
357: const int32_t *pp)
358: {
1.6 schwarze 359: struct macro_entry *entry;
1.1 schwarze 360: const int32_t *ip;
361: int32_t np;
362:
363: np = 0;
364: for (ip = pp; *ip; ip++)
365: np++;
1.6 schwarze 366:
367: entry = get_macro_entry(dba_array_get(dba->macros, im), value, np);
1.1 schwarze 368: for (ip = pp; *ip; ip++)
1.6 schwarze 369: dba_array_add(entry->pages, dba_array_get(dba->pages,
1.1 schwarze 370: be32toh(*ip) / 5 / sizeof(*ip) - 1));
371: }
372:
373: /*
1.6 schwarze 374: * In addition to get_macro_entry(), add one page reference,
375: * directly taking the in-memory page pointer as an argument.
1.1 schwarze 376: */
377: void
378: dba_macro_add(struct dba_array *macros, int32_t im, const char *value,
379: struct dba_array *page)
380: {
1.6 schwarze 381: struct macro_entry *entry;
1.1 schwarze 382:
383: if (*value == '\0')
384: return;
1.6 schwarze 385: entry = get_macro_entry(dba_array_get(macros, im), value, 1);
386: dba_array_add(entry->pages, page);
1.1 schwarze 387: }
388:
389: /*
390: * Write the macros table to disk; the format is:
391: * - The number of macro tables (actually, MACRO_MAX).
392: * - That number of pointers to the individual macro tables.
393: * - The individual macro tables.
394: */
395: static void
396: dba_macros_write(struct dba_array *macros)
397: {
1.6 schwarze 398: struct ohash *macro;
1.1 schwarze 399: int32_t im, pos_macros, pos_end;
400:
401: pos_macros = dba_array_writelen(macros, 1);
402: im = 0;
403: dba_array_FOREACH(macros, macro) {
404: dba_array_setpos(macros, im++, dba_tell());
405: dba_macro_write(macro);
406: }
407: pos_end = dba_tell();
408: dba_seek(pos_macros);
409: dba_array_writepos(macros);
410: dba_seek(pos_end);
411: }
412:
413: /*
414: * Write one individual macro table to disk; the format is:
415: * - The number of entries in the table.
416: * - For each entry, two pointers, the first one to the value
417: * and the second one to the list of pages.
418: * - A list of values, each ending in a NUL byte.
419: * - To assure alignment of following integers,
420: * padding with NUL bytes up to a multiple of four bytes.
421: * - A list of pointers to pages, each list ending in a 0 integer.
422: */
423: static void
1.6 schwarze 424: dba_macro_write(struct ohash *macro)
1.1 schwarze 425: {
1.6 schwarze 426: struct macro_entry **entries, *entry;
427: struct dba_array *page;
428: int32_t *kpos, *dpos;
429: unsigned int ie, ne, slot;
430: int use;
431: int32_t addr, pos_macro, pos_end;
432:
433: /* Temporary storage for filtering and sorting. */
434:
435: ne = ohash_entries(macro);
436: entries = mandoc_reallocarray(NULL, ne, sizeof(*entries));
437: kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos));
438: dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos));
439:
440: /* Build a list of non-empty entries and sort it. */
441:
442: ne = 0;
443: for (entry = ohash_first(macro, &slot); entry != NULL;
444: entry = ohash_next(macro, &slot)) {
445: use = 0;
446: dba_array_FOREACH(entry->pages, page)
1.1 schwarze 447: if (dba_array_getpos(page))
1.6 schwarze 448: use = 1;
449: if (use)
450: entries[ne++] = entry;
451: }
452: qsort(entries, ne, sizeof(*entries), compare_entries);
453:
454: /* Number of entries, and space for the pointer pairs. */
455:
456: dba_int_write(ne);
457: pos_macro = dba_skip(2, ne);
458:
459: /* String table. */
460:
461: for (ie = 0; ie < ne; ie++) {
462: kpos[ie] = dba_tell();
463: dba_str_write(entries[ie]->value);
1.1 schwarze 464: }
465: dba_align();
1.6 schwarze 466:
467: /* Pages table. */
468:
469: for (ie = 0; ie < ne; ie++) {
470: dpos[ie] = dba_tell();
471: dba_array_FOREACH(entries[ie]->pages, page)
1.1 schwarze 472: if ((addr = dba_array_getpos(page)))
473: dba_int_write(addr);
474: dba_int_write(0);
475: }
476: pos_end = dba_tell();
1.6 schwarze 477:
478: /* Fill in the pointer pairs. */
479:
1.1 schwarze 480: dba_seek(pos_macro);
1.6 schwarze 481: for (ie = 0; ie < ne; ie++) {
482: dba_int_write(kpos[ie]);
483: dba_int_write(dpos[ie]);
484: }
1.1 schwarze 485: dba_seek(pos_end);
1.6 schwarze 486:
487: free(entries);
488: free(kpos);
489: free(dpos);
490: }
491:
492: static int
493: compare_entries(const void *vp1, const void *vp2)
494: {
495: const struct macro_entry *ep1, *ep2;
496:
1.7 ! schwarze 497: ep1 = *(const struct macro_entry * const *)vp1;
! 498: ep2 = *(const struct macro_entry * const *)vp2;
1.6 schwarze 499: return strcmp(ep1->value, ep2->value);
1.1 schwarze 500: }