Annotation of src/usr.bin/mandoc/mandocdb.c, Revision 1.75
1.75 ! schwarze 1: /* $Id: mandocdb.c,v 1.74 2014/03/19 22:20:36 schwarze Exp $ */
1.1 schwarze 2: /*
1.47 schwarze 3: * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
1.52 schwarze 4: * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.47 schwarze 18: #include <sys/stat.h>
1.72 schwarze 19: #include <sys/wait.h>
1.1 schwarze 20:
21: #include <assert.h>
1.33 schwarze 22: #include <ctype.h>
1.34 schwarze 23: #include <errno.h>
1.1 schwarze 24: #include <fcntl.h>
1.47 schwarze 25: #include <fts.h>
1.1 schwarze 26: #include <getopt.h>
1.44 schwarze 27: #include <limits.h>
1.47 schwarze 28: #include <stddef.h>
1.1 schwarze 29: #include <stdio.h>
30: #include <stdint.h>
31: #include <stdlib.h>
32: #include <string.h>
1.14 schwarze 33: #include <unistd.h>
1.1 schwarze 34:
1.47 schwarze 35: #include <ohash.h>
36: #include <sqlite3.h>
37:
38: #include "mdoc.h"
1.1 schwarze 39: #include "man.h"
40: #include "mandoc.h"
1.10 schwarze 41: #include "manpath.h"
1.47 schwarze 42: #include "mansearch.h"
1.1 schwarze 43:
1.68 schwarze 44: extern int mansearch_keymax;
45: extern const char *const mansearch_keynames[];
46:
1.47 schwarze 47: #define SQL_EXEC(_v) \
48: if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \
49: fprintf(stderr, "%s\n", sqlite3_errmsg(db))
50: #define SQL_BIND_TEXT(_s, _i, _v) \
51: if (SQLITE_OK != sqlite3_bind_text \
52: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
53: fprintf(stderr, "%s\n", sqlite3_errmsg(db))
54: #define SQL_BIND_INT(_s, _i, _v) \
55: if (SQLITE_OK != sqlite3_bind_int \
56: ((_s), (_i)++, (_v))) \
57: fprintf(stderr, "%s\n", sqlite3_errmsg(db))
58: #define SQL_BIND_INT64(_s, _i, _v) \
59: if (SQLITE_OK != sqlite3_bind_int64 \
60: ((_s), (_i)++, (_v))) \
61: fprintf(stderr, "%s\n", sqlite3_errmsg(db))
62: #define SQL_STEP(_s) \
63: if (SQLITE_DONE != sqlite3_step((_s))) \
64: fprintf(stderr, "%s\n", sqlite3_errmsg(db))
1.1 schwarze 65:
1.47 schwarze 66: enum op {
67: OP_DEFAULT = 0, /* new dbs from dir list or default config */
68: OP_CONFFILE, /* new databases from custom config file */
69: OP_UPDATE, /* delete/add entries in existing database */
70: OP_DELETE, /* delete entries from existing database */
71: OP_TEST /* change no databases, report potential problems */
72: };
1.11 schwarze 73:
1.47 schwarze 74: enum form {
75: FORM_NONE, /* format is unknown */
76: FORM_SRC, /* format is -man or -mdoc */
77: FORM_CAT /* format is cat */
78: };
1.28 schwarze 79:
1.47 schwarze 80: struct str {
1.53 schwarze 81: char *rendered; /* key in UTF-8 or ASCII form */
1.47 schwarze 82: const struct mpage *mpage; /* if set, the owning parse */
83: uint64_t mask; /* bitmask in sequence */
1.53 schwarze 84: char key[]; /* may contain escape sequences */
1.28 schwarze 85: };
86:
1.47 schwarze 87: struct inodev {
88: ino_t st_ino;
89: dev_t st_dev;
90: };
1.28 schwarze 91:
1.47 schwarze 92: struct mpage {
93: struct inodev inodev; /* used for hashing routine */
1.75 ! schwarze 94: int64_t recno; /* id in mpages SQL table */
1.47 schwarze 95: enum form form; /* format from file content */
96: char *sec; /* section from file content */
97: char *arch; /* architecture from file content */
98: char *title; /* title from file content */
99: char *desc; /* description from file content */
100: struct mlink *mlinks; /* singly linked list */
1.28 schwarze 101: };
102:
1.47 schwarze 103: struct mlink {
104: char file[PATH_MAX]; /* filename rel. to manpath */
105: enum form dform; /* format from directory */
106: enum form fform; /* format from file name suffix */
107: char *dsec; /* section from directory */
108: char *arch; /* architecture from directory */
109: char *name; /* name from file name (not empty) */
110: char *fsec; /* section from file name suffix */
111: struct mlink *next; /* singly linked list */
1.75 ! schwarze 112: struct mpage *mpage; /* parent */
1.2 schwarze 113: };
114:
1.47 schwarze 115: enum stmt {
116: STMT_DELETE_PAGE = 0, /* delete mpage */
117: STMT_INSERT_PAGE, /* insert mpage */
118: STMT_INSERT_LINK, /* insert mlink */
119: STMT_INSERT_KEY, /* insert parsed key */
120: STMT__MAX
1.1 schwarze 121: };
122:
1.47 schwarze 123: typedef int (*mdoc_fp)(struct mpage *, const struct mdoc_node *);
1.1 schwarze 124:
1.19 schwarze 125: struct mdoc_handler {
1.47 schwarze 126: mdoc_fp fp; /* optional handler */
127: uint64_t mask; /* set unless handler returns 0 */
1.19 schwarze 128: };
129:
1.47 schwarze 130: static void dbclose(int);
1.75 ! schwarze 131: static void dbadd(struct mpage *, struct mchars *);
! 132: static void dbadd_mlink(const struct mlink *mlink);
1.47 schwarze 133: static int dbopen(int);
134: static void dbprune(void);
135: static void filescan(const char *);
136: static void *hash_alloc(size_t, void *);
137: static void hash_free(void *, size_t, void *);
138: static void *hash_halloc(size_t, void *);
139: static void mlink_add(struct mlink *, const struct stat *);
1.50 schwarze 140: static int mlink_check(struct mpage *, struct mlink *);
1.47 schwarze 141: static void mlink_free(struct mlink *);
142: static void mlinks_undupe(struct mpage *);
143: static void mpages_free(void);
1.58 schwarze 144: static void mpages_merge(struct mchars *, struct mparse *);
1.47 schwarze 145: static void parse_cat(struct mpage *);
146: static void parse_man(struct mpage *, const struct man_node *);
147: static void parse_mdoc(struct mpage *, const struct mdoc_node *);
148: static int parse_mdoc_body(struct mpage *, const struct mdoc_node *);
149: static int parse_mdoc_head(struct mpage *, const struct mdoc_node *);
150: static int parse_mdoc_Fd(struct mpage *, const struct mdoc_node *);
151: static int parse_mdoc_Fn(struct mpage *, const struct mdoc_node *);
152: static int parse_mdoc_Nd(struct mpage *, const struct mdoc_node *);
153: static int parse_mdoc_Nm(struct mpage *, const struct mdoc_node *);
154: static int parse_mdoc_Sh(struct mpage *, const struct mdoc_node *);
155: static int parse_mdoc_Xr(struct mpage *, const struct mdoc_node *);
1.69 schwarze 156: static void putkey(const struct mpage *, char *, uint64_t);
1.47 schwarze 157: static void putkeys(const struct mpage *,
158: const char *, size_t, uint64_t);
159: static void putmdockey(const struct mpage *,
160: const struct mdoc_node *, uint64_t);
1.53 schwarze 161: static void render_key(struct mchars *, struct str *);
1.47 schwarze 162: static void say(const char *, const char *, ...);
163: static int set_basedir(const char *);
164: static int treescan(void);
165: static size_t utf8(unsigned int, char [7]);
166:
1.72 schwarze 167: static char tempfilename[32];
1.47 schwarze 168: static char *progname;
1.59 schwarze 169: static int nodb; /* no database changes */
1.73 schwarze 170: static int mparse_options; /* abort the parse early */
1.47 schwarze 171: static int use_all; /* use all found files */
172: static int verb; /* print what we're doing */
173: static int warnings; /* warn about crap */
1.52 schwarze 174: static int write_utf8; /* write UTF-8 output; else ASCII */
1.47 schwarze 175: static int exitcode; /* to be returned by main */
176: static enum op op; /* operational mode */
177: static char basedir[PATH_MAX]; /* current base directory */
178: static struct ohash mpages; /* table of distinct manual pages */
179: static struct ohash mlinks; /* table of directory entries */
180: static struct ohash strings; /* table of all strings */
181: static sqlite3 *db = NULL; /* current database */
182: static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */
183:
1.19 schwarze 184: static const struct mdoc_handler mdocs[MDOC_MAX] = {
1.47 schwarze 185: { NULL, 0 }, /* Ap */
186: { NULL, 0 }, /* Dd */
187: { NULL, 0 }, /* Dt */
188: { NULL, 0 }, /* Os */
189: { parse_mdoc_Sh, TYPE_Sh }, /* Sh */
190: { parse_mdoc_head, TYPE_Ss }, /* Ss */
191: { NULL, 0 }, /* Pp */
192: { NULL, 0 }, /* D1 */
193: { NULL, 0 }, /* Dl */
194: { NULL, 0 }, /* Bd */
195: { NULL, 0 }, /* Ed */
196: { NULL, 0 }, /* Bl */
197: { NULL, 0 }, /* El */
198: { NULL, 0 }, /* It */
199: { NULL, 0 }, /* Ad */
200: { NULL, TYPE_An }, /* An */
201: { NULL, TYPE_Ar }, /* Ar */
202: { NULL, TYPE_Cd }, /* Cd */
203: { NULL, TYPE_Cm }, /* Cm */
204: { NULL, TYPE_Dv }, /* Dv */
205: { NULL, TYPE_Er }, /* Er */
206: { NULL, TYPE_Ev }, /* Ev */
207: { NULL, 0 }, /* Ex */
208: { NULL, TYPE_Fa }, /* Fa */
209: { parse_mdoc_Fd, 0 }, /* Fd */
210: { NULL, TYPE_Fl }, /* Fl */
211: { parse_mdoc_Fn, 0 }, /* Fn */
212: { NULL, TYPE_Ft }, /* Ft */
213: { NULL, TYPE_Ic }, /* Ic */
1.49 schwarze 214: { NULL, TYPE_In }, /* In */
1.47 schwarze 215: { NULL, TYPE_Li }, /* Li */
216: { parse_mdoc_Nd, TYPE_Nd }, /* Nd */
217: { parse_mdoc_Nm, TYPE_Nm }, /* Nm */
218: { NULL, 0 }, /* Op */
219: { NULL, 0 }, /* Ot */
220: { NULL, TYPE_Pa }, /* Pa */
221: { NULL, 0 }, /* Rv */
1.49 schwarze 222: { NULL, TYPE_St }, /* St */
1.47 schwarze 223: { NULL, TYPE_Va }, /* Va */
224: { parse_mdoc_body, TYPE_Va }, /* Vt */
225: { parse_mdoc_Xr, 0 }, /* Xr */
226: { NULL, 0 }, /* %A */
227: { NULL, 0 }, /* %B */
228: { NULL, 0 }, /* %D */
229: { NULL, 0 }, /* %I */
230: { NULL, 0 }, /* %J */
231: { NULL, 0 }, /* %N */
232: { NULL, 0 }, /* %O */
233: { NULL, 0 }, /* %P */
234: { NULL, 0 }, /* %R */
235: { NULL, 0 }, /* %T */
236: { NULL, 0 }, /* %V */
237: { NULL, 0 }, /* Ac */
238: { NULL, 0 }, /* Ao */
239: { NULL, 0 }, /* Aq */
240: { NULL, TYPE_At }, /* At */
241: { NULL, 0 }, /* Bc */
242: { NULL, 0 }, /* Bf */
243: { NULL, 0 }, /* Bo */
244: { NULL, 0 }, /* Bq */
245: { NULL, TYPE_Bsx }, /* Bsx */
246: { NULL, TYPE_Bx }, /* Bx */
247: { NULL, 0 }, /* Db */
248: { NULL, 0 }, /* Dc */
249: { NULL, 0 }, /* Do */
250: { NULL, 0 }, /* Dq */
251: { NULL, 0 }, /* Ec */
252: { NULL, 0 }, /* Ef */
253: { NULL, TYPE_Em }, /* Em */
254: { NULL, 0 }, /* Eo */
255: { NULL, TYPE_Fx }, /* Fx */
256: { NULL, TYPE_Ms }, /* Ms */
257: { NULL, 0 }, /* No */
258: { NULL, 0 }, /* Ns */
259: { NULL, TYPE_Nx }, /* Nx */
260: { NULL, TYPE_Ox }, /* Ox */
261: { NULL, 0 }, /* Pc */
262: { NULL, 0 }, /* Pf */
263: { NULL, 0 }, /* Po */
264: { NULL, 0 }, /* Pq */
265: { NULL, 0 }, /* Qc */
266: { NULL, 0 }, /* Ql */
267: { NULL, 0 }, /* Qo */
268: { NULL, 0 }, /* Qq */
269: { NULL, 0 }, /* Re */
270: { NULL, 0 }, /* Rs */
271: { NULL, 0 }, /* Sc */
272: { NULL, 0 }, /* So */
273: { NULL, 0 }, /* Sq */
274: { NULL, 0 }, /* Sm */
275: { NULL, 0 }, /* Sx */
276: { NULL, TYPE_Sy }, /* Sy */
277: { NULL, TYPE_Tn }, /* Tn */
278: { NULL, 0 }, /* Ux */
279: { NULL, 0 }, /* Xc */
280: { NULL, 0 }, /* Xo */
281: { parse_mdoc_head, 0 }, /* Fo */
282: { NULL, 0 }, /* Fc */
283: { NULL, 0 }, /* Oo */
284: { NULL, 0 }, /* Oc */
285: { NULL, 0 }, /* Bk */
286: { NULL, 0 }, /* Ek */
287: { NULL, 0 }, /* Bt */
288: { NULL, 0 }, /* Hf */
289: { NULL, 0 }, /* Fr */
290: { NULL, 0 }, /* Ud */
291: { NULL, TYPE_Lb }, /* Lb */
292: { NULL, 0 }, /* Lp */
293: { NULL, TYPE_Lk }, /* Lk */
294: { NULL, TYPE_Mt }, /* Mt */
295: { NULL, 0 }, /* Brq */
296: { NULL, 0 }, /* Bro */
297: { NULL, 0 }, /* Brc */
298: { NULL, 0 }, /* %C */
299: { NULL, 0 }, /* Es */
300: { NULL, 0 }, /* En */
301: { NULL, TYPE_Dx }, /* Dx */
302: { NULL, 0 }, /* %Q */
303: { NULL, 0 }, /* br */
304: { NULL, 0 }, /* sp */
305: { NULL, 0 }, /* %U */
306: { NULL, 0 }, /* Ta */
1.1 schwarze 307: };
308:
309: int
1.3 schwarze 310: mandocdb(int argc, char *argv[])
1.1 schwarze 311: {
1.47 schwarze 312: int ch, i;
313: size_t j, sz;
314: const char *path_arg;
315: struct mchars *mc;
316: struct manpaths dirs;
317: struct mparse *mp;
318: struct ohash_info mpages_info, mlinks_info;
319:
320: memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *));
321: memset(&dirs, 0, sizeof(struct manpaths));
322:
323: mpages_info.alloc = mlinks_info.alloc = hash_alloc;
324: mpages_info.halloc = mlinks_info.halloc = hash_halloc;
325: mpages_info.hfree = mlinks_info.hfree = hash_free;
326:
327: mpages_info.key_offset = offsetof(struct mpage, inodev);
328: mlinks_info.key_offset = offsetof(struct mlink, file);
1.1 schwarze 329:
330: progname = strrchr(argv[0], '/');
331: if (progname == NULL)
332: progname = argv[0];
333: else
334: ++progname;
335:
1.47 schwarze 336: /*
337: * We accept a few different invocations.
338: * The CHECKOP macro makes sure that invocation styles don't
339: * clobber each other.
340: */
341: #define CHECKOP(_op, _ch) do \
342: if (OP_DEFAULT != (_op)) { \
343: fprintf(stderr, "-%c: Conflicting option\n", (_ch)); \
344: goto usage; \
345: } while (/*CONSTCOND*/0)
1.10 schwarze 346:
1.47 schwarze 347: path_arg = NULL;
1.28 schwarze 348: op = OP_DEFAULT;
1.1 schwarze 349:
1.59 schwarze 350: while (-1 != (ch = getopt(argc, argv, "aC:d:nQT:tu:vW")))
1.1 schwarze 351: switch (ch) {
1.6 schwarze 352: case ('a'):
353: use_all = 1;
354: break;
1.25 schwarze 355: case ('C'):
1.47 schwarze 356: CHECKOP(op, ch);
357: path_arg = optarg;
1.28 schwarze 358: op = OP_CONFFILE;
1.25 schwarze 359: break;
1.1 schwarze 360: case ('d'):
1.47 schwarze 361: CHECKOP(op, ch);
362: path_arg = optarg;
1.2 schwarze 363: op = OP_UPDATE;
1.1 schwarze 364: break;
1.47 schwarze 365: case ('n'):
366: nodb = 1;
367: break;
1.59 schwarze 368: case ('Q'):
1.73 schwarze 369: mparse_options |= MPARSE_QUICK;
1.59 schwarze 370: break;
1.52 schwarze 371: case ('T'):
372: if (strcmp(optarg, "utf8")) {
373: fprintf(stderr, "-T%s: Unsupported "
374: "output format\n", optarg);
375: goto usage;
376: }
377: write_utf8 = 1;
378: break;
1.28 schwarze 379: case ('t'):
1.47 schwarze 380: CHECKOP(op, ch);
1.28 schwarze 381: dup2(STDOUT_FILENO, STDERR_FILENO);
382: op = OP_TEST;
1.47 schwarze 383: nodb = warnings = 1;
1.28 schwarze 384: break;
1.2 schwarze 385: case ('u'):
1.47 schwarze 386: CHECKOP(op, ch);
387: path_arg = optarg;
1.1 schwarze 388: op = OP_DELETE;
389: break;
390: case ('v'):
391: verb++;
392: break;
1.28 schwarze 393: case ('W'):
394: warnings = 1;
395: break;
1.1 schwarze 396: default:
1.28 schwarze 397: goto usage;
1.1 schwarze 398: }
399:
400: argc -= optind;
401: argv += optind;
402:
1.28 schwarze 403: if (OP_CONFFILE == op && argc > 0) {
1.47 schwarze 404: fprintf(stderr, "-C: Too many arguments\n");
1.28 schwarze 405: goto usage;
406: }
407:
1.47 schwarze 408: exitcode = (int)MANDOCLEVEL_OK;
1.73 schwarze 409: mp = mparse_alloc(mparse_options, MANDOCLEVEL_FATAL, NULL, NULL);
1.47 schwarze 410: mc = mchars_alloc();
1.2 schwarze 411:
1.47 schwarze 412: ohash_init(&mpages, 6, &mpages_info);
413: ohash_init(&mlinks, 6, &mlinks_info);
1.2 schwarze 414:
1.47 schwarze 415: if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) {
416: /*
417: * Force processing all files.
418: */
419: use_all = 1;
1.2 schwarze 420:
1.47 schwarze 421: /*
422: * All of these deal with a specific directory.
423: * Jump into that directory then collect files specified
424: * on the command-line.
425: */
426: if (0 == set_basedir(path_arg))
427: goto out;
428: for (i = 0; i < argc; i++)
429: filescan(argv[i]);
430: if (0 == dbopen(1))
431: goto out;
432: if (OP_TEST != op)
433: dbprune();
434: if (OP_DELETE != op)
1.58 schwarze 435: mpages_merge(mc, mp);
1.47 schwarze 436: dbclose(1);
437: } else {
438: /*
439: * If we have arguments, use them as our manpaths.
440: * If we don't, grok from manpath(1) or however else
441: * manpath_parse() wants to do it.
442: */
443: if (argc > 0) {
444: dirs.paths = mandoc_calloc
445: (argc, sizeof(char *));
446: dirs.sz = (size_t)argc;
447: for (i = 0; i < argc; i++)
448: dirs.paths[i] = mandoc_strdup(argv[i]);
449: } else
450: manpath_parse(&dirs, path_arg, NULL, NULL);
1.2 schwarze 451:
1.47 schwarze 452: /*
453: * First scan the tree rooted at a base directory, then
454: * build a new database and finally move it into place.
455: * Ignore zero-length directories and strip trailing
456: * slashes.
457: */
458: for (j = 0; j < dirs.sz; j++) {
459: sz = strlen(dirs.paths[j]);
460: if (sz && '/' == dirs.paths[j][sz - 1])
461: dirs.paths[j][--sz] = '\0';
462: if (0 == sz)
463: continue;
1.2 schwarze 464:
1.47 schwarze 465: if (j) {
466: ohash_init(&mpages, 6, &mpages_info);
467: ohash_init(&mlinks, 6, &mlinks_info);
468: }
1.1 schwarze 469:
1.47 schwarze 470: if (0 == set_basedir(dirs.paths[j]))
471: goto out;
472: if (0 == treescan())
473: goto out;
474: if (0 == set_basedir(dirs.paths[j]))
475: goto out;
476: if (0 == dbopen(0))
477: goto out;
1.41 deraadt 478:
1.58 schwarze 479: mpages_merge(mc, mp);
1.47 schwarze 480: dbclose(0);
1.28 schwarze 481:
1.47 schwarze 482: if (j + 1 < dirs.sz) {
483: mpages_free();
484: ohash_delete(&mpages);
485: ohash_delete(&mlinks);
486: }
1.2 schwarze 487: }
1.47 schwarze 488: }
489: out:
490: set_basedir(NULL);
491: manpath_free(&dirs);
492: mchars_free(mc);
493: mparse_free(mp);
494: mpages_free();
495: ohash_delete(&mpages);
496: ohash_delete(&mlinks);
497: return(exitcode);
498: usage:
1.59 schwarze 499: fprintf(stderr, "usage: %s [-anQvW] [-C file] [-Tutf8]\n"
500: " %s [-anQvW] [-Tutf8] dir ...\n"
501: " %s [-nQvW] [-Tutf8] -d dir [file ...]\n"
1.47 schwarze 502: " %s [-nvW] -u dir [file ...]\n"
1.59 schwarze 503: " %s [-Q] -t file ...\n",
1.47 schwarze 504: progname, progname, progname,
505: progname, progname);
1.1 schwarze 506:
1.47 schwarze 507: return((int)MANDOCLEVEL_BADARG);
508: }
1.1 schwarze 509:
1.47 schwarze 510: /*
511: * Scan a directory tree rooted at "basedir" for manpages.
512: * We use fts(), scanning directory parts along the way for clues to our
513: * section and architecture.
514: *
515: * If use_all has been specified, grok all files.
516: * If not, sanitise paths to the following:
517: *
518: * [./]man*[/<arch>]/<name>.<section>
519: * or
520: * [./]cat<section>[/<arch>]/<name>.0
521: *
522: * TODO: accomodate for multi-language directories.
523: */
524: static int
525: treescan(void)
526: {
527: FTS *f;
528: FTSENT *ff;
529: struct mlink *mlink;
530: int dform;
1.51 schwarze 531: char *dsec, *arch, *fsec, *cp;
532: const char *path;
1.47 schwarze 533: const char *argv[2];
1.1 schwarze 534:
1.47 schwarze 535: argv[0] = ".";
536: argv[1] = (char *)NULL;
1.1 schwarze 537:
1.47 schwarze 538: /*
539: * Walk through all components under the directory, using the
540: * logical descent of files.
541: */
542: f = fts_open((char * const *)argv, FTS_LOGICAL, NULL);
543: if (NULL == f) {
544: exitcode = (int)MANDOCLEVEL_SYSERR;
545: say("", NULL);
546: return(0);
547: }
1.2 schwarze 548:
1.47 schwarze 549: dsec = arch = NULL;
550: dform = FORM_NONE;
1.2 schwarze 551:
1.47 schwarze 552: while (NULL != (ff = fts_read(f))) {
553: path = ff->fts_path + 2;
1.15 schwarze 554: /*
1.47 schwarze 555: * If we're a regular file, add an mlink by using the
556: * stored directory data and handling the filename.
1.15 schwarze 557: */
1.47 schwarze 558: if (FTS_F == ff->fts_info) {
559: if (0 == strcmp(path, MANDOC_DB))
560: continue;
561: if ( ! use_all && ff->fts_level < 2) {
562: if (warnings)
563: say(path, "Extraneous file");
564: continue;
565: } else if (NULL == (fsec =
566: strrchr(ff->fts_name, '.'))) {
567: if ( ! use_all) {
568: if (warnings)
569: say(path,
570: "No filename suffix");
571: continue;
572: }
573: } else if (0 == strcmp(++fsec, "html")) {
574: if (warnings)
575: say(path, "Skip html");
576: continue;
577: } else if (0 == strcmp(fsec, "gz")) {
578: if (warnings)
579: say(path, "Skip gz");
580: continue;
581: } else if (0 == strcmp(fsec, "ps")) {
582: if (warnings)
583: say(path, "Skip ps");
584: continue;
585: } else if (0 == strcmp(fsec, "pdf")) {
586: if (warnings)
587: say(path, "Skip pdf");
588: continue;
589: } else if ( ! use_all &&
590: ((FORM_SRC == dform && strcmp(fsec, dsec)) ||
591: (FORM_CAT == dform && strcmp(fsec, "0")))) {
592: if (warnings)
593: say(path, "Wrong filename suffix");
594: continue;
595: } else
596: fsec[-1] = '\0';
1.51 schwarze 597:
1.47 schwarze 598: mlink = mandoc_calloc(1, sizeof(struct mlink));
599: strlcpy(mlink->file, path, sizeof(mlink->file));
600: mlink->dform = dform;
1.51 schwarze 601: mlink->dsec = dsec;
602: mlink->arch = arch;
603: mlink->name = ff->fts_name;
604: mlink->fsec = fsec;
1.47 schwarze 605: mlink_add(mlink, ff->fts_statp);
606: continue;
607: } else if (FTS_D != ff->fts_info &&
608: FTS_DP != ff->fts_info) {
609: if (warnings)
610: say(path, "Not a regular file");
611: continue;
612: }
613:
614: switch (ff->fts_level) {
615: case (0):
616: /* Ignore the root directory. */
617: break;
618: case (1):
619: /*
620: * This might contain manX/ or catX/.
621: * Try to infer this from the name.
622: * If we're not in use_all, enforce it.
623: */
624: cp = ff->fts_name;
625: if (FTS_DP == ff->fts_info)
626: break;
1.15 schwarze 627:
1.47 schwarze 628: if (0 == strncmp(cp, "man", 3)) {
629: dform = FORM_SRC;
630: dsec = cp + 3;
631: } else if (0 == strncmp(cp, "cat", 3)) {
632: dform = FORM_CAT;
633: dsec = cp + 3;
1.51 schwarze 634: } else {
635: dform = FORM_NONE;
636: dsec = NULL;
1.26 schwarze 637: }
1.47 schwarze 638:
639: if (NULL != dsec || use_all)
640: break;
641:
642: if (warnings)
643: say(path, "Unknown directory part");
644: fts_set(f, ff, FTS_SKIP);
645: break;
646: case (2):
647: /*
648: * Possibly our architecture.
649: * If we're descending, keep tabs on it.
650: */
651: if (FTS_DP != ff->fts_info && NULL != dsec)
652: arch = ff->fts_name;
1.51 schwarze 653: else
654: arch = NULL;
1.47 schwarze 655: break;
656: default:
657: if (FTS_DP == ff->fts_info || use_all)
658: break;
659: if (warnings)
660: say(path, "Extraneous directory part");
661: fts_set(f, ff, FTS_SKIP);
662: break;
1.14 schwarze 663: }
1.47 schwarze 664: }
665:
666: fts_close(f);
667: return(1);
668: }
1.1 schwarze 669:
1.47 schwarze 670: /*
671: * Add a file to the mlinks table.
672: * Do not verify that it's a "valid" looking manpage (we'll do that
673: * later).
674: *
675: * Try to infer the manual section, architecture, and page name from the
676: * path, assuming it looks like
677: *
678: * [./]man*[/<arch>]/<name>.<section>
679: * or
680: * [./]cat<section>[/<arch>]/<name>.0
681: *
682: * See treescan() for the fts(3) version of this.
683: */
684: static void
685: filescan(const char *file)
686: {
687: char buf[PATH_MAX];
688: struct stat st;
689: struct mlink *mlink;
690: char *p, *start;
691:
692: assert(use_all);
693:
694: if (0 == strncmp(file, "./", 2))
695: file += 2;
696:
697: if (NULL == realpath(file, buf)) {
698: exitcode = (int)MANDOCLEVEL_BADARG;
699: say(file, NULL);
700: return;
1.63 schwarze 701: }
702:
703: if (strstr(buf, basedir) == buf)
704: start = buf + strlen(basedir) + 1;
705: else if (OP_TEST == op)
706: start = buf;
707: else {
1.47 schwarze 708: exitcode = (int)MANDOCLEVEL_BADARG;
709: say("", "%s: outside base directory", buf);
710: return;
1.63 schwarze 711: }
712:
713: if (-1 == stat(buf, &st)) {
1.47 schwarze 714: exitcode = (int)MANDOCLEVEL_BADARG;
715: say(file, NULL);
716: return;
717: } else if ( ! (S_IFREG & st.st_mode)) {
718: exitcode = (int)MANDOCLEVEL_BADARG;
719: say(file, "Not a regular file");
720: return;
1.1 schwarze 721: }
1.63 schwarze 722:
1.47 schwarze 723: mlink = mandoc_calloc(1, sizeof(struct mlink));
724: strlcpy(mlink->file, start, sizeof(mlink->file));
1.1 schwarze 725:
1.10 schwarze 726: /*
1.47 schwarze 727: * First try to guess our directory structure.
728: * If we find a separator, try to look for man* or cat*.
729: * If we find one of these and what's underneath is a directory,
730: * assume it's an architecture.
1.10 schwarze 731: */
1.47 schwarze 732: if (NULL != (p = strchr(start, '/'))) {
733: *p++ = '\0';
734: if (0 == strncmp(start, "man", 3)) {
735: mlink->dform = FORM_SRC;
1.51 schwarze 736: mlink->dsec = start + 3;
1.47 schwarze 737: } else if (0 == strncmp(start, "cat", 3)) {
738: mlink->dform = FORM_CAT;
1.51 schwarze 739: mlink->dsec = start + 3;
1.47 schwarze 740: }
1.10 schwarze 741:
1.47 schwarze 742: start = p;
743: if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) {
744: *p++ = '\0';
1.51 schwarze 745: mlink->arch = start;
1.47 schwarze 746: start = p;
1.41 deraadt 747: }
1.47 schwarze 748: }
1.7 schwarze 749:
1.47 schwarze 750: /*
751: * Now check the file suffix.
752: * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
753: */
754: p = strrchr(start, '\0');
755: while (p-- > start && '/' != *p && '.' != *p)
756: /* Loop. */ ;
757:
758: if ('.' == *p) {
759: *p++ = '\0';
1.51 schwarze 760: mlink->fsec = p;
1.47 schwarze 761: }
1.41 deraadt 762:
1.47 schwarze 763: /*
764: * Now try to parse the name.
765: * Use the filename portion of the path.
766: */
767: mlink->name = start;
768: if (NULL != (p = strrchr(start, '/'))) {
769: mlink->name = p + 1;
770: *p = '\0';
771: }
772: mlink_add(mlink, &st);
773: }
1.2 schwarze 774:
1.47 schwarze 775: static void
776: mlink_add(struct mlink *mlink, const struct stat *st)
777: {
778: struct inodev inodev;
779: struct mpage *mpage;
780: unsigned int slot;
781:
782: assert(NULL != mlink->file);
783:
1.51 schwarze 784: mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : "");
785: mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : "");
786: mlink->name = mandoc_strdup(mlink->name ? mlink->name : "");
787: mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : "");
1.47 schwarze 788:
789: if ('0' == *mlink->fsec) {
790: free(mlink->fsec);
791: mlink->fsec = mandoc_strdup(mlink->dsec);
792: mlink->fform = FORM_CAT;
793: } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec)
794: mlink->fform = FORM_SRC;
795: else
796: mlink->fform = FORM_NONE;
1.1 schwarze 797:
1.47 schwarze 798: slot = ohash_qlookup(&mlinks, mlink->file);
799: assert(NULL == ohash_find(&mlinks, slot));
800: ohash_insert(&mlinks, slot, mlink);
801:
802: inodev.st_ino = st->st_ino;
803: inodev.st_dev = st->st_dev;
804: slot = ohash_lookup_memory(&mpages, (char *)&inodev,
805: sizeof(struct inodev), inodev.st_ino);
806: mpage = ohash_find(&mpages, slot);
807: if (NULL == mpage) {
808: mpage = mandoc_calloc(1, sizeof(struct mpage));
809: mpage->inodev.st_ino = inodev.st_ino;
810: mpage->inodev.st_dev = inodev.st_dev;
811: ohash_insert(&mpages, slot, mpage);
812: } else
813: mlink->next = mpage->mlinks;
814: mpage->mlinks = mlink;
1.75 ! schwarze 815: mlink->mpage = mpage;
1.47 schwarze 816: }
1.1 schwarze 817:
1.47 schwarze 818: static void
819: mlink_free(struct mlink *mlink)
820: {
1.1 schwarze 821:
1.47 schwarze 822: free(mlink->dsec);
823: free(mlink->arch);
824: free(mlink->name);
825: free(mlink->fsec);
826: free(mlink);
827: }
1.1 schwarze 828:
1.47 schwarze 829: static void
830: mpages_free(void)
831: {
832: struct mpage *mpage;
833: struct mlink *mlink;
834: unsigned int slot;
835:
836: mpage = ohash_first(&mpages, &slot);
837: while (NULL != mpage) {
838: while (NULL != (mlink = mpage->mlinks)) {
839: mpage->mlinks = mlink->next;
840: mlink_free(mlink);
841: }
842: free(mpage->sec);
843: free(mpage->arch);
844: free(mpage->title);
845: free(mpage->desc);
846: free(mpage);
847: mpage = ohash_next(&mpages, &slot);
848: }
849: }
1.1 schwarze 850:
1.47 schwarze 851: /*
852: * For each mlink to the mpage, check whether the path looks like
853: * it is formatted, and if it does, check whether a source manual
854: * exists by the same name, ignoring the suffix.
855: * If both conditions hold, drop the mlink.
856: */
857: static void
858: mlinks_undupe(struct mpage *mpage)
859: {
860: char buf[PATH_MAX];
861: struct mlink **prev;
862: struct mlink *mlink;
863: char *bufp;
864:
865: mpage->form = FORM_CAT;
866: prev = &mpage->mlinks;
867: while (NULL != (mlink = *prev)) {
868: if (FORM_CAT != mlink->dform) {
869: mpage->form = FORM_NONE;
870: goto nextlink;
871: }
872: if (strlcpy(buf, mlink->file, PATH_MAX) >= PATH_MAX) {
873: if (warnings)
874: say(mlink->file, "Filename too long");
875: goto nextlink;
1.26 schwarze 876: }
1.47 schwarze 877: bufp = strstr(buf, "cat");
878: assert(NULL != bufp);
879: memcpy(bufp, "man", 3);
880: if (NULL != (bufp = strrchr(buf, '.')))
881: *++bufp = '\0';
882: strlcat(buf, mlink->dsec, PATH_MAX);
883: if (NULL == ohash_find(&mlinks,
884: ohash_qlookup(&mlinks, buf)))
885: goto nextlink;
886: if (warnings)
887: say(mlink->file, "Man source exists: %s", buf);
888: if (use_all)
889: goto nextlink;
890: *prev = mlink->next;
891: mlink_free(mlink);
892: continue;
893: nextlink:
894: prev = &(*prev)->next;
1.1 schwarze 895: }
1.47 schwarze 896: }
1.1 schwarze 897:
1.50 schwarze 898: static int
899: mlink_check(struct mpage *mpage, struct mlink *mlink)
900: {
901: int match;
902:
903: match = 1;
904:
905: /*
906: * Check whether the manual section given in a file
907: * agrees with the directory where the file is located.
908: * Some manuals have suffixes like (3p) on their
909: * section number either inside the file or in the
910: * directory name, some are linked into more than one
911: * section, like encrypt(1) = makekey(8).
912: */
913:
914: if (FORM_SRC == mpage->form &&
915: strcasecmp(mpage->sec, mlink->dsec)) {
916: match = 0;
917: say(mlink->file, "Section \"%s\" manual in %s directory",
918: mpage->sec, mlink->dsec);
919: }
920:
921: /*
922: * Manual page directories exist for each kernel
923: * architecture as returned by machine(1).
924: * However, many manuals only depend on the
925: * application architecture as returned by arch(1).
926: * For example, some (2/ARM) manuals are shared
927: * across the "armish" and "zaurus" kernel
928: * architectures.
929: * A few manuals are even shared across completely
930: * different architectures, for example fdformat(1)
931: * on amd64, i386, sparc, and sparc64.
932: */
933:
934: if (strcasecmp(mpage->arch, mlink->arch)) {
935: match = 0;
936: say(mlink->file, "Architecture \"%s\" manual in "
937: "\"%s\" directory", mpage->arch, mlink->arch);
938: }
939:
940: if (strcasecmp(mpage->title, mlink->name))
941: match = 0;
942:
943: return(match);
944: }
945:
1.47 schwarze 946: /*
947: * Run through the files in the global vector "mpages"
948: * and add them to the database specified in "basedir".
949: *
950: * This handles the parsing scheme itself, using the cues of directory
951: * and filename to determine whether the file is parsable or not.
952: */
953: static void
1.58 schwarze 954: mpages_merge(struct mchars *mc, struct mparse *mp)
1.47 schwarze 955: {
1.70 schwarze 956: char any[] = "any";
1.58 schwarze 957: struct ohash_info str_info;
1.75 ! schwarze 958: struct mpage *mpage, *mpage_dest;
! 959: struct mlink *mlink, *mlink_dest;
1.47 schwarze 960: struct mdoc *mdoc;
961: struct man *man;
1.75 ! schwarze 962: char *sodest;
1.69 schwarze 963: char *cp;
1.47 schwarze 964: int match;
1.58 schwarze 965: unsigned int pslot;
1.47 schwarze 966: enum mandoclevel lvl;
967:
968: str_info.alloc = hash_alloc;
969: str_info.halloc = hash_halloc;
970: str_info.hfree = hash_free;
971: str_info.key_offset = offsetof(struct str, key);
972:
1.64 schwarze 973: if (0 == nodb)
974: SQL_EXEC("BEGIN TRANSACTION");
975:
1.47 schwarze 976: mpage = ohash_first(&mpages, &pslot);
977: while (NULL != mpage) {
978: mlinks_undupe(mpage);
979: if (NULL == mpage->mlinks) {
980: mpage = ohash_next(&mpages, &pslot);
981: continue;
982: }
1.1 schwarze 983:
1.47 schwarze 984: ohash_init(&strings, 6, &str_info);
985: mparse_reset(mp);
986: mdoc = NULL;
987: man = NULL;
1.11 schwarze 988:
989: /*
1.24 schwarze 990: * Try interpreting the file as mdoc(7) or man(7)
991: * source code, unless it is already known to be
992: * formatted. Fall back to formatted mode.
1.11 schwarze 993: */
1.47 schwarze 994: if (FORM_CAT != mpage->mlinks->dform ||
995: FORM_CAT != mpage->mlinks->fform) {
996: lvl = mparse_readfd(mp, -1, mpage->mlinks->file);
997: if (lvl < MANDOCLEVEL_FATAL)
1.75 ! schwarze 998: mparse_result(mp, &mdoc, &man, &sodest);
1.47 schwarze 999: }
1.11 schwarze 1000:
1.75 ! schwarze 1001: if (NULL != sodest) {
! 1002: mlink_dest = ohash_find(&mlinks,
! 1003: ohash_qlookup(&mlinks, sodest));
! 1004: if (NULL != mlink_dest) {
! 1005:
! 1006: /* The .so target exists. */
! 1007:
! 1008: mpage_dest = mlink_dest->mpage;
! 1009: mlink = mpage->mlinks;
! 1010: while (1) {
! 1011: mlink->mpage = mpage_dest;
! 1012:
! 1013: /*
! 1014: * If the target was already
! 1015: * processed, add the links
! 1016: * to the database now.
! 1017: * Otherwise, this will
! 1018: * happen when we come
! 1019: * to the target.
! 1020: */
! 1021:
! 1022: if (mpage_dest->recno)
! 1023: dbadd_mlink(mlink);
! 1024:
! 1025: if (NULL == mlink->next)
! 1026: break;
! 1027: mlink = mlink->next;
! 1028: }
! 1029:
! 1030: /* Move all links to the target. */
! 1031:
! 1032: mlink->next = mlink_dest->next;
! 1033: mlink_dest->next = mpage->mlinks;
! 1034: mpage->mlinks = NULL;
! 1035: }
! 1036: ohash_delete(&strings);
! 1037: mpage = ohash_next(&mpages, &pslot);
! 1038: continue;
! 1039: } else if (NULL != mdoc) {
1.47 schwarze 1040: mpage->form = FORM_SRC;
1041: mpage->sec =
1042: mandoc_strdup(mdoc_meta(mdoc)->msec);
1043: mpage->arch = mdoc_meta(mdoc)->arch;
1044: mpage->arch = mandoc_strdup(
1045: NULL == mpage->arch ? "" : mpage->arch);
1046: mpage->title =
1047: mandoc_strdup(mdoc_meta(mdoc)->title);
1.11 schwarze 1048: } else if (NULL != man) {
1.47 schwarze 1049: mpage->form = FORM_SRC;
1050: mpage->sec =
1051: mandoc_strdup(man_meta(man)->msec);
1052: mpage->arch =
1053: mandoc_strdup(mpage->mlinks->arch);
1054: mpage->title =
1055: mandoc_strdup(man_meta(man)->title);
1.11 schwarze 1056: } else {
1.47 schwarze 1057: mpage->form = FORM_CAT;
1058: mpage->sec =
1059: mandoc_strdup(mpage->mlinks->dsec);
1060: mpage->arch =
1061: mandoc_strdup(mpage->mlinks->arch);
1062: mpage->title =
1063: mandoc_strdup(mpage->mlinks->name);
1.1 schwarze 1064: }
1.54 schwarze 1065: putkey(mpage, mpage->sec, TYPE_sec);
1.55 schwarze 1066: putkey(mpage, '\0' == *mpage->arch ?
1.70 schwarze 1067: any : mpage->arch, TYPE_arch);
1.1 schwarze 1068:
1.54 schwarze 1069: for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {
1070: if ('\0' != *mlink->dsec)
1071: putkey(mpage, mlink->dsec, TYPE_sec);
1072: if ('\0' != *mlink->fsec)
1073: putkey(mpage, mlink->fsec, TYPE_sec);
1.55 schwarze 1074: putkey(mpage, '\0' == *mlink->arch ?
1.70 schwarze 1075: any : mlink->arch, TYPE_arch);
1.50 schwarze 1076: putkey(mpage, mlink->name, TYPE_Nm);
1.54 schwarze 1077: }
1.41 deraadt 1078:
1.50 schwarze 1079: if (warnings && !use_all) {
1.47 schwarze 1080: match = 0;
1.50 schwarze 1081: for (mlink = mpage->mlinks; mlink;
1082: mlink = mlink->next)
1083: if (mlink_check(mpage, mlink))
1084: match = 1;
1085: } else
1086: match = 1;
1.6 schwarze 1087:
1.47 schwarze 1088: if (NULL != mdoc) {
1089: if (NULL != (cp = mdoc_meta(mdoc)->name))
1090: putkey(mpage, cp, TYPE_Nm);
1091: assert(NULL == mpage->desc);
1092: parse_mdoc(mpage, mdoc_node(mdoc));
1093: putkey(mpage, NULL != mpage->desc ?
1094: mpage->desc : mpage->mlinks->name, TYPE_Nd);
1095: } else if (NULL != man)
1096: parse_man(mpage, man_node(man));
1097: else
1098: parse_cat(mpage);
1.6 schwarze 1099:
1.62 schwarze 1100: dbadd(mpage, mc);
1.47 schwarze 1101: ohash_delete(&strings);
1102: mpage = ohash_next(&mpages, &pslot);
1103: }
1.64 schwarze 1104:
1105: if (0 == nodb)
1106: SQL_EXEC("END TRANSACTION");
1.47 schwarze 1107: }
1.6 schwarze 1108:
1.47 schwarze 1109: static void
1110: parse_cat(struct mpage *mpage)
1111: {
1112: FILE *stream;
1113: char *line, *p, *title;
1114: size_t len, plen, titlesz;
1.1 schwarze 1115:
1.47 schwarze 1116: if (NULL == (stream = fopen(mpage->mlinks->file, "r"))) {
1117: if (warnings)
1118: say(mpage->mlinks->file, NULL);
1119: return;
1120: }
1.1 schwarze 1121:
1.47 schwarze 1122: /* Skip to first blank line. */
1.1 schwarze 1123:
1.47 schwarze 1124: while (NULL != (line = fgetln(stream, &len)))
1125: if ('\n' == *line)
1126: break;
1.1 schwarze 1127:
1.47 schwarze 1128: /*
1129: * Assume the first line that is not indented
1130: * is the first section header. Skip to it.
1131: */
1.1 schwarze 1132:
1.47 schwarze 1133: while (NULL != (line = fgetln(stream, &len)))
1134: if ('\n' != *line && ' ' != *line)
1135: break;
1136:
1137: /*
1138: * Read up until the next section into a buffer.
1139: * Strip the leading and trailing newline from each read line,
1140: * appending a trailing space.
1141: * Ignore empty (whitespace-only) lines.
1142: */
1.28 schwarze 1143:
1.47 schwarze 1144: titlesz = 0;
1145: title = NULL;
1.38 schwarze 1146:
1.47 schwarze 1147: while (NULL != (line = fgetln(stream, &len))) {
1148: if (' ' != *line || '\n' != line[len - 1])
1149: break;
1150: while (len > 0 && isspace((unsigned char)*line)) {
1151: line++;
1152: len--;
1153: }
1154: if (1 == len)
1155: continue;
1156: title = mandoc_realloc(title, titlesz + len);
1157: memcpy(title + titlesz, line, len);
1158: titlesz += len;
1159: title[titlesz - 1] = ' ';
1160: }
1.28 schwarze 1161:
1.47 schwarze 1162: /*
1163: * If no page content can be found, or the input line
1164: * is already the next section header, or there is no
1165: * trailing newline, reuse the page title as the page
1166: * description.
1167: */
1.1 schwarze 1168:
1.47 schwarze 1169: if (NULL == title || '\0' == *title) {
1170: if (warnings)
1171: say(mpage->mlinks->file,
1172: "Cannot find NAME section");
1173: assert(NULL == mpage->desc);
1174: mpage->desc = mandoc_strdup(mpage->mlinks->name);
1175: putkey(mpage, mpage->mlinks->name, TYPE_Nd);
1176: fclose(stream);
1177: free(title);
1178: return;
1179: }
1.24 schwarze 1180:
1.47 schwarze 1181: title = mandoc_realloc(title, titlesz + 1);
1182: title[titlesz] = '\0';
1.24 schwarze 1183:
1.47 schwarze 1184: /*
1185: * Skip to the first dash.
1186: * Use the remaining line as the description (no more than 70
1187: * bytes).
1188: */
1.28 schwarze 1189:
1.47 schwarze 1190: if (NULL != (p = strstr(title, "- "))) {
1191: for (p += 2; ' ' == *p || '\b' == *p; p++)
1192: /* Skip to next word. */ ;
1193: } else {
1194: if (warnings)
1195: say(mpage->mlinks->file,
1196: "No dash in title line");
1197: p = title;
1198: }
1.1 schwarze 1199:
1.47 schwarze 1200: plen = strlen(p);
1.1 schwarze 1201:
1.47 schwarze 1202: /* Strip backspace-encoding from line. */
1.1 schwarze 1203:
1.47 schwarze 1204: while (NULL != (line = memchr(p, '\b', plen))) {
1205: len = line - p;
1206: if (0 == len) {
1207: memmove(line, line + 1, plen--);
1208: continue;
1209: }
1210: memmove(line - 1, line + 1, plen - len);
1211: plen -= 2;
1212: }
1.1 schwarze 1213:
1.47 schwarze 1214: assert(NULL == mpage->desc);
1215: mpage->desc = mandoc_strdup(p);
1216: putkey(mpage, mpage->desc, TYPE_Nd);
1217: fclose(stream);
1218: free(title);
1219: }
1.16 schwarze 1220:
1.47 schwarze 1221: /*
1222: * Put a type/word pair into the word database for this particular file.
1223: */
1224: static void
1.69 schwarze 1225: putkey(const struct mpage *mpage, char *value, uint64_t type)
1.47 schwarze 1226: {
1.69 schwarze 1227: char *cp;
1.37 schwarze 1228:
1.47 schwarze 1229: assert(NULL != value);
1.69 schwarze 1230: if (TYPE_arch == type)
1231: for (cp = value; *cp; cp++)
1232: if (isupper((unsigned char)*cp))
1233: *cp = _tolower((unsigned char)*cp);
1.47 schwarze 1234: putkeys(mpage, value, strlen(value), type);
1.2 schwarze 1235: }
1236:
1237: /*
1.47 schwarze 1238: * Grok all nodes at or below a certain mdoc node into putkey().
1.2 schwarze 1239: */
1240: static void
1.47 schwarze 1241: putmdockey(const struct mpage *mpage,
1242: const struct mdoc_node *n, uint64_t m)
1.2 schwarze 1243: {
1.16 schwarze 1244:
1.47 schwarze 1245: for ( ; NULL != n; n = n->next) {
1246: if (NULL != n->child)
1247: putmdockey(mpage, n->child, m);
1248: if (MDOC_TEXT == n->type)
1249: putkey(mpage, n->string, m);
1250: }
1251: }
1.16 schwarze 1252:
1.47 schwarze 1253: static void
1254: parse_man(struct mpage *mpage, const struct man_node *n)
1255: {
1256: const struct man_node *head, *body;
1257: char *start, *sv, *title;
1258: char byte;
1259: size_t sz, titlesz;
1.16 schwarze 1260:
1.47 schwarze 1261: if (NULL == n)
1262: return;
1.16 schwarze 1263:
1.47 schwarze 1264: /*
1265: * We're only searching for one thing: the first text child in
1266: * the BODY of a NAME section. Since we don't keep track of
1267: * sections in -man, run some hoops to find out whether we're in
1268: * the correct section or not.
1269: */
1.16 schwarze 1270:
1.47 schwarze 1271: if (MAN_BODY == n->type && MAN_SH == n->tok) {
1272: body = n;
1273: assert(body->parent);
1274: if (NULL != (head = body->parent->head) &&
1275: 1 == head->nchild &&
1276: NULL != (head = (head->child)) &&
1277: MAN_TEXT == head->type &&
1278: 0 == strcmp(head->string, "NAME") &&
1279: NULL != (body = body->child) &&
1280: MAN_TEXT == body->type) {
1.2 schwarze 1281:
1.47 schwarze 1282: title = NULL;
1283: titlesz = 0;
1.2 schwarze 1284:
1.47 schwarze 1285: /*
1286: * Suck the entire NAME section into memory.
1287: * Yes, we might run away.
1288: * But too many manuals have big, spread-out
1289: * NAME sections over many lines.
1290: */
1.2 schwarze 1291:
1.47 schwarze 1292: for ( ; NULL != body; body = body->next) {
1293: if (MAN_TEXT != body->type)
1294: break;
1295: if (0 == (sz = strlen(body->string)))
1296: continue;
1297: title = mandoc_realloc
1298: (title, titlesz + sz + 1);
1299: memcpy(title + titlesz, body->string, sz);
1300: titlesz += sz + 1;
1301: title[titlesz - 1] = ' ';
1302: }
1303: if (NULL == title)
1304: return;
1.16 schwarze 1305:
1.47 schwarze 1306: title = mandoc_realloc(title, titlesz + 1);
1307: title[titlesz] = '\0';
1.16 schwarze 1308:
1.47 schwarze 1309: /* Skip leading space. */
1.16 schwarze 1310:
1.47 schwarze 1311: sv = title;
1312: while (isspace((unsigned char)*sv))
1313: sv++;
1.16 schwarze 1314:
1.47 schwarze 1315: if (0 == (sz = strlen(sv))) {
1316: free(title);
1317: return;
1318: }
1.1 schwarze 1319:
1.47 schwarze 1320: /* Erase trailing space. */
1.1 schwarze 1321:
1.47 schwarze 1322: start = &sv[sz - 1];
1323: while (start > sv && isspace((unsigned char)*start))
1324: *start-- = '\0';
1.1 schwarze 1325:
1.47 schwarze 1326: if (start == sv) {
1327: free(title);
1328: return;
1329: }
1.1 schwarze 1330:
1.47 schwarze 1331: start = sv;
1.16 schwarze 1332:
1.47 schwarze 1333: /*
1334: * Go through a special heuristic dance here.
1335: * Conventionally, one or more manual names are
1336: * comma-specified prior to a whitespace, then a
1337: * dash, then a description. Try to puzzle out
1338: * the name parts here.
1339: */
1.16 schwarze 1340:
1.47 schwarze 1341: for ( ;; ) {
1342: sz = strcspn(start, " ,");
1343: if ('\0' == start[sz])
1344: break;
1.1 schwarze 1345:
1.47 schwarze 1346: byte = start[sz];
1347: start[sz] = '\0';
1.67 schwarze 1348:
1349: /*
1350: * Assume a stray trailing comma in the
1351: * name list if a name begins with a dash.
1352: */
1353:
1354: if ('-' == start[0] ||
1355: ('\\' == start[0] && '-' == start[1]))
1356: break;
1.1 schwarze 1357:
1.47 schwarze 1358: putkey(mpage, start, TYPE_Nm);
1.1 schwarze 1359:
1.47 schwarze 1360: if (' ' == byte) {
1361: start += sz + 1;
1362: break;
1363: }
1.1 schwarze 1364:
1.47 schwarze 1365: assert(',' == byte);
1366: start += sz + 1;
1367: while (' ' == *start)
1368: start++;
1369: }
1.1 schwarze 1370:
1.47 schwarze 1371: if (sv == start) {
1372: putkey(mpage, start, TYPE_Nm);
1373: free(title);
1374: return;
1375: }
1.1 schwarze 1376:
1.47 schwarze 1377: while (isspace((unsigned char)*start))
1378: start++;
1.1 schwarze 1379:
1.47 schwarze 1380: if (0 == strncmp(start, "-", 1))
1381: start += 1;
1382: else if (0 == strncmp(start, "\\-\\-", 4))
1383: start += 4;
1384: else if (0 == strncmp(start, "\\-", 2))
1385: start += 2;
1386: else if (0 == strncmp(start, "\\(en", 4))
1387: start += 4;
1388: else if (0 == strncmp(start, "\\(em", 4))
1389: start += 4;
1.1 schwarze 1390:
1.47 schwarze 1391: while (' ' == *start)
1392: start++;
1.1 schwarze 1393:
1.47 schwarze 1394: assert(NULL == mpage->desc);
1395: mpage->desc = mandoc_strdup(start);
1396: putkey(mpage, mpage->desc, TYPE_Nd);
1397: free(title);
1398: return;
1399: }
1400: }
1.1 schwarze 1401:
1.47 schwarze 1402: for (n = n->child; n; n = n->next) {
1403: if (NULL != mpage->desc)
1404: break;
1405: parse_man(mpage, n);
1.1 schwarze 1406: }
1407: }
1408:
1409: static void
1.47 schwarze 1410: parse_mdoc(struct mpage *mpage, const struct mdoc_node *n)
1.1 schwarze 1411: {
1412:
1.47 schwarze 1413: assert(NULL != n);
1414: for (n = n->child; NULL != n; n = n->next) {
1415: switch (n->type) {
1416: case (MDOC_ELEM):
1417: /* FALLTHROUGH */
1418: case (MDOC_BLOCK):
1419: /* FALLTHROUGH */
1420: case (MDOC_HEAD):
1421: /* FALLTHROUGH */
1422: case (MDOC_BODY):
1423: /* FALLTHROUGH */
1424: case (MDOC_TAIL):
1425: if (NULL != mdocs[n->tok].fp)
1426: if (0 == (*mdocs[n->tok].fp)(mpage, n))
1427: break;
1428: if (mdocs[n->tok].mask)
1429: putmdockey(mpage, n->child,
1430: mdocs[n->tok].mask);
1431: break;
1432: default:
1433: assert(MDOC_ROOT != n->type);
1434: continue;
1435: }
1436: if (NULL != n->child)
1437: parse_mdoc(mpage, n);
1.1 schwarze 1438: }
1439: }
1440:
1.19 schwarze 1441: static int
1.47 schwarze 1442: parse_mdoc_Fd(struct mpage *mpage, const struct mdoc_node *n)
1.1 schwarze 1443: {
1444: const char *start, *end;
1445: size_t sz;
1.19 schwarze 1446:
1.47 schwarze 1447: if (SEC_SYNOPSIS != n->sec ||
1448: NULL == (n = n->child) ||
1449: MDOC_TEXT != n->type)
1.19 schwarze 1450: return(0);
1.1 schwarze 1451:
1452: /*
1453: * Only consider those `Fd' macro fields that begin with an
1454: * "inclusion" token (versus, e.g., #define).
1455: */
1.47 schwarze 1456:
1.1 schwarze 1457: if (strcmp("#include", n->string))
1.19 schwarze 1458: return(0);
1.1 schwarze 1459:
1460: if (NULL == (n = n->next) || MDOC_TEXT != n->type)
1.19 schwarze 1461: return(0);
1.1 schwarze 1462:
1463: /*
1464: * Strip away the enclosing angle brackets and make sure we're
1465: * not zero-length.
1466: */
1467:
1468: start = n->string;
1469: if ('<' == *start || '"' == *start)
1470: start++;
1471:
1472: if (0 == (sz = strlen(start)))
1.19 schwarze 1473: return(0);
1.1 schwarze 1474:
1475: end = &start[(int)sz - 1];
1476: if ('>' == *end || '"' == *end)
1477: end--;
1478:
1.47 schwarze 1479: if (end > start)
1480: putkeys(mpage, start, end - start + 1, TYPE_In);
1.49 schwarze 1481: return(0);
1.1 schwarze 1482: }
1483:
1.19 schwarze 1484: static int
1.47 schwarze 1485: parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n)
1.1 schwarze 1486: {
1.69 schwarze 1487: char *cp;
1.1 schwarze 1488:
1.47 schwarze 1489: if (NULL == (n = n->child) || MDOC_TEXT != n->type)
1.19 schwarze 1490: return(0);
1491:
1.47 schwarze 1492: /*
1493: * Parse: .Fn "struct type *name" "char *arg".
1494: * First strip away pointer symbol.
1495: * Then store the function name, then type.
1496: * Finally, store the arguments.
1497: */
1.1 schwarze 1498:
1.47 schwarze 1499: if (NULL == (cp = strrchr(n->string, ' ')))
1500: cp = n->string;
1.1 schwarze 1501:
1502: while ('*' == *cp)
1503: cp++;
1504:
1.47 schwarze 1505: putkey(mpage, cp, TYPE_Fn);
1.19 schwarze 1506:
1.47 schwarze 1507: if (n->string < cp)
1508: putkeys(mpage, n->string, cp - n->string, TYPE_Ft);
1.19 schwarze 1509:
1.47 schwarze 1510: for (n = n->next; NULL != n; n = n->next)
1511: if (MDOC_TEXT == n->type)
1512: putkey(mpage, n->string, TYPE_Fa);
1.19 schwarze 1513:
1514: return(0);
1.1 schwarze 1515: }
1516:
1.19 schwarze 1517: static int
1.47 schwarze 1518: parse_mdoc_Xr(struct mpage *mpage, const struct mdoc_node *n)
1.1 schwarze 1519: {
1.47 schwarze 1520: char *cp;
1.1 schwarze 1521:
1522: if (NULL == (n = n->child))
1.19 schwarze 1523: return(0);
1.1 schwarze 1524:
1.47 schwarze 1525: if (NULL == n->next) {
1526: putkey(mpage, n->string, TYPE_Xr);
1527: return(0);
1528: }
1.1 schwarze 1529:
1.47 schwarze 1530: if (-1 == asprintf(&cp, "%s(%s)", n->string, n->next->string)) {
1531: perror(NULL);
1532: exit((int)MANDOCLEVEL_SYSERR);
1533: }
1534: putkey(mpage, cp, TYPE_Xr);
1535: free(cp);
1536: return(0);
1.1 schwarze 1537: }
1538:
1.19 schwarze 1539: static int
1.47 schwarze 1540: parse_mdoc_Nd(struct mpage *mpage, const struct mdoc_node *n)
1.1 schwarze 1541: {
1.47 schwarze 1542: size_t sz;
1.1 schwarze 1543:
1544: if (MDOC_BODY != n->type)
1.19 schwarze 1545: return(0);
1.1 schwarze 1546:
1.47 schwarze 1547: /*
1548: * Special-case the `Nd' because we need to put the description
1549: * into the document table.
1550: */
1551:
1552: for (n = n->child; NULL != n; n = n->next) {
1553: if (MDOC_TEXT == n->type) {
1554: if (NULL != mpage->desc) {
1555: sz = strlen(mpage->desc) +
1556: strlen(n->string) + 2;
1557: mpage->desc = mandoc_realloc(
1558: mpage->desc, sz);
1559: strlcat(mpage->desc, " ", sz);
1560: strlcat(mpage->desc, n->string, sz);
1561: } else
1562: mpage->desc = mandoc_strdup(n->string);
1563: }
1564: if (NULL != n->child)
1565: parse_mdoc_Nd(mpage, n);
1566: }
1.19 schwarze 1567: return(1);
1.1 schwarze 1568: }
1569:
1.19 schwarze 1570: static int
1.47 schwarze 1571: parse_mdoc_Nm(struct mpage *mpage, const struct mdoc_node *n)
1.1 schwarze 1572: {
1573:
1.49 schwarze 1574: return(SEC_NAME == n->sec ||
1575: (SEC_SYNOPSIS == n->sec && MDOC_HEAD == n->type));
1.1 schwarze 1576: }
1577:
1.19 schwarze 1578: static int
1.47 schwarze 1579: parse_mdoc_Sh(struct mpage *mpage, const struct mdoc_node *n)
1.1 schwarze 1580: {
1581:
1.19 schwarze 1582: return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
1.1 schwarze 1583: }
1584:
1.47 schwarze 1585: static int
1586: parse_mdoc_head(struct mpage *mpage, const struct mdoc_node *n)
1.1 schwarze 1587: {
1588:
1.47 schwarze 1589: return(MDOC_HEAD == n->type);
1590: }
1.1 schwarze 1591:
1.47 schwarze 1592: static int
1593: parse_mdoc_body(struct mpage *mpage, const struct mdoc_node *n)
1594: {
1.1 schwarze 1595:
1.47 schwarze 1596: return(MDOC_BODY == n->type);
1.1 schwarze 1597: }
1598:
1.47 schwarze 1599: /*
1600: * Add a string to the hash table for the current manual.
1601: * Each string has a bitmask telling which macros it belongs to.
1602: * When we finish the manual, we'll dump the table.
1603: */
1.1 schwarze 1604: static void
1.47 schwarze 1605: putkeys(const struct mpage *mpage,
1606: const char *cp, size_t sz, uint64_t v)
1.1 schwarze 1607: {
1.47 schwarze 1608: struct str *s;
1.68 schwarze 1609: const char *end;
1610: uint64_t mask;
1.47 schwarze 1611: unsigned int slot;
1.68 schwarze 1612: int i;
1.1 schwarze 1613:
1.47 schwarze 1614: if (0 == sz)
1615: return;
1.68 schwarze 1616:
1617: if (verb > 1) {
1618: for (i = 0, mask = 1;
1619: i < mansearch_keymax;
1620: i++, mask <<= 1)
1621: if (mask & v)
1622: break;
1623: say(mpage->mlinks->file, "Adding key %s=%*s",
1624: mansearch_keynames[i], sz, cp);
1625: }
1.47 schwarze 1626:
1627: end = cp + sz;
1628: slot = ohash_qlookupi(&strings, cp, &end);
1629: s = ohash_find(&strings, slot);
1.1 schwarze 1630:
1.47 schwarze 1631: if (NULL != s && mpage == s->mpage) {
1632: s->mask |= v;
1.1 schwarze 1633: return;
1.47 schwarze 1634: } else if (NULL == s) {
1635: s = mandoc_calloc(sizeof(struct str) + sz + 1, 1);
1636: memcpy(s->key, cp, sz);
1637: ohash_insert(&strings, slot, s);
1638: }
1639: s->mpage = mpage;
1640: s->mask = v;
1.1 schwarze 1641: }
1642:
1643: /*
1.47 schwarze 1644: * Take a Unicode codepoint and produce its UTF-8 encoding.
1645: * This isn't the best way to do this, but it works.
1646: * The magic numbers are from the UTF-8 packaging.
1647: * They're not as scary as they seem: read the UTF-8 spec for details.
1.1 schwarze 1648: */
1.47 schwarze 1649: static size_t
1650: utf8(unsigned int cp, char out[7])
1.1 schwarze 1651: {
1.47 schwarze 1652: size_t rc;
1.1 schwarze 1653:
1.47 schwarze 1654: rc = 0;
1655: if (cp <= 0x0000007F) {
1656: rc = 1;
1657: out[0] = (char)cp;
1658: } else if (cp <= 0x000007FF) {
1659: rc = 2;
1660: out[0] = (cp >> 6 & 31) | 192;
1661: out[1] = (cp & 63) | 128;
1662: } else if (cp <= 0x0000FFFF) {
1663: rc = 3;
1664: out[0] = (cp >> 12 & 15) | 224;
1665: out[1] = (cp >> 6 & 63) | 128;
1666: out[2] = (cp & 63) | 128;
1667: } else if (cp <= 0x001FFFFF) {
1668: rc = 4;
1669: out[0] = (cp >> 18 & 7) | 240;
1670: out[1] = (cp >> 12 & 63) | 128;
1671: out[2] = (cp >> 6 & 63) | 128;
1672: out[3] = (cp & 63) | 128;
1673: } else if (cp <= 0x03FFFFFF) {
1674: rc = 5;
1675: out[0] = (cp >> 24 & 3) | 248;
1676: out[1] = (cp >> 18 & 63) | 128;
1677: out[2] = (cp >> 12 & 63) | 128;
1678: out[3] = (cp >> 6 & 63) | 128;
1679: out[4] = (cp & 63) | 128;
1680: } else if (cp <= 0x7FFFFFFF) {
1681: rc = 6;
1682: out[0] = (cp >> 30 & 1) | 252;
1683: out[1] = (cp >> 24 & 63) | 128;
1684: out[2] = (cp >> 18 & 63) | 128;
1685: out[3] = (cp >> 12 & 63) | 128;
1686: out[4] = (cp >> 6 & 63) | 128;
1687: out[5] = (cp & 63) | 128;
1688: } else
1689: return(0);
1.19 schwarze 1690:
1.47 schwarze 1691: out[rc] = '\0';
1692: return(rc);
1.1 schwarze 1693: }
1694:
1.47 schwarze 1695: /*
1.53 schwarze 1696: * Store the rendered version of a key, or alias the pointer
1697: * if the key contains no escape sequences.
1.47 schwarze 1698: */
1699: static void
1.53 schwarze 1700: render_key(struct mchars *mc, struct str *key)
1.1 schwarze 1701: {
1.47 schwarze 1702: size_t sz, bsz, pos;
1.71 schwarze 1703: char utfbuf[7], res[6];
1.47 schwarze 1704: char *buf;
1705: const char *seq, *cpp, *val;
1706: int len, u;
1707: enum mandoc_esc esc;
1708:
1.53 schwarze 1709: assert(NULL == key->rendered);
1.47 schwarze 1710:
1711: res[0] = '\\';
1712: res[1] = '\t';
1713: res[2] = ASCII_NBRSP;
1714: res[3] = ASCII_HYPH;
1.71 schwarze 1715: res[4] = ASCII_BREAK;
1716: res[5] = '\0';
1.1 schwarze 1717:
1.47 schwarze 1718: val = key->key;
1719: bsz = strlen(val);
1.1 schwarze 1720:
1721: /*
1.47 schwarze 1722: * Pre-check: if we have no stop-characters, then set the
1723: * pointer as ourselvse and get out of here.
1.1 schwarze 1724: */
1.47 schwarze 1725: if (strcspn(val, res) == bsz) {
1.53 schwarze 1726: key->rendered = key->key;
1.47 schwarze 1727: return;
1728: }
1.1 schwarze 1729:
1.47 schwarze 1730: /* Pre-allocate by the length of the input */
1.39 schwarze 1731:
1.47 schwarze 1732: buf = mandoc_malloc(++bsz);
1733: pos = 0;
1.39 schwarze 1734:
1.47 schwarze 1735: while ('\0' != *val) {
1736: /*
1737: * Halt on the first escape sequence.
1738: * This also halts on the end of string, in which case
1739: * we just copy, fallthrough, and exit the loop.
1740: */
1741: if ((sz = strcspn(val, res)) > 0) {
1742: memcpy(&buf[pos], val, sz);
1743: pos += sz;
1744: val += sz;
1745: }
1.39 schwarze 1746:
1.71 schwarze 1747: switch (*val) {
1748: case (ASCII_HYPH):
1.47 schwarze 1749: buf[pos++] = '-';
1750: val++;
1751: continue;
1.71 schwarze 1752: case ('\t'):
1753: /* FALLTHROUGH */
1754: case (ASCII_NBRSP):
1.47 schwarze 1755: buf[pos++] = ' ';
1756: val++;
1.71 schwarze 1757: /* FALLTHROUGH */
1758: case (ASCII_BREAK):
1.47 schwarze 1759: continue;
1.71 schwarze 1760: default:
1761: break;
1762: }
1763: if ('\\' != *val)
1.47 schwarze 1764: break;
1.39 schwarze 1765:
1.47 schwarze 1766: /* Read past the slash. */
1.39 schwarze 1767:
1.47 schwarze 1768: val++;
1.39 schwarze 1769:
1.47 schwarze 1770: /*
1771: * Parse the escape sequence and see if it's a
1772: * predefined character or special character.
1773: */
1.52 schwarze 1774:
1.47 schwarze 1775: esc = mandoc_escape
1776: ((const char **)&val, &seq, &len);
1777: if (ESCAPE_ERROR == esc)
1778: break;
1779: if (ESCAPE_SPECIAL != esc)
1780: continue;
1.39 schwarze 1781:
1.47 schwarze 1782: /*
1.52 schwarze 1783: * Render the special character
1784: * as either UTF-8 or ASCII.
1.47 schwarze 1785: */
1.52 schwarze 1786:
1787: if (write_utf8) {
1788: if (0 == (u = mchars_spec2cp(mc, seq, len)))
1789: continue;
1790: cpp = utfbuf;
1791: if (0 == (sz = utf8(u, utfbuf)))
1792: continue;
1793: sz = strlen(cpp);
1794: } else {
1795: cpp = mchars_spec2str(mc, seq, len, &sz);
1796: if (NULL == cpp)
1797: continue;
1798: if (ASCII_NBRSP == *cpp) {
1799: cpp = " ";
1800: sz = 1;
1801: }
1802: }
1.1 schwarze 1803:
1.47 schwarze 1804: /* Copy the rendered glyph into the stream. */
1.1 schwarze 1805:
1.47 schwarze 1806: bsz += sz;
1807: buf = mandoc_realloc(buf, bsz);
1808: memcpy(&buf[pos], cpp, sz);
1809: pos += sz;
1.1 schwarze 1810: }
1811:
1.47 schwarze 1812: buf[pos] = '\0';
1.53 schwarze 1813: key->rendered = buf;
1.1 schwarze 1814: }
1815:
1.75 ! schwarze 1816: static void
! 1817: dbadd_mlink(const struct mlink *mlink)
! 1818: {
! 1819: size_t i;
! 1820:
! 1821: i = 1;
! 1822: SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec);
! 1823: SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch);
! 1824: SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name);
! 1825: SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, mlink->mpage->recno);
! 1826: SQL_STEP(stmts[STMT_INSERT_LINK]);
! 1827: sqlite3_reset(stmts[STMT_INSERT_LINK]);
! 1828: }
! 1829:
1.11 schwarze 1830: /*
1.47 schwarze 1831: * Flush the current page's terms (and their bits) into the database.
1832: * Wrap the entire set of additions in a transaction to make sqlite be a
1833: * little faster.
1.53 schwarze 1834: * Also, handle escape sequences at the last possible moment.
1.11 schwarze 1835: */
1836: static void
1.75 ! schwarze 1837: dbadd(struct mpage *mpage, struct mchars *mc)
1.11 schwarze 1838: {
1.47 schwarze 1839: struct mlink *mlink;
1840: struct str *key;
1841: size_t i;
1842: unsigned int slot;
1843:
1844: if (verb)
1.62 schwarze 1845: say(mpage->mlinks->file, "Adding to database");
1.11 schwarze 1846:
1.47 schwarze 1847: if (nodb)
1.11 schwarze 1848: return;
1.47 schwarze 1849:
1850: i = 1;
1851: SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form);
1852: SQL_STEP(stmts[STMT_INSERT_PAGE]);
1.75 ! schwarze 1853: mpage->recno = sqlite3_last_insert_rowid(db);
1.47 schwarze 1854: sqlite3_reset(stmts[STMT_INSERT_PAGE]);
1855:
1.75 ! schwarze 1856: for (mlink = mpage->mlinks; mlink; mlink = mlink->next)
! 1857: dbadd_mlink(mlink);
1.47 schwarze 1858:
1859: for (key = ohash_first(&strings, &slot); NULL != key;
1860: key = ohash_next(&strings, &slot)) {
1861: assert(key->mpage == mpage);
1.53 schwarze 1862: if (NULL == key->rendered)
1863: render_key(mc, key);
1.47 schwarze 1864: i = 1;
1865: SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask);
1.53 schwarze 1866: SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->rendered);
1.75 ! schwarze 1867: SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, mpage->recno);
1.47 schwarze 1868: SQL_STEP(stmts[STMT_INSERT_KEY]);
1869: sqlite3_reset(stmts[STMT_INSERT_KEY]);
1.53 schwarze 1870: if (key->rendered != key->key)
1871: free(key->rendered);
1.47 schwarze 1872: free(key);
1.33 schwarze 1873: }
1.47 schwarze 1874: }
1.41 deraadt 1875:
1.47 schwarze 1876: static void
1877: dbprune(void)
1878: {
1879: struct mpage *mpage;
1880: struct mlink *mlink;
1881: size_t i;
1882: unsigned int slot;
1.11 schwarze 1883:
1.63 schwarze 1884: if (0 == nodb)
1885: SQL_EXEC("BEGIN TRANSACTION");
1.47 schwarze 1886:
1.63 schwarze 1887: for (mpage = ohash_first(&mpages, &slot); NULL != mpage;
1888: mpage = ohash_next(&mpages, &slot)) {
1.47 schwarze 1889: mlink = mpage->mlinks;
1890: if (verb)
1.63 schwarze 1891: say(mlink->file, "Deleting from database");
1892: if (nodb)
1893: continue;
1894: for ( ; NULL != mlink; mlink = mlink->next) {
1895: i = 1;
1896: SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
1897: i, mlink->dsec);
1898: SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
1899: i, mlink->arch);
1900: SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
1901: i, mlink->name);
1902: SQL_STEP(stmts[STMT_DELETE_PAGE]);
1903: sqlite3_reset(stmts[STMT_DELETE_PAGE]);
1904: }
1.11 schwarze 1905: }
1.63 schwarze 1906:
1907: if (0 == nodb)
1908: SQL_EXEC("END TRANSACTION");
1.47 schwarze 1909: }
1.22 schwarze 1910:
1.47 schwarze 1911: /*
1912: * Close an existing database and its prepared statements.
1913: * If "real" is not set, rename the temporary file into the real one.
1914: */
1915: static void
1916: dbclose(int real)
1917: {
1918: size_t i;
1.72 schwarze 1919: int status;
1920: pid_t child;
1.11 schwarze 1921:
1.47 schwarze 1922: if (nodb)
1923: return;
1.11 schwarze 1924:
1.47 schwarze 1925: for (i = 0; i < STMT__MAX; i++) {
1926: sqlite3_finalize(stmts[i]);
1927: stmts[i] = NULL;
1.28 schwarze 1928: }
1.22 schwarze 1929:
1.47 schwarze 1930: sqlite3_close(db);
1931: db = NULL;
1.11 schwarze 1932:
1.47 schwarze 1933: if (real)
1934: return;
1.22 schwarze 1935:
1.72 schwarze 1936: if ('\0' == *tempfilename) {
1937: if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) {
1938: exitcode = (int)MANDOCLEVEL_SYSERR;
1939: say(MANDOC_DB, "%s", strerror(errno));
1940: }
1941: return;
1942: }
1943:
1944: switch (child = fork()) {
1945: case (-1):
1946: exitcode = (int)MANDOCLEVEL_SYSERR;
1947: say("fork cmp", "%s", strerror(errno));
1948: return;
1949: case (0):
1950: execlp("cmp", "cmp", "-s",
1951: tempfilename, MANDOC_DB, NULL);
1952: say("exec cmp", "%s", strerror(errno));
1953: exit(0);
1954: default:
1955: break;
1956: }
1957: if (-1 == waitpid(child, &status, 0)) {
1958: exitcode = (int)MANDOCLEVEL_SYSERR;
1959: say("wait cmp", "%s", strerror(errno));
1960: } else if (WIFSIGNALED(status)) {
1961: exitcode = (int)MANDOCLEVEL_SYSERR;
1962: say("cmp", "Died from a signal");
1963: } else if (WEXITSTATUS(status)) {
1.47 schwarze 1964: exitcode = (int)MANDOCLEVEL_SYSERR;
1.72 schwarze 1965: say(MANDOC_DB,
1966: "Data changed, but cannot replace database");
1967: }
1968:
1969: *strrchr(tempfilename, '/') = '\0';
1970: switch (child = fork()) {
1971: case (-1):
1972: exitcode = (int)MANDOCLEVEL_SYSERR;
1973: say("fork rm", "%s", strerror(errno));
1974: return;
1975: case (0):
1976: execlp("rm", "rm", "-rf", tempfilename, NULL);
1977: say("exec rm", "%s", strerror(errno));
1978: exit((int)MANDOCLEVEL_SYSERR);
1979: default:
1980: break;
1981: }
1982: if (-1 == waitpid(child, &status, 0)) {
1983: exitcode = (int)MANDOCLEVEL_SYSERR;
1984: say("wait rm", "%s", strerror(errno));
1985: } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) {
1986: exitcode = (int)MANDOCLEVEL_SYSERR;
1987: say(tempfilename,
1988: "Cannot remove temporary directory");
1.22 schwarze 1989: }
1.11 schwarze 1990: }
1991:
1.47 schwarze 1992: /*
1993: * This is straightforward stuff.
1994: * Open a database connection to a "temporary" database, then open a set
1995: * of prepared statements we'll use over and over again.
1996: * If "real" is set, we use the existing database; if not, we truncate a
1997: * temporary one.
1998: * Must be matched by dbclose().
1999: */
2000: static int
2001: dbopen(int real)
1.2 schwarze 2002: {
1.72 schwarze 2003: const char *sql;
1.47 schwarze 2004: int rc, ofl;
1.6 schwarze 2005:
1.47 schwarze 2006: if (nodb)
2007: return(1);
1.6 schwarze 2008:
1.72 schwarze 2009: *tempfilename = '\0';
1.47 schwarze 2010: ofl = SQLITE_OPEN_READWRITE;
1.72 schwarze 2011:
2012: if (real) {
2013: rc = sqlite3_open_v2(MANDOC_DB, &db, ofl, NULL);
2014: if (SQLITE_OK != rc) {
1.47 schwarze 2015: exitcode = (int)MANDOCLEVEL_SYSERR;
1.72 schwarze 2016: say(MANDOC_DB, "%s", sqlite3_errmsg(db));
1.47 schwarze 2017: return(0);
1.28 schwarze 2018: }
1.72 schwarze 2019: goto prepare_statements;
2020: }
2021:
2022: ofl |= SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE;
1.6 schwarze 2023:
1.72 schwarze 2024: remove(MANDOC_DB "~");
2025: rc = sqlite3_open_v2(MANDOC_DB "~", &db, ofl, NULL);
1.47 schwarze 2026: if (SQLITE_OK == rc)
1.72 schwarze 2027: goto create_tables;
1.73 schwarze 2028: if (MPARSE_QUICK & mparse_options) {
1.47 schwarze 2029: exitcode = (int)MANDOCLEVEL_SYSERR;
1.72 schwarze 2030: say(MANDOC_DB "~", "%s", sqlite3_errmsg(db));
1.47 schwarze 2031: return(0);
2032: }
1.6 schwarze 2033:
1.72 schwarze 2034: if (strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX",
2035: sizeof(tempfilename)) >= sizeof(tempfilename)) {
2036: exitcode = (int)MANDOCLEVEL_SYSERR;
2037: say("/tmp/mandocdb.XXXXXX", "Filename too long");
2038: return(0);
2039: }
2040: if (NULL == mkdtemp(tempfilename)) {
2041: exitcode = (int)MANDOCLEVEL_SYSERR;
2042: say(tempfilename, "%s", strerror(errno));
2043: return(0);
2044: }
2045: if (strlcat(tempfilename, "/" MANDOC_DB,
2046: sizeof(tempfilename)) >= sizeof(tempfilename)) {
2047: exitcode = (int)MANDOCLEVEL_SYSERR;
2048: say(tempfilename, "Filename too long");
2049: return(0);
2050: }
2051: rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL);
2052: if (SQLITE_OK != rc) {
1.47 schwarze 2053: exitcode = (int)MANDOCLEVEL_SYSERR;
1.72 schwarze 2054: say(tempfilename, "%s", sqlite3_errmsg(db));
1.47 schwarze 2055: return(0);
1.2 schwarze 2056: }
2057:
1.72 schwarze 2058: create_tables:
1.47 schwarze 2059: sql = "CREATE TABLE \"mpages\" (\n"
2060: " \"form\" INTEGER NOT NULL,\n"
2061: " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
2062: ");\n"
2063: "\n"
2064: "CREATE TABLE \"mlinks\" (\n"
2065: " \"sec\" TEXT NOT NULL,\n"
2066: " \"arch\" TEXT NOT NULL,\n"
2067: " \"name\" TEXT NOT NULL,\n"
2068: " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
1.66 schwarze 2069: "ON DELETE CASCADE\n"
1.47 schwarze 2070: ");\n"
2071: "\n"
2072: "CREATE TABLE \"keys\" (\n"
2073: " \"bits\" INTEGER NOT NULL,\n"
2074: " \"key\" TEXT NOT NULL,\n"
2075: " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
1.66 schwarze 2076: "ON DELETE CASCADE\n"
1.65 schwarze 2077: ");\n";
1.47 schwarze 2078:
2079: if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) {
2080: exitcode = (int)MANDOCLEVEL_SYSERR;
1.72 schwarze 2081: say(MANDOC_DB, "%s", sqlite3_errmsg(db));
1.47 schwarze 2082: return(0);
1.2 schwarze 2083: }
2084:
1.47 schwarze 2085: prepare_statements:
2086: SQL_EXEC("PRAGMA foreign_keys = ON");
1.63 schwarze 2087: sql = "DELETE FROM mpages WHERE id IN "
2088: "(SELECT pageid FROM mlinks WHERE "
2089: "sec=? AND arch=? AND name=?)";
1.47 schwarze 2090: sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL);
2091: sql = "INSERT INTO mpages "
1.60 schwarze 2092: "(form) VALUES (?)";
1.47 schwarze 2093: sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL);
2094: sql = "INSERT INTO mlinks "
1.61 schwarze 2095: "(sec,arch,name,pageid) VALUES (?,?,?,?)";
1.47 schwarze 2096: sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL);
2097: sql = "INSERT INTO keys "
2098: "(bits,key,pageid) VALUES (?,?,?)";
2099: sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL);
1.6 schwarze 2100:
1.47 schwarze 2101: /*
2102: * When opening a new database, we can turn off
2103: * synchronous mode for much better performance.
2104: */
1.6 schwarze 2105:
1.47 schwarze 2106: if (real)
2107: SQL_EXEC("PRAGMA synchronous = OFF");
1.11 schwarze 2108:
1.47 schwarze 2109: return(1);
2110: }
1.6 schwarze 2111:
1.47 schwarze 2112: static void *
2113: hash_halloc(size_t sz, void *arg)
2114: {
1.6 schwarze 2115:
1.47 schwarze 2116: return(mandoc_calloc(sz, 1));
2117: }
1.2 schwarze 2118:
1.47 schwarze 2119: static void *
2120: hash_alloc(size_t sz, void *arg)
2121: {
1.28 schwarze 2122:
1.47 schwarze 2123: return(mandoc_malloc(sz));
2124: }
1.6 schwarze 2125:
1.47 schwarze 2126: static void
2127: hash_free(void *p, size_t sz, void *arg)
2128: {
1.26 schwarze 2129:
1.47 schwarze 2130: free(p);
2131: }
1.6 schwarze 2132:
1.47 schwarze 2133: static int
2134: set_basedir(const char *targetdir)
2135: {
2136: static char startdir[PATH_MAX];
2137: static int fd;
1.6 schwarze 2138:
1.47 schwarze 2139: /*
2140: * Remember where we started by keeping a fd open to the origin
2141: * path component: throughout this utility, we chdir() a lot to
2142: * handle relative paths, and by doing this, we can return to
2143: * the starting point.
2144: */
2145: if ('\0' == *startdir) {
2146: if (NULL == getcwd(startdir, PATH_MAX)) {
2147: exitcode = (int)MANDOCLEVEL_SYSERR;
2148: if (NULL != targetdir)
2149: say(".", NULL);
2150: return(0);
2151: }
2152: if (-1 == (fd = open(startdir, O_RDONLY, 0))) {
2153: exitcode = (int)MANDOCLEVEL_SYSERR;
2154: say(startdir, NULL);
2155: return(0);
1.11 schwarze 2156: }
1.47 schwarze 2157: if (NULL == targetdir)
2158: targetdir = startdir;
2159: } else {
2160: if (-1 == fd)
2161: return(0);
2162: if (-1 == fchdir(fd)) {
2163: close(fd);
2164: basedir[0] = '\0';
2165: exitcode = (int)MANDOCLEVEL_SYSERR;
2166: say(startdir, NULL);
2167: return(0);
1.2 schwarze 2168: }
1.47 schwarze 2169: if (NULL == targetdir) {
2170: close(fd);
2171: return(1);
1.2 schwarze 2172: }
2173: }
1.47 schwarze 2174: if (NULL == realpath(targetdir, basedir)) {
2175: basedir[0] = '\0';
2176: exitcode = (int)MANDOCLEVEL_BADARG;
2177: say(targetdir, NULL);
2178: return(0);
2179: } else if (-1 == chdir(basedir)) {
2180: exitcode = (int)MANDOCLEVEL_BADARG;
2181: say("", NULL);
2182: return(0);
2183: }
2184: return(1);
1.2 schwarze 2185: }
2186:
2187: static void
1.47 schwarze 2188: say(const char *file, const char *format, ...)
1.2 schwarze 2189: {
1.47 schwarze 2190: va_list ap;
1.2 schwarze 2191:
1.47 schwarze 2192: if ('\0' != *basedir)
2193: fprintf(stderr, "%s", basedir);
2194: if ('\0' != *basedir && '\0' != *file)
2195: fputs("//", stderr);
2196: if ('\0' != *file)
2197: fprintf(stderr, "%s", file);
2198: fputs(": ", stderr);
1.31 schwarze 2199:
1.47 schwarze 2200: if (NULL == format) {
2201: perror(NULL);
2202: return;
1.2 schwarze 2203: }
1.47 schwarze 2204:
2205: va_start(ap, format);
2206: vfprintf(stderr, format, ap);
2207: va_end(ap);
2208:
2209: fputc('\n', stderr);
1.1 schwarze 2210: }