Annotation of src/usr.bin/mandoc/mandocdb.c, Revision 1.6
1.6 ! schwarze 1: /* $Id: mandocdb.c,v 1.5 2011/11/13 10:40:52 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/param.h>
18:
19: #include <assert.h>
1.2 schwarze 20: #include <dirent.h>
1.1 schwarze 21: #include <fcntl.h>
22: #include <getopt.h>
23: #include <stdio.h>
24: #include <stdint.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <db.h>
28:
29: #include "man.h"
30: #include "mdoc.h"
31: #include "mandoc.h"
1.5 schwarze 32: #include "mandocdb.h"
1.1 schwarze 33:
34: #define MANDOC_BUFSZ BUFSIZ
35: #define MANDOC_SLOP 1024
36:
1.2 schwarze 37: /* Tiny list for files. No need to bring in QUEUE. */
38:
39: struct of {
40: char *fname; /* heap-allocated */
1.6 ! schwarze 41: char *sec;
! 42: char *arch;
! 43: char *title;
1.2 schwarze 44: struct of *next; /* NULL for last one */
45: struct of *first; /* first in list */
46: };
47:
1.1 schwarze 48: /* Buffer for storing growable data. */
49:
50: struct buf {
51: char *cp;
1.2 schwarze 52: size_t len; /* current length */
53: size_t size; /* total buffer size */
1.1 schwarze 54: };
55:
56: /* Operation we're going to perform. */
57:
58: enum op {
59: OP_NEW = 0, /* new database */
1.2 schwarze 60: OP_UPDATE, /* delete/add entries in existing database */
1.1 schwarze 61: OP_DELETE /* delete entries from existing database */
62: };
63:
64: #define MAN_ARGS DB *hash, \
65: struct buf *buf, \
66: struct buf *dbuf, \
67: const struct man_node *n
68: #define MDOC_ARGS DB *hash, \
69: struct buf *buf, \
70: struct buf *dbuf, \
71: const struct mdoc_node *n, \
72: const struct mdoc_meta *m
73:
74: static void buf_appendmdoc(struct buf *,
75: const struct mdoc_node *, int);
76: static void buf_append(struct buf *, const char *);
77: static void buf_appendb(struct buf *,
78: const void *, size_t);
79: static void dbt_put(DB *, const char *, DBT *, DBT *);
80: static void hash_put(DB *, const struct buf *, int);
81: static void hash_reset(DB **);
1.2 schwarze 82: static void index_merge(const struct of *, struct mparse *,
83: struct buf *, struct buf *,
84: DB *, DB *, const char *,
1.6 ! schwarze 85: DB *, const char *, int, int,
1.2 schwarze 86: recno_t, const recno_t *, size_t);
87: static void index_prune(const struct of *, DB *,
88: const char *, DB *, const char *,
89: int, recno_t *, recno_t **, size_t *);
1.6 ! schwarze 90: static void ofile_argbuild(char *[], int, int, int,
! 91: struct of **);
! 92: static int ofile_dirbuild(const char *, const char *,
! 93: const char *, int, int, struct of **);
1.2 schwarze 94: static void ofile_free(struct of *);
1.1 schwarze 95: static int pman_node(MAN_ARGS);
96: static void pmdoc_node(MDOC_ARGS);
97: static void pmdoc_An(MDOC_ARGS);
98: static void pmdoc_Cd(MDOC_ARGS);
99: static void pmdoc_Er(MDOC_ARGS);
100: static void pmdoc_Ev(MDOC_ARGS);
101: static void pmdoc_Fd(MDOC_ARGS);
102: static void pmdoc_In(MDOC_ARGS);
103: static void pmdoc_Fn(MDOC_ARGS);
104: static void pmdoc_Fo(MDOC_ARGS);
105: static void pmdoc_Nd(MDOC_ARGS);
106: static void pmdoc_Nm(MDOC_ARGS);
107: static void pmdoc_Pa(MDOC_ARGS);
108: static void pmdoc_St(MDOC_ARGS);
109: static void pmdoc_Vt(MDOC_ARGS);
110: static void pmdoc_Xr(MDOC_ARGS);
111: static void usage(void);
112:
113: typedef void (*pmdoc_nf)(MDOC_ARGS);
114:
115: static const pmdoc_nf mdocs[MDOC_MAX] = {
116: NULL, /* Ap */
117: NULL, /* Dd */
118: NULL, /* Dt */
119: NULL, /* Os */
120: NULL, /* Sh */
121: NULL, /* Ss */
122: NULL, /* Pp */
123: NULL, /* D1 */
124: NULL, /* Dl */
125: NULL, /* Bd */
126: NULL, /* Ed */
127: NULL, /* Bl */
128: NULL, /* El */
129: NULL, /* It */
130: NULL, /* Ad */
131: pmdoc_An, /* An */
132: NULL, /* Ar */
133: pmdoc_Cd, /* Cd */
134: NULL, /* Cm */
135: NULL, /* Dv */
136: pmdoc_Er, /* Er */
137: pmdoc_Ev, /* Ev */
138: NULL, /* Ex */
139: NULL, /* Fa */
140: pmdoc_Fd, /* Fd */
141: NULL, /* Fl */
142: pmdoc_Fn, /* Fn */
143: NULL, /* Ft */
144: NULL, /* Ic */
145: pmdoc_In, /* In */
146: NULL, /* Li */
147: pmdoc_Nd, /* Nd */
148: pmdoc_Nm, /* Nm */
149: NULL, /* Op */
150: NULL, /* Ot */
151: pmdoc_Pa, /* Pa */
152: NULL, /* Rv */
153: pmdoc_St, /* St */
154: pmdoc_Vt, /* Va */
155: pmdoc_Vt, /* Vt */
156: pmdoc_Xr, /* Xr */
157: NULL, /* %A */
158: NULL, /* %B */
159: NULL, /* %D */
160: NULL, /* %I */
161: NULL, /* %J */
162: NULL, /* %N */
163: NULL, /* %O */
164: NULL, /* %P */
165: NULL, /* %R */
166: NULL, /* %T */
167: NULL, /* %V */
168: NULL, /* Ac */
169: NULL, /* Ao */
170: NULL, /* Aq */
171: NULL, /* At */
172: NULL, /* Bc */
173: NULL, /* Bf */
174: NULL, /* Bo */
175: NULL, /* Bq */
176: NULL, /* Bsx */
177: NULL, /* Bx */
178: NULL, /* Db */
179: NULL, /* Dc */
180: NULL, /* Do */
181: NULL, /* Dq */
182: NULL, /* Ec */
183: NULL, /* Ef */
184: NULL, /* Em */
185: NULL, /* Eo */
186: NULL, /* Fx */
187: NULL, /* Ms */
188: NULL, /* No */
189: NULL, /* Ns */
190: NULL, /* Nx */
191: NULL, /* Ox */
192: NULL, /* Pc */
193: NULL, /* Pf */
194: NULL, /* Po */
195: NULL, /* Pq */
196: NULL, /* Qc */
197: NULL, /* Ql */
198: NULL, /* Qo */
199: NULL, /* Qq */
200: NULL, /* Re */
201: NULL, /* Rs */
202: NULL, /* Sc */
203: NULL, /* So */
204: NULL, /* Sq */
205: NULL, /* Sm */
206: NULL, /* Sx */
207: NULL, /* Sy */
208: NULL, /* Tn */
209: NULL, /* Ux */
210: NULL, /* Xc */
211: NULL, /* Xo */
212: pmdoc_Fo, /* Fo */
213: NULL, /* Fc */
214: NULL, /* Oo */
215: NULL, /* Oc */
216: NULL, /* Bk */
217: NULL, /* Ek */
218: NULL, /* Bt */
219: NULL, /* Hf */
220: NULL, /* Fr */
221: NULL, /* Ud */
222: NULL, /* Lb */
223: NULL, /* Lp */
224: NULL, /* Lk */
225: NULL, /* Mt */
226: NULL, /* Brq */
227: NULL, /* Bro */
228: NULL, /* Brc */
229: NULL, /* %C */
230: NULL, /* Es */
231: NULL, /* En */
232: NULL, /* Dx */
233: NULL, /* %Q */
234: NULL, /* br */
235: NULL, /* sp */
236: NULL, /* %U */
237: NULL, /* Ta */
238: };
239:
240: static const char *progname;
241:
242: int
1.3 schwarze 243: mandocdb(int argc, char *argv[])
1.1 schwarze 244: {
245: struct mparse *mp; /* parse sequence */
246: enum op op; /* current operation */
1.2 schwarze 247: const char *dir;
1.1 schwarze 248: char ibuf[MAXPATHLEN], /* index fname */
1.2 schwarze 249: fbuf[MAXPATHLEN]; /* btree fname */
250: int verb, /* output verbosity */
1.6 ! schwarze 251: use_all, /* use all directories and files */
1.2 schwarze 252: ch, i, flags;
1.1 schwarze 253: DB *idx, /* index database */
254: *db, /* keyword database */
255: *hash; /* temporary keyword hashtable */
256: BTREEINFO info; /* btree configuration */
1.2 schwarze 257: recno_t maxrec; /* supremum of all records */
1.1 schwarze 258: recno_t *recs; /* buffer of empty records */
1.2 schwarze 259: size_t sz1, sz2,
260: recsz, /* buffer size of recs */
1.1 schwarze 261: reccur; /* valid number of recs */
262: struct buf buf, /* keyword buffer */
263: dbuf; /* description buffer */
1.2 schwarze 264: struct of *of; /* list of files for processing */
1.1 schwarze 265: extern int optind;
266: extern char *optarg;
267:
268: progname = strrchr(argv[0], '/');
269: if (progname == NULL)
270: progname = argv[0];
271: else
272: ++progname;
273:
274: verb = 0;
1.6 ! schwarze 275: use_all = 0;
1.2 schwarze 276: of = NULL;
1.1 schwarze 277: db = idx = NULL;
278: mp = NULL;
279: hash = NULL;
280: recs = NULL;
281: recsz = reccur = 0;
282: maxrec = 0;
283: op = OP_NEW;
1.2 schwarze 284: dir = NULL;
1.1 schwarze 285:
1.6 ! schwarze 286: while (-1 != (ch = getopt(argc, argv, "ad:u:v")))
1.1 schwarze 287: switch (ch) {
1.6 ! schwarze 288: case ('a'):
! 289: use_all = 1;
! 290: break;
1.1 schwarze 291: case ('d'):
292: dir = optarg;
1.2 schwarze 293: op = OP_UPDATE;
1.1 schwarze 294: break;
1.2 schwarze 295: case ('u'):
296: dir = optarg;
1.1 schwarze 297: op = OP_DELETE;
298: break;
299: case ('v'):
300: verb++;
301: break;
302: default:
303: usage();
304: return((int)MANDOCLEVEL_BADARG);
305: }
306:
307: argc -= optind;
308: argv += optind;
309:
1.2 schwarze 310: memset(&info, 0, sizeof(BTREEINFO));
311: info.flags = R_DUP;
312:
313: mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
314:
315: memset(&buf, 0, sizeof(struct buf));
316: memset(&dbuf, 0, sizeof(struct buf));
317:
318: buf.size = dbuf.size = MANDOC_BUFSZ;
319:
320: buf.cp = mandoc_malloc(buf.size);
321: dbuf.cp = mandoc_malloc(dbuf.size);
322:
323: flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR;
1.1 schwarze 324:
1.2 schwarze 325: if (OP_UPDATE == op || OP_DELETE == op) {
326: ibuf[0] = fbuf[0] = '\0';
1.1 schwarze 327:
1.2 schwarze 328: strlcat(fbuf, dir, MAXPATHLEN);
329: strlcat(fbuf, "/", MAXPATHLEN);
330: sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
331:
332: strlcat(ibuf, dir, MAXPATHLEN);
333: strlcat(ibuf, "/", MAXPATHLEN);
334: sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
335:
336: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
337: fprintf(stderr, "%s: Path too long\n", dir);
338: exit((int)MANDOCLEVEL_BADARG);
339: }
1.1 schwarze 340:
1.2 schwarze 341: db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
342: idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
1.1 schwarze 343:
1.2 schwarze 344: if (NULL == db) {
345: perror(fbuf);
346: exit((int)MANDOCLEVEL_SYSERR);
347: } else if (NULL == db) {
348: perror(ibuf);
349: exit((int)MANDOCLEVEL_SYSERR);
350: }
1.1 schwarze 351:
1.2 schwarze 352: if (verb > 2) {
353: printf("%s: Opened\n", fbuf);
354: printf("%s: Opened\n", ibuf);
355: }
1.1 schwarze 356:
1.6 ! schwarze 357: ofile_argbuild(argv, argc, use_all, verb, &of);
1.2 schwarze 358: if (NULL == of)
359: goto out;
360:
361: of = of->first;
362:
363: index_prune(of, db, fbuf, idx, ibuf, verb,
364: &maxrec, &recs, &recsz);
365:
366: if (OP_UPDATE == op)
367: index_merge(of, mp, &dbuf, &buf, hash,
1.6 ! schwarze 368: db, fbuf, idx, ibuf, use_all,
! 369: verb, maxrec, recs, reccur);
1.1 schwarze 370:
371: goto out;
372: }
373:
1.2 schwarze 374: for (i = 0; i < argc; i++) {
375: ibuf[0] = fbuf[0] = '\0';
376:
377: strlcat(fbuf, argv[i], MAXPATHLEN);
378: strlcat(fbuf, "/", MAXPATHLEN);
379: sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
380:
381: strlcat(ibuf, argv[i], MAXPATHLEN);
382: strlcat(ibuf, "/", MAXPATHLEN);
383: sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
384:
385: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
386: fprintf(stderr, "%s: Path too long\n", argv[i]);
387: exit((int)MANDOCLEVEL_BADARG);
388: }
389:
390: db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
391: idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
1.1 schwarze 392:
1.2 schwarze 393: if (NULL == db) {
394: perror(fbuf);
395: exit((int)MANDOCLEVEL_SYSERR);
396: } else if (NULL == db) {
397: perror(ibuf);
398: exit((int)MANDOCLEVEL_SYSERR);
399: }
1.1 schwarze 400:
1.2 schwarze 401: if (verb > 2) {
402: printf("%s: Truncated\n", fbuf);
403: printf("%s: Truncated\n", ibuf);
404: }
1.1 schwarze 405:
1.2 schwarze 406: ofile_free(of);
407: of = NULL;
1.1 schwarze 408:
1.6 ! schwarze 409: if ( ! ofile_dirbuild(argv[i], NULL, NULL,
! 410: use_all, verb, &of))
1.2 schwarze 411: exit((int)MANDOCLEVEL_SYSERR);
1.1 schwarze 412:
1.2 schwarze 413: if (NULL == of)
414: continue;
1.1 schwarze 415:
1.2 schwarze 416: of = of->first;
1.1 schwarze 417:
1.2 schwarze 418: index_merge(of, mp, &dbuf, &buf, hash, db, fbuf,
1.6 ! schwarze 419: idx, ibuf, use_all, verb,
! 420: maxrec, recs, reccur);
1.1 schwarze 421: }
422:
1.2 schwarze 423: out:
424: if (db)
425: (*db->close)(db);
426: if (idx)
427: (*idx->close)(idx);
428: if (hash)
429: (*hash->close)(hash);
430: if (mp)
431: mparse_free(mp);
1.1 schwarze 432:
1.2 schwarze 433: ofile_free(of);
434: free(buf.cp);
435: free(dbuf.cp);
436: free(recs);
1.1 schwarze 437:
1.2 schwarze 438: return(MANDOCLEVEL_OK);
439: }
1.1 schwarze 440:
1.2 schwarze 441: void
442: index_merge(const struct of *of, struct mparse *mp,
443: struct buf *dbuf, struct buf *buf,
444: DB *hash, DB *db, const char *dbf,
1.6 ! schwarze 445: DB *idx, const char *idxf, int use_all, int verb,
1.2 schwarze 446: recno_t maxrec, const recno_t *recs, size_t reccur)
447: {
448: recno_t rec;
449: int ch;
450: DBT key, val;
451: struct mdoc *mdoc;
452: struct man *man;
453: const char *fn, *msec, *mtitle, *arch;
454: size_t sv;
455: unsigned seq;
456: char vbuf[8];
1.1 schwarze 457:
1.2 schwarze 458: for (rec = 0; of; of = of->next) {
459: fn = of->fname;
460: if (reccur > 0) {
461: --reccur;
462: rec = recs[(int)reccur];
463: } else if (maxrec > 0) {
464: rec = maxrec;
465: maxrec = 0;
1.1 schwarze 466: } else
467: rec++;
468:
469: mparse_reset(mp);
470: hash_reset(&hash);
471:
472: if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
473: fprintf(stderr, "%s: Parse failure\n", fn);
474: continue;
475: }
476:
477: mparse_result(mp, &mdoc, &man);
478: if (NULL == mdoc && NULL == man)
479: continue;
480:
1.6 ! schwarze 481: /*
! 482: * Make sure the manual section and architecture
! 483: * agree with the directory where the file is located
! 484: * or man(1) will not be able to find it.
! 485: */
! 486:
1.1 schwarze 487: msec = NULL != mdoc ?
488: mdoc_meta(mdoc)->msec : man_meta(man)->msec;
1.2 schwarze 489: arch = NULL != mdoc ?
490: mdoc_meta(mdoc)->arch : NULL;
1.1 schwarze 491:
1.6 ! schwarze 492: if (0 == use_all) {
! 493: assert(of->sec);
! 494: assert(msec);
! 495: if (strcmp(msec, of->sec))
! 496: continue;
! 497:
! 498: if (NULL == arch) {
! 499: if (NULL != of->arch)
! 500: continue;
! 501: } else if (NULL == of->arch ||
! 502: strcmp(arch, of->arch))
! 503: continue;
! 504: }
! 505:
1.1 schwarze 506: if (NULL == arch)
507: arch = "";
508:
509: /*
1.6 ! schwarze 510: * Case is relevant for man(1), so use the file name
! 511: * instead of the (usually) all caps page title,
! 512: * if the two agree.
! 513: */
! 514:
! 515: mtitle = NULL != mdoc ?
! 516: mdoc_meta(mdoc)->title : man_meta(man)->title;
! 517:
! 518: assert(of->title);
! 519: assert(mtitle);
! 520:
! 521: if (0 == strcasecmp(mtitle, of->title))
! 522: mtitle = of->title;
! 523: else if (0 == use_all)
! 524: continue;
! 525:
! 526: /*
1.1 schwarze 527: * The index record value consists of a nil-terminated
528: * filename, a nil-terminated manual section, and a
529: * nil-terminated description. Since the description
530: * may not be set, we set a sentinel to see if we're
531: * going to write a nil byte in its place.
532: */
533:
1.2 schwarze 534: dbuf->len = 0;
535: buf_appendb(dbuf, fn, strlen(fn) + 1);
536: buf_appendb(dbuf, msec, strlen(msec) + 1);
537: buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
538: buf_appendb(dbuf, arch, strlen(arch) + 1);
1.1 schwarze 539:
1.2 schwarze 540: sv = dbuf->len;
1.1 schwarze 541:
542: /* Fix the record number in the btree value. */
543:
544: if (mdoc)
1.2 schwarze 545: pmdoc_node(hash, buf, dbuf,
1.1 schwarze 546: mdoc_node(mdoc), mdoc_meta(mdoc));
547: else
1.2 schwarze 548: pman_node(hash, buf, dbuf, man_node(man));
1.1 schwarze 549:
550: /*
551: * Copy from the in-memory hashtable of pending keywords
552: * into the database.
553: */
554:
555: memset(vbuf, 0, sizeof(uint32_t));
556: memcpy(vbuf + 4, &rec, sizeof(uint32_t));
557:
558: seq = R_FIRST;
559: while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
560: seq = R_NEXT;
561:
562: memcpy(vbuf, val.data, sizeof(uint32_t));
563: val.size = sizeof(vbuf);
564: val.data = vbuf;
565:
566: if (verb > 1)
1.2 schwarze 567: printf("%s: Added keyword: %s\n",
568: fn, (char *)key.data);
569: dbt_put(db, dbf, &key, &val);
1.1 schwarze 570: }
571: if (ch < 0) {
572: perror("hash");
573: exit((int)MANDOCLEVEL_SYSERR);
574: }
575:
576: /*
577: * Apply to the index. If we haven't had a description
578: * set, put an empty one in now.
579: */
580:
1.2 schwarze 581: if (dbuf->len == sv)
582: buf_appendb(dbuf, "", 1);
1.1 schwarze 583:
584: key.data = &rec;
585: key.size = sizeof(recno_t);
586:
1.2 schwarze 587: val.data = dbuf->cp;
588: val.size = dbuf->len;
1.1 schwarze 589:
1.2 schwarze 590: if (verb)
1.1 schwarze 591: printf("%s: Added index\n", fn);
1.2 schwarze 592: dbt_put(idx, idxf, &key, &val);
593: }
594: }
595:
596: /*
597: * Scan through all entries in the index file `idx' and prune those
598: * entries in `ofile'.
599: * Pruning consists of removing from `db', then invalidating the entry
600: * in `idx' (zeroing its value size).
601: */
602: static void
603: index_prune(const struct of *ofile, DB *db, const char *dbf,
604: DB *idx, const char *idxf, int verb,
605: recno_t *maxrec, recno_t **recs, size_t *recsz)
606: {
607: const struct of *of;
608: const char *fn;
609: unsigned seq, sseq;
610: DBT key, val;
611: size_t reccur;
612: int ch;
613:
614: reccur = 0;
615: seq = R_FIRST;
616: while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
617: seq = R_NEXT;
618: *maxrec = *(recno_t *)key.data;
619: if (0 == val.size) {
620: if (reccur >= *recsz) {
621: *recsz += MANDOC_SLOP;
622: *recs = mandoc_realloc(*recs,
623: *recsz * sizeof(recno_t));
624: }
625: (*recs)[(int)reccur] = *maxrec;
626: reccur++;
627: continue;
628: }
629:
630: fn = (char *)val.data;
631: for (of = ofile; of; of = of->next)
632: if (0 == strcmp(fn, of->fname))
633: break;
634:
635: if (NULL == of)
636: continue;
637:
638: sseq = R_FIRST;
639: while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
640: sseq = R_NEXT;
641: assert(8 == val.size);
642: if (*maxrec != *(recno_t *)(val.data + 4))
643: continue;
644: if (verb)
645: printf("%s: Deleted keyword: %s\n",
646: fn, (char *)key.data);
647: ch = (*db->del)(db, &key, R_CURSOR);
648: if (ch < 0)
649: break;
650: }
651: if (ch < 0) {
652: perror(dbf);
653: exit((int)MANDOCLEVEL_SYSERR);
654: }
1.1 schwarze 655:
1.2 schwarze 656: if (verb)
657: printf("%s: Deleted index\n", fn);
1.1 schwarze 658:
1.2 schwarze 659: val.size = 0;
660: ch = (*idx->put)(idx, &key, &val, R_CURSOR);
661: if (ch < 0) {
662: perror(idxf);
663: exit((int)MANDOCLEVEL_SYSERR);
664: }
1.1 schwarze 665:
1.2 schwarze 666: if (reccur >= *recsz) {
667: *recsz += MANDOC_SLOP;
668: *recs = mandoc_realloc
669: (*recs, *recsz * sizeof(recno_t));
670: }
1.1 schwarze 671:
1.2 schwarze 672: (*recs)[(int)reccur] = *maxrec;
673: reccur++;
674: }
675: (*maxrec)++;
1.1 schwarze 676: }
677:
678: /*
679: * Grow the buffer (if necessary) and copy in a binary string.
680: */
681: static void
682: buf_appendb(struct buf *buf, const void *cp, size_t sz)
683: {
684:
685: /* Overshoot by MANDOC_BUFSZ. */
686:
687: while (buf->len + sz >= buf->size) {
688: buf->size = buf->len + sz + MANDOC_BUFSZ;
689: buf->cp = mandoc_realloc(buf->cp, buf->size);
690: }
691:
692: memcpy(buf->cp + (int)buf->len, cp, sz);
693: buf->len += sz;
694: }
695:
696: /*
697: * Append a nil-terminated string to the buffer.
698: * This can be invoked multiple times.
699: * The buffer string will be nil-terminated.
700: * If invoked multiple times, a space is put between strings.
701: */
702: static void
703: buf_append(struct buf *buf, const char *cp)
704: {
705: size_t sz;
706:
707: if (0 == (sz = strlen(cp)))
708: return;
709:
710: if (buf->len)
711: buf->cp[(int)buf->len - 1] = ' ';
712:
713: buf_appendb(buf, cp, sz + 1);
714: }
715:
716: /*
717: * Recursively add all text from a given node.
718: * This is optimised for general mdoc nodes in this context, which do
719: * not consist of subexpressions and having a recursive call for n->next
720: * would be wasteful.
721: * The "f" variable should be 0 unless called from pmdoc_Nd for the
722: * description buffer, which does not start at the beginning of the
723: * buffer.
724: */
725: static void
726: buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
727: {
728:
729: for ( ; n; n = n->next) {
730: if (n->child)
731: buf_appendmdoc(buf, n->child, f);
732:
733: if (MDOC_TEXT == n->type && f) {
734: f = 0;
735: buf_appendb(buf, n->string,
736: strlen(n->string) + 1);
737: } else if (MDOC_TEXT == n->type)
738: buf_append(buf, n->string);
739:
740: }
741: }
742:
743: /* ARGSUSED */
744: static void
745: pmdoc_An(MDOC_ARGS)
746: {
747:
748: if (SEC_AUTHORS != n->sec)
749: return;
750:
751: buf_appendmdoc(buf, n->child, 0);
1.5 schwarze 752: hash_put(hash, buf, TYPE_An);
1.1 schwarze 753: }
754:
755: static void
756: hash_reset(DB **db)
757: {
758: DB *hash;
759:
760: if (NULL != (hash = *db))
761: (*hash->close)(hash);
762:
1.2 schwarze 763: *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
1.1 schwarze 764: if (NULL == *db) {
765: perror("hash");
766: exit((int)MANDOCLEVEL_SYSERR);
767: }
768: }
769:
770: /* ARGSUSED */
771: static void
772: pmdoc_Fd(MDOC_ARGS)
773: {
774: const char *start, *end;
775: size_t sz;
776:
777: if (SEC_SYNOPSIS != n->sec)
778: return;
779: if (NULL == (n = n->child) || MDOC_TEXT != n->type)
780: return;
781:
782: /*
783: * Only consider those `Fd' macro fields that begin with an
784: * "inclusion" token (versus, e.g., #define).
785: */
786: if (strcmp("#include", n->string))
787: return;
788:
789: if (NULL == (n = n->next) || MDOC_TEXT != n->type)
790: return;
791:
792: /*
793: * Strip away the enclosing angle brackets and make sure we're
794: * not zero-length.
795: */
796:
797: start = n->string;
798: if ('<' == *start || '"' == *start)
799: start++;
800:
801: if (0 == (sz = strlen(start)))
802: return;
803:
804: end = &start[(int)sz - 1];
805: if ('>' == *end || '"' == *end)
806: end--;
807:
808: assert(end >= start);
809:
810: buf_appendb(buf, start, (size_t)(end - start + 1));
811: buf_appendb(buf, "", 1);
812:
1.5 schwarze 813: hash_put(hash, buf, TYPE_In);
1.1 schwarze 814: }
815:
816: /* ARGSUSED */
817: static void
818: pmdoc_Cd(MDOC_ARGS)
819: {
820:
821: if (SEC_SYNOPSIS != n->sec)
822: return;
823:
824: buf_appendmdoc(buf, n->child, 0);
1.5 schwarze 825: hash_put(hash, buf, TYPE_Cd);
1.1 schwarze 826: }
827:
828: /* ARGSUSED */
829: static void
830: pmdoc_In(MDOC_ARGS)
831: {
832:
833: if (SEC_SYNOPSIS != n->sec)
834: return;
835: if (NULL == n->child || MDOC_TEXT != n->child->type)
836: return;
837:
838: buf_append(buf, n->child->string);
1.5 schwarze 839: hash_put(hash, buf, TYPE_In);
1.1 schwarze 840: }
841:
842: /* ARGSUSED */
843: static void
844: pmdoc_Fn(MDOC_ARGS)
845: {
846: const char *cp;
847:
848: if (SEC_SYNOPSIS != n->sec)
849: return;
850: if (NULL == n->child || MDOC_TEXT != n->child->type)
851: return;
852:
853: /* .Fn "struct type *arg" "foo" */
854:
855: cp = strrchr(n->child->string, ' ');
856: if (NULL == cp)
857: cp = n->child->string;
858:
859: /* Strip away pointer symbol. */
860:
861: while ('*' == *cp)
862: cp++;
863:
864: buf_append(buf, cp);
1.5 schwarze 865: hash_put(hash, buf, TYPE_Fn);
1.1 schwarze 866: }
867:
868: /* ARGSUSED */
869: static void
870: pmdoc_St(MDOC_ARGS)
871: {
872:
873: if (SEC_STANDARDS != n->sec)
874: return;
875: if (NULL == n->child || MDOC_TEXT != n->child->type)
876: return;
877:
878: buf_append(buf, n->child->string);
1.5 schwarze 879: hash_put(hash, buf, TYPE_St);
1.1 schwarze 880: }
881:
882: /* ARGSUSED */
883: static void
884: pmdoc_Xr(MDOC_ARGS)
885: {
886:
887: if (NULL == (n = n->child))
888: return;
889:
890: buf_appendb(buf, n->string, strlen(n->string));
891:
892: if (NULL != (n = n->next)) {
893: buf_appendb(buf, ".", 1);
894: buf_appendb(buf, n->string, strlen(n->string) + 1);
895: } else
896: buf_appendb(buf, ".", 2);
897:
1.5 schwarze 898: hash_put(hash, buf, TYPE_Xr);
1.1 schwarze 899: }
900:
901: /* ARGSUSED */
902: static void
903: pmdoc_Vt(MDOC_ARGS)
904: {
905: const char *start;
906: size_t sz;
907:
908: if (SEC_SYNOPSIS != n->sec)
909: return;
910: if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
911: return;
912: if (NULL == n->last || MDOC_TEXT != n->last->type)
913: return;
914:
915: /*
916: * Strip away leading pointer symbol '*' and trailing ';'.
917: */
918:
919: start = n->last->string;
920:
921: while ('*' == *start)
922: start++;
923:
924: if (0 == (sz = strlen(start)))
925: return;
926:
927: if (';' == start[(int)sz - 1])
928: sz--;
929:
930: if (0 == sz)
931: return;
932:
933: buf_appendb(buf, start, sz);
934: buf_appendb(buf, "", 1);
1.5 schwarze 935: hash_put(hash, buf, TYPE_Va);
1.1 schwarze 936: }
937:
938: /* ARGSUSED */
939: static void
940: pmdoc_Fo(MDOC_ARGS)
941: {
942:
943: if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
944: return;
945: if (NULL == n->child || MDOC_TEXT != n->child->type)
946: return;
947:
948: buf_append(buf, n->child->string);
1.5 schwarze 949: hash_put(hash, buf, TYPE_Fn);
1.1 schwarze 950: }
951:
952:
953: /* ARGSUSED */
954: static void
955: pmdoc_Nd(MDOC_ARGS)
956: {
957:
958: if (MDOC_BODY != n->type)
959: return;
960:
961: buf_appendmdoc(dbuf, n->child, 1);
962: buf_appendmdoc(buf, n->child, 0);
963:
1.5 schwarze 964: hash_put(hash, buf, TYPE_Nd);
1.1 schwarze 965: }
966:
967: /* ARGSUSED */
968: static void
969: pmdoc_Er(MDOC_ARGS)
970: {
971:
972: if (SEC_ERRORS != n->sec)
973: return;
974:
975: buf_appendmdoc(buf, n->child, 0);
1.5 schwarze 976: hash_put(hash, buf, TYPE_Er);
1.1 schwarze 977: }
978:
979: /* ARGSUSED */
980: static void
981: pmdoc_Ev(MDOC_ARGS)
982: {
983:
984: if (SEC_ENVIRONMENT != n->sec)
985: return;
986:
987: buf_appendmdoc(buf, n->child, 0);
1.5 schwarze 988: hash_put(hash, buf, TYPE_Ev);
1.1 schwarze 989: }
990:
991: /* ARGSUSED */
992: static void
993: pmdoc_Pa(MDOC_ARGS)
994: {
995:
996: if (SEC_FILES != n->sec)
997: return;
998:
999: buf_appendmdoc(buf, n->child, 0);
1.5 schwarze 1000: hash_put(hash, buf, TYPE_Pa);
1.1 schwarze 1001: }
1002:
1003: /* ARGSUSED */
1004: static void
1005: pmdoc_Nm(MDOC_ARGS)
1006: {
1007:
1008: if (SEC_NAME == n->sec) {
1009: buf_appendmdoc(buf, n->child, 0);
1.5 schwarze 1010: hash_put(hash, buf, TYPE_Nm);
1.1 schwarze 1011: return;
1012: } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
1013: return;
1014:
1015: if (NULL == n->child)
1016: buf_append(buf, m->name);
1017:
1018: buf_appendmdoc(buf, n->child, 0);
1.5 schwarze 1019: hash_put(hash, buf, TYPE_Nm);
1.1 schwarze 1020: }
1021:
1022: static void
1023: hash_put(DB *db, const struct buf *buf, int mask)
1024: {
1025: DBT key, val;
1026: int rc;
1027:
1028: if (buf->len < 2)
1029: return;
1030:
1031: key.data = buf->cp;
1032: key.size = buf->len;
1033:
1034: if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1035: perror("hash");
1036: exit((int)MANDOCLEVEL_SYSERR);
1037: } else if (0 == rc)
1038: mask |= *(int *)val.data;
1039:
1040: val.data = &mask;
1041: val.size = sizeof(int);
1042:
1043: if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1044: perror("hash");
1045: exit((int)MANDOCLEVEL_SYSERR);
1046: }
1047: }
1048:
1049: static void
1050: dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1051: {
1052:
1053: assert(key->size);
1054: assert(val->size);
1055:
1056: if (0 == (*db->put)(db, key, val, 0))
1057: return;
1058:
1059: perror(dbn);
1060: exit((int)MANDOCLEVEL_SYSERR);
1061: /* NOTREACHED */
1062: }
1063:
1064: /*
1065: * Call out to per-macro handlers after clearing the persistent database
1066: * key. If the macro sets the database key, flush it to the database.
1067: */
1068: static void
1069: pmdoc_node(MDOC_ARGS)
1070: {
1071:
1072: if (NULL == n)
1073: return;
1074:
1075: switch (n->type) {
1076: case (MDOC_HEAD):
1077: /* FALLTHROUGH */
1078: case (MDOC_BODY):
1079: /* FALLTHROUGH */
1080: case (MDOC_TAIL):
1081: /* FALLTHROUGH */
1082: case (MDOC_BLOCK):
1083: /* FALLTHROUGH */
1084: case (MDOC_ELEM):
1085: if (NULL == mdocs[n->tok])
1086: break;
1087:
1088: buf->len = 0;
1089: (*mdocs[n->tok])(hash, buf, dbuf, n, m);
1090: break;
1091: default:
1092: break;
1093: }
1094:
1095: pmdoc_node(hash, buf, dbuf, n->child, m);
1096: pmdoc_node(hash, buf, dbuf, n->next, m);
1097: }
1098:
1099: static int
1100: pman_node(MAN_ARGS)
1101: {
1102: const struct man_node *head, *body;
1103: const char *start, *sv;
1104: size_t sz;
1105:
1106: if (NULL == n)
1107: return(0);
1108:
1109: /*
1110: * We're only searching for one thing: the first text child in
1111: * the BODY of a NAME section. Since we don't keep track of
1112: * sections in -man, run some hoops to find out whether we're in
1113: * the correct section or not.
1114: */
1115:
1116: if (MAN_BODY == n->type && MAN_SH == n->tok) {
1117: body = n;
1118: assert(body->parent);
1119: if (NULL != (head = body->parent->head) &&
1120: 1 == head->nchild &&
1121: NULL != (head = (head->child)) &&
1122: MAN_TEXT == head->type &&
1123: 0 == strcmp(head->string, "NAME") &&
1124: NULL != (body = body->child) &&
1125: MAN_TEXT == body->type) {
1126:
1127: assert(body->string);
1128: start = sv = body->string;
1129:
1130: /*
1131: * Go through a special heuristic dance here.
1132: * This is why -man manuals are great!
1133: * (I'm being sarcastic: my eyes are bleeding.)
1134: * Conventionally, one or more manual names are
1135: * comma-specified prior to a whitespace, then a
1136: * dash, then a description. Try to puzzle out
1137: * the name parts here.
1138: */
1139:
1140: for ( ;; ) {
1141: sz = strcspn(start, " ,");
1142: if ('\0' == start[(int)sz])
1143: break;
1144:
1145: buf->len = 0;
1146: buf_appendb(buf, start, sz);
1147: buf_appendb(buf, "", 1);
1148:
1.5 schwarze 1149: hash_put(hash, buf, TYPE_Nm);
1.1 schwarze 1150:
1151: if (' ' == start[(int)sz]) {
1152: start += (int)sz + 1;
1153: break;
1154: }
1155:
1156: assert(',' == start[(int)sz]);
1157: start += (int)sz + 1;
1158: while (' ' == *start)
1159: start++;
1160: }
1161:
1162: buf->len = 0;
1163:
1164: if (sv == start) {
1165: buf_append(buf, start);
1166: return(1);
1167: }
1168:
1169: while (' ' == *start)
1170: start++;
1171:
1172: if (0 == strncmp(start, "-", 1))
1173: start += 1;
1174: else if (0 == strncmp(start, "\\-", 2))
1175: start += 2;
1176: else if (0 == strncmp(start, "\\(en", 4))
1177: start += 4;
1178: else if (0 == strncmp(start, "\\(em", 4))
1179: start += 4;
1180:
1181: while (' ' == *start)
1182: start++;
1183:
1184: sz = strlen(start) + 1;
1185: buf_appendb(dbuf, start, sz);
1186: buf_appendb(buf, start, sz);
1187:
1.5 schwarze 1188: hash_put(hash, buf, TYPE_Nd);
1.1 schwarze 1189: }
1190: }
1191:
1.4 schwarze 1192: for (n = n->child; n; n = n->next)
1193: if (pman_node(hash, buf, dbuf, n))
1194: return(1);
1.1 schwarze 1195:
1196: return(0);
1197: }
1198:
1199: static void
1.6 ! schwarze 1200: ofile_argbuild(char *argv[], int argc, int use_all, int verb,
! 1201: struct of **of)
1.2 schwarze 1202: {
1.6 ! schwarze 1203: char buf[MAXPATHLEN];
! 1204: char *sec, *arch, *title, *p;
1.2 schwarze 1205: int i;
1206: struct of *nof;
1207:
1208: for (i = 0; i < argc; i++) {
1.6 ! schwarze 1209:
! 1210: /*
! 1211: * Analyze the path.
! 1212: */
! 1213:
! 1214: if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
! 1215: fprintf(stderr, "%s: Path too long\n", argv[i]);
! 1216: continue;
! 1217: }
! 1218: sec = arch = title = NULL;
! 1219: p = strrchr(buf, '\0');
! 1220: while (p-- > buf) {
! 1221: if (NULL == sec && '.' == *p) {
! 1222: sec = p + 1;
! 1223: *p = '\0';
! 1224: continue;
! 1225: }
! 1226: if ('/' != *p)
! 1227: continue;
! 1228: if (NULL == title) {
! 1229: title = p + 1;
! 1230: *p = '\0';
! 1231: continue;
! 1232: }
! 1233: if (strncmp("man", p + 1, 3))
! 1234: arch = p + 1;
! 1235: break;
! 1236: }
! 1237: if (NULL == title)
! 1238: title = buf;
! 1239:
! 1240: /*
! 1241: * Build the file structure.
! 1242: */
! 1243:
1.2 schwarze 1244: nof = mandoc_calloc(1, sizeof(struct of));
1.6 ! schwarze 1245: nof->fname = mandoc_strdup(argv[i]);
! 1246: if (NULL != sec)
! 1247: nof->sec = mandoc_strdup(sec);
! 1248: if (NULL != arch)
! 1249: nof->arch = mandoc_strdup(arch);
! 1250: nof->title = mandoc_strdup(title);
! 1251:
! 1252: /*
! 1253: * Add the structure to the list.
! 1254: */
! 1255:
1.2 schwarze 1256: if (verb > 2)
1257: printf("%s: Scheduling\n", argv[i]);
1258: if (NULL == *of) {
1259: *of = nof;
1260: (*of)->first = nof;
1261: } else {
1262: nof->first = (*of)->first;
1263: (*of)->next = nof;
1264: *of = nof;
1265: }
1266: }
1267: }
1268:
1269: /*
1270: * Recursively build up a list of files to parse.
1271: * We use this instead of ftw() and so on because I don't want global
1272: * variables hanging around.
1273: * This ignores the mandoc.db and mandoc.index files, but assumes that
1274: * everything else is a manual.
1275: * Pass in a pointer to a NULL structure for the first invocation.
1276: */
1277: static int
1.6 ! schwarze 1278: ofile_dirbuild(const char *dir, const char* psec, const char *parch,
! 1279: int use_all, int verb, struct of **of)
1.2 schwarze 1280: {
1281: char buf[MAXPATHLEN];
1282: size_t sz;
1283: DIR *d;
1.6 ! schwarze 1284: const char *fn, *sec, *arch;
! 1285: char *suffix;
1.2 schwarze 1286: struct of *nof;
1287: struct dirent *dp;
1288:
1289: if (NULL == (d = opendir(dir))) {
1290: perror(dir);
1291: return(0);
1292: }
1293:
1294: while (NULL != (dp = readdir(d))) {
1295: fn = dp->d_name;
1.6 ! schwarze 1296:
! 1297: if ('.' == *fn)
! 1298: continue;
! 1299:
1.2 schwarze 1300: if (DT_DIR == dp->d_type) {
1.6 ! schwarze 1301: sec = psec;
! 1302: arch = parch;
! 1303:
! 1304: /*
! 1305: * Don't bother parsing directories
! 1306: * that man(1) won't find.
! 1307: */
! 1308:
! 1309: if (NULL == sec) {
! 1310: if(0 == strncmp("man", fn, 3))
! 1311: sec = fn + 3;
! 1312: else if (use_all)
! 1313: sec = fn;
! 1314: else
! 1315: continue;
! 1316: } else if (NULL == arch && (use_all ||
! 1317: NULL == strchr(fn, '.')))
! 1318: arch = fn;
! 1319: else if (0 == use_all)
1.2 schwarze 1320: continue;
1321:
1322: buf[0] = '\0';
1323: strlcat(buf, dir, MAXPATHLEN);
1324: strlcat(buf, "/", MAXPATHLEN);
1325: sz = strlcat(buf, fn, MAXPATHLEN);
1326:
1.6 ! schwarze 1327: if (MAXPATHLEN <= sz) {
! 1328: fprintf(stderr, "%s: Path too long\n", dir);
! 1329: return(0);
! 1330: }
! 1331:
! 1332: if (verb > 2)
! 1333: printf("%s: Scanning\n", buf);
! 1334:
! 1335: if ( ! ofile_dirbuild(buf, sec, arch,
! 1336: use_all, verb, of))
! 1337: return(0);
! 1338: }
! 1339: if (DT_REG != dp->d_type ||
! 1340: (NULL == psec && !use_all) ||
! 1341: !strcmp(MANDOC_DB, fn) ||
! 1342: !strcmp(MANDOC_IDX, fn))
! 1343: continue;
! 1344:
! 1345: /*
! 1346: * Don't bother parsing files that man(1) won't find.
! 1347: */
! 1348:
! 1349: suffix = strrchr(fn, '.');
! 1350: if (0 == use_all) {
! 1351: if (NULL == suffix)
1.2 schwarze 1352: continue;
1.6 ! schwarze 1353: if (strcmp(suffix + 1, psec))
1.2 schwarze 1354: continue;
1355: }
1356:
1357: buf[0] = '\0';
1358: strlcat(buf, dir, MAXPATHLEN);
1359: strlcat(buf, "/", MAXPATHLEN);
1360: sz = strlcat(buf, fn, MAXPATHLEN);
1361: if (sz >= MAXPATHLEN) {
1362: fprintf(stderr, "%s: Path too long\n", dir);
1363: return(0);
1364: }
1365:
1366: nof = mandoc_calloc(1, sizeof(struct of));
1367: nof->fname = mandoc_strdup(buf);
1.6 ! schwarze 1368: if (NULL != psec)
! 1369: nof->sec = mandoc_strdup(psec);
! 1370: if (NULL != parch)
! 1371: nof->arch = mandoc_strdup(parch);
! 1372: if (NULL != suffix)
! 1373: *suffix = '\0';
! 1374: nof->title = mandoc_strdup(fn);
1.2 schwarze 1375:
1376: if (verb > 2)
1377: printf("%s: Scheduling\n", buf);
1378:
1379: if (NULL == *of) {
1380: *of = nof;
1381: (*of)->first = nof;
1382: } else {
1383: nof->first = (*of)->first;
1384: (*of)->next = nof;
1385: *of = nof;
1386: }
1387: }
1388:
1.4 schwarze 1389: closedir(d);
1.2 schwarze 1390: return(1);
1391: }
1392:
1393: static void
1394: ofile_free(struct of *of)
1395: {
1396: struct of *nof;
1397:
1398: while (of) {
1399: nof = of->next;
1400: free(of->fname);
1.6 ! schwarze 1401: free(of->sec);
! 1402: free(of->arch);
! 1403: free(of->title);
1.2 schwarze 1404: free(of);
1405: of = nof;
1406: }
1407: }
1408:
1409: static void
1.1 schwarze 1410: usage(void)
1411: {
1412:
1.2 schwarze 1413: fprintf(stderr, "usage: %s [-v] "
1414: "[-d dir [files...] |"
1415: " -u dir [files...] |"
1416: " dir...]\n", progname);
1.1 schwarze 1417: }