Annotation of src/usr.bin/mandoc/mandocdb.c, Revision 1.5
1.5 ! schwarze 1: /* $Id: mandocdb.c,v 1.4 2011/11/13 00:53:07 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/param.h>
18:
19: #include <assert.h>
1.2 schwarze 20: #include <dirent.h>
1.1 schwarze 21: #include <fcntl.h>
22: #include <getopt.h>
23: #include <stdio.h>
24: #include <stdint.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <db.h>
28:
29: #include "man.h"
30: #include "mdoc.h"
31: #include "mandoc.h"
1.5 ! schwarze 32: #include "mandocdb.h"
1.1 schwarze 33:
34: #define MANDOC_BUFSZ BUFSIZ
35: #define MANDOC_SLOP 1024
36:
1.2 schwarze 37: /* Tiny list for files. No need to bring in QUEUE. */
38:
39: struct of {
40: char *fname; /* heap-allocated */
41: struct of *next; /* NULL for last one */
42: struct of *first; /* first in list */
43: };
44:
1.1 schwarze 45: /* Buffer for storing growable data. */
46:
47: struct buf {
48: char *cp;
1.2 schwarze 49: size_t len; /* current length */
50: size_t size; /* total buffer size */
1.1 schwarze 51: };
52:
53: /* Operation we're going to perform. */
54:
55: enum op {
56: OP_NEW = 0, /* new database */
1.2 schwarze 57: OP_UPDATE, /* delete/add entries in existing database */
1.1 schwarze 58: OP_DELETE /* delete entries from existing database */
59: };
60:
61: #define MAN_ARGS DB *hash, \
62: struct buf *buf, \
63: struct buf *dbuf, \
64: const struct man_node *n
65: #define MDOC_ARGS DB *hash, \
66: struct buf *buf, \
67: struct buf *dbuf, \
68: const struct mdoc_node *n, \
69: const struct mdoc_meta *m
70:
71: static void buf_appendmdoc(struct buf *,
72: const struct mdoc_node *, int);
73: static void buf_append(struct buf *, const char *);
74: static void buf_appendb(struct buf *,
75: const void *, size_t);
76: static void dbt_put(DB *, const char *, DBT *, DBT *);
77: static void hash_put(DB *, const struct buf *, int);
78: static void hash_reset(DB **);
1.2 schwarze 79: static void index_merge(const struct of *, struct mparse *,
80: struct buf *, struct buf *,
81: DB *, DB *, const char *,
82: DB *, const char *, int,
83: recno_t, const recno_t *, size_t);
84: static void index_prune(const struct of *, DB *,
85: const char *, DB *, const char *,
86: int, recno_t *, recno_t **, size_t *);
87: static void ofile_argbuild(char *[], int, int, struct of **);
88: static int ofile_dirbuild(const char *, int, struct of **);
89: static void ofile_free(struct of *);
1.1 schwarze 90: static int pman_node(MAN_ARGS);
91: static void pmdoc_node(MDOC_ARGS);
92: static void pmdoc_An(MDOC_ARGS);
93: static void pmdoc_Cd(MDOC_ARGS);
94: static void pmdoc_Er(MDOC_ARGS);
95: static void pmdoc_Ev(MDOC_ARGS);
96: static void pmdoc_Fd(MDOC_ARGS);
97: static void pmdoc_In(MDOC_ARGS);
98: static void pmdoc_Fn(MDOC_ARGS);
99: static void pmdoc_Fo(MDOC_ARGS);
100: static void pmdoc_Nd(MDOC_ARGS);
101: static void pmdoc_Nm(MDOC_ARGS);
102: static void pmdoc_Pa(MDOC_ARGS);
103: static void pmdoc_St(MDOC_ARGS);
104: static void pmdoc_Vt(MDOC_ARGS);
105: static void pmdoc_Xr(MDOC_ARGS);
106: static void usage(void);
107:
108: typedef void (*pmdoc_nf)(MDOC_ARGS);
109:
110: static const pmdoc_nf mdocs[MDOC_MAX] = {
111: NULL, /* Ap */
112: NULL, /* Dd */
113: NULL, /* Dt */
114: NULL, /* Os */
115: NULL, /* Sh */
116: NULL, /* Ss */
117: NULL, /* Pp */
118: NULL, /* D1 */
119: NULL, /* Dl */
120: NULL, /* Bd */
121: NULL, /* Ed */
122: NULL, /* Bl */
123: NULL, /* El */
124: NULL, /* It */
125: NULL, /* Ad */
126: pmdoc_An, /* An */
127: NULL, /* Ar */
128: pmdoc_Cd, /* Cd */
129: NULL, /* Cm */
130: NULL, /* Dv */
131: pmdoc_Er, /* Er */
132: pmdoc_Ev, /* Ev */
133: NULL, /* Ex */
134: NULL, /* Fa */
135: pmdoc_Fd, /* Fd */
136: NULL, /* Fl */
137: pmdoc_Fn, /* Fn */
138: NULL, /* Ft */
139: NULL, /* Ic */
140: pmdoc_In, /* In */
141: NULL, /* Li */
142: pmdoc_Nd, /* Nd */
143: pmdoc_Nm, /* Nm */
144: NULL, /* Op */
145: NULL, /* Ot */
146: pmdoc_Pa, /* Pa */
147: NULL, /* Rv */
148: pmdoc_St, /* St */
149: pmdoc_Vt, /* Va */
150: pmdoc_Vt, /* Vt */
151: pmdoc_Xr, /* Xr */
152: NULL, /* %A */
153: NULL, /* %B */
154: NULL, /* %D */
155: NULL, /* %I */
156: NULL, /* %J */
157: NULL, /* %N */
158: NULL, /* %O */
159: NULL, /* %P */
160: NULL, /* %R */
161: NULL, /* %T */
162: NULL, /* %V */
163: NULL, /* Ac */
164: NULL, /* Ao */
165: NULL, /* Aq */
166: NULL, /* At */
167: NULL, /* Bc */
168: NULL, /* Bf */
169: NULL, /* Bo */
170: NULL, /* Bq */
171: NULL, /* Bsx */
172: NULL, /* Bx */
173: NULL, /* Db */
174: NULL, /* Dc */
175: NULL, /* Do */
176: NULL, /* Dq */
177: NULL, /* Ec */
178: NULL, /* Ef */
179: NULL, /* Em */
180: NULL, /* Eo */
181: NULL, /* Fx */
182: NULL, /* Ms */
183: NULL, /* No */
184: NULL, /* Ns */
185: NULL, /* Nx */
186: NULL, /* Ox */
187: NULL, /* Pc */
188: NULL, /* Pf */
189: NULL, /* Po */
190: NULL, /* Pq */
191: NULL, /* Qc */
192: NULL, /* Ql */
193: NULL, /* Qo */
194: NULL, /* Qq */
195: NULL, /* Re */
196: NULL, /* Rs */
197: NULL, /* Sc */
198: NULL, /* So */
199: NULL, /* Sq */
200: NULL, /* Sm */
201: NULL, /* Sx */
202: NULL, /* Sy */
203: NULL, /* Tn */
204: NULL, /* Ux */
205: NULL, /* Xc */
206: NULL, /* Xo */
207: pmdoc_Fo, /* Fo */
208: NULL, /* Fc */
209: NULL, /* Oo */
210: NULL, /* Oc */
211: NULL, /* Bk */
212: NULL, /* Ek */
213: NULL, /* Bt */
214: NULL, /* Hf */
215: NULL, /* Fr */
216: NULL, /* Ud */
217: NULL, /* Lb */
218: NULL, /* Lp */
219: NULL, /* Lk */
220: NULL, /* Mt */
221: NULL, /* Brq */
222: NULL, /* Bro */
223: NULL, /* Brc */
224: NULL, /* %C */
225: NULL, /* Es */
226: NULL, /* En */
227: NULL, /* Dx */
228: NULL, /* %Q */
229: NULL, /* br */
230: NULL, /* sp */
231: NULL, /* %U */
232: NULL, /* Ta */
233: };
234:
235: static const char *progname;
236:
237: int
1.3 schwarze 238: mandocdb(int argc, char *argv[])
1.1 schwarze 239: {
240: struct mparse *mp; /* parse sequence */
241: enum op op; /* current operation */
1.2 schwarze 242: const char *dir;
1.1 schwarze 243: char ibuf[MAXPATHLEN], /* index fname */
1.2 schwarze 244: fbuf[MAXPATHLEN]; /* btree fname */
245: int verb, /* output verbosity */
246: ch, i, flags;
1.1 schwarze 247: DB *idx, /* index database */
248: *db, /* keyword database */
249: *hash; /* temporary keyword hashtable */
250: BTREEINFO info; /* btree configuration */
1.2 schwarze 251: recno_t maxrec; /* supremum of all records */
1.1 schwarze 252: recno_t *recs; /* buffer of empty records */
1.2 schwarze 253: size_t sz1, sz2,
254: recsz, /* buffer size of recs */
1.1 schwarze 255: reccur; /* valid number of recs */
256: struct buf buf, /* keyword buffer */
257: dbuf; /* description buffer */
1.2 schwarze 258: struct of *of; /* list of files for processing */
1.1 schwarze 259: extern int optind;
260: extern char *optarg;
261:
262: progname = strrchr(argv[0], '/');
263: if (progname == NULL)
264: progname = argv[0];
265: else
266: ++progname;
267:
268: verb = 0;
1.2 schwarze 269: of = NULL;
1.1 schwarze 270: db = idx = NULL;
271: mp = NULL;
272: hash = NULL;
273: recs = NULL;
274: recsz = reccur = 0;
275: maxrec = 0;
276: op = OP_NEW;
1.2 schwarze 277: dir = NULL;
1.1 schwarze 278:
1.2 schwarze 279: while (-1 != (ch = getopt(argc, argv, "d:u:v")))
1.1 schwarze 280: switch (ch) {
281: case ('d'):
282: dir = optarg;
1.2 schwarze 283: op = OP_UPDATE;
1.1 schwarze 284: break;
1.2 schwarze 285: case ('u'):
286: dir = optarg;
1.1 schwarze 287: op = OP_DELETE;
288: break;
289: case ('v'):
290: verb++;
291: break;
292: default:
293: usage();
294: return((int)MANDOCLEVEL_BADARG);
295: }
296:
297: argc -= optind;
298: argv += optind;
299:
1.2 schwarze 300: memset(&info, 0, sizeof(BTREEINFO));
301: info.flags = R_DUP;
302:
303: mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
304:
305: memset(&buf, 0, sizeof(struct buf));
306: memset(&dbuf, 0, sizeof(struct buf));
307:
308: buf.size = dbuf.size = MANDOC_BUFSZ;
309:
310: buf.cp = mandoc_malloc(buf.size);
311: dbuf.cp = mandoc_malloc(dbuf.size);
312:
313: flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR;
1.1 schwarze 314:
1.2 schwarze 315: if (OP_UPDATE == op || OP_DELETE == op) {
316: ibuf[0] = fbuf[0] = '\0';
1.1 schwarze 317:
1.2 schwarze 318: strlcat(fbuf, dir, MAXPATHLEN);
319: strlcat(fbuf, "/", MAXPATHLEN);
320: sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
321:
322: strlcat(ibuf, dir, MAXPATHLEN);
323: strlcat(ibuf, "/", MAXPATHLEN);
324: sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
325:
326: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
327: fprintf(stderr, "%s: Path too long\n", dir);
328: exit((int)MANDOCLEVEL_BADARG);
329: }
1.1 schwarze 330:
1.2 schwarze 331: db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
332: idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
1.1 schwarze 333:
1.2 schwarze 334: if (NULL == db) {
335: perror(fbuf);
336: exit((int)MANDOCLEVEL_SYSERR);
337: } else if (NULL == db) {
338: perror(ibuf);
339: exit((int)MANDOCLEVEL_SYSERR);
340: }
1.1 schwarze 341:
1.2 schwarze 342: if (verb > 2) {
343: printf("%s: Opened\n", fbuf);
344: printf("%s: Opened\n", ibuf);
345: }
1.1 schwarze 346:
1.2 schwarze 347: ofile_argbuild(argv, argc, verb, &of);
348: if (NULL == of)
349: goto out;
350:
351: of = of->first;
352:
353: index_prune(of, db, fbuf, idx, ibuf, verb,
354: &maxrec, &recs, &recsz);
355:
356: if (OP_UPDATE == op)
357: index_merge(of, mp, &dbuf, &buf, hash,
358: db, fbuf, idx, ibuf, verb,
359: maxrec, recs, reccur);
1.1 schwarze 360:
361: goto out;
362: }
363:
1.2 schwarze 364: for (i = 0; i < argc; i++) {
365: ibuf[0] = fbuf[0] = '\0';
366:
367: strlcat(fbuf, argv[i], MAXPATHLEN);
368: strlcat(fbuf, "/", MAXPATHLEN);
369: sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
370:
371: strlcat(ibuf, argv[i], MAXPATHLEN);
372: strlcat(ibuf, "/", MAXPATHLEN);
373: sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
374:
375: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
376: fprintf(stderr, "%s: Path too long\n", argv[i]);
377: exit((int)MANDOCLEVEL_BADARG);
378: }
379:
380: db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
381: idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
1.1 schwarze 382:
1.2 schwarze 383: if (NULL == db) {
384: perror(fbuf);
385: exit((int)MANDOCLEVEL_SYSERR);
386: } else if (NULL == db) {
387: perror(ibuf);
388: exit((int)MANDOCLEVEL_SYSERR);
389: }
1.1 schwarze 390:
1.2 schwarze 391: if (verb > 2) {
392: printf("%s: Truncated\n", fbuf);
393: printf("%s: Truncated\n", ibuf);
394: }
1.1 schwarze 395:
1.2 schwarze 396: ofile_free(of);
397: of = NULL;
1.1 schwarze 398:
1.2 schwarze 399: if ( ! ofile_dirbuild(argv[i], verb, &of))
400: exit((int)MANDOCLEVEL_SYSERR);
1.1 schwarze 401:
1.2 schwarze 402: if (NULL == of)
403: continue;
1.1 schwarze 404:
1.2 schwarze 405: of = of->first;
1.1 schwarze 406:
1.2 schwarze 407: index_merge(of, mp, &dbuf, &buf, hash, db, fbuf,
408: idx, ibuf, verb, maxrec, recs, reccur);
1.1 schwarze 409: }
410:
1.2 schwarze 411: out:
412: if (db)
413: (*db->close)(db);
414: if (idx)
415: (*idx->close)(idx);
416: if (hash)
417: (*hash->close)(hash);
418: if (mp)
419: mparse_free(mp);
1.1 schwarze 420:
1.2 schwarze 421: ofile_free(of);
422: free(buf.cp);
423: free(dbuf.cp);
424: free(recs);
1.1 schwarze 425:
1.2 schwarze 426: return(MANDOCLEVEL_OK);
427: }
1.1 schwarze 428:
1.2 schwarze 429: void
430: index_merge(const struct of *of, struct mparse *mp,
431: struct buf *dbuf, struct buf *buf,
432: DB *hash, DB *db, const char *dbf,
433: DB *idx, const char *idxf, int verb,
434: recno_t maxrec, const recno_t *recs, size_t reccur)
435: {
436: recno_t rec;
437: int ch;
438: DBT key, val;
439: struct mdoc *mdoc;
440: struct man *man;
441: const char *fn, *msec, *mtitle, *arch;
442: size_t sv;
443: unsigned seq;
444: char vbuf[8];
1.1 schwarze 445:
1.2 schwarze 446: for (rec = 0; of; of = of->next) {
447: fn = of->fname;
448: if (reccur > 0) {
449: --reccur;
450: rec = recs[(int)reccur];
451: } else if (maxrec > 0) {
452: rec = maxrec;
453: maxrec = 0;
1.1 schwarze 454: } else
455: rec++;
456:
457: mparse_reset(mp);
458: hash_reset(&hash);
459:
460: if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
461: fprintf(stderr, "%s: Parse failure\n", fn);
462: continue;
463: }
464:
465: mparse_result(mp, &mdoc, &man);
466: if (NULL == mdoc && NULL == man)
467: continue;
468:
469: msec = NULL != mdoc ?
470: mdoc_meta(mdoc)->msec : man_meta(man)->msec;
471: mtitle = NULL != mdoc ?
472: mdoc_meta(mdoc)->title : man_meta(man)->title;
1.2 schwarze 473: arch = NULL != mdoc ?
474: mdoc_meta(mdoc)->arch : NULL;
1.1 schwarze 475:
476: if (NULL == arch)
477: arch = "";
478:
479: /*
480: * The index record value consists of a nil-terminated
481: * filename, a nil-terminated manual section, and a
482: * nil-terminated description. Since the description
483: * may not be set, we set a sentinel to see if we're
484: * going to write a nil byte in its place.
485: */
486:
1.2 schwarze 487: dbuf->len = 0;
488: buf_appendb(dbuf, fn, strlen(fn) + 1);
489: buf_appendb(dbuf, msec, strlen(msec) + 1);
490: buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
491: buf_appendb(dbuf, arch, strlen(arch) + 1);
1.1 schwarze 492:
1.2 schwarze 493: sv = dbuf->len;
1.1 schwarze 494:
495: /* Fix the record number in the btree value. */
496:
497: if (mdoc)
1.2 schwarze 498: pmdoc_node(hash, buf, dbuf,
1.1 schwarze 499: mdoc_node(mdoc), mdoc_meta(mdoc));
500: else
1.2 schwarze 501: pman_node(hash, buf, dbuf, man_node(man));
1.1 schwarze 502:
503: /*
504: * Copy from the in-memory hashtable of pending keywords
505: * into the database.
506: */
507:
508: memset(vbuf, 0, sizeof(uint32_t));
509: memcpy(vbuf + 4, &rec, sizeof(uint32_t));
510:
511: seq = R_FIRST;
512: while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
513: seq = R_NEXT;
514:
515: memcpy(vbuf, val.data, sizeof(uint32_t));
516: val.size = sizeof(vbuf);
517: val.data = vbuf;
518:
519: if (verb > 1)
1.2 schwarze 520: printf("%s: Added keyword: %s\n",
521: fn, (char *)key.data);
522: dbt_put(db, dbf, &key, &val);
1.1 schwarze 523: }
524: if (ch < 0) {
525: perror("hash");
526: exit((int)MANDOCLEVEL_SYSERR);
527: }
528:
529: /*
530: * Apply to the index. If we haven't had a description
531: * set, put an empty one in now.
532: */
533:
1.2 schwarze 534: if (dbuf->len == sv)
535: buf_appendb(dbuf, "", 1);
1.1 schwarze 536:
537: key.data = &rec;
538: key.size = sizeof(recno_t);
539:
1.2 schwarze 540: val.data = dbuf->cp;
541: val.size = dbuf->len;
1.1 schwarze 542:
1.2 schwarze 543: if (verb)
1.1 schwarze 544: printf("%s: Added index\n", fn);
1.2 schwarze 545: dbt_put(idx, idxf, &key, &val);
546: }
547: }
548:
549: /*
550: * Scan through all entries in the index file `idx' and prune those
551: * entries in `ofile'.
552: * Pruning consists of removing from `db', then invalidating the entry
553: * in `idx' (zeroing its value size).
554: */
555: static void
556: index_prune(const struct of *ofile, DB *db, const char *dbf,
557: DB *idx, const char *idxf, int verb,
558: recno_t *maxrec, recno_t **recs, size_t *recsz)
559: {
560: const struct of *of;
561: const char *fn;
562: unsigned seq, sseq;
563: DBT key, val;
564: size_t reccur;
565: int ch;
566:
567: reccur = 0;
568: seq = R_FIRST;
569: while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
570: seq = R_NEXT;
571: *maxrec = *(recno_t *)key.data;
572: if (0 == val.size) {
573: if (reccur >= *recsz) {
574: *recsz += MANDOC_SLOP;
575: *recs = mandoc_realloc(*recs,
576: *recsz * sizeof(recno_t));
577: }
578: (*recs)[(int)reccur] = *maxrec;
579: reccur++;
580: continue;
581: }
582:
583: fn = (char *)val.data;
584: for (of = ofile; of; of = of->next)
585: if (0 == strcmp(fn, of->fname))
586: break;
587:
588: if (NULL == of)
589: continue;
590:
591: sseq = R_FIRST;
592: while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
593: sseq = R_NEXT;
594: assert(8 == val.size);
595: if (*maxrec != *(recno_t *)(val.data + 4))
596: continue;
597: if (verb)
598: printf("%s: Deleted keyword: %s\n",
599: fn, (char *)key.data);
600: ch = (*db->del)(db, &key, R_CURSOR);
601: if (ch < 0)
602: break;
603: }
604: if (ch < 0) {
605: perror(dbf);
606: exit((int)MANDOCLEVEL_SYSERR);
607: }
1.1 schwarze 608:
1.2 schwarze 609: if (verb)
610: printf("%s: Deleted index\n", fn);
1.1 schwarze 611:
1.2 schwarze 612: val.size = 0;
613: ch = (*idx->put)(idx, &key, &val, R_CURSOR);
614: if (ch < 0) {
615: perror(idxf);
616: exit((int)MANDOCLEVEL_SYSERR);
617: }
1.1 schwarze 618:
1.2 schwarze 619: if (reccur >= *recsz) {
620: *recsz += MANDOC_SLOP;
621: *recs = mandoc_realloc
622: (*recs, *recsz * sizeof(recno_t));
623: }
1.1 schwarze 624:
1.2 schwarze 625: (*recs)[(int)reccur] = *maxrec;
626: reccur++;
627: }
628: (*maxrec)++;
1.1 schwarze 629: }
630:
631: /*
632: * Grow the buffer (if necessary) and copy in a binary string.
633: */
634: static void
635: buf_appendb(struct buf *buf, const void *cp, size_t sz)
636: {
637:
638: /* Overshoot by MANDOC_BUFSZ. */
639:
640: while (buf->len + sz >= buf->size) {
641: buf->size = buf->len + sz + MANDOC_BUFSZ;
642: buf->cp = mandoc_realloc(buf->cp, buf->size);
643: }
644:
645: memcpy(buf->cp + (int)buf->len, cp, sz);
646: buf->len += sz;
647: }
648:
649: /*
650: * Append a nil-terminated string to the buffer.
651: * This can be invoked multiple times.
652: * The buffer string will be nil-terminated.
653: * If invoked multiple times, a space is put between strings.
654: */
655: static void
656: buf_append(struct buf *buf, const char *cp)
657: {
658: size_t sz;
659:
660: if (0 == (sz = strlen(cp)))
661: return;
662:
663: if (buf->len)
664: buf->cp[(int)buf->len - 1] = ' ';
665:
666: buf_appendb(buf, cp, sz + 1);
667: }
668:
669: /*
670: * Recursively add all text from a given node.
671: * This is optimised for general mdoc nodes in this context, which do
672: * not consist of subexpressions and having a recursive call for n->next
673: * would be wasteful.
674: * The "f" variable should be 0 unless called from pmdoc_Nd for the
675: * description buffer, which does not start at the beginning of the
676: * buffer.
677: */
678: static void
679: buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
680: {
681:
682: for ( ; n; n = n->next) {
683: if (n->child)
684: buf_appendmdoc(buf, n->child, f);
685:
686: if (MDOC_TEXT == n->type && f) {
687: f = 0;
688: buf_appendb(buf, n->string,
689: strlen(n->string) + 1);
690: } else if (MDOC_TEXT == n->type)
691: buf_append(buf, n->string);
692:
693: }
694: }
695:
696: /* ARGSUSED */
697: static void
698: pmdoc_An(MDOC_ARGS)
699: {
700:
701: if (SEC_AUTHORS != n->sec)
702: return;
703:
704: buf_appendmdoc(buf, n->child, 0);
1.5 ! schwarze 705: hash_put(hash, buf, TYPE_An);
1.1 schwarze 706: }
707:
708: static void
709: hash_reset(DB **db)
710: {
711: DB *hash;
712:
713: if (NULL != (hash = *db))
714: (*hash->close)(hash);
715:
1.2 schwarze 716: *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
1.1 schwarze 717: if (NULL == *db) {
718: perror("hash");
719: exit((int)MANDOCLEVEL_SYSERR);
720: }
721: }
722:
723: /* ARGSUSED */
724: static void
725: pmdoc_Fd(MDOC_ARGS)
726: {
727: const char *start, *end;
728: size_t sz;
729:
730: if (SEC_SYNOPSIS != n->sec)
731: return;
732: if (NULL == (n = n->child) || MDOC_TEXT != n->type)
733: return;
734:
735: /*
736: * Only consider those `Fd' macro fields that begin with an
737: * "inclusion" token (versus, e.g., #define).
738: */
739: if (strcmp("#include", n->string))
740: return;
741:
742: if (NULL == (n = n->next) || MDOC_TEXT != n->type)
743: return;
744:
745: /*
746: * Strip away the enclosing angle brackets and make sure we're
747: * not zero-length.
748: */
749:
750: start = n->string;
751: if ('<' == *start || '"' == *start)
752: start++;
753:
754: if (0 == (sz = strlen(start)))
755: return;
756:
757: end = &start[(int)sz - 1];
758: if ('>' == *end || '"' == *end)
759: end--;
760:
761: assert(end >= start);
762:
763: buf_appendb(buf, start, (size_t)(end - start + 1));
764: buf_appendb(buf, "", 1);
765:
1.5 ! schwarze 766: hash_put(hash, buf, TYPE_In);
1.1 schwarze 767: }
768:
769: /* ARGSUSED */
770: static void
771: pmdoc_Cd(MDOC_ARGS)
772: {
773:
774: if (SEC_SYNOPSIS != n->sec)
775: return;
776:
777: buf_appendmdoc(buf, n->child, 0);
1.5 ! schwarze 778: hash_put(hash, buf, TYPE_Cd);
1.1 schwarze 779: }
780:
781: /* ARGSUSED */
782: static void
783: pmdoc_In(MDOC_ARGS)
784: {
785:
786: if (SEC_SYNOPSIS != n->sec)
787: return;
788: if (NULL == n->child || MDOC_TEXT != n->child->type)
789: return;
790:
791: buf_append(buf, n->child->string);
1.5 ! schwarze 792: hash_put(hash, buf, TYPE_In);
1.1 schwarze 793: }
794:
795: /* ARGSUSED */
796: static void
797: pmdoc_Fn(MDOC_ARGS)
798: {
799: const char *cp;
800:
801: if (SEC_SYNOPSIS != n->sec)
802: return;
803: if (NULL == n->child || MDOC_TEXT != n->child->type)
804: return;
805:
806: /* .Fn "struct type *arg" "foo" */
807:
808: cp = strrchr(n->child->string, ' ');
809: if (NULL == cp)
810: cp = n->child->string;
811:
812: /* Strip away pointer symbol. */
813:
814: while ('*' == *cp)
815: cp++;
816:
817: buf_append(buf, cp);
1.5 ! schwarze 818: hash_put(hash, buf, TYPE_Fn);
1.1 schwarze 819: }
820:
821: /* ARGSUSED */
822: static void
823: pmdoc_St(MDOC_ARGS)
824: {
825:
826: if (SEC_STANDARDS != n->sec)
827: return;
828: if (NULL == n->child || MDOC_TEXT != n->child->type)
829: return;
830:
831: buf_append(buf, n->child->string);
1.5 ! schwarze 832: hash_put(hash, buf, TYPE_St);
1.1 schwarze 833: }
834:
835: /* ARGSUSED */
836: static void
837: pmdoc_Xr(MDOC_ARGS)
838: {
839:
840: if (NULL == (n = n->child))
841: return;
842:
843: buf_appendb(buf, n->string, strlen(n->string));
844:
845: if (NULL != (n = n->next)) {
846: buf_appendb(buf, ".", 1);
847: buf_appendb(buf, n->string, strlen(n->string) + 1);
848: } else
849: buf_appendb(buf, ".", 2);
850:
1.5 ! schwarze 851: hash_put(hash, buf, TYPE_Xr);
1.1 schwarze 852: }
853:
854: /* ARGSUSED */
855: static void
856: pmdoc_Vt(MDOC_ARGS)
857: {
858: const char *start;
859: size_t sz;
860:
861: if (SEC_SYNOPSIS != n->sec)
862: return;
863: if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
864: return;
865: if (NULL == n->last || MDOC_TEXT != n->last->type)
866: return;
867:
868: /*
869: * Strip away leading pointer symbol '*' and trailing ';'.
870: */
871:
872: start = n->last->string;
873:
874: while ('*' == *start)
875: start++;
876:
877: if (0 == (sz = strlen(start)))
878: return;
879:
880: if (';' == start[(int)sz - 1])
881: sz--;
882:
883: if (0 == sz)
884: return;
885:
886: buf_appendb(buf, start, sz);
887: buf_appendb(buf, "", 1);
1.5 ! schwarze 888: hash_put(hash, buf, TYPE_Va);
1.1 schwarze 889: }
890:
891: /* ARGSUSED */
892: static void
893: pmdoc_Fo(MDOC_ARGS)
894: {
895:
896: if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
897: return;
898: if (NULL == n->child || MDOC_TEXT != n->child->type)
899: return;
900:
901: buf_append(buf, n->child->string);
1.5 ! schwarze 902: hash_put(hash, buf, TYPE_Fn);
1.1 schwarze 903: }
904:
905:
906: /* ARGSUSED */
907: static void
908: pmdoc_Nd(MDOC_ARGS)
909: {
910:
911: if (MDOC_BODY != n->type)
912: return;
913:
914: buf_appendmdoc(dbuf, n->child, 1);
915: buf_appendmdoc(buf, n->child, 0);
916:
1.5 ! schwarze 917: hash_put(hash, buf, TYPE_Nd);
1.1 schwarze 918: }
919:
920: /* ARGSUSED */
921: static void
922: pmdoc_Er(MDOC_ARGS)
923: {
924:
925: if (SEC_ERRORS != n->sec)
926: return;
927:
928: buf_appendmdoc(buf, n->child, 0);
1.5 ! schwarze 929: hash_put(hash, buf, TYPE_Er);
1.1 schwarze 930: }
931:
932: /* ARGSUSED */
933: static void
934: pmdoc_Ev(MDOC_ARGS)
935: {
936:
937: if (SEC_ENVIRONMENT != n->sec)
938: return;
939:
940: buf_appendmdoc(buf, n->child, 0);
1.5 ! schwarze 941: hash_put(hash, buf, TYPE_Ev);
1.1 schwarze 942: }
943:
944: /* ARGSUSED */
945: static void
946: pmdoc_Pa(MDOC_ARGS)
947: {
948:
949: if (SEC_FILES != n->sec)
950: return;
951:
952: buf_appendmdoc(buf, n->child, 0);
1.5 ! schwarze 953: hash_put(hash, buf, TYPE_Pa);
1.1 schwarze 954: }
955:
956: /* ARGSUSED */
957: static void
958: pmdoc_Nm(MDOC_ARGS)
959: {
960:
961: if (SEC_NAME == n->sec) {
962: buf_appendmdoc(buf, n->child, 0);
1.5 ! schwarze 963: hash_put(hash, buf, TYPE_Nm);
1.1 schwarze 964: return;
965: } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
966: return;
967:
968: if (NULL == n->child)
969: buf_append(buf, m->name);
970:
971: buf_appendmdoc(buf, n->child, 0);
1.5 ! schwarze 972: hash_put(hash, buf, TYPE_Nm);
1.1 schwarze 973: }
974:
975: static void
976: hash_put(DB *db, const struct buf *buf, int mask)
977: {
978: DBT key, val;
979: int rc;
980:
981: if (buf->len < 2)
982: return;
983:
984: key.data = buf->cp;
985: key.size = buf->len;
986:
987: if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
988: perror("hash");
989: exit((int)MANDOCLEVEL_SYSERR);
990: } else if (0 == rc)
991: mask |= *(int *)val.data;
992:
993: val.data = &mask;
994: val.size = sizeof(int);
995:
996: if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
997: perror("hash");
998: exit((int)MANDOCLEVEL_SYSERR);
999: }
1000: }
1001:
1002: static void
1003: dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1004: {
1005:
1006: assert(key->size);
1007: assert(val->size);
1008:
1009: if (0 == (*db->put)(db, key, val, 0))
1010: return;
1011:
1012: perror(dbn);
1013: exit((int)MANDOCLEVEL_SYSERR);
1014: /* NOTREACHED */
1015: }
1016:
1017: /*
1018: * Call out to per-macro handlers after clearing the persistent database
1019: * key. If the macro sets the database key, flush it to the database.
1020: */
1021: static void
1022: pmdoc_node(MDOC_ARGS)
1023: {
1024:
1025: if (NULL == n)
1026: return;
1027:
1028: switch (n->type) {
1029: case (MDOC_HEAD):
1030: /* FALLTHROUGH */
1031: case (MDOC_BODY):
1032: /* FALLTHROUGH */
1033: case (MDOC_TAIL):
1034: /* FALLTHROUGH */
1035: case (MDOC_BLOCK):
1036: /* FALLTHROUGH */
1037: case (MDOC_ELEM):
1038: if (NULL == mdocs[n->tok])
1039: break;
1040:
1041: buf->len = 0;
1042: (*mdocs[n->tok])(hash, buf, dbuf, n, m);
1043: break;
1044: default:
1045: break;
1046: }
1047:
1048: pmdoc_node(hash, buf, dbuf, n->child, m);
1049: pmdoc_node(hash, buf, dbuf, n->next, m);
1050: }
1051:
1052: static int
1053: pman_node(MAN_ARGS)
1054: {
1055: const struct man_node *head, *body;
1056: const char *start, *sv;
1057: size_t sz;
1058:
1059: if (NULL == n)
1060: return(0);
1061:
1062: /*
1063: * We're only searching for one thing: the first text child in
1064: * the BODY of a NAME section. Since we don't keep track of
1065: * sections in -man, run some hoops to find out whether we're in
1066: * the correct section or not.
1067: */
1068:
1069: if (MAN_BODY == n->type && MAN_SH == n->tok) {
1070: body = n;
1071: assert(body->parent);
1072: if (NULL != (head = body->parent->head) &&
1073: 1 == head->nchild &&
1074: NULL != (head = (head->child)) &&
1075: MAN_TEXT == head->type &&
1076: 0 == strcmp(head->string, "NAME") &&
1077: NULL != (body = body->child) &&
1078: MAN_TEXT == body->type) {
1079:
1080: assert(body->string);
1081: start = sv = body->string;
1082:
1083: /*
1084: * Go through a special heuristic dance here.
1085: * This is why -man manuals are great!
1086: * (I'm being sarcastic: my eyes are bleeding.)
1087: * Conventionally, one or more manual names are
1088: * comma-specified prior to a whitespace, then a
1089: * dash, then a description. Try to puzzle out
1090: * the name parts here.
1091: */
1092:
1093: for ( ;; ) {
1094: sz = strcspn(start, " ,");
1095: if ('\0' == start[(int)sz])
1096: break;
1097:
1098: buf->len = 0;
1099: buf_appendb(buf, start, sz);
1100: buf_appendb(buf, "", 1);
1101:
1.5 ! schwarze 1102: hash_put(hash, buf, TYPE_Nm);
1.1 schwarze 1103:
1104: if (' ' == start[(int)sz]) {
1105: start += (int)sz + 1;
1106: break;
1107: }
1108:
1109: assert(',' == start[(int)sz]);
1110: start += (int)sz + 1;
1111: while (' ' == *start)
1112: start++;
1113: }
1114:
1115: buf->len = 0;
1116:
1117: if (sv == start) {
1118: buf_append(buf, start);
1119: return(1);
1120: }
1121:
1122: while (' ' == *start)
1123: start++;
1124:
1125: if (0 == strncmp(start, "-", 1))
1126: start += 1;
1127: else if (0 == strncmp(start, "\\-", 2))
1128: start += 2;
1129: else if (0 == strncmp(start, "\\(en", 4))
1130: start += 4;
1131: else if (0 == strncmp(start, "\\(em", 4))
1132: start += 4;
1133:
1134: while (' ' == *start)
1135: start++;
1136:
1137: sz = strlen(start) + 1;
1138: buf_appendb(dbuf, start, sz);
1139: buf_appendb(buf, start, sz);
1140:
1.5 ! schwarze 1141: hash_put(hash, buf, TYPE_Nd);
1.1 schwarze 1142: }
1143: }
1144:
1.4 schwarze 1145: for (n = n->child; n; n = n->next)
1146: if (pman_node(hash, buf, dbuf, n))
1147: return(1);
1.1 schwarze 1148:
1149: return(0);
1150: }
1151:
1152: static void
1.2 schwarze 1153: ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
1154: {
1155: int i;
1156: struct of *nof;
1157:
1158: for (i = 0; i < argc; i++) {
1159: nof = mandoc_calloc(1, sizeof(struct of));
1160: nof->fname = strdup(argv[i]);
1161: if (verb > 2)
1162: printf("%s: Scheduling\n", argv[i]);
1163: if (NULL == *of) {
1164: *of = nof;
1165: (*of)->first = nof;
1166: } else {
1167: nof->first = (*of)->first;
1168: (*of)->next = nof;
1169: *of = nof;
1170: }
1171: }
1172: }
1173:
1174: /*
1175: * Recursively build up a list of files to parse.
1176: * We use this instead of ftw() and so on because I don't want global
1177: * variables hanging around.
1178: * This ignores the mandoc.db and mandoc.index files, but assumes that
1179: * everything else is a manual.
1180: * Pass in a pointer to a NULL structure for the first invocation.
1181: */
1182: static int
1183: ofile_dirbuild(const char *dir, int verb, struct of **of)
1184: {
1185: char buf[MAXPATHLEN];
1186: size_t sz;
1187: DIR *d;
1188: const char *fn;
1189: struct of *nof;
1190: struct dirent *dp;
1191:
1192: if (NULL == (d = opendir(dir))) {
1193: perror(dir);
1194: return(0);
1195: }
1196:
1197: while (NULL != (dp = readdir(d))) {
1198: fn = dp->d_name;
1199: if (DT_DIR == dp->d_type) {
1200: if (0 == strcmp(".", fn))
1201: continue;
1202: if (0 == strcmp("..", fn))
1203: continue;
1204:
1205: buf[0] = '\0';
1206: strlcat(buf, dir, MAXPATHLEN);
1207: strlcat(buf, "/", MAXPATHLEN);
1208: sz = strlcat(buf, fn, MAXPATHLEN);
1209:
1210: if (sz < MAXPATHLEN) {
1211: if ( ! ofile_dirbuild(buf, verb, of))
1212: return(0);
1213: continue;
1214: } else if (sz < MAXPATHLEN)
1215: continue;
1216:
1217: fprintf(stderr, "%s: Path too long\n", dir);
1218: return(0);
1219: }
1220: if (DT_REG != dp->d_type)
1221: continue;
1222:
1223: if (0 == strcmp(MANDOC_DB, fn) ||
1224: 0 == strcmp(MANDOC_IDX, fn))
1225: continue;
1226:
1227: buf[0] = '\0';
1228: strlcat(buf, dir, MAXPATHLEN);
1229: strlcat(buf, "/", MAXPATHLEN);
1230: sz = strlcat(buf, fn, MAXPATHLEN);
1231: if (sz >= MAXPATHLEN) {
1232: fprintf(stderr, "%s: Path too long\n", dir);
1233: return(0);
1234: }
1235:
1236: nof = mandoc_calloc(1, sizeof(struct of));
1237: nof->fname = mandoc_strdup(buf);
1238:
1239: if (verb > 2)
1240: printf("%s: Scheduling\n", buf);
1241:
1242: if (NULL == *of) {
1243: *of = nof;
1244: (*of)->first = nof;
1245: } else {
1246: nof->first = (*of)->first;
1247: (*of)->next = nof;
1248: *of = nof;
1249: }
1250: }
1251:
1.4 schwarze 1252: closedir(d);
1.2 schwarze 1253: return(1);
1254: }
1255:
1256: static void
1257: ofile_free(struct of *of)
1258: {
1259: struct of *nof;
1260:
1261: while (of) {
1262: nof = of->next;
1263: free(of->fname);
1264: free(of);
1265: of = nof;
1266: }
1267: }
1268:
1269: static void
1.1 schwarze 1270: usage(void)
1271: {
1272:
1.2 schwarze 1273: fprintf(stderr, "usage: %s [-v] "
1274: "[-d dir [files...] |"
1275: " -u dir [files...] |"
1276: " dir...]\n", progname);
1.1 schwarze 1277: }