Annotation of src/usr.bin/mandoc/apropos.c, Revision 1.2
1.2 ! schwarze 1: /* $Id: apropos.c,v 1.1 2011/10/06 23:04:16 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/types.h>
18:
19: #include <assert.h>
20: #include <errno.h>
21: #include <fcntl.h>
22: #include <getopt.h>
23: #include <limits.h>
24: #include <regex.h>
25: #include <stdarg.h>
26: #include <stdint.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <unistd.h>
31: #include <db.h>
32:
33: #include "mandoc.h"
34:
1.2 ! schwarze 35: #define MAXRESULTS 256
1.1 schwarze 36:
1.2 ! schwarze 37: /* Bit-fields. See mandocdb.8. */
! 38:
! 39: #define TYPE_NAME 0x01
! 40: #define TYPE_FUNCTION 0x02
! 41: #define TYPE_UTILITY 0x04
! 42: #define TYPE_INCLUDES 0x08
! 43: #define TYPE_VARIABLE 0x10
! 44: #define TYPE_STANDARD 0x20
! 45: #define TYPE_AUTHOR 0x40
! 46: #define TYPE_CONFIG 0x80
! 47: #define TYPE_DESC 0x100
! 48: #define TYPE_XREF 0x200
! 49: #define TYPE_PATH 0x400
! 50: #define TYPE_ENV 0x800
! 51: #define TYPE_ERR 0x1000
1.1 schwarze 52:
53: enum match {
54: MATCH_SUBSTR = 0,
55: MATCH_REGEX,
56: MATCH_EXACT
57: };
58:
59: enum sort {
60: SORT_TITLE = 0,
61: SORT_CAT,
62: SORT__MAX
63: };
64:
65: struct opts {
66: enum sort sort; /* output sorting */
67: const char *arch; /* restrict to architecture */
68: const char *cat; /* restrict to category */
69: int types; /* only types in bitmask */
70: int insens; /* case-insensitive match */
71: enum match match; /* match type */
72: };
73:
74: struct type {
75: int mask;
1.2 ! schwarze 76: const char *name; /* command-line type name */
1.1 schwarze 77: };
78:
79: struct rec {
1.2 ! schwarze 80: char *file; /* file in file-system */
! 81: char *cat; /* category (3p, 3, etc.) */
! 82: char *title; /* title (FOO, etc.) */
! 83: char *arch; /* arch (or empty string) */
! 84: char *desc; /* description (from Nd) */
! 85: recno_t rec; /* record in index */
1.1 schwarze 86: };
87:
88: struct res {
89: char *arch; /* architecture */
90: char *desc; /* free-form description */
91: char *keyword; /* matched keyword */
92: int types; /* bitmask of field selectors */
93: char *cat; /* manual section */
94: char *title; /* manual section */
95: char *uri; /* formatted uri of file */
96: recno_t rec; /* unique id of underlying manual */
1.2 ! schwarze 97: /*
! 98: * Maintain a binary tree for checking the uniqueness of `rec'
! 99: * when adding elements to the results array.
! 100: * Since the results array is dynamic, use offset in the array
! 101: * instead of a pointer to the structure.
! 102: */
! 103: int lhs;
! 104: int rhs;
1.1 schwarze 105: };
106:
107: struct state {
108: DB *db; /* database */
109: DB *idx; /* index */
110: const char *dbf; /* database name */
111: const char *idxf; /* index name */
112: };
113:
114: static const char * const sorts[SORT__MAX] = {
115: "cat", /* SORT_CAT */
116: "title", /* SORT_TITLE */
117: };
118:
119: static const struct type types[] = {
120: { TYPE_NAME, "name" },
121: { TYPE_FUNCTION, "func" },
122: { TYPE_UTILITY, "utility" },
123: { TYPE_INCLUDES, "incl" },
124: { TYPE_VARIABLE, "var" },
125: { TYPE_STANDARD, "stand" },
126: { TYPE_AUTHOR, "auth" },
127: { TYPE_CONFIG, "conf" },
128: { TYPE_DESC, "desc" },
129: { TYPE_XREF, "xref" },
130: { TYPE_PATH, "path" },
131: { TYPE_ENV, "env" },
132: { TYPE_ERR, "err" },
133: { INT_MAX, "all" },
134: { 0, NULL }
135: };
136:
137: static void buf_alloc(char **, size_t *, size_t);
138: static void buf_dup(struct mchars *, char **, const char *);
139: static void buf_redup(struct mchars *, char **,
140: size_t *, const char *);
141: static int sort_cat(const void *, const void *);
142: static int sort_title(const void *, const void *);
1.2 ! schwarze 143: static int state_getrecord(struct state *,
! 144: recno_t, struct rec *);
1.1 schwarze 145: static void state_output(const struct res *, int);
1.2 ! schwarze 146: static int state_search(struct state *,
1.1 schwarze 147: const struct opts *, char *);
148: static void usage(void);
149:
1.2 ! schwarze 150: static char *progname;
1.1 schwarze 151:
152: int
153: apropos(int argc, char *argv[])
154: {
1.2 ! schwarze 155: BTREEINFO info;
! 156: int ch, i, rc;
1.1 schwarze 157: const char *dbf, *idxf;
158: struct state state;
159: char *q, *v;
160: struct opts opts;
161: extern int optind;
162: extern char *optarg;
163:
164: memset(&opts, 0, sizeof(struct opts));
1.2 ! schwarze 165: memset(&state, 0, sizeof(struct state));
1.1 schwarze 166:
167: dbf = "mandoc.db";
168: idxf = "mandoc.index";
169: q = NULL;
1.2 ! schwarze 170: rc = EXIT_FAILURE;
1.1 schwarze 171:
172: progname = strrchr(argv[0], '/');
173: if (progname == NULL)
174: progname = argv[0];
175: else
176: ++progname;
177:
178: opts.match = MATCH_SUBSTR;
179:
180: while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:")))
181: switch (ch) {
182: case ('a'):
183: opts.arch = optarg;
184: break;
185: case ('c'):
186: opts.cat = optarg;
187: break;
188: case ('e'):
189: opts.match = MATCH_EXACT;
190: break;
191: case ('I'):
192: opts.insens = 1;
193: break;
194: case ('r'):
195: opts.match = MATCH_REGEX;
196: break;
197: case ('s'):
198: for (i = 0; i < SORT__MAX; i++) {
199: if (strcmp(optarg, sorts[i]))
200: continue;
201: opts.sort = (enum sort)i;
202: break;
203: }
204:
205: if (i < SORT__MAX)
206: break;
207:
1.2 ! schwarze 208: fprintf(stderr, "%s: Bad sort\n", optarg);
1.1 schwarze 209: return(EXIT_FAILURE);
210: case ('t'):
211: while (NULL != (v = strsep(&optarg, ","))) {
212: if ('\0' == *v)
213: continue;
214: for (i = 0; types[i].mask; i++) {
215: if (strcmp(types[i].name, v))
216: continue;
217: break;
218: }
219: if (0 == types[i].mask)
220: break;
221: opts.types |= types[i].mask;
222: }
223: if (NULL == v)
224: break;
225:
1.2 ! schwarze 226: fprintf(stderr, "%s: Bad type\n", v);
1.1 schwarze 227: return(EXIT_FAILURE);
228: default:
229: usage();
230: return(EXIT_FAILURE);
231: }
232:
233: argc -= optind;
234: argv += optind;
235:
236: if (0 == argc || '\0' == **argv) {
237: usage();
1.2 ! schwarze 238: goto out;
1.1 schwarze 239: } else
240: q = *argv;
241:
242: if (0 == opts.types)
243: opts.types = TYPE_NAME | TYPE_DESC;
244:
1.2 ! schwarze 245: /*
! 246: * Configure databases.
! 247: * The keyword database is a btree that allows for duplicate
! 248: * entries.
! 249: * The index database is a recno.
! 250: */
! 251:
! 252: memset(&info, 0, sizeof(BTREEINFO));
! 253: info.flags = R_DUP;
! 254:
! 255: state.db = dbopen(dbf, O_RDONLY, 0, DB_BTREE, &info);
! 256: if (NULL == state.db) {
! 257: perror(dbf);
! 258: goto out;
1.1 schwarze 259: }
260:
1.2 ! schwarze 261: state.idx = dbopen(idxf, O_RDONLY, 0, DB_RECNO, NULL);
! 262: if (NULL == state.idx) {
! 263: perror(idxf);
! 264: goto out;
! 265: }
! 266:
! 267: /* Main search function. */
! 268:
! 269: rc = state_search(&state, &opts, q) ?
! 270: EXIT_SUCCESS : EXIT_FAILURE;
! 271: out:
! 272: if (state.db)
! 273: (*state.db->close)(state.db);
! 274: if (state.idx)
! 275: (*state.idx->close)(state.idx);
1.1 schwarze 276:
1.2 ! schwarze 277: return(rc);
1.1 schwarze 278: }
279:
1.2 ! schwarze 280: static int
1.1 schwarze 281: state_search(struct state *p, const struct opts *opts, char *q)
282: {
1.2 ! schwarze 283: int leaf, root, len, ch, dflag, rc;
1.1 schwarze 284: struct mchars *mc;
285: char *buf;
286: size_t bufsz;
287: recno_t rec;
288: uint32_t fl;
289: DBT key, val;
1.2 ! schwarze 290: struct res *res;
1.1 schwarze 291: regex_t reg;
292: regex_t *regp;
293: char filebuf[10];
294: struct rec record;
295:
1.2 ! schwarze 296: rc = 0;
! 297: root = leaf = -1;
! 298: res = NULL;
1.1 schwarze 299: len = 0;
300: buf = NULL;
301: bufsz = 0;
302: regp = NULL;
303:
1.2 ! schwarze 304: /*
! 305: * Configure how we scan through results to see if we match:
! 306: * whether by regexp or exact matches.
! 307: */
! 308:
1.1 schwarze 309: switch (opts->match) {
310: case (MATCH_REGEX):
1.2 ! schwarze 311: ch = REG_EXTENDED | REG_NOSUB |
1.1 schwarze 312: (opts->insens ? REG_ICASE : 0);
313:
1.2 ! schwarze 314: if (0 != regcomp(®, q, ch)) {
! 315: fprintf(stderr, "%s: Bad pattern\n", q);
! 316: return(0);
1.1 schwarze 317: }
318:
319: regp = ®
320: dflag = R_FIRST;
321: break;
322: case (MATCH_EXACT):
323: key.data = q;
324: key.size = strlen(q) + 1;
325: dflag = R_CURSOR;
326: break;
327: default:
328: dflag = R_FIRST;
329: break;
330: }
331:
1.2 ! schwarze 332: mc = mchars_alloc();
1.1 schwarze 333:
334: /*
335: * Iterate over the entire keyword database.
336: * For each record, we must first translate the key into UTF-8.
337: * Following that, make sure it's acceptable.
338: * Lastly, add it to the available records.
339: */
340:
1.2 ! schwarze 341: while (0 == (ch = (*p->db->seq)(p->db, &key, &val, dflag))) {
1.1 schwarze 342: dflag = R_NEXT;
343:
344: /*
345: * Keys must be sized as such: the keyword must be
346: * non-empty (nil terminator plus one character) and the
347: * value must be 8 (recno_t---uint32_t---index reference
348: * and a uint32_t flag field).
349: */
350:
351: if (key.size < 2 || 8 != val.size) {
1.2 ! schwarze 352: fprintf(stderr, "%s: Bad database\n", p->dbf);
! 353: goto out;
1.1 schwarze 354: }
355:
356: buf_redup(mc, &buf, &bufsz, (char *)key.data);
357:
358: fl = *(uint32_t *)val.data;
359:
360: if ( ! (fl & opts->types))
361: continue;
362:
363: switch (opts->match) {
364: case (MATCH_REGEX):
365: if (regexec(regp, buf, 0, NULL, 0))
366: continue;
367: break;
368: case (MATCH_EXACT):
369: if (opts->insens && strcasecmp(buf, q))
370: goto send;
371: if ( ! opts->insens && strcmp(buf, q))
372: goto send;
373: break;
374: default:
375: if (opts->insens && NULL == strcasestr(buf, q))
376: continue;
377: if ( ! opts->insens && NULL == strstr(buf, q))
378: continue;
379: break;
380: }
381:
382: /*
383: * Now look up the file itself in our index. The file's
384: * indexed by its recno for fast lookups.
385: */
386:
387: memcpy(&rec, val.data + 4, sizeof(recno_t));
388:
389: if ( ! state_getrecord(p, rec, &record))
1.2 ! schwarze 390: goto out;
1.1 schwarze 391:
392: /* If we're in a different section, skip... */
393:
394: if (opts->cat && strcasecmp(opts->cat, record.cat))
395: continue;
396: if (opts->arch && strcasecmp(opts->arch, record.arch))
397: continue;
398:
1.2 ! schwarze 399: /*
! 400: * Do a binary search to dedupe the results tree of the
! 401: * same record: we don't print the same file.
! 402: */
1.1 schwarze 403:
1.2 ! schwarze 404: for (leaf = root; leaf >= 0; )
! 405: if (rec > res[leaf].rec && res[leaf].rhs >= 0)
! 406: leaf = res[leaf].rhs;
! 407: else if (rec < res[leaf].rec && res[leaf].lhs >= 0)
! 408: leaf = res[leaf].lhs;
! 409: else
1.1 schwarze 410: break;
411:
1.2 ! schwarze 412: if (leaf >= 0 && res[leaf].rec == rec)
1.1 schwarze 413: continue;
414:
1.2 ! schwarze 415: res = mandoc_realloc
! 416: (res, (len + 1) * sizeof(struct res));
! 417:
1.1 schwarze 418: /*
419: * Now we have our filename, keywords, types, and all
420: * other necessary information.
421: * Process it and add it to our list of results.
422: */
423:
424: filebuf[9] = '\0';
425: snprintf(filebuf, 10, "%u", record.rec);
426: assert('\0' == filebuf[9]);
427:
428: res[len].rec = record.rec;
429: res[len].types = fl;
1.2 ! schwarze 430: res[len].lhs = res[len].rhs = -1;
1.1 schwarze 431:
432: buf_dup(mc, &res[len].keyword, buf);
433: buf_dup(mc, &res[len].uri, filebuf);
434: buf_dup(mc, &res[len].cat, record.cat);
435: buf_dup(mc, &res[len].arch, record.arch);
436: buf_dup(mc, &res[len].title, record.title);
437: buf_dup(mc, &res[len].desc, record.desc);
1.2 ! schwarze 438:
! 439: if (leaf >= 0) {
! 440: if (record.rec > res[leaf].rec)
! 441: res[leaf].rhs = len;
! 442: else
! 443: res[leaf].lhs = len;
! 444: } else
! 445: root = len;
! 446:
1.1 schwarze 447: len++;
448: }
449:
450: if (ch < 0) {
451: perror(p->dbf);
1.2 ! schwarze 452: goto out;
1.1 schwarze 453: }
1.2 ! schwarze 454: send:
! 455: /* Sort our results. */
1.1 schwarze 456:
1.2 ! schwarze 457: if (SORT_CAT == opts->sort)
1.1 schwarze 458: qsort(res, len, sizeof(struct res), sort_cat);
1.2 ! schwarze 459: else
1.1 schwarze 460: qsort(res, len, sizeof(struct res), sort_title);
461:
462: state_output(res, len);
1.2 ! schwarze 463: rc = 1;
! 464: out:
1.1 schwarze 465: for (len-- ; len >= 0; len--) {
466: free(res[len].keyword);
467: free(res[len].title);
468: free(res[len].cat);
469: free(res[len].arch);
470: free(res[len].desc);
471: free(res[len].uri);
472: }
473:
1.2 ! schwarze 474: free(res);
1.1 schwarze 475: free(buf);
476: mchars_free(mc);
477:
478: if (regp)
479: regfree(regp);
1.2 ! schwarze 480:
! 481: return(rc);
1.1 schwarze 482: }
483:
484: /*
485: * Track allocated buffer size for buf_redup().
486: */
487: static inline void
488: buf_alloc(char **buf, size_t *bufsz, size_t sz)
489: {
490:
491: if (sz < *bufsz)
492: return;
493:
494: *bufsz = sz + 1024;
1.2 ! schwarze 495: *buf = mandoc_realloc(*buf, *bufsz);
1.1 schwarze 496: }
497:
498: /*
499: * Like buf_redup() but throwing away the buffer size.
500: */
501: static void
502: buf_dup(struct mchars *mc, char **buf, const char *val)
503: {
504: size_t bufsz;
505:
506: bufsz = 0;
507: *buf = NULL;
508: buf_redup(mc, buf, &bufsz, val);
509: }
510:
511: /*
512: * Normalise strings from the index and database.
513: * These strings are escaped as defined by mandoc_char(7) along with
514: * other goop in mandoc.h (e.g., soft hyphens).
515: */
516: static void
517: buf_redup(struct mchars *mc, char **buf,
518: size_t *bufsz, const char *val)
519: {
520: size_t sz;
521: const char *seq, *cpp;
522: int len, pos;
523: enum mandoc_esc esc;
524: const char rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' };
525:
526: /* Pre-allocate by the length of the input */
527:
528: buf_alloc(buf, bufsz, strlen(val) + 1);
529:
530: pos = 0;
531:
532: while ('\0' != *val) {
533: /*
534: * Halt on the first escape sequence.
535: * This also halts on the end of string, in which case
536: * we just copy, fallthrough, and exit the loop.
537: */
538: if ((sz = strcspn(val, rsv)) > 0) {
539: memcpy(&(*buf)[pos], val, sz);
540: pos += (int)sz;
541: val += (int)sz;
542: }
543:
544: if (ASCII_HYPH == *val) {
545: (*buf)[pos++] = '-';
546: val++;
547: continue;
548: } else if (ASCII_NBRSP == *val) {
549: (*buf)[pos++] = ' ';
550: val++;
551: continue;
552: } else if ('\\' != *val)
553: break;
554:
555: /* Read past the slash. */
556:
557: val++;
558:
559: /*
560: * Parse the escape sequence and see if it's a
561: * predefined character or special character.
562: */
563:
564: esc = mandoc_escape(&val, &seq, &len);
565: if (ESCAPE_ERROR == esc)
566: break;
567:
568: cpp = ESCAPE_SPECIAL == esc ?
569: mchars_spec2str(mc, seq, len, &sz) : NULL;
570:
571: if (NULL == cpp)
572: continue;
573:
574: /* Copy the rendered glyph into the stream. */
575:
576: buf_alloc(buf, bufsz, sz);
577:
578: memcpy(&(*buf)[pos], cpp, sz);
579: pos += (int)sz;
580: }
581:
582: (*buf)[pos] = '\0';
583: }
584:
585: static void
586: state_output(const struct res *res, int sz)
587: {
588: int i;
589:
590: for (i = 0; i < sz; i++)
591: printf("%s(%s%s%s) - %s\n", res[i].title,
592: res[i].cat,
593: *res[i].arch ? "/" : "",
594: *res[i].arch ? res[i].arch : "",
595: res[i].desc);
596: }
597:
598: static void
599: usage(void)
600: {
601:
602: fprintf(stderr, "usage: %s "
603: "[-eIr] "
604: "[-a arch] "
605: "[-c cat] "
606: "[-s sort] "
607: "[-t type[,...]] "
608: "key\n", progname);
609: }
610:
611: static int
612: state_getrecord(struct state *p, recno_t rec, struct rec *rp)
613: {
614: DBT key, val;
615: size_t sz;
616: int rc;
617:
618: key.data = &rec;
619: key.size = sizeof(recno_t);
620:
621: rc = (*p->idx->get)(p->idx, &key, &val, 0);
622: if (rc < 0) {
1.2 ! schwarze 623: perror(p->idxf);
1.1 schwarze 624: return(0);
1.2 ! schwarze 625: } else if (rc > 0)
! 626: goto err;
1.1 schwarze 627:
628: rp->file = (char *)val.data;
1.2 ! schwarze 629: if ((sz = strlen(rp->file) + 1) >= val.size)
! 630: goto err;
1.1 schwarze 631:
632: rp->cat = (char *)val.data + (int)sz;
1.2 ! schwarze 633: if ((sz += strlen(rp->cat) + 1) >= val.size)
! 634: goto err;
1.1 schwarze 635:
636: rp->title = (char *)val.data + (int)sz;
1.2 ! schwarze 637: if ((sz += strlen(rp->title) + 1) >= val.size)
! 638: goto err;
1.1 schwarze 639:
640: rp->arch = (char *)val.data + (int)sz;
1.2 ! schwarze 641: if ((sz += strlen(rp->arch) + 1) >= val.size)
! 642: goto err;
1.1 schwarze 643:
644: rp->desc = (char *)val.data + (int)sz;
645: rp->rec = rec;
646: return(1);
1.2 ! schwarze 647: err:
! 648: fprintf(stderr, "%s: Corrupt index\n", p->idxf);
! 649: return(0);
1.1 schwarze 650: }
651:
652: static int
653: sort_title(const void *p1, const void *p2)
654: {
655:
656: return(strcmp(((const struct res *)p1)->title,
657: ((const struct res *)p2)->title));
658: }
659:
660: static int
661: sort_cat(const void *p1, const void *p2)
662: {
663: int rc;
664:
665: rc = strcmp(((const struct res *)p1)->cat,
666: ((const struct res *)p2)->cat);
667:
668: return(0 == rc ? sort_title(p1, p2) : rc);
669: }