Annotation of src/usr.bin/mandoc/mansearch.c, Revision 1.5
1.5 ! schwarze 1: /* $Id: mansearch.c,v 1.4 2014/01/04 23:42:32 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
1.4 schwarze 4: * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <assert.h>
19: #include <fcntl.h>
20: #include <getopt.h>
21: #include <limits.h>
22: #include <regex.h>
23: #include <stdio.h>
24: #include <stdint.h>
25: #include <stddef.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
30: #include <ohash.h>
31: #include <sqlite3.h>
32:
33: #include "mandoc.h"
34: #include "manpath.h"
35: #include "mansearch.h"
36:
37: #define SQL_BIND_TEXT(_db, _s, _i, _v) \
38: do { if (SQLITE_OK != sqlite3_bind_text \
39: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
40: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
41: } while (0)
42: #define SQL_BIND_INT64(_db, _s, _i, _v) \
43: do { if (SQLITE_OK != sqlite3_bind_int64 \
44: ((_s), (_i)++, (_v))) \
45: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
46: } while (0)
47: #define SQL_BIND_BLOB(_db, _s, _i, _v) \
48: do { if (SQLITE_OK != sqlite3_bind_blob \
49: ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
50: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
51: } while (0)
52:
53: struct expr {
54: uint64_t bits; /* type-mask */
55: const char *substr; /* to search for, if applicable */
56: regex_t regexp; /* compiled regexp, if applicable */
1.4 schwarze 57: int open; /* opening parentheses before */
58: int and; /* logical AND before */
59: int close; /* closing parentheses after */
1.1 schwarze 60: struct expr *next; /* next in sequence */
61: };
62:
63: struct match {
64: uint64_t id; /* identifier in database */
65: char *file; /* relative filepath of manpage */
66: char *desc; /* description of manpage */
67: int form; /* 0 == catpage */
68: };
69:
70: struct type {
71: uint64_t bits;
72: const char *name;
73: };
74:
75: static const struct type types[] = {
76: { TYPE_An, "An" },
77: { TYPE_Ar, "Ar" },
78: { TYPE_At, "At" },
79: { TYPE_Bsx, "Bsx" },
80: { TYPE_Bx, "Bx" },
81: { TYPE_Cd, "Cd" },
82: { TYPE_Cm, "Cm" },
83: { TYPE_Dv, "Dv" },
84: { TYPE_Dx, "Dx" },
85: { TYPE_Em, "Em" },
86: { TYPE_Er, "Er" },
87: { TYPE_Ev, "Ev" },
88: { TYPE_Fa, "Fa" },
89: { TYPE_Fl, "Fl" },
90: { TYPE_Fn, "Fn" },
91: { TYPE_Fn, "Fo" },
92: { TYPE_Ft, "Ft" },
93: { TYPE_Fx, "Fx" },
94: { TYPE_Ic, "Ic" },
95: { TYPE_In, "In" },
96: { TYPE_Lb, "Lb" },
97: { TYPE_Li, "Li" },
98: { TYPE_Lk, "Lk" },
99: { TYPE_Ms, "Ms" },
100: { TYPE_Mt, "Mt" },
101: { TYPE_Nd, "Nd" },
102: { TYPE_Nm, "Nm" },
103: { TYPE_Nx, "Nx" },
104: { TYPE_Ox, "Ox" },
105: { TYPE_Pa, "Pa" },
106: { TYPE_Rs, "Rs" },
107: { TYPE_Sh, "Sh" },
108: { TYPE_Ss, "Ss" },
109: { TYPE_St, "St" },
110: { TYPE_Sy, "Sy" },
111: { TYPE_Tn, "Tn" },
112: { TYPE_Va, "Va" },
113: { TYPE_Va, "Vt" },
114: { TYPE_Xr, "Xr" },
1.5 ! schwarze 115: { TYPE_sec, "sec" },
! 116: { TYPE_arch,"arch" },
1.1 schwarze 117: { ~0ULL, "any" },
118: { 0ULL, NULL }
119: };
120:
1.2 schwarze 121: static char *buildnames(sqlite3 *, sqlite3_stmt *, uint64_t);
1.3 schwarze 122: static char *buildoutput(sqlite3 *, sqlite3_stmt *,
123: uint64_t, uint64_t);
1.1 schwarze 124: static void *hash_alloc(size_t, void *);
125: static void hash_free(void *, size_t, void *);
126: static void *hash_halloc(size_t, void *);
127: static struct expr *exprcomp(const struct mansearch *,
128: int, char *[]);
129: static void exprfree(struct expr *);
130: static struct expr *exprterm(const struct mansearch *, char *, int);
1.4 schwarze 131: static void sql_append(char **sql, size_t *sz,
132: const char *newstr, int count);
1.1 schwarze 133: static void sql_match(sqlite3_context *context,
134: int argc, sqlite3_value **argv);
135: static void sql_regexp(sqlite3_context *context,
136: int argc, sqlite3_value **argv);
137: static char *sql_statement(const struct expr *,
138: const char *, const char *);
139:
140: int
141: mansearch(const struct mansearch *search,
1.3 schwarze 142: const struct manpaths *paths,
143: int argc, char *argv[],
144: const char *outkey,
1.1 schwarze 145: struct manpage **res, size_t *sz)
146: {
1.3 schwarze 147: int fd, rc, c, ibit;
1.1 schwarze 148: int64_t id;
1.3 schwarze 149: uint64_t outbit;
1.1 schwarze 150: char buf[PATH_MAX];
1.2 schwarze 151: char *sql;
1.1 schwarze 152: struct manpage *mpage;
153: struct expr *e, *ep;
154: sqlite3 *db;
1.3 schwarze 155: sqlite3_stmt *s, *s2;
1.1 schwarze 156: struct match *mp;
157: struct ohash_info info;
158: struct ohash htab;
159: unsigned int idx;
160: size_t i, j, cur, maxres;
161:
162: memset(&info, 0, sizeof(struct ohash_info));
163:
164: info.halloc = hash_halloc;
165: info.alloc = hash_alloc;
166: info.hfree = hash_free;
167: info.key_offset = offsetof(struct match, id);
168:
169: *sz = cur = maxres = 0;
170: sql = NULL;
171: *res = NULL;
172: fd = -1;
173: e = NULL;
174: rc = 0;
175:
176: if (0 == argc)
177: goto out;
178: if (NULL == (e = exprcomp(search, argc, argv)))
179: goto out;
180:
1.3 schwarze 181: outbit = 0;
182: if (NULL != outkey) {
183: for (ibit = 0; types[ibit].bits; ibit++) {
184: if (0 == strcasecmp(types[ibit].name, outkey)) {
185: outbit = types[ibit].bits;
186: break;
187: }
188: }
189: }
190:
1.1 schwarze 191: /*
192: * Save a descriptor to the current working directory.
193: * Since pathnames in the "paths" variable might be relative,
194: * and we'll be chdir()ing into them, we need to keep a handle
195: * on our current directory from which to start the chdir().
196: */
197:
198: if (NULL == getcwd(buf, PATH_MAX)) {
199: perror(NULL);
200: goto out;
201: } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
202: perror(buf);
203: goto out;
204: }
205:
206: sql = sql_statement(e, search->arch, search->sec);
207:
208: /*
209: * Loop over the directories (containing databases) for us to
210: * search.
211: * Don't let missing/bad databases/directories phase us.
212: * In each, try to open the resident database and, if it opens,
213: * scan it for our match expression.
214: */
215:
216: for (i = 0; i < paths->sz; i++) {
217: if (-1 == fchdir(fd)) {
218: perror(buf);
219: free(*res);
220: break;
221: } else if (-1 == chdir(paths->paths[i])) {
222: perror(paths->paths[i]);
223: continue;
224: }
225:
226: c = sqlite3_open_v2
227: (MANDOC_DB, &db,
228: SQLITE_OPEN_READONLY, NULL);
229:
230: if (SQLITE_OK != c) {
231: perror(MANDOC_DB);
232: sqlite3_close(db);
233: continue;
234: }
235:
236: /*
237: * Define the SQL functions for substring
238: * and regular expression matching.
239: */
240:
241: c = sqlite3_create_function(db, "match", 2,
242: SQLITE_ANY, NULL, sql_match, NULL, NULL);
243: assert(SQLITE_OK == c);
244: c = sqlite3_create_function(db, "regexp", 2,
245: SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
246: assert(SQLITE_OK == c);
247:
248: j = 1;
249: c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
250: if (SQLITE_OK != c)
251: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
252:
253: if (NULL != search->arch)
254: SQL_BIND_TEXT(db, s, j, search->arch);
255: if (NULL != search->sec)
256: SQL_BIND_TEXT(db, s, j, search->sec);
257:
258: for (ep = e; NULL != ep; ep = ep->next) {
259: if (NULL == ep->substr) {
260: SQL_BIND_BLOB(db, s, j, ep->regexp);
261: } else
262: SQL_BIND_TEXT(db, s, j, ep->substr);
263: SQL_BIND_INT64(db, s, j, ep->bits);
264: }
265:
266: memset(&htab, 0, sizeof(struct ohash));
267: ohash_init(&htab, 4, &info);
268:
269: /*
270: * Hash each entry on its [unique] document identifier.
271: * This is a uint64_t.
272: * Instead of using a hash function, simply convert the
273: * uint64_t to a uint32_t, the hash value's type.
274: * This gives good performance and preserves the
275: * distribution of buckets in the table.
276: */
277: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.4 schwarze 278: id = sqlite3_column_int64(s, 5);
1.1 schwarze 279: idx = ohash_lookup_memory
280: (&htab, (char *)&id,
281: sizeof(uint64_t), (uint32_t)id);
282:
283: if (NULL != ohash_find(&htab, idx))
284: continue;
285:
286: mp = mandoc_calloc(1, sizeof(struct match));
287: mp->id = id;
288: mp->file = mandoc_strdup
1.4 schwarze 289: ((char *)sqlite3_column_text(s, 0));
290: mp->desc = mandoc_strdup
1.1 schwarze 291: ((char *)sqlite3_column_text(s, 3));
1.4 schwarze 292: mp->form = sqlite3_column_int(s, 4);
1.1 schwarze 293: ohash_insert(&htab, idx, mp);
294: }
295:
296: if (SQLITE_DONE != c)
297: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
298:
299: sqlite3_finalize(s);
300:
301: c = sqlite3_prepare_v2(db,
302: "SELECT * FROM mlinks WHERE pageid=?",
303: -1, &s, NULL);
304: if (SQLITE_OK != c)
305: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
306:
1.3 schwarze 307: c = sqlite3_prepare_v2(db,
308: "SELECT * FROM keys WHERE pageid=? AND bits & ?",
309: -1, &s2, NULL);
310: if (SQLITE_OK != c)
311: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
312:
1.1 schwarze 313: for (mp = ohash_first(&htab, &idx);
314: NULL != mp;
315: mp = ohash_next(&htab, &idx)) {
316: if (cur + 1 > maxres) {
317: maxres += 1024;
318: *res = mandoc_realloc
319: (*res, maxres * sizeof(struct manpage));
320: }
321: mpage = *res + cur;
322: if (-1 == asprintf(&mpage->file, "%s/%s",
323: paths->paths[i], mp->file)) {
324: perror(0);
325: exit((int)MANDOCLEVEL_SYSERR);
326: }
327: mpage->desc = mp->desc;
328: mpage->form = mp->form;
1.2 schwarze 329: mpage->names = buildnames(db, s, mp->id);
1.3 schwarze 330: mpage->output = outbit ?
331: buildoutput(db, s2, mp->id, outbit) : NULL;
1.1 schwarze 332:
333: free(mp->file);
334: free(mp);
335: cur++;
336: }
337:
338: sqlite3_finalize(s);
1.3 schwarze 339: sqlite3_finalize(s2);
1.1 schwarze 340: sqlite3_close(db);
341: ohash_delete(&htab);
342: }
343: rc = 1;
344: out:
345: exprfree(e);
346: if (-1 != fd)
347: close(fd);
348: free(sql);
349: *sz = cur;
350: return(rc);
1.2 schwarze 351: }
352:
353: static char *
354: buildnames(sqlite3 *db, sqlite3_stmt *s, uint64_t id)
355: {
356: char *names, *newnames;
357: const char *oldnames, *sep1, *name, *sec, *sep2, *arch;
358: size_t i;
359: int c;
360:
361: names = NULL;
362: i = 1;
363: SQL_BIND_INT64(db, s, i, id);
364: while (SQLITE_ROW == (c = sqlite3_step(s))) {
365: if (NULL == names) {
366: oldnames = "";
367: sep1 = "";
368: } else {
369: oldnames = names;
370: sep1 = ", ";
371: }
372: sec = sqlite3_column_text(s, 1);
373: arch = sqlite3_column_text(s, 2);
374: name = sqlite3_column_text(s, 3);
375: sep2 = '\0' == *arch ? "" : "/";
376: if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)",
377: oldnames, sep1, name, sec, sep2, arch)) {
378: perror(0);
379: exit((int)MANDOCLEVEL_SYSERR);
380: }
381: free(names);
382: names = newnames;
383: }
384: if (SQLITE_DONE != c)
385: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
386: sqlite3_reset(s);
387: return(names);
1.3 schwarze 388: }
389:
390: static char *
391: buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit)
392: {
393: char *output, *newoutput;
394: const char *oldoutput, *sep1, *data;
395: size_t i;
396: int c;
397:
398: output = NULL;
399: i = 1;
400: SQL_BIND_INT64(db, s, i, id);
401: SQL_BIND_INT64(db, s, i, outbit);
402: while (SQLITE_ROW == (c = sqlite3_step(s))) {
403: if (NULL == output) {
404: oldoutput = "";
405: sep1 = "";
406: } else {
407: oldoutput = output;
408: sep1 = " # ";
409: }
410: data = sqlite3_column_text(s, 1);
411: if (-1 == asprintf(&newoutput, "%s%s%s",
412: oldoutput, sep1, data)) {
413: perror(0);
414: exit((int)MANDOCLEVEL_SYSERR);
415: }
416: free(output);
417: output = newoutput;
418: }
419: if (SQLITE_DONE != c)
420: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
421: sqlite3_reset(s);
422: return(output);
1.1 schwarze 423: }
424:
425: /*
426: * Implement substring match as an application-defined SQL function.
427: * Using the SQL LIKE or GLOB operators instead would be a bad idea
428: * because that would require escaping metacharacters in the string
429: * being searched for.
430: */
431: static void
432: sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
433: {
434:
435: assert(2 == argc);
436: sqlite3_result_int(context, NULL != strcasestr(
437: (const char *)sqlite3_value_text(argv[1]),
438: (const char *)sqlite3_value_text(argv[0])));
439: }
440:
441: /*
442: * Implement regular expression match
443: * as an application-defined SQL function.
444: */
445: static void
446: sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
447: {
448:
449: assert(2 == argc);
450: sqlite3_result_int(context, !regexec(
451: (regex_t *)sqlite3_value_blob(argv[0]),
452: (const char *)sqlite3_value_text(argv[1]),
453: 0, NULL, 0));
454: }
455:
1.4 schwarze 456: static void
457: sql_append(char **sql, size_t *sz, const char *newstr, int count)
458: {
459: size_t newsz;
460:
461: newsz = 1 < count ? (size_t)count : strlen(newstr);
462: *sql = mandoc_realloc(*sql, *sz + newsz + 1);
463: if (1 < count)
464: memset(*sql + *sz, *newstr, (size_t)count);
465: else
466: memcpy(*sql + *sz, newstr, newsz);
467: *sz += newsz;
468: (*sql)[*sz] = '\0';
469: }
470:
1.1 schwarze 471: /*
472: * Prepare the search SQL statement.
473: */
474: static char *
475: sql_statement(const struct expr *e, const char *arch, const char *sec)
476: {
477: char *sql;
478: size_t sz;
1.4 schwarze 479: int needop;
1.1 schwarze 480:
1.4 schwarze 481: sql = mandoc_strdup("SELECT * FROM mpages WHERE ");
1.1 schwarze 482: sz = strlen(sql);
483:
1.4 schwarze 484: if (NULL != arch)
485: sql_append(&sql, &sz, "arch = ? AND ", 1);
486: if (NULL != sec)
487: sql_append(&sql, &sz, "sec = ? AND ", 1);
488: sql_append(&sql, &sz, "(", 1);
489:
490: for (needop = 0; NULL != e; e = e->next) {
491: if (e->and)
492: sql_append(&sql, &sz, " AND ", 1);
493: else if (needop)
494: sql_append(&sql, &sz, " OR ", 1);
495: if (e->open)
496: sql_append(&sql, &sz, "(", e->open);
497: sql_append(&sql, &sz, NULL == e->substr ?
498: "id IN (SELECT pageid FROM keys "
499: "WHERE key REGEXP ? AND bits & ?)" :
500: "id IN (SELECT pageid FROM keys "
501: "WHERE key MATCH ? AND bits & ?)", 1);
502: if (e->close)
503: sql_append(&sql, &sz, ")", e->close);
504: needop = 1;
1.1 schwarze 505: }
1.4 schwarze 506: sql_append(&sql, &sz, ")", 1);
1.1 schwarze 507:
508: return(sql);
509: }
510:
511: /*
512: * Compile a set of string tokens into an expression.
513: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
514: * "(", "foo=bar", etc.).
515: */
516: static struct expr *
517: exprcomp(const struct mansearch *search, int argc, char *argv[])
518: {
1.4 schwarze 519: int i, toopen, logic, igncase, toclose;
1.1 schwarze 520: struct expr *first, *next, *cur;
521:
522: first = cur = NULL;
1.4 schwarze 523: toopen = logic = igncase = toclose = 0;
1.1 schwarze 524:
525: for (i = 0; i < argc; i++) {
1.4 schwarze 526: if (0 == strcmp("(", argv[i])) {
527: if (igncase)
528: goto fail;
529: toopen++;
530: toclose++;
531: continue;
532: } else if (0 == strcmp(")", argv[i])) {
533: if (toopen || logic || igncase || NULL == cur)
534: goto fail;
535: cur->close++;
536: if (0 > --toclose)
537: goto fail;
538: continue;
539: } else if (0 == strcmp("-a", argv[i])) {
540: if (toopen || logic || igncase || NULL == cur)
541: goto fail;
542: logic = 1;
543: continue;
544: } else if (0 == strcmp("-o", argv[i])) {
545: if (toopen || logic || igncase || NULL == cur)
546: goto fail;
547: logic = 2;
548: continue;
549: } else if (0 == strcmp("-i", argv[i])) {
550: if (igncase)
551: goto fail;
552: igncase = 1;
553: continue;
1.1 schwarze 554: }
1.4 schwarze 555: next = exprterm(search, argv[i], !igncase);
556: if (NULL == next)
557: goto fail;
558: next->open = toopen;
559: next->and = (1 == logic);
1.1 schwarze 560: if (NULL != first) {
561: cur->next = next;
562: cur = next;
563: } else
564: cur = first = next;
1.4 schwarze 565: toopen = logic = igncase = 0;
1.1 schwarze 566: }
1.4 schwarze 567: if ( ! (toopen || logic || igncase || toclose))
568: return(first);
569: fail:
570: if (NULL != first)
571: exprfree(first);
572: return(NULL);
1.1 schwarze 573: }
574:
575: static struct expr *
576: exprterm(const struct mansearch *search, char *buf, int cs)
577: {
578: struct expr *e;
579: char *key, *v;
580: size_t i;
581:
582: if ('\0' == *buf)
583: return(NULL);
584:
585: e = mandoc_calloc(1, sizeof(struct expr));
586:
587: /*"whatis" mode uses an opaque string and default fields. */
588:
589: if (MANSEARCH_WHATIS & search->flags) {
590: e->substr = buf;
591: e->bits = search->deftype;
592: return(e);
593: }
594:
595: /*
596: * If no =~ is specified, search with equality over names and
597: * descriptions.
598: * If =~ begins the phrase, use name and description fields.
599: */
600:
601: if (NULL == (v = strpbrk(buf, "=~"))) {
602: e->substr = buf;
603: e->bits = search->deftype;
604: return(e);
605: } else if (v == buf)
606: e->bits = search->deftype;
607:
608: if ('~' == *v++) {
609: if (regcomp(&e->regexp, v,
610: REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE))) {
611: free(e);
612: return(NULL);
613: }
614: } else
615: e->substr = v;
616: v[-1] = '\0';
617:
618: /*
619: * Parse out all possible fields.
620: * If the field doesn't resolve, bail.
621: */
622:
623: while (NULL != (key = strsep(&buf, ","))) {
624: if ('\0' == *key)
625: continue;
626: i = 0;
627: while (types[i].bits &&
628: strcasecmp(types[i].name, key))
629: i++;
630: if (0 == types[i].bits) {
631: free(e);
632: return(NULL);
633: }
634: e->bits |= types[i].bits;
635: }
636:
637: return(e);
638: }
639:
640: static void
641: exprfree(struct expr *p)
642: {
643: struct expr *pp;
644:
645: while (NULL != p) {
646: pp = p->next;
647: free(p);
648: p = pp;
649: }
650: }
651:
652: static void *
653: hash_halloc(size_t sz, void *arg)
654: {
655:
656: return(mandoc_calloc(sz, 1));
657: }
658:
659: static void *
660: hash_alloc(size_t sz, void *arg)
661: {
662:
663: return(mandoc_malloc(sz));
664: }
665:
666: static void
667: hash_free(void *p, size_t sz, void *arg)
668: {
669:
670: free(p);
671: }