Annotation of src/usr.bin/mandoc/mansearch.c, Revision 1.4
1.4 ! schwarze 1: /* $Id: mansearch.c,v 1.3 2013/12/31 03:41:09 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
1.4 ! schwarze 4: * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <assert.h>
19: #include <fcntl.h>
20: #include <getopt.h>
21: #include <limits.h>
22: #include <regex.h>
23: #include <stdio.h>
24: #include <stdint.h>
25: #include <stddef.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
30: #include <ohash.h>
31: #include <sqlite3.h>
32:
33: #include "mandoc.h"
34: #include "manpath.h"
35: #include "mansearch.h"
36:
37: #define SQL_BIND_TEXT(_db, _s, _i, _v) \
38: do { if (SQLITE_OK != sqlite3_bind_text \
39: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
40: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
41: } while (0)
42: #define SQL_BIND_INT64(_db, _s, _i, _v) \
43: do { if (SQLITE_OK != sqlite3_bind_int64 \
44: ((_s), (_i)++, (_v))) \
45: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
46: } while (0)
47: #define SQL_BIND_BLOB(_db, _s, _i, _v) \
48: do { if (SQLITE_OK != sqlite3_bind_blob \
49: ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
50: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
51: } while (0)
52:
53: struct expr {
54: uint64_t bits; /* type-mask */
55: const char *substr; /* to search for, if applicable */
56: regex_t regexp; /* compiled regexp, if applicable */
1.4 ! schwarze 57: int open; /* opening parentheses before */
! 58: int and; /* logical AND before */
! 59: int close; /* closing parentheses after */
1.1 schwarze 60: struct expr *next; /* next in sequence */
61: };
62:
63: struct match {
64: uint64_t id; /* identifier in database */
65: char *file; /* relative filepath of manpage */
66: char *desc; /* description of manpage */
67: int form; /* 0 == catpage */
68: };
69:
70: struct type {
71: uint64_t bits;
72: const char *name;
73: };
74:
75: static const struct type types[] = {
76: { TYPE_An, "An" },
77: { TYPE_Ar, "Ar" },
78: { TYPE_At, "At" },
79: { TYPE_Bsx, "Bsx" },
80: { TYPE_Bx, "Bx" },
81: { TYPE_Cd, "Cd" },
82: { TYPE_Cm, "Cm" },
83: { TYPE_Dv, "Dv" },
84: { TYPE_Dx, "Dx" },
85: { TYPE_Em, "Em" },
86: { TYPE_Er, "Er" },
87: { TYPE_Ev, "Ev" },
88: { TYPE_Fa, "Fa" },
89: { TYPE_Fl, "Fl" },
90: { TYPE_Fn, "Fn" },
91: { TYPE_Fn, "Fo" },
92: { TYPE_Ft, "Ft" },
93: { TYPE_Fx, "Fx" },
94: { TYPE_Ic, "Ic" },
95: { TYPE_In, "In" },
96: { TYPE_Lb, "Lb" },
97: { TYPE_Li, "Li" },
98: { TYPE_Lk, "Lk" },
99: { TYPE_Ms, "Ms" },
100: { TYPE_Mt, "Mt" },
101: { TYPE_Nd, "Nd" },
102: { TYPE_Nm, "Nm" },
103: { TYPE_Nx, "Nx" },
104: { TYPE_Ox, "Ox" },
105: { TYPE_Pa, "Pa" },
106: { TYPE_Rs, "Rs" },
107: { TYPE_Sh, "Sh" },
108: { TYPE_Ss, "Ss" },
109: { TYPE_St, "St" },
110: { TYPE_Sy, "Sy" },
111: { TYPE_Tn, "Tn" },
112: { TYPE_Va, "Va" },
113: { TYPE_Va, "Vt" },
114: { TYPE_Xr, "Xr" },
115: { ~0ULL, "any" },
116: { 0ULL, NULL }
117: };
118:
1.2 schwarze 119: static char *buildnames(sqlite3 *, sqlite3_stmt *, uint64_t);
1.3 schwarze 120: static char *buildoutput(sqlite3 *, sqlite3_stmt *,
121: uint64_t, uint64_t);
1.1 schwarze 122: static void *hash_alloc(size_t, void *);
123: static void hash_free(void *, size_t, void *);
124: static void *hash_halloc(size_t, void *);
125: static struct expr *exprcomp(const struct mansearch *,
126: int, char *[]);
127: static void exprfree(struct expr *);
128: static struct expr *exprterm(const struct mansearch *, char *, int);
1.4 ! schwarze 129: static void sql_append(char **sql, size_t *sz,
! 130: const char *newstr, int count);
1.1 schwarze 131: static void sql_match(sqlite3_context *context,
132: int argc, sqlite3_value **argv);
133: static void sql_regexp(sqlite3_context *context,
134: int argc, sqlite3_value **argv);
135: static char *sql_statement(const struct expr *,
136: const char *, const char *);
137:
138: int
139: mansearch(const struct mansearch *search,
1.3 schwarze 140: const struct manpaths *paths,
141: int argc, char *argv[],
142: const char *outkey,
1.1 schwarze 143: struct manpage **res, size_t *sz)
144: {
1.3 schwarze 145: int fd, rc, c, ibit;
1.1 schwarze 146: int64_t id;
1.3 schwarze 147: uint64_t outbit;
1.1 schwarze 148: char buf[PATH_MAX];
1.2 schwarze 149: char *sql;
1.1 schwarze 150: struct manpage *mpage;
151: struct expr *e, *ep;
152: sqlite3 *db;
1.3 schwarze 153: sqlite3_stmt *s, *s2;
1.1 schwarze 154: struct match *mp;
155: struct ohash_info info;
156: struct ohash htab;
157: unsigned int idx;
158: size_t i, j, cur, maxres;
159:
160: memset(&info, 0, sizeof(struct ohash_info));
161:
162: info.halloc = hash_halloc;
163: info.alloc = hash_alloc;
164: info.hfree = hash_free;
165: info.key_offset = offsetof(struct match, id);
166:
167: *sz = cur = maxres = 0;
168: sql = NULL;
169: *res = NULL;
170: fd = -1;
171: e = NULL;
172: rc = 0;
173:
174: if (0 == argc)
175: goto out;
176: if (NULL == (e = exprcomp(search, argc, argv)))
177: goto out;
178:
1.3 schwarze 179: outbit = 0;
180: if (NULL != outkey) {
181: for (ibit = 0; types[ibit].bits; ibit++) {
182: if (0 == strcasecmp(types[ibit].name, outkey)) {
183: outbit = types[ibit].bits;
184: break;
185: }
186: }
187: }
188:
1.1 schwarze 189: /*
190: * Save a descriptor to the current working directory.
191: * Since pathnames in the "paths" variable might be relative,
192: * and we'll be chdir()ing into them, we need to keep a handle
193: * on our current directory from which to start the chdir().
194: */
195:
196: if (NULL == getcwd(buf, PATH_MAX)) {
197: perror(NULL);
198: goto out;
199: } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
200: perror(buf);
201: goto out;
202: }
203:
204: sql = sql_statement(e, search->arch, search->sec);
205:
206: /*
207: * Loop over the directories (containing databases) for us to
208: * search.
209: * Don't let missing/bad databases/directories phase us.
210: * In each, try to open the resident database and, if it opens,
211: * scan it for our match expression.
212: */
213:
214: for (i = 0; i < paths->sz; i++) {
215: if (-1 == fchdir(fd)) {
216: perror(buf);
217: free(*res);
218: break;
219: } else if (-1 == chdir(paths->paths[i])) {
220: perror(paths->paths[i]);
221: continue;
222: }
223:
224: c = sqlite3_open_v2
225: (MANDOC_DB, &db,
226: SQLITE_OPEN_READONLY, NULL);
227:
228: if (SQLITE_OK != c) {
229: perror(MANDOC_DB);
230: sqlite3_close(db);
231: continue;
232: }
233:
234: /*
235: * Define the SQL functions for substring
236: * and regular expression matching.
237: */
238:
239: c = sqlite3_create_function(db, "match", 2,
240: SQLITE_ANY, NULL, sql_match, NULL, NULL);
241: assert(SQLITE_OK == c);
242: c = sqlite3_create_function(db, "regexp", 2,
243: SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
244: assert(SQLITE_OK == c);
245:
246: j = 1;
247: c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
248: if (SQLITE_OK != c)
249: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
250:
251: if (NULL != search->arch)
252: SQL_BIND_TEXT(db, s, j, search->arch);
253: if (NULL != search->sec)
254: SQL_BIND_TEXT(db, s, j, search->sec);
255:
256: for (ep = e; NULL != ep; ep = ep->next) {
257: if (NULL == ep->substr) {
258: SQL_BIND_BLOB(db, s, j, ep->regexp);
259: } else
260: SQL_BIND_TEXT(db, s, j, ep->substr);
261: SQL_BIND_INT64(db, s, j, ep->bits);
262: }
263:
264: memset(&htab, 0, sizeof(struct ohash));
265: ohash_init(&htab, 4, &info);
266:
267: /*
268: * Hash each entry on its [unique] document identifier.
269: * This is a uint64_t.
270: * Instead of using a hash function, simply convert the
271: * uint64_t to a uint32_t, the hash value's type.
272: * This gives good performance and preserves the
273: * distribution of buckets in the table.
274: */
275: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.4 ! schwarze 276: id = sqlite3_column_int64(s, 5);
1.1 schwarze 277: idx = ohash_lookup_memory
278: (&htab, (char *)&id,
279: sizeof(uint64_t), (uint32_t)id);
280:
281: if (NULL != ohash_find(&htab, idx))
282: continue;
283:
284: mp = mandoc_calloc(1, sizeof(struct match));
285: mp->id = id;
286: mp->file = mandoc_strdup
1.4 ! schwarze 287: ((char *)sqlite3_column_text(s, 0));
! 288: mp->desc = mandoc_strdup
1.1 schwarze 289: ((char *)sqlite3_column_text(s, 3));
1.4 ! schwarze 290: mp->form = sqlite3_column_int(s, 4);
1.1 schwarze 291: ohash_insert(&htab, idx, mp);
292: }
293:
294: if (SQLITE_DONE != c)
295: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
296:
297: sqlite3_finalize(s);
298:
299: c = sqlite3_prepare_v2(db,
300: "SELECT * FROM mlinks WHERE pageid=?",
301: -1, &s, NULL);
302: if (SQLITE_OK != c)
303: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
304:
1.3 schwarze 305: c = sqlite3_prepare_v2(db,
306: "SELECT * FROM keys WHERE pageid=? AND bits & ?",
307: -1, &s2, NULL);
308: if (SQLITE_OK != c)
309: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
310:
1.1 schwarze 311: for (mp = ohash_first(&htab, &idx);
312: NULL != mp;
313: mp = ohash_next(&htab, &idx)) {
314: if (cur + 1 > maxres) {
315: maxres += 1024;
316: *res = mandoc_realloc
317: (*res, maxres * sizeof(struct manpage));
318: }
319: mpage = *res + cur;
320: if (-1 == asprintf(&mpage->file, "%s/%s",
321: paths->paths[i], mp->file)) {
322: perror(0);
323: exit((int)MANDOCLEVEL_SYSERR);
324: }
325: mpage->desc = mp->desc;
326: mpage->form = mp->form;
1.2 schwarze 327: mpage->names = buildnames(db, s, mp->id);
1.3 schwarze 328: mpage->output = outbit ?
329: buildoutput(db, s2, mp->id, outbit) : NULL;
1.1 schwarze 330:
331: free(mp->file);
332: free(mp);
333: cur++;
334: }
335:
336: sqlite3_finalize(s);
1.3 schwarze 337: sqlite3_finalize(s2);
1.1 schwarze 338: sqlite3_close(db);
339: ohash_delete(&htab);
340: }
341: rc = 1;
342: out:
343: exprfree(e);
344: if (-1 != fd)
345: close(fd);
346: free(sql);
347: *sz = cur;
348: return(rc);
1.2 schwarze 349: }
350:
351: static char *
352: buildnames(sqlite3 *db, sqlite3_stmt *s, uint64_t id)
353: {
354: char *names, *newnames;
355: const char *oldnames, *sep1, *name, *sec, *sep2, *arch;
356: size_t i;
357: int c;
358:
359: names = NULL;
360: i = 1;
361: SQL_BIND_INT64(db, s, i, id);
362: while (SQLITE_ROW == (c = sqlite3_step(s))) {
363: if (NULL == names) {
364: oldnames = "";
365: sep1 = "";
366: } else {
367: oldnames = names;
368: sep1 = ", ";
369: }
370: sec = sqlite3_column_text(s, 1);
371: arch = sqlite3_column_text(s, 2);
372: name = sqlite3_column_text(s, 3);
373: sep2 = '\0' == *arch ? "" : "/";
374: if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)",
375: oldnames, sep1, name, sec, sep2, arch)) {
376: perror(0);
377: exit((int)MANDOCLEVEL_SYSERR);
378: }
379: free(names);
380: names = newnames;
381: }
382: if (SQLITE_DONE != c)
383: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
384: sqlite3_reset(s);
385: return(names);
1.3 schwarze 386: }
387:
388: static char *
389: buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit)
390: {
391: char *output, *newoutput;
392: const char *oldoutput, *sep1, *data;
393: size_t i;
394: int c;
395:
396: output = NULL;
397: i = 1;
398: SQL_BIND_INT64(db, s, i, id);
399: SQL_BIND_INT64(db, s, i, outbit);
400: while (SQLITE_ROW == (c = sqlite3_step(s))) {
401: if (NULL == output) {
402: oldoutput = "";
403: sep1 = "";
404: } else {
405: oldoutput = output;
406: sep1 = " # ";
407: }
408: data = sqlite3_column_text(s, 1);
409: if (-1 == asprintf(&newoutput, "%s%s%s",
410: oldoutput, sep1, data)) {
411: perror(0);
412: exit((int)MANDOCLEVEL_SYSERR);
413: }
414: free(output);
415: output = newoutput;
416: }
417: if (SQLITE_DONE != c)
418: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
419: sqlite3_reset(s);
420: return(output);
1.1 schwarze 421: }
422:
423: /*
424: * Implement substring match as an application-defined SQL function.
425: * Using the SQL LIKE or GLOB operators instead would be a bad idea
426: * because that would require escaping metacharacters in the string
427: * being searched for.
428: */
429: static void
430: sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
431: {
432:
433: assert(2 == argc);
434: sqlite3_result_int(context, NULL != strcasestr(
435: (const char *)sqlite3_value_text(argv[1]),
436: (const char *)sqlite3_value_text(argv[0])));
437: }
438:
439: /*
440: * Implement regular expression match
441: * as an application-defined SQL function.
442: */
443: static void
444: sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
445: {
446:
447: assert(2 == argc);
448: sqlite3_result_int(context, !regexec(
449: (regex_t *)sqlite3_value_blob(argv[0]),
450: (const char *)sqlite3_value_text(argv[1]),
451: 0, NULL, 0));
452: }
453:
1.4 ! schwarze 454: static void
! 455: sql_append(char **sql, size_t *sz, const char *newstr, int count)
! 456: {
! 457: size_t newsz;
! 458:
! 459: newsz = 1 < count ? (size_t)count : strlen(newstr);
! 460: *sql = mandoc_realloc(*sql, *sz + newsz + 1);
! 461: if (1 < count)
! 462: memset(*sql + *sz, *newstr, (size_t)count);
! 463: else
! 464: memcpy(*sql + *sz, newstr, newsz);
! 465: *sz += newsz;
! 466: (*sql)[*sz] = '\0';
! 467: }
! 468:
1.1 schwarze 469: /*
470: * Prepare the search SQL statement.
471: */
472: static char *
473: sql_statement(const struct expr *e, const char *arch, const char *sec)
474: {
475: char *sql;
476: size_t sz;
1.4 ! schwarze 477: int needop;
1.1 schwarze 478:
1.4 ! schwarze 479: sql = mandoc_strdup("SELECT * FROM mpages WHERE ");
1.1 schwarze 480: sz = strlen(sql);
481:
1.4 ! schwarze 482: if (NULL != arch)
! 483: sql_append(&sql, &sz, "arch = ? AND ", 1);
! 484: if (NULL != sec)
! 485: sql_append(&sql, &sz, "sec = ? AND ", 1);
! 486: sql_append(&sql, &sz, "(", 1);
! 487:
! 488: for (needop = 0; NULL != e; e = e->next) {
! 489: if (e->and)
! 490: sql_append(&sql, &sz, " AND ", 1);
! 491: else if (needop)
! 492: sql_append(&sql, &sz, " OR ", 1);
! 493: if (e->open)
! 494: sql_append(&sql, &sz, "(", e->open);
! 495: sql_append(&sql, &sz, NULL == e->substr ?
! 496: "id IN (SELECT pageid FROM keys "
! 497: "WHERE key REGEXP ? AND bits & ?)" :
! 498: "id IN (SELECT pageid FROM keys "
! 499: "WHERE key MATCH ? AND bits & ?)", 1);
! 500: if (e->close)
! 501: sql_append(&sql, &sz, ")", e->close);
! 502: needop = 1;
1.1 schwarze 503: }
1.4 ! schwarze 504: sql_append(&sql, &sz, ")", 1);
1.1 schwarze 505:
506: return(sql);
507: }
508:
509: /*
510: * Compile a set of string tokens into an expression.
511: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
512: * "(", "foo=bar", etc.).
513: */
514: static struct expr *
515: exprcomp(const struct mansearch *search, int argc, char *argv[])
516: {
1.4 ! schwarze 517: int i, toopen, logic, igncase, toclose;
1.1 schwarze 518: struct expr *first, *next, *cur;
519:
520: first = cur = NULL;
1.4 ! schwarze 521: toopen = logic = igncase = toclose = 0;
1.1 schwarze 522:
523: for (i = 0; i < argc; i++) {
1.4 ! schwarze 524: if (0 == strcmp("(", argv[i])) {
! 525: if (igncase)
! 526: goto fail;
! 527: toopen++;
! 528: toclose++;
! 529: continue;
! 530: } else if (0 == strcmp(")", argv[i])) {
! 531: if (toopen || logic || igncase || NULL == cur)
! 532: goto fail;
! 533: cur->close++;
! 534: if (0 > --toclose)
! 535: goto fail;
! 536: continue;
! 537: } else if (0 == strcmp("-a", argv[i])) {
! 538: if (toopen || logic || igncase || NULL == cur)
! 539: goto fail;
! 540: logic = 1;
! 541: continue;
! 542: } else if (0 == strcmp("-o", argv[i])) {
! 543: if (toopen || logic || igncase || NULL == cur)
! 544: goto fail;
! 545: logic = 2;
! 546: continue;
! 547: } else if (0 == strcmp("-i", argv[i])) {
! 548: if (igncase)
! 549: goto fail;
! 550: igncase = 1;
! 551: continue;
1.1 schwarze 552: }
1.4 ! schwarze 553: next = exprterm(search, argv[i], !igncase);
! 554: if (NULL == next)
! 555: goto fail;
! 556: next->open = toopen;
! 557: next->and = (1 == logic);
1.1 schwarze 558: if (NULL != first) {
559: cur->next = next;
560: cur = next;
561: } else
562: cur = first = next;
1.4 ! schwarze 563: toopen = logic = igncase = 0;
1.1 schwarze 564: }
1.4 ! schwarze 565: if ( ! (toopen || logic || igncase || toclose))
! 566: return(first);
! 567: fail:
! 568: if (NULL != first)
! 569: exprfree(first);
! 570: return(NULL);
1.1 schwarze 571: }
572:
573: static struct expr *
574: exprterm(const struct mansearch *search, char *buf, int cs)
575: {
576: struct expr *e;
577: char *key, *v;
578: size_t i;
579:
580: if ('\0' == *buf)
581: return(NULL);
582:
583: e = mandoc_calloc(1, sizeof(struct expr));
584:
585: /*"whatis" mode uses an opaque string and default fields. */
586:
587: if (MANSEARCH_WHATIS & search->flags) {
588: e->substr = buf;
589: e->bits = search->deftype;
590: return(e);
591: }
592:
593: /*
594: * If no =~ is specified, search with equality over names and
595: * descriptions.
596: * If =~ begins the phrase, use name and description fields.
597: */
598:
599: if (NULL == (v = strpbrk(buf, "=~"))) {
600: e->substr = buf;
601: e->bits = search->deftype;
602: return(e);
603: } else if (v == buf)
604: e->bits = search->deftype;
605:
606: if ('~' == *v++) {
607: if (regcomp(&e->regexp, v,
608: REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE))) {
609: free(e);
610: return(NULL);
611: }
612: } else
613: e->substr = v;
614: v[-1] = '\0';
615:
616: /*
617: * Parse out all possible fields.
618: * If the field doesn't resolve, bail.
619: */
620:
621: while (NULL != (key = strsep(&buf, ","))) {
622: if ('\0' == *key)
623: continue;
624: i = 0;
625: while (types[i].bits &&
626: strcasecmp(types[i].name, key))
627: i++;
628: if (0 == types[i].bits) {
629: free(e);
630: return(NULL);
631: }
632: e->bits |= types[i].bits;
633: }
634:
635: return(e);
636: }
637:
638: static void
639: exprfree(struct expr *p)
640: {
641: struct expr *pp;
642:
643: while (NULL != p) {
644: pp = p->next;
645: free(p);
646: p = pp;
647: }
648: }
649:
650: static void *
651: hash_halloc(size_t sz, void *arg)
652: {
653:
654: return(mandoc_calloc(sz, 1));
655: }
656:
657: static void *
658: hash_alloc(size_t sz, void *arg)
659: {
660:
661: return(mandoc_malloc(sz));
662: }
663:
664: static void
665: hash_free(void *p, size_t sz, void *arg)
666: {
667:
668: free(p);
669: }