Annotation of src/usr.bin/mandoc/mansearch.c, Revision 1.2
1.2 ! schwarze 1: /* $Id: mansearch.c,v 1.1 2013/12/31 00:40:19 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4: * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <assert.h>
19: #include <fcntl.h>
20: #include <getopt.h>
21: #include <limits.h>
22: #include <regex.h>
23: #include <stdio.h>
24: #include <stdint.h>
25: #include <stddef.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
30: #include <ohash.h>
31: #include <sqlite3.h>
32:
33: #include "mandoc.h"
34: #include "manpath.h"
35: #include "mansearch.h"
36:
37: #define SQL_BIND_TEXT(_db, _s, _i, _v) \
38: do { if (SQLITE_OK != sqlite3_bind_text \
39: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
40: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
41: } while (0)
42: #define SQL_BIND_INT64(_db, _s, _i, _v) \
43: do { if (SQLITE_OK != sqlite3_bind_int64 \
44: ((_s), (_i)++, (_v))) \
45: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
46: } while (0)
47: #define SQL_BIND_BLOB(_db, _s, _i, _v) \
48: do { if (SQLITE_OK != sqlite3_bind_blob \
49: ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
50: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
51: } while (0)
52:
53: struct expr {
54: uint64_t bits; /* type-mask */
55: const char *substr; /* to search for, if applicable */
56: regex_t regexp; /* compiled regexp, if applicable */
57: struct expr *next; /* next in sequence */
58: };
59:
60: struct match {
61: uint64_t id; /* identifier in database */
62: char *file; /* relative filepath of manpage */
63: char *desc; /* description of manpage */
64: int form; /* 0 == catpage */
65: };
66:
67: struct type {
68: uint64_t bits;
69: const char *name;
70: };
71:
72: static const struct type types[] = {
73: { TYPE_An, "An" },
74: { TYPE_Ar, "Ar" },
75: { TYPE_At, "At" },
76: { TYPE_Bsx, "Bsx" },
77: { TYPE_Bx, "Bx" },
78: { TYPE_Cd, "Cd" },
79: { TYPE_Cm, "Cm" },
80: { TYPE_Dv, "Dv" },
81: { TYPE_Dx, "Dx" },
82: { TYPE_Em, "Em" },
83: { TYPE_Er, "Er" },
84: { TYPE_Ev, "Ev" },
85: { TYPE_Fa, "Fa" },
86: { TYPE_Fl, "Fl" },
87: { TYPE_Fn, "Fn" },
88: { TYPE_Fn, "Fo" },
89: { TYPE_Ft, "Ft" },
90: { TYPE_Fx, "Fx" },
91: { TYPE_Ic, "Ic" },
92: { TYPE_In, "In" },
93: { TYPE_Lb, "Lb" },
94: { TYPE_Li, "Li" },
95: { TYPE_Lk, "Lk" },
96: { TYPE_Ms, "Ms" },
97: { TYPE_Mt, "Mt" },
98: { TYPE_Nd, "Nd" },
99: { TYPE_Nm, "Nm" },
100: { TYPE_Nx, "Nx" },
101: { TYPE_Ox, "Ox" },
102: { TYPE_Pa, "Pa" },
103: { TYPE_Rs, "Rs" },
104: { TYPE_Sh, "Sh" },
105: { TYPE_Ss, "Ss" },
106: { TYPE_St, "St" },
107: { TYPE_Sy, "Sy" },
108: { TYPE_Tn, "Tn" },
109: { TYPE_Va, "Va" },
110: { TYPE_Va, "Vt" },
111: { TYPE_Xr, "Xr" },
112: { ~0ULL, "any" },
113: { 0ULL, NULL }
114: };
115:
1.2 ! schwarze 116: static char *buildnames(sqlite3 *, sqlite3_stmt *, uint64_t);
1.1 schwarze 117: static void *hash_alloc(size_t, void *);
118: static void hash_free(void *, size_t, void *);
119: static void *hash_halloc(size_t, void *);
120: static struct expr *exprcomp(const struct mansearch *,
121: int, char *[]);
122: static void exprfree(struct expr *);
123: static struct expr *exprterm(const struct mansearch *, char *, int);
124: static void sql_match(sqlite3_context *context,
125: int argc, sqlite3_value **argv);
126: static void sql_regexp(sqlite3_context *context,
127: int argc, sqlite3_value **argv);
128: static char *sql_statement(const struct expr *,
129: const char *, const char *);
130:
131: int
132: mansearch(const struct mansearch *search,
133: const struct manpaths *paths,
134: int argc, char *argv[],
135: struct manpage **res, size_t *sz)
136: {
137: int fd, rc, c;
138: int64_t id;
139: char buf[PATH_MAX];
1.2 ! schwarze 140: char *sql;
1.1 schwarze 141: struct manpage *mpage;
142: struct expr *e, *ep;
143: sqlite3 *db;
144: sqlite3_stmt *s;
145: struct match *mp;
146: struct ohash_info info;
147: struct ohash htab;
148: unsigned int idx;
149: size_t i, j, cur, maxres;
150:
151: memset(&info, 0, sizeof(struct ohash_info));
152:
153: info.halloc = hash_halloc;
154: info.alloc = hash_alloc;
155: info.hfree = hash_free;
156: info.key_offset = offsetof(struct match, id);
157:
158: *sz = cur = maxres = 0;
159: sql = NULL;
160: *res = NULL;
161: fd = -1;
162: e = NULL;
163: rc = 0;
164:
165: if (0 == argc)
166: goto out;
167: if (NULL == (e = exprcomp(search, argc, argv)))
168: goto out;
169:
170: /*
171: * Save a descriptor to the current working directory.
172: * Since pathnames in the "paths" variable might be relative,
173: * and we'll be chdir()ing into them, we need to keep a handle
174: * on our current directory from which to start the chdir().
175: */
176:
177: if (NULL == getcwd(buf, PATH_MAX)) {
178: perror(NULL);
179: goto out;
180: } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
181: perror(buf);
182: goto out;
183: }
184:
185: sql = sql_statement(e, search->arch, search->sec);
186:
187: /*
188: * Loop over the directories (containing databases) for us to
189: * search.
190: * Don't let missing/bad databases/directories phase us.
191: * In each, try to open the resident database and, if it opens,
192: * scan it for our match expression.
193: */
194:
195: for (i = 0; i < paths->sz; i++) {
196: if (-1 == fchdir(fd)) {
197: perror(buf);
198: free(*res);
199: break;
200: } else if (-1 == chdir(paths->paths[i])) {
201: perror(paths->paths[i]);
202: continue;
203: }
204:
205: c = sqlite3_open_v2
206: (MANDOC_DB, &db,
207: SQLITE_OPEN_READONLY, NULL);
208:
209: if (SQLITE_OK != c) {
210: perror(MANDOC_DB);
211: sqlite3_close(db);
212: continue;
213: }
214:
215: /*
216: * Define the SQL functions for substring
217: * and regular expression matching.
218: */
219:
220: c = sqlite3_create_function(db, "match", 2,
221: SQLITE_ANY, NULL, sql_match, NULL, NULL);
222: assert(SQLITE_OK == c);
223: c = sqlite3_create_function(db, "regexp", 2,
224: SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
225: assert(SQLITE_OK == c);
226:
227: j = 1;
228: c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
229: if (SQLITE_OK != c)
230: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
231:
232: if (NULL != search->arch)
233: SQL_BIND_TEXT(db, s, j, search->arch);
234: if (NULL != search->sec)
235: SQL_BIND_TEXT(db, s, j, search->sec);
236:
237: for (ep = e; NULL != ep; ep = ep->next) {
238: if (NULL == ep->substr) {
239: SQL_BIND_BLOB(db, s, j, ep->regexp);
240: } else
241: SQL_BIND_TEXT(db, s, j, ep->substr);
242: SQL_BIND_INT64(db, s, j, ep->bits);
243: }
244:
245: memset(&htab, 0, sizeof(struct ohash));
246: ohash_init(&htab, 4, &info);
247:
248: /*
249: * Hash each entry on its [unique] document identifier.
250: * This is a uint64_t.
251: * Instead of using a hash function, simply convert the
252: * uint64_t to a uint32_t, the hash value's type.
253: * This gives good performance and preserves the
254: * distribution of buckets in the table.
255: */
256: while (SQLITE_ROW == (c = sqlite3_step(s))) {
257: id = sqlite3_column_int64(s, 0);
258: idx = ohash_lookup_memory
259: (&htab, (char *)&id,
260: sizeof(uint64_t), (uint32_t)id);
261:
262: if (NULL != ohash_find(&htab, idx))
263: continue;
264:
265: mp = mandoc_calloc(1, sizeof(struct match));
266: mp->id = id;
267: mp->file = mandoc_strdup
268: ((char *)sqlite3_column_text(s, 3));
269: mp->desc = mandoc_strdup
270: ((char *)sqlite3_column_text(s, 4));
271: mp->form = sqlite3_column_int(s, 5);
272: ohash_insert(&htab, idx, mp);
273: }
274:
275: if (SQLITE_DONE != c)
276: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
277:
278: sqlite3_finalize(s);
279:
280: c = sqlite3_prepare_v2(db,
281: "SELECT * FROM mlinks WHERE pageid=?",
282: -1, &s, NULL);
283: if (SQLITE_OK != c)
284: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
285:
286: for (mp = ohash_first(&htab, &idx);
287: NULL != mp;
288: mp = ohash_next(&htab, &idx)) {
289: if (cur + 1 > maxres) {
290: maxres += 1024;
291: *res = mandoc_realloc
292: (*res, maxres * sizeof(struct manpage));
293: }
294: mpage = *res + cur;
295: if (-1 == asprintf(&mpage->file, "%s/%s",
296: paths->paths[i], mp->file)) {
297: perror(0);
298: exit((int)MANDOCLEVEL_SYSERR);
299: }
300: mpage->desc = mp->desc;
301: mpage->form = mp->form;
1.2 ! schwarze 302: mpage->names = buildnames(db, s, mp->id);
1.1 schwarze 303:
304: free(mp->file);
305: free(mp);
306: cur++;
307: }
308:
309: sqlite3_finalize(s);
310: sqlite3_close(db);
311: ohash_delete(&htab);
312: }
313: rc = 1;
314: out:
315: exprfree(e);
316: if (-1 != fd)
317: close(fd);
318: free(sql);
319: *sz = cur;
320: return(rc);
1.2 ! schwarze 321: }
! 322:
! 323: static char *
! 324: buildnames(sqlite3 *db, sqlite3_stmt *s, uint64_t id)
! 325: {
! 326: char *names, *newnames;
! 327: const char *oldnames, *sep1, *name, *sec, *sep2, *arch;
! 328: size_t i;
! 329: int c;
! 330:
! 331: names = NULL;
! 332: i = 1;
! 333: SQL_BIND_INT64(db, s, i, id);
! 334: while (SQLITE_ROW == (c = sqlite3_step(s))) {
! 335: if (NULL == names) {
! 336: oldnames = "";
! 337: sep1 = "";
! 338: } else {
! 339: oldnames = names;
! 340: sep1 = ", ";
! 341: }
! 342: sec = sqlite3_column_text(s, 1);
! 343: arch = sqlite3_column_text(s, 2);
! 344: name = sqlite3_column_text(s, 3);
! 345: sep2 = '\0' == *arch ? "" : "/";
! 346: if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)",
! 347: oldnames, sep1, name, sec, sep2, arch)) {
! 348: perror(0);
! 349: exit((int)MANDOCLEVEL_SYSERR);
! 350: }
! 351: free(names);
! 352: names = newnames;
! 353: }
! 354: if (SQLITE_DONE != c)
! 355: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
! 356: sqlite3_reset(s);
! 357: return(names);
1.1 schwarze 358: }
359:
360: /*
361: * Implement substring match as an application-defined SQL function.
362: * Using the SQL LIKE or GLOB operators instead would be a bad idea
363: * because that would require escaping metacharacters in the string
364: * being searched for.
365: */
366: static void
367: sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
368: {
369:
370: assert(2 == argc);
371: sqlite3_result_int(context, NULL != strcasestr(
372: (const char *)sqlite3_value_text(argv[1]),
373: (const char *)sqlite3_value_text(argv[0])));
374: }
375:
376: /*
377: * Implement regular expression match
378: * as an application-defined SQL function.
379: */
380: static void
381: sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
382: {
383:
384: assert(2 == argc);
385: sqlite3_result_int(context, !regexec(
386: (regex_t *)sqlite3_value_blob(argv[0]),
387: (const char *)sqlite3_value_text(argv[1]),
388: 0, NULL, 0));
389: }
390:
391: /*
392: * Prepare the search SQL statement.
393: * We search for any of the words specified in our match expression.
394: * We filter the per-doc AND expressions when collecting results.
395: */
396: static char *
397: sql_statement(const struct expr *e, const char *arch, const char *sec)
398: {
399: char *sql;
400: const char *substr = "(key MATCH ? AND bits & ?)";
401: const char *regexp = "(key REGEXP ? AND bits & ?)";
402: const char *andarch = "arch = ? AND ";
403: const char *andsec = "sec = ? AND ";
404: size_t substrsz;
405: size_t regexpsz;
406: size_t sz;
407:
408: sql = mandoc_strdup
409: ("SELECT pageid,bits,key,file,desc,form,sec,arch "
410: "FROM keys "
411: "INNER JOIN mpages ON mpages.id=keys.pageid "
412: "WHERE ");
413: sz = strlen(sql);
414: substrsz = strlen(substr);
415: regexpsz = strlen(regexp);
416:
417: if (NULL != arch) {
418: sz += strlen(andarch) + 1;
419: sql = mandoc_realloc(sql, sz);
420: strlcat(sql, andarch, sz);
421: }
422:
423: if (NULL != sec) {
424: sz += strlen(andsec) + 1;
425: sql = mandoc_realloc(sql, sz);
426: strlcat(sql, andsec, sz);
427: }
428:
429: sz += 2;
430: sql = mandoc_realloc(sql, sz);
431: strlcat(sql, "(", sz);
432:
433: for ( ; NULL != e; e = e->next) {
434: sz += (NULL == e->substr ? regexpsz : substrsz) +
435: (NULL == e->next ? 3 : 5);
436: sql = mandoc_realloc(sql, sz);
437: strlcat(sql, NULL == e->substr ? regexp : substr, sz);
438: strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
439: }
440:
441: return(sql);
442: }
443:
444: /*
445: * Compile a set of string tokens into an expression.
446: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
447: * "(", "foo=bar", etc.).
448: */
449: static struct expr *
450: exprcomp(const struct mansearch *search, int argc, char *argv[])
451: {
452: int i, cs;
453: struct expr *first, *next, *cur;
454:
455: first = cur = NULL;
456:
457: for (i = 0; i < argc; i++) {
458: if (0 == strcmp("-i", argv[i])) {
459: if (++i >= argc)
460: return(NULL);
461: cs = 0;
462: } else
463: cs = 1;
464: next = exprterm(search, argv[i], cs);
465: if (NULL == next) {
466: exprfree(first);
467: return(NULL);
468: }
469: if (NULL != first) {
470: cur->next = next;
471: cur = next;
472: } else
473: cur = first = next;
474: }
475:
476: return(first);
477: }
478:
479: static struct expr *
480: exprterm(const struct mansearch *search, char *buf, int cs)
481: {
482: struct expr *e;
483: char *key, *v;
484: size_t i;
485:
486: if ('\0' == *buf)
487: return(NULL);
488:
489: e = mandoc_calloc(1, sizeof(struct expr));
490:
491: /*"whatis" mode uses an opaque string and default fields. */
492:
493: if (MANSEARCH_WHATIS & search->flags) {
494: e->substr = buf;
495: e->bits = search->deftype;
496: return(e);
497: }
498:
499: /*
500: * If no =~ is specified, search with equality over names and
501: * descriptions.
502: * If =~ begins the phrase, use name and description fields.
503: */
504:
505: if (NULL == (v = strpbrk(buf, "=~"))) {
506: e->substr = buf;
507: e->bits = search->deftype;
508: return(e);
509: } else if (v == buf)
510: e->bits = search->deftype;
511:
512: if ('~' == *v++) {
513: if (regcomp(&e->regexp, v,
514: REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE))) {
515: free(e);
516: return(NULL);
517: }
518: } else
519: e->substr = v;
520: v[-1] = '\0';
521:
522: /*
523: * Parse out all possible fields.
524: * If the field doesn't resolve, bail.
525: */
526:
527: while (NULL != (key = strsep(&buf, ","))) {
528: if ('\0' == *key)
529: continue;
530: i = 0;
531: while (types[i].bits &&
532: strcasecmp(types[i].name, key))
533: i++;
534: if (0 == types[i].bits) {
535: free(e);
536: return(NULL);
537: }
538: e->bits |= types[i].bits;
539: }
540:
541: return(e);
542: }
543:
544: static void
545: exprfree(struct expr *p)
546: {
547: struct expr *pp;
548:
549: while (NULL != p) {
550: pp = p->next;
551: free(p);
552: p = pp;
553: }
554: }
555:
556: static void *
557: hash_halloc(size_t sz, void *arg)
558: {
559:
560: return(mandoc_calloc(sz, 1));
561: }
562:
563: static void *
564: hash_alloc(size_t sz, void *arg)
565: {
566:
567: return(mandoc_malloc(sz));
568: }
569:
570: static void
571: hash_free(void *p, size_t sz, void *arg)
572: {
573:
574: free(p);
575: }