Annotation of src/usr.bin/mandoc/mansearch.c, Revision 1.45
1.45 ! schwarze 1: /* $OpenBSD: mansearch.c,v 1.44 2015/04/01 12:48:00 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
1.40 schwarze 4: * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.43 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.43 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.33 schwarze 18:
1.19 schwarze 19: #include <sys/mman.h>
1.33 schwarze 20: #include <sys/types.h>
21:
1.1 schwarze 22: #include <assert.h>
1.39 schwarze 23: #include <errno.h>
1.1 schwarze 24: #include <fcntl.h>
25: #include <getopt.h>
1.41 schwarze 26: #include <glob.h>
1.1 schwarze 27: #include <limits.h>
28: #include <regex.h>
29: #include <stdio.h>
30: #include <stdint.h>
31: #include <stddef.h>
32: #include <stdlib.h>
33: #include <string.h>
34: #include <unistd.h>
35:
36: #include <ohash.h>
37: #include <sqlite3.h>
38:
39: #include "mandoc.h"
1.14 schwarze 40: #include "mandoc_aux.h"
1.43 schwarze 41: #include "manconf.h"
1.1 schwarze 42: #include "mansearch.h"
43:
1.11 schwarze 44: extern int mansearch_keymax;
45: extern const char *const mansearch_keynames[];
46:
1.1 schwarze 47: #define SQL_BIND_TEXT(_db, _s, _i, _v) \
48: do { if (SQLITE_OK != sqlite3_bind_text \
49: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
50: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
51: } while (0)
52: #define SQL_BIND_INT64(_db, _s, _i, _v) \
53: do { if (SQLITE_OK != sqlite3_bind_int64 \
54: ((_s), (_i)++, (_v))) \
55: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
56: } while (0)
57: #define SQL_BIND_BLOB(_db, _s, _i, _v) \
58: do { if (SQLITE_OK != sqlite3_bind_blob \
59: ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
60: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
61: } while (0)
62:
63: struct expr {
1.28 schwarze 64: regex_t regexp; /* compiled regexp, if applicable */
65: const char *substr; /* to search for, if applicable */
66: struct expr *next; /* next in sequence */
1.24 schwarze 67: uint64_t bits; /* type-mask */
1.28 schwarze 68: int equal; /* equality, not subsring match */
1.4 schwarze 69: int open; /* opening parentheses before */
70: int and; /* logical AND before */
71: int close; /* closing parentheses after */
1.1 schwarze 72: };
73:
74: struct match {
1.22 schwarze 75: uint64_t pageid; /* identifier in database */
1.37 schwarze 76: uint64_t bits; /* name type mask */
1.17 schwarze 77: char *desc; /* manual page description */
1.35 schwarze 78: int form; /* bit field: formatted, zipped? */
1.1 schwarze 79: };
80:
1.40 schwarze 81: static void buildnames(const struct mansearch *,
82: struct manpage *, sqlite3 *,
1.10 schwarze 83: sqlite3_stmt *, uint64_t,
84: const char *, int form);
1.3 schwarze 85: static char *buildoutput(sqlite3 *, sqlite3_stmt *,
86: uint64_t, uint64_t);
1.1 schwarze 87: static void *hash_alloc(size_t, void *);
1.27 espie 88: static void hash_free(void *, void *);
89: static void *hash_calloc(size_t, size_t, void *);
1.24 schwarze 90: static struct expr *exprcomp(const struct mansearch *,
1.1 schwarze 91: int, char *[]);
92: static void exprfree(struct expr *);
93: static struct expr *exprterm(const struct mansearch *, char *, int);
1.29 schwarze 94: static int manpage_compare(const void *, const void *);
1.4 schwarze 95: static void sql_append(char **sql, size_t *sz,
96: const char *newstr, int count);
1.1 schwarze 97: static void sql_match(sqlite3_context *context,
98: int argc, sqlite3_value **argv);
99: static void sql_regexp(sqlite3_context *context,
100: int argc, sqlite3_value **argv);
1.6 schwarze 101: static char *sql_statement(const struct expr *);
1.19 schwarze 102:
1.24 schwarze 103:
1.19 schwarze 104: int
105: mansearch_setup(int start)
106: {
107: static void *pagecache;
108: int c;
109:
110: #define PC_PAGESIZE 1280
111: #define PC_NUMPAGES 256
112:
113: if (start) {
114: if (NULL != pagecache) {
115: fprintf(stderr, "pagecache already enabled\n");
1.45 ! schwarze 116: return (int)MANDOCLEVEL_BADARG;
1.19 schwarze 117: }
118:
119: pagecache = mmap(NULL, PC_PAGESIZE * PC_NUMPAGES,
1.31 schwarze 120: PROT_READ | PROT_WRITE,
121: MAP_SHARED | MAP_ANON, -1, 0);
1.19 schwarze 122:
123: if (MAP_FAILED == pagecache) {
124: perror("mmap");
125: pagecache = NULL;
1.45 ! schwarze 126: return (int)MANDOCLEVEL_SYSERR;
1.19 schwarze 127: }
128:
129: c = sqlite3_config(SQLITE_CONFIG_PAGECACHE,
130: pagecache, PC_PAGESIZE, PC_NUMPAGES);
131:
132: if (SQLITE_OK == c)
1.45 ! schwarze 133: return (int)MANDOCLEVEL_OK;
1.19 schwarze 134:
135: fprintf(stderr, "pagecache: %s\n", sqlite3_errstr(c));
136:
137: } else if (NULL == pagecache) {
138: fprintf(stderr, "pagecache missing\n");
1.45 ! schwarze 139: return (int)MANDOCLEVEL_BADARG;
1.19 schwarze 140: }
141:
142: if (-1 == munmap(pagecache, PC_PAGESIZE * PC_NUMPAGES)) {
143: perror("munmap");
144: pagecache = NULL;
1.45 ! schwarze 145: return (int)MANDOCLEVEL_SYSERR;
1.19 schwarze 146: }
147:
148: pagecache = NULL;
1.45 ! schwarze 149: return (int)MANDOCLEVEL_OK;
1.19 schwarze 150: }
1.1 schwarze 151:
152: int
153: mansearch(const struct mansearch *search,
1.3 schwarze 154: const struct manpaths *paths,
155: int argc, char *argv[],
1.1 schwarze 156: struct manpage **res, size_t *sz)
157: {
1.22 schwarze 158: int64_t pageid;
1.11 schwarze 159: uint64_t outbit, iterbit;
1.1 schwarze 160: char buf[PATH_MAX];
1.2 schwarze 161: char *sql;
1.1 schwarze 162: struct manpage *mpage;
163: struct expr *e, *ep;
164: sqlite3 *db;
1.3 schwarze 165: sqlite3_stmt *s, *s2;
1.1 schwarze 166: struct match *mp;
167: struct ohash_info info;
168: struct ohash htab;
169: unsigned int idx;
170: size_t i, j, cur, maxres;
1.44 schwarze 171: int c, chdir_status, getcwd_status, indexbit;
172:
173: if (argc == 0 || (e = exprcomp(search, argc, argv)) == NULL) {
174: *sz = 0;
1.45 ! schwarze 175: return 0;
1.44 schwarze 176: }
1.1 schwarze 177:
1.27 espie 178: info.calloc = hash_calloc;
1.1 schwarze 179: info.alloc = hash_alloc;
1.27 espie 180: info.free = hash_free;
1.22 schwarze 181: info.key_offset = offsetof(struct match, pageid);
1.1 schwarze 182:
1.44 schwarze 183: cur = maxres = 0;
1.1 schwarze 184: *res = NULL;
185:
1.33 schwarze 186: if (NULL != search->outkey) {
1.42 schwarze 187: outbit = TYPE_Nd;
1.11 schwarze 188: for (indexbit = 0, iterbit = 1;
189: indexbit < mansearch_keymax;
190: indexbit++, iterbit <<= 1) {
1.33 schwarze 191: if (0 == strcasecmp(search->outkey,
1.11 schwarze 192: mansearch_keynames[indexbit])) {
193: outbit = iterbit;
1.3 schwarze 194: break;
195: }
196: }
1.42 schwarze 197: } else
198: outbit = 0;
1.3 schwarze 199:
1.1 schwarze 200: /*
1.44 schwarze 201: * Remember the original working directory, if possible.
202: * This will be needed if the second or a later directory
203: * is given as a relative path.
204: * Do not error out if the current directory is not
205: * searchable: Maybe it won't be needed after all.
1.1 schwarze 206: */
207:
1.44 schwarze 208: if (getcwd(buf, PATH_MAX) == NULL) {
209: getcwd_status = 0;
210: (void)strlcpy(buf, strerror(errno), sizeof(buf));
211: } else
212: getcwd_status = 1;
1.1 schwarze 213:
1.6 schwarze 214: sql = sql_statement(e);
1.1 schwarze 215:
216: /*
217: * Loop over the directories (containing databases) for us to
218: * search.
219: * Don't let missing/bad databases/directories phase us.
220: * In each, try to open the resident database and, if it opens,
221: * scan it for our match expression.
222: */
223:
1.44 schwarze 224: chdir_status = 0;
1.1 schwarze 225: for (i = 0; i < paths->sz; i++) {
1.44 schwarze 226: if (chdir_status && paths->paths[i][0] != '/') {
227: if ( ! getcwd_status) {
228: fprintf(stderr, "%s: getcwd: %s\n",
229: paths->paths[i], buf);
230: continue;
231: } else if (chdir(buf) == -1) {
232: perror(buf);
233: continue;
234: }
235: }
236: if (chdir(paths->paths[i]) == -1) {
1.1 schwarze 237: perror(paths->paths[i]);
238: continue;
1.24 schwarze 239: }
1.44 schwarze 240: chdir_status = 1;
1.1 schwarze 241:
1.24 schwarze 242: c = sqlite3_open_v2(MANDOC_DB, &db,
243: SQLITE_OPEN_READONLY, NULL);
1.1 schwarze 244:
245: if (SQLITE_OK != c) {
1.39 schwarze 246: fprintf(stderr, "%s/%s: %s\n",
247: paths->paths[i], MANDOC_DB, strerror(errno));
1.1 schwarze 248: sqlite3_close(db);
249: continue;
250: }
251:
252: /*
253: * Define the SQL functions for substring
254: * and regular expression matching.
255: */
256:
257: c = sqlite3_create_function(db, "match", 2,
1.21 schwarze 258: SQLITE_UTF8 | SQLITE_DETERMINISTIC,
259: NULL, sql_match, NULL, NULL);
1.1 schwarze 260: assert(SQLITE_OK == c);
261: c = sqlite3_create_function(db, "regexp", 2,
1.21 schwarze 262: SQLITE_UTF8 | SQLITE_DETERMINISTIC,
263: NULL, sql_regexp, NULL, NULL);
1.1 schwarze 264: assert(SQLITE_OK == c);
265:
266: j = 1;
267: c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
268: if (SQLITE_OK != c)
269: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
270:
271: for (ep = e; NULL != ep; ep = ep->next) {
272: if (NULL == ep->substr) {
273: SQL_BIND_BLOB(db, s, j, ep->regexp);
274: } else
275: SQL_BIND_TEXT(db, s, j, ep->substr);
1.18 schwarze 276: if (0 == ((TYPE_Nd | TYPE_Nm) & ep->bits))
1.17 schwarze 277: SQL_BIND_INT64(db, s, j, ep->bits);
1.1 schwarze 278: }
279:
280: memset(&htab, 0, sizeof(struct ohash));
281: ohash_init(&htab, 4, &info);
282:
283: /*
284: * Hash each entry on its [unique] document identifier.
285: * This is a uint64_t.
286: * Instead of using a hash function, simply convert the
287: * uint64_t to a uint32_t, the hash value's type.
288: * This gives good performance and preserves the
289: * distribution of buckets in the table.
290: */
291: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.22 schwarze 292: pageid = sqlite3_column_int64(s, 2);
1.24 schwarze 293: idx = ohash_lookup_memory(&htab,
294: (char *)&pageid, sizeof(uint64_t),
295: (uint32_t)pageid);
1.1 schwarze 296:
297: if (NULL != ohash_find(&htab, idx))
298: continue;
299:
300: mp = mandoc_calloc(1, sizeof(struct match));
1.22 schwarze 301: mp->pageid = pageid;
1.17 schwarze 302: mp->form = sqlite3_column_int(s, 1);
1.37 schwarze 303: mp->bits = sqlite3_column_int64(s, 3);
1.17 schwarze 304: if (TYPE_Nd == outbit)
1.30 schwarze 305: mp->desc = mandoc_strdup((const char *)
1.17 schwarze 306: sqlite3_column_text(s, 0));
1.1 schwarze 307: ohash_insert(&htab, idx, mp);
308: }
309:
310: if (SQLITE_DONE != c)
311: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
312:
313: sqlite3_finalize(s);
314:
1.24 schwarze 315: c = sqlite3_prepare_v2(db,
1.25 schwarze 316: "SELECT sec, arch, name, pageid FROM mlinks "
317: "WHERE pageid=? ORDER BY sec, arch, name",
1.1 schwarze 318: -1, &s, NULL);
319: if (SQLITE_OK != c)
320: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
321:
1.3 schwarze 322: c = sqlite3_prepare_v2(db,
1.25 schwarze 323: "SELECT bits, key, pageid FROM keys "
324: "WHERE pageid=? AND bits & ?",
1.3 schwarze 325: -1, &s2, NULL);
326: if (SQLITE_OK != c)
327: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
328:
1.1 schwarze 329: for (mp = ohash_first(&htab, &idx);
330: NULL != mp;
331: mp = ohash_next(&htab, &idx)) {
332: if (cur + 1 > maxres) {
333: maxres += 1024;
1.26 schwarze 334: *res = mandoc_reallocarray(*res,
335: maxres, sizeof(struct manpage));
1.1 schwarze 336: }
337: mpage = *res + cur;
1.34 schwarze 338: mpage->ipath = i;
1.37 schwarze 339: mpage->bits = mp->bits;
1.29 schwarze 340: mpage->sec = 10;
1.1 schwarze 341: mpage->form = mp->form;
1.40 schwarze 342: buildnames(search, mpage, db, s, mp->pageid,
1.10 schwarze 343: paths->paths[i], mp->form);
1.40 schwarze 344: if (mpage->names != NULL) {
345: mpage->output = TYPE_Nd & outbit ?
346: mp->desc : outbit ?
347: buildoutput(db, s2, mp->pageid, outbit) :
348: NULL;
349: cur++;
350: }
1.1 schwarze 351: free(mp);
352: }
353:
354: sqlite3_finalize(s);
1.3 schwarze 355: sqlite3_finalize(s2);
1.1 schwarze 356: sqlite3_close(db);
357: ohash_delete(&htab);
1.36 schwarze 358:
359: /*
360: * In man(1) mode, prefer matches in earlier trees
361: * over matches in later trees.
362: */
363:
364: if (cur && search->firstmatch)
365: break;
1.1 schwarze 366: }
1.29 schwarze 367: qsort(*res, cur, sizeof(struct manpage), manpage_compare);
1.44 schwarze 368: if (chdir_status && getcwd_status && chdir(buf) == -1)
369: perror(buf);
1.1 schwarze 370: exprfree(e);
371: free(sql);
372: *sz = cur;
1.45 ! schwarze 373: return 1;
1.2 schwarze 374: }
375:
1.33 schwarze 376: void
377: mansearch_free(struct manpage *res, size_t sz)
378: {
379: size_t i;
380:
381: for (i = 0; i < sz; i++) {
382: free(res[i].file);
383: free(res[i].names);
384: free(res[i].output);
385: }
386: free(res);
387: }
388:
1.29 schwarze 389: static int
390: manpage_compare(const void *vp1, const void *vp2)
391: {
392: const struct manpage *mp1, *mp2;
393: int diff;
394:
395: mp1 = vp1;
396: mp2 = vp2;
1.45 ! schwarze 397: return (diff = mp2->bits - mp1->bits) ? diff :
! 398: (diff = mp1->sec - mp2->sec) ? diff :
! 399: strcasecmp(mp1->names, mp2->names);
1.29 schwarze 400: }
401:
1.8 schwarze 402: static void
1.40 schwarze 403: buildnames(const struct mansearch *search, struct manpage *mpage,
404: sqlite3 *db, sqlite3_stmt *s,
1.22 schwarze 405: uint64_t pageid, const char *path, int form)
1.2 schwarze 406: {
1.41 schwarze 407: glob_t globinfo;
408: char *firstname, *newnames, *prevsec, *prevarch;
1.10 schwarze 409: const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec;
1.2 schwarze 410: size_t i;
1.41 schwarze 411: int c, globres;
1.2 schwarze 412:
1.16 schwarze 413: mpage->file = NULL;
1.8 schwarze 414: mpage->names = NULL;
1.41 schwarze 415: firstname = prevsec = prevarch = NULL;
1.2 schwarze 416: i = 1;
1.22 schwarze 417: SQL_BIND_INT64(db, s, i, pageid);
1.2 schwarze 418: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.8 schwarze 419:
1.13 schwarze 420: /* Decide whether we already have some names. */
1.8 schwarze 421:
422: if (NULL == mpage->names) {
1.2 schwarze 423: oldnames = "";
424: sep1 = "";
425: } else {
1.8 schwarze 426: oldnames = mpage->names;
1.2 schwarze 427: sep1 = ", ";
428: }
1.13 schwarze 429:
1.40 schwarze 430: /* Fetch the next name, rejecting sec/arch mismatches. */
1.13 schwarze 431:
1.30 schwarze 432: sec = (const char *)sqlite3_column_text(s, 0);
1.40 schwarze 433: if (search->sec != NULL && strcasecmp(sec, search->sec))
434: continue;
1.30 schwarze 435: arch = (const char *)sqlite3_column_text(s, 1);
1.40 schwarze 436: if (search->arch != NULL && *arch != '\0' &&
437: strcasecmp(arch, search->arch))
438: continue;
1.30 schwarze 439: name = (const char *)sqlite3_column_text(s, 2);
1.29 schwarze 440:
441: /* Remember the first section found. */
442:
443: if (9 < mpage->sec && '1' <= *sec && '9' >= *sec)
444: mpage->sec = (*sec - '1') + 1;
1.13 schwarze 445:
446: /* If the section changed, append the old one. */
447:
448: if (NULL != prevsec &&
449: (strcmp(sec, prevsec) ||
450: strcmp(arch, prevarch))) {
451: sep2 = '\0' == *prevarch ? "" : "/";
1.15 schwarze 452: mandoc_asprintf(&newnames, "%s(%s%s%s)",
453: oldnames, prevsec, sep2, prevarch);
1.13 schwarze 454: free(mpage->names);
455: oldnames = mpage->names = newnames;
456: free(prevsec);
457: free(prevarch);
458: prevsec = prevarch = NULL;
459: }
460:
461: /* Save the new section, to append it later. */
462:
463: if (NULL == prevsec) {
464: prevsec = mandoc_strdup(sec);
465: prevarch = mandoc_strdup(arch);
466: }
467:
468: /* Append the new name. */
469:
1.15 schwarze 470: mandoc_asprintf(&newnames, "%s%s%s",
471: oldnames, sep1, name);
1.8 schwarze 472: free(mpage->names);
473: mpage->names = newnames;
474:
475: /* Also save the first file name encountered. */
476:
1.38 schwarze 477: if (mpage->file != NULL)
1.8 schwarze 478: continue;
479:
1.35 schwarze 480: if (form & FORM_SRC) {
1.10 schwarze 481: sep1 = "man";
482: fsec = sec;
483: } else {
484: sep1 = "cat";
485: fsec = "0";
486: }
1.38 schwarze 487: sep2 = *arch == '\0' ? "" : "/";
488: mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s",
489: path, sep1, sec, sep2, arch, name, fsec);
1.41 schwarze 490: if (access(mpage->file, R_OK) != -1)
491: continue;
492:
493: /* Handle unusual file name extensions. */
494:
495: if (firstname == NULL)
496: firstname = mpage->file;
497: else
498: free(mpage->file);
499: mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.*",
500: path, sep1, sec, sep2, arch, name);
501: globres = glob(mpage->file, 0, NULL, &globinfo);
502: free(mpage->file);
503: mpage->file = globres ? NULL :
504: mandoc_strdup(*globinfo.gl_pathv);
505: globfree(&globinfo);
1.2 schwarze 506: }
1.38 schwarze 507: if (c != SQLITE_DONE)
1.2 schwarze 508: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
509: sqlite3_reset(s);
1.41 schwarze 510:
511: /* If none of the files is usable, use the first name. */
512:
513: if (mpage->file == NULL)
514: mpage->file = firstname;
515: else if (mpage->file != firstname)
516: free(firstname);
1.13 schwarze 517:
518: /* Append one final section to the names. */
519:
1.38 schwarze 520: if (prevsec != NULL) {
521: sep2 = *prevarch == '\0' ? "" : "/";
1.15 schwarze 522: mandoc_asprintf(&newnames, "%s(%s%s%s)",
523: mpage->names, prevsec, sep2, prevarch);
1.13 schwarze 524: free(mpage->names);
525: mpage->names = newnames;
526: free(prevsec);
527: free(prevarch);
528: }
1.3 schwarze 529: }
530:
531: static char *
1.22 schwarze 532: buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t pageid, uint64_t outbit)
1.3 schwarze 533: {
534: char *output, *newoutput;
535: const char *oldoutput, *sep1, *data;
536: size_t i;
537: int c;
538:
539: output = NULL;
540: i = 1;
1.22 schwarze 541: SQL_BIND_INT64(db, s, i, pageid);
1.3 schwarze 542: SQL_BIND_INT64(db, s, i, outbit);
543: while (SQLITE_ROW == (c = sqlite3_step(s))) {
544: if (NULL == output) {
545: oldoutput = "";
546: sep1 = "";
547: } else {
548: oldoutput = output;
549: sep1 = " # ";
550: }
1.30 schwarze 551: data = (const char *)sqlite3_column_text(s, 1);
1.15 schwarze 552: mandoc_asprintf(&newoutput, "%s%s%s",
553: oldoutput, sep1, data);
1.3 schwarze 554: free(output);
555: output = newoutput;
556: }
557: if (SQLITE_DONE != c)
558: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
559: sqlite3_reset(s);
1.45 ! schwarze 560: return output;
1.1 schwarze 561: }
562:
563: /*
564: * Implement substring match as an application-defined SQL function.
565: * Using the SQL LIKE or GLOB operators instead would be a bad idea
566: * because that would require escaping metacharacters in the string
567: * being searched for.
568: */
569: static void
570: sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
571: {
572:
573: assert(2 == argc);
574: sqlite3_result_int(context, NULL != strcasestr(
575: (const char *)sqlite3_value_text(argv[1]),
576: (const char *)sqlite3_value_text(argv[0])));
577: }
578:
579: /*
580: * Implement regular expression match
581: * as an application-defined SQL function.
582: */
583: static void
584: sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
585: {
586:
587: assert(2 == argc);
588: sqlite3_result_int(context, !regexec(
589: (regex_t *)sqlite3_value_blob(argv[0]),
590: (const char *)sqlite3_value_text(argv[1]),
591: 0, NULL, 0));
592: }
593:
1.4 schwarze 594: static void
595: sql_append(char **sql, size_t *sz, const char *newstr, int count)
596: {
597: size_t newsz;
598:
599: newsz = 1 < count ? (size_t)count : strlen(newstr);
600: *sql = mandoc_realloc(*sql, *sz + newsz + 1);
601: if (1 < count)
602: memset(*sql + *sz, *newstr, (size_t)count);
603: else
604: memcpy(*sql + *sz, newstr, newsz);
605: *sz += newsz;
606: (*sql)[*sz] = '\0';
607: }
608:
1.1 schwarze 609: /*
610: * Prepare the search SQL statement.
611: */
612: static char *
1.6 schwarze 613: sql_statement(const struct expr *e)
1.1 schwarze 614: {
615: char *sql;
616: size_t sz;
1.4 schwarze 617: int needop;
1.1 schwarze 618:
1.37 schwarze 619: sql = mandoc_strdup(e->equal ?
620: "SELECT desc, form, pageid, bits "
621: "FROM mpages NATURAL JOIN names WHERE " :
622: "SELECT desc, form, pageid, 0 FROM mpages WHERE ");
1.1 schwarze 623: sz = strlen(sql);
624:
1.4 schwarze 625: for (needop = 0; NULL != e; e = e->next) {
626: if (e->and)
627: sql_append(&sql, &sz, " AND ", 1);
628: else if (needop)
629: sql_append(&sql, &sz, " OR ", 1);
630: if (e->open)
631: sql_append(&sql, &sz, "(", e->open);
1.17 schwarze 632: sql_append(&sql, &sz,
633: TYPE_Nd & e->bits
634: ? (NULL == e->substr
635: ? "desc REGEXP ?"
636: : "desc MATCH ?")
1.18 schwarze 637: : TYPE_Nm == e->bits
638: ? (NULL == e->substr
1.22 schwarze 639: ? "pageid IN (SELECT pageid FROM names "
1.18 schwarze 640: "WHERE name REGEXP ?)"
1.28 schwarze 641: : e->equal
1.37 schwarze 642: ? "name = ? "
1.22 schwarze 643: : "pageid IN (SELECT pageid FROM names "
1.18 schwarze 644: "WHERE name MATCH ?)")
1.17 schwarze 645: : (NULL == e->substr
1.22 schwarze 646: ? "pageid IN (SELECT pageid FROM keys "
1.17 schwarze 647: "WHERE key REGEXP ? AND bits & ?)"
1.22 schwarze 648: : "pageid IN (SELECT pageid FROM keys "
1.17 schwarze 649: "WHERE key MATCH ? AND bits & ?)"), 1);
1.4 schwarze 650: if (e->close)
651: sql_append(&sql, &sz, ")", e->close);
652: needop = 1;
1.1 schwarze 653: }
654:
1.45 ! schwarze 655: return sql;
1.1 schwarze 656: }
657:
658: /*
659: * Compile a set of string tokens into an expression.
660: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
661: * "(", "foo=bar", etc.).
662: */
663: static struct expr *
664: exprcomp(const struct mansearch *search, int argc, char *argv[])
665: {
1.18 schwarze 666: uint64_t mask;
1.4 schwarze 667: int i, toopen, logic, igncase, toclose;
1.18 schwarze 668: struct expr *first, *prev, *cur, *next;
1.1 schwarze 669:
670: first = cur = NULL;
1.40 schwarze 671: logic = igncase = toopen = toclose = 0;
1.1 schwarze 672:
673: for (i = 0; i < argc; i++) {
1.4 schwarze 674: if (0 == strcmp("(", argv[i])) {
675: if (igncase)
676: goto fail;
677: toopen++;
678: toclose++;
679: continue;
680: } else if (0 == strcmp(")", argv[i])) {
681: if (toopen || logic || igncase || NULL == cur)
682: goto fail;
683: cur->close++;
684: if (0 > --toclose)
685: goto fail;
686: continue;
687: } else if (0 == strcmp("-a", argv[i])) {
688: if (toopen || logic || igncase || NULL == cur)
689: goto fail;
690: logic = 1;
691: continue;
692: } else if (0 == strcmp("-o", argv[i])) {
693: if (toopen || logic || igncase || NULL == cur)
694: goto fail;
695: logic = 2;
696: continue;
697: } else if (0 == strcmp("-i", argv[i])) {
698: if (igncase)
699: goto fail;
700: igncase = 1;
701: continue;
1.1 schwarze 702: }
1.4 schwarze 703: next = exprterm(search, argv[i], !igncase);
704: if (NULL == next)
705: goto fail;
1.17 schwarze 706: if (NULL == first)
707: first = next;
708: else
1.1 schwarze 709: cur->next = next;
1.18 schwarze 710: prev = cur = next;
1.17 schwarze 711:
712: /*
713: * Searching for descriptions must be split out
714: * because they are stored in the mpages table,
715: * not in the keys table.
716: */
717:
1.18 schwarze 718: for (mask = TYPE_Nm; mask <= TYPE_Nd; mask <<= 1) {
719: if (mask & cur->bits && ~mask & cur->bits) {
720: next = mandoc_calloc(1,
721: sizeof(struct expr));
722: memcpy(next, cur, sizeof(struct expr));
723: prev->open = 1;
724: cur->bits = mask;
725: cur->next = next;
726: cur = next;
727: cur->bits &= ~mask;
728: }
729: }
730: prev->and = (1 == logic);
731: prev->open += toopen;
732: if (cur != prev)
1.17 schwarze 733: cur->close = 1;
1.18 schwarze 734:
1.4 schwarze 735: toopen = logic = igncase = 0;
1.1 schwarze 736: }
1.40 schwarze 737: if ( ! (toopen || logic || igncase || toclose))
1.45 ! schwarze 738: return first;
1.6 schwarze 739:
1.4 schwarze 740: fail:
741: if (NULL != first)
742: exprfree(first);
1.45 ! schwarze 743: return NULL;
1.6 schwarze 744: }
745:
746: static struct expr *
1.1 schwarze 747: exprterm(const struct mansearch *search, char *buf, int cs)
748: {
1.6 schwarze 749: char errbuf[BUFSIZ];
1.1 schwarze 750: struct expr *e;
1.28 schwarze 751: char *key, *val;
1.11 schwarze 752: uint64_t iterbit;
753: int i, irc;
1.1 schwarze 754:
755: if ('\0' == *buf)
1.45 ! schwarze 756: return NULL;
1.1 schwarze 757:
758: e = mandoc_calloc(1, sizeof(struct expr));
759:
1.33 schwarze 760: if (search->argmode == ARG_NAME) {
761: e->bits = TYPE_Nm;
1.1 schwarze 762: e->substr = buf;
1.28 schwarze 763: e->equal = 1;
1.45 ! schwarze 764: return e;
1.1 schwarze 765: }
766:
767: /*
1.33 schwarze 768: * Separate macro keys from search string.
769: * If needed, request regular expression handling
770: * by setting e->substr to NULL.
1.1 schwarze 771: */
772:
1.33 schwarze 773: if (search->argmode == ARG_WORD) {
774: e->bits = TYPE_Nm;
775: e->substr = NULL;
776: mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", buf);
777: cs = 0;
778: } else if ((val = strpbrk(buf, "=~")) == NULL) {
779: e->bits = TYPE_Nm | TYPE_Nd;
1.1 schwarze 780: e->substr = buf;
1.28 schwarze 781: } else {
782: if (val == buf)
1.33 schwarze 783: e->bits = TYPE_Nm | TYPE_Nd;
1.28 schwarze 784: if ('=' == *val)
785: e->substr = val + 1;
786: *val++ = '\0';
1.12 schwarze 787: if (NULL != strstr(buf, "arch"))
788: cs = 0;
1.28 schwarze 789: }
790:
791: /* Compile regular expressions. */
792:
793: if (NULL == e->substr) {
794: irc = regcomp(&e->regexp, val,
795: REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
1.33 schwarze 796: if (search->argmode == ARG_WORD)
1.28 schwarze 797: free(val);
798: if (irc) {
1.6 schwarze 799: regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
800: fprintf(stderr, "regcomp: %s\n", errbuf);
1.1 schwarze 801: free(e);
1.45 ! schwarze 802: return NULL;
1.1 schwarze 803: }
1.28 schwarze 804: }
805:
806: if (e->bits)
1.45 ! schwarze 807: return e;
1.1 schwarze 808:
809: /*
810: * Parse out all possible fields.
811: * If the field doesn't resolve, bail.
812: */
813:
814: while (NULL != (key = strsep(&buf, ","))) {
815: if ('\0' == *key)
816: continue;
1.11 schwarze 817: for (i = 0, iterbit = 1;
818: i < mansearch_keymax;
819: i++, iterbit <<= 1) {
820: if (0 == strcasecmp(key,
821: mansearch_keynames[i])) {
822: e->bits |= iterbit;
823: break;
824: }
825: }
826: if (i == mansearch_keymax) {
827: if (strcasecmp(key, "any")) {
828: free(e);
1.45 ! schwarze 829: return NULL;
1.11 schwarze 830: }
831: e->bits |= ~0ULL;
1.1 schwarze 832: }
833: }
834:
1.45 ! schwarze 835: return e;
1.1 schwarze 836: }
837:
838: static void
839: exprfree(struct expr *p)
840: {
841: struct expr *pp;
842:
843: while (NULL != p) {
844: pp = p->next;
845: free(p);
846: p = pp;
847: }
848: }
849:
850: static void *
1.27 espie 851: hash_calloc(size_t nmemb, size_t sz, void *arg)
1.1 schwarze 852: {
853:
1.45 ! schwarze 854: return mandoc_calloc(nmemb, sz);
1.1 schwarze 855: }
856:
857: static void *
858: hash_alloc(size_t sz, void *arg)
859: {
860:
1.45 ! schwarze 861: return mandoc_malloc(sz);
1.1 schwarze 862: }
863:
864: static void
1.27 espie 865: hash_free(void *p, void *arg)
1.1 schwarze 866: {
867:
868: free(p);
869: }