Annotation of src/usr.bin/mandoc/mansearch.c, Revision 1.32
1.32 ! schwarze 1: /* $Id: mansearch.c,v 1.31 2014/08/09 14:24:49 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
1.4 schwarze 4: * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.19 schwarze 18: #include <sys/mman.h>
1.1 schwarze 19: #include <assert.h>
20: #include <fcntl.h>
21: #include <getopt.h>
22: #include <limits.h>
23: #include <regex.h>
24: #include <stdio.h>
25: #include <stdint.h>
26: #include <stddef.h>
27: #include <stdlib.h>
28: #include <string.h>
29: #include <unistd.h>
30:
31: #include <ohash.h>
32: #include <sqlite3.h>
33:
34: #include "mandoc.h"
1.14 schwarze 35: #include "mandoc_aux.h"
1.1 schwarze 36: #include "manpath.h"
37: #include "mansearch.h"
38:
1.11 schwarze 39: extern int mansearch_keymax;
40: extern const char *const mansearch_keynames[];
41:
1.1 schwarze 42: #define SQL_BIND_TEXT(_db, _s, _i, _v) \
43: do { if (SQLITE_OK != sqlite3_bind_text \
44: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
45: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
46: } while (0)
47: #define SQL_BIND_INT64(_db, _s, _i, _v) \
48: do { if (SQLITE_OK != sqlite3_bind_int64 \
49: ((_s), (_i)++, (_v))) \
50: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
51: } while (0)
52: #define SQL_BIND_BLOB(_db, _s, _i, _v) \
53: do { if (SQLITE_OK != sqlite3_bind_blob \
54: ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
55: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
56: } while (0)
57:
58: struct expr {
1.28 schwarze 59: regex_t regexp; /* compiled regexp, if applicable */
60: const char *substr; /* to search for, if applicable */
61: struct expr *next; /* next in sequence */
1.24 schwarze 62: uint64_t bits; /* type-mask */
1.28 schwarze 63: int equal; /* equality, not subsring match */
1.4 schwarze 64: int open; /* opening parentheses before */
65: int and; /* logical AND before */
66: int close; /* closing parentheses after */
1.1 schwarze 67: };
68:
69: struct match {
1.22 schwarze 70: uint64_t pageid; /* identifier in database */
1.17 schwarze 71: char *desc; /* manual page description */
1.1 schwarze 72: int form; /* 0 == catpage */
73: };
74:
1.8 schwarze 75: static void buildnames(struct manpage *, sqlite3 *,
1.10 schwarze 76: sqlite3_stmt *, uint64_t,
77: const char *, int form);
1.3 schwarze 78: static char *buildoutput(sqlite3 *, sqlite3_stmt *,
79: uint64_t, uint64_t);
1.1 schwarze 80: static void *hash_alloc(size_t, void *);
1.27 espie 81: static void hash_free(void *, void *);
82: static void *hash_calloc(size_t, size_t, void *);
1.24 schwarze 83: static struct expr *exprcomp(const struct mansearch *,
1.1 schwarze 84: int, char *[]);
85: static void exprfree(struct expr *);
1.6 schwarze 86: static struct expr *exprspec(struct expr *, uint64_t,
87: const char *, const char *);
1.1 schwarze 88: static struct expr *exprterm(const struct mansearch *, char *, int);
1.29 schwarze 89: static int manpage_compare(const void *, const void *);
1.4 schwarze 90: static void sql_append(char **sql, size_t *sz,
91: const char *newstr, int count);
1.1 schwarze 92: static void sql_match(sqlite3_context *context,
93: int argc, sqlite3_value **argv);
94: static void sql_regexp(sqlite3_context *context,
95: int argc, sqlite3_value **argv);
1.6 schwarze 96: static char *sql_statement(const struct expr *);
1.19 schwarze 97:
1.24 schwarze 98:
1.19 schwarze 99: int
100: mansearch_setup(int start)
101: {
102: static void *pagecache;
103: int c;
104:
105: #define PC_PAGESIZE 1280
106: #define PC_NUMPAGES 256
107:
108: if (start) {
109: if (NULL != pagecache) {
110: fprintf(stderr, "pagecache already enabled\n");
111: return((int)MANDOCLEVEL_BADARG);
112: }
113:
114: pagecache = mmap(NULL, PC_PAGESIZE * PC_NUMPAGES,
1.31 schwarze 115: PROT_READ | PROT_WRITE,
116: MAP_SHARED | MAP_ANON, -1, 0);
1.19 schwarze 117:
118: if (MAP_FAILED == pagecache) {
119: perror("mmap");
120: pagecache = NULL;
121: return((int)MANDOCLEVEL_SYSERR);
122: }
123:
124: c = sqlite3_config(SQLITE_CONFIG_PAGECACHE,
125: pagecache, PC_PAGESIZE, PC_NUMPAGES);
126:
127: if (SQLITE_OK == c)
128: return((int)MANDOCLEVEL_OK);
129:
130: fprintf(stderr, "pagecache: %s\n", sqlite3_errstr(c));
131:
132: } else if (NULL == pagecache) {
133: fprintf(stderr, "pagecache missing\n");
134: return((int)MANDOCLEVEL_BADARG);
135: }
136:
137: if (-1 == munmap(pagecache, PC_PAGESIZE * PC_NUMPAGES)) {
138: perror("munmap");
139: pagecache = NULL;
140: return((int)MANDOCLEVEL_SYSERR);
141: }
142:
143: pagecache = NULL;
144: return((int)MANDOCLEVEL_OK);
145: }
1.1 schwarze 146:
147: int
148: mansearch(const struct mansearch *search,
1.3 schwarze 149: const struct manpaths *paths,
150: int argc, char *argv[],
151: const char *outkey,
1.1 schwarze 152: struct manpage **res, size_t *sz)
153: {
1.11 schwarze 154: int fd, rc, c, indexbit;
1.22 schwarze 155: int64_t pageid;
1.11 schwarze 156: uint64_t outbit, iterbit;
1.1 schwarze 157: char buf[PATH_MAX];
1.2 schwarze 158: char *sql;
1.1 schwarze 159: struct manpage *mpage;
160: struct expr *e, *ep;
161: sqlite3 *db;
1.3 schwarze 162: sqlite3_stmt *s, *s2;
1.1 schwarze 163: struct match *mp;
164: struct ohash_info info;
165: struct ohash htab;
166: unsigned int idx;
167: size_t i, j, cur, maxres;
168:
1.27 espie 169: info.calloc = hash_calloc;
1.1 schwarze 170: info.alloc = hash_alloc;
1.27 espie 171: info.free = hash_free;
1.22 schwarze 172: info.key_offset = offsetof(struct match, pageid);
1.1 schwarze 173:
174: *sz = cur = maxres = 0;
175: sql = NULL;
176: *res = NULL;
177: fd = -1;
178: e = NULL;
179: rc = 0;
180:
181: if (0 == argc)
182: goto out;
183: if (NULL == (e = exprcomp(search, argc, argv)))
184: goto out;
185:
1.3 schwarze 186: outbit = 0;
187: if (NULL != outkey) {
1.11 schwarze 188: for (indexbit = 0, iterbit = 1;
189: indexbit < mansearch_keymax;
190: indexbit++, iterbit <<= 1) {
191: if (0 == strcasecmp(outkey,
192: mansearch_keynames[indexbit])) {
193: outbit = iterbit;
1.3 schwarze 194: break;
195: }
196: }
197: }
198:
1.1 schwarze 199: /*
200: * Save a descriptor to the current working directory.
201: * Since pathnames in the "paths" variable might be relative,
202: * and we'll be chdir()ing into them, we need to keep a handle
203: * on our current directory from which to start the chdir().
204: */
205:
206: if (NULL == getcwd(buf, PATH_MAX)) {
1.20 schwarze 207: perror("getcwd");
1.1 schwarze 208: goto out;
209: } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
210: perror(buf);
211: goto out;
212: }
213:
1.6 schwarze 214: sql = sql_statement(e);
1.1 schwarze 215:
216: /*
217: * Loop over the directories (containing databases) for us to
218: * search.
219: * Don't let missing/bad databases/directories phase us.
220: * In each, try to open the resident database and, if it opens,
221: * scan it for our match expression.
222: */
223:
224: for (i = 0; i < paths->sz; i++) {
225: if (-1 == fchdir(fd)) {
226: perror(buf);
227: free(*res);
228: break;
229: } else if (-1 == chdir(paths->paths[i])) {
230: perror(paths->paths[i]);
231: continue;
1.24 schwarze 232: }
1.1 schwarze 233:
1.24 schwarze 234: c = sqlite3_open_v2(MANDOC_DB, &db,
235: SQLITE_OPEN_READONLY, NULL);
1.1 schwarze 236:
237: if (SQLITE_OK != c) {
238: perror(MANDOC_DB);
239: sqlite3_close(db);
240: continue;
241: }
242:
243: /*
244: * Define the SQL functions for substring
245: * and regular expression matching.
246: */
247:
248: c = sqlite3_create_function(db, "match", 2,
1.21 schwarze 249: SQLITE_UTF8 | SQLITE_DETERMINISTIC,
250: NULL, sql_match, NULL, NULL);
1.1 schwarze 251: assert(SQLITE_OK == c);
252: c = sqlite3_create_function(db, "regexp", 2,
1.21 schwarze 253: SQLITE_UTF8 | SQLITE_DETERMINISTIC,
254: NULL, sql_regexp, NULL, NULL);
1.1 schwarze 255: assert(SQLITE_OK == c);
256:
257: j = 1;
258: c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
259: if (SQLITE_OK != c)
260: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
261:
262: for (ep = e; NULL != ep; ep = ep->next) {
263: if (NULL == ep->substr) {
264: SQL_BIND_BLOB(db, s, j, ep->regexp);
265: } else
266: SQL_BIND_TEXT(db, s, j, ep->substr);
1.18 schwarze 267: if (0 == ((TYPE_Nd | TYPE_Nm) & ep->bits))
1.17 schwarze 268: SQL_BIND_INT64(db, s, j, ep->bits);
1.1 schwarze 269: }
270:
271: memset(&htab, 0, sizeof(struct ohash));
272: ohash_init(&htab, 4, &info);
273:
274: /*
275: * Hash each entry on its [unique] document identifier.
276: * This is a uint64_t.
277: * Instead of using a hash function, simply convert the
278: * uint64_t to a uint32_t, the hash value's type.
279: * This gives good performance and preserves the
280: * distribution of buckets in the table.
281: */
282: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.22 schwarze 283: pageid = sqlite3_column_int64(s, 2);
1.24 schwarze 284: idx = ohash_lookup_memory(&htab,
285: (char *)&pageid, sizeof(uint64_t),
286: (uint32_t)pageid);
1.1 schwarze 287:
288: if (NULL != ohash_find(&htab, idx))
289: continue;
290:
291: mp = mandoc_calloc(1, sizeof(struct match));
1.22 schwarze 292: mp->pageid = pageid;
1.17 schwarze 293: mp->form = sqlite3_column_int(s, 1);
294: if (TYPE_Nd == outbit)
1.30 schwarze 295: mp->desc = mandoc_strdup((const char *)
1.17 schwarze 296: sqlite3_column_text(s, 0));
1.1 schwarze 297: ohash_insert(&htab, idx, mp);
298: }
299:
300: if (SQLITE_DONE != c)
301: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
302:
303: sqlite3_finalize(s);
304:
1.24 schwarze 305: c = sqlite3_prepare_v2(db,
1.25 schwarze 306: "SELECT sec, arch, name, pageid FROM mlinks "
307: "WHERE pageid=? ORDER BY sec, arch, name",
1.1 schwarze 308: -1, &s, NULL);
309: if (SQLITE_OK != c)
310: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
311:
1.3 schwarze 312: c = sqlite3_prepare_v2(db,
1.25 schwarze 313: "SELECT bits, key, pageid FROM keys "
314: "WHERE pageid=? AND bits & ?",
1.3 schwarze 315: -1, &s2, NULL);
316: if (SQLITE_OK != c)
317: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
318:
1.1 schwarze 319: for (mp = ohash_first(&htab, &idx);
320: NULL != mp;
321: mp = ohash_next(&htab, &idx)) {
322: if (cur + 1 > maxres) {
323: maxres += 1024;
1.26 schwarze 324: *res = mandoc_reallocarray(*res,
325: maxres, sizeof(struct manpage));
1.1 schwarze 326: }
327: mpage = *res + cur;
1.29 schwarze 328: mpage->sec = 10;
1.1 schwarze 329: mpage->form = mp->form;
1.22 schwarze 330: buildnames(mpage, db, s, mp->pageid,
1.10 schwarze 331: paths->paths[i], mp->form);
1.17 schwarze 332: mpage->output = TYPE_Nd & outbit ?
333: mp->desc : outbit ?
1.22 schwarze 334: buildoutput(db, s2, mp->pageid, outbit) : NULL;
1.1 schwarze 335:
336: free(mp);
337: cur++;
338: }
339:
340: sqlite3_finalize(s);
1.3 schwarze 341: sqlite3_finalize(s2);
1.1 schwarze 342: sqlite3_close(db);
343: ohash_delete(&htab);
344: }
1.29 schwarze 345: qsort(*res, cur, sizeof(struct manpage), manpage_compare);
1.1 schwarze 346: rc = 1;
347: out:
1.20 schwarze 348: if (-1 != fd) {
349: if (-1 == fchdir(fd))
350: perror(buf);
351: close(fd);
352: }
1.1 schwarze 353: exprfree(e);
354: free(sql);
355: *sz = cur;
356: return(rc);
1.2 schwarze 357: }
358:
1.29 schwarze 359: static int
360: manpage_compare(const void *vp1, const void *vp2)
361: {
362: const struct manpage *mp1, *mp2;
363: int diff;
364:
365: mp1 = vp1;
366: mp2 = vp2;
367: diff = mp1->sec - mp2->sec;
368: return(diff ? diff : strcasecmp(mp1->names, mp2->names));
369: }
370:
1.8 schwarze 371: static void
372: buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s,
1.22 schwarze 373: uint64_t pageid, const char *path, int form)
1.2 schwarze 374: {
1.13 schwarze 375: char *newnames, *prevsec, *prevarch;
1.10 schwarze 376: const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec;
1.2 schwarze 377: size_t i;
378: int c;
379:
1.16 schwarze 380: mpage->file = NULL;
1.8 schwarze 381: mpage->names = NULL;
1.13 schwarze 382: prevsec = prevarch = NULL;
1.2 schwarze 383: i = 1;
1.22 schwarze 384: SQL_BIND_INT64(db, s, i, pageid);
1.2 schwarze 385: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.8 schwarze 386:
1.13 schwarze 387: /* Decide whether we already have some names. */
1.8 schwarze 388:
389: if (NULL == mpage->names) {
1.2 schwarze 390: oldnames = "";
391: sep1 = "";
392: } else {
1.8 schwarze 393: oldnames = mpage->names;
1.2 schwarze 394: sep1 = ", ";
395: }
1.13 schwarze 396:
397: /* Fetch the next name. */
398:
1.30 schwarze 399: sec = (const char *)sqlite3_column_text(s, 0);
400: arch = (const char *)sqlite3_column_text(s, 1);
401: name = (const char *)sqlite3_column_text(s, 2);
1.29 schwarze 402:
403: /* Remember the first section found. */
404:
405: if (9 < mpage->sec && '1' <= *sec && '9' >= *sec)
406: mpage->sec = (*sec - '1') + 1;
1.13 schwarze 407:
408: /* If the section changed, append the old one. */
409:
410: if (NULL != prevsec &&
411: (strcmp(sec, prevsec) ||
412: strcmp(arch, prevarch))) {
413: sep2 = '\0' == *prevarch ? "" : "/";
1.15 schwarze 414: mandoc_asprintf(&newnames, "%s(%s%s%s)",
415: oldnames, prevsec, sep2, prevarch);
1.13 schwarze 416: free(mpage->names);
417: oldnames = mpage->names = newnames;
418: free(prevsec);
419: free(prevarch);
420: prevsec = prevarch = NULL;
421: }
422:
423: /* Save the new section, to append it later. */
424:
425: if (NULL == prevsec) {
426: prevsec = mandoc_strdup(sec);
427: prevarch = mandoc_strdup(arch);
428: }
429:
430: /* Append the new name. */
431:
1.15 schwarze 432: mandoc_asprintf(&newnames, "%s%s%s",
433: oldnames, sep1, name);
1.8 schwarze 434: free(mpage->names);
435: mpage->names = newnames;
436:
437: /* Also save the first file name encountered. */
438:
439: if (NULL != mpage->file)
440: continue;
441:
1.10 schwarze 442: if (form) {
443: sep1 = "man";
444: fsec = sec;
445: } else {
446: sep1 = "cat";
447: fsec = "0";
448: }
1.13 schwarze 449: sep2 = '\0' == *arch ? "" : "/";
1.15 schwarze 450: mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s",
451: path, sep1, sec, sep2, arch, name, fsec);
1.2 schwarze 452: }
453: if (SQLITE_DONE != c)
454: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
455: sqlite3_reset(s);
1.13 schwarze 456:
457: /* Append one final section to the names. */
458:
459: if (NULL != prevsec) {
460: sep2 = '\0' == *prevarch ? "" : "/";
1.15 schwarze 461: mandoc_asprintf(&newnames, "%s(%s%s%s)",
462: mpage->names, prevsec, sep2, prevarch);
1.13 schwarze 463: free(mpage->names);
464: mpage->names = newnames;
465: free(prevsec);
466: free(prevarch);
467: }
1.3 schwarze 468: }
469:
470: static char *
1.22 schwarze 471: buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t pageid, uint64_t outbit)
1.3 schwarze 472: {
473: char *output, *newoutput;
474: const char *oldoutput, *sep1, *data;
475: size_t i;
476: int c;
477:
478: output = NULL;
479: i = 1;
1.22 schwarze 480: SQL_BIND_INT64(db, s, i, pageid);
1.3 schwarze 481: SQL_BIND_INT64(db, s, i, outbit);
482: while (SQLITE_ROW == (c = sqlite3_step(s))) {
483: if (NULL == output) {
484: oldoutput = "";
485: sep1 = "";
486: } else {
487: oldoutput = output;
488: sep1 = " # ";
489: }
1.30 schwarze 490: data = (const char *)sqlite3_column_text(s, 1);
1.15 schwarze 491: mandoc_asprintf(&newoutput, "%s%s%s",
492: oldoutput, sep1, data);
1.3 schwarze 493: free(output);
494: output = newoutput;
495: }
496: if (SQLITE_DONE != c)
497: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
498: sqlite3_reset(s);
499: return(output);
1.1 schwarze 500: }
501:
502: /*
503: * Implement substring match as an application-defined SQL function.
504: * Using the SQL LIKE or GLOB operators instead would be a bad idea
505: * because that would require escaping metacharacters in the string
506: * being searched for.
507: */
508: static void
509: sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
510: {
511:
512: assert(2 == argc);
513: sqlite3_result_int(context, NULL != strcasestr(
514: (const char *)sqlite3_value_text(argv[1]),
515: (const char *)sqlite3_value_text(argv[0])));
516: }
517:
518: /*
519: * Implement regular expression match
520: * as an application-defined SQL function.
521: */
522: static void
523: sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
524: {
525:
526: assert(2 == argc);
527: sqlite3_result_int(context, !regexec(
528: (regex_t *)sqlite3_value_blob(argv[0]),
529: (const char *)sqlite3_value_text(argv[1]),
530: 0, NULL, 0));
531: }
532:
1.4 schwarze 533: static void
534: sql_append(char **sql, size_t *sz, const char *newstr, int count)
535: {
536: size_t newsz;
537:
538: newsz = 1 < count ? (size_t)count : strlen(newstr);
539: *sql = mandoc_realloc(*sql, *sz + newsz + 1);
540: if (1 < count)
541: memset(*sql + *sz, *newstr, (size_t)count);
542: else
543: memcpy(*sql + *sz, newstr, newsz);
544: *sz += newsz;
545: (*sql)[*sz] = '\0';
546: }
547:
1.1 schwarze 548: /*
549: * Prepare the search SQL statement.
550: */
551: static char *
1.6 schwarze 552: sql_statement(const struct expr *e)
1.1 schwarze 553: {
554: char *sql;
555: size_t sz;
1.4 schwarze 556: int needop;
1.1 schwarze 557:
1.25 schwarze 558: sql = mandoc_strdup(
559: "SELECT desc, form, pageid FROM mpages WHERE ");
1.1 schwarze 560: sz = strlen(sql);
561:
1.4 schwarze 562: for (needop = 0; NULL != e; e = e->next) {
563: if (e->and)
564: sql_append(&sql, &sz, " AND ", 1);
565: else if (needop)
566: sql_append(&sql, &sz, " OR ", 1);
567: if (e->open)
568: sql_append(&sql, &sz, "(", e->open);
1.17 schwarze 569: sql_append(&sql, &sz,
570: TYPE_Nd & e->bits
571: ? (NULL == e->substr
572: ? "desc REGEXP ?"
573: : "desc MATCH ?")
1.18 schwarze 574: : TYPE_Nm == e->bits
575: ? (NULL == e->substr
1.22 schwarze 576: ? "pageid IN (SELECT pageid FROM names "
1.18 schwarze 577: "WHERE name REGEXP ?)"
1.28 schwarze 578: : e->equal
579: ? "pageid IN (SELECT pageid FROM names "
580: "WHERE name = ?)"
1.22 schwarze 581: : "pageid IN (SELECT pageid FROM names "
1.18 schwarze 582: "WHERE name MATCH ?)")
1.17 schwarze 583: : (NULL == e->substr
1.22 schwarze 584: ? "pageid IN (SELECT pageid FROM keys "
1.17 schwarze 585: "WHERE key REGEXP ? AND bits & ?)"
1.22 schwarze 586: : "pageid IN (SELECT pageid FROM keys "
1.17 schwarze 587: "WHERE key MATCH ? AND bits & ?)"), 1);
1.4 schwarze 588: if (e->close)
589: sql_append(&sql, &sz, ")", e->close);
590: needop = 1;
1.1 schwarze 591: }
592:
593: return(sql);
594: }
595:
596: /*
597: * Compile a set of string tokens into an expression.
598: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
599: * "(", "foo=bar", etc.).
600: */
601: static struct expr *
602: exprcomp(const struct mansearch *search, int argc, char *argv[])
603: {
1.18 schwarze 604: uint64_t mask;
1.4 schwarze 605: int i, toopen, logic, igncase, toclose;
1.18 schwarze 606: struct expr *first, *prev, *cur, *next;
1.1 schwarze 607:
608: first = cur = NULL;
1.6 schwarze 609: logic = igncase = toclose = 0;
1.23 schwarze 610: toopen = NULL != search->sec || NULL != search->arch;
1.1 schwarze 611:
612: for (i = 0; i < argc; i++) {
1.4 schwarze 613: if (0 == strcmp("(", argv[i])) {
614: if (igncase)
615: goto fail;
616: toopen++;
617: toclose++;
618: continue;
619: } else if (0 == strcmp(")", argv[i])) {
620: if (toopen || logic || igncase || NULL == cur)
621: goto fail;
622: cur->close++;
623: if (0 > --toclose)
624: goto fail;
625: continue;
626: } else if (0 == strcmp("-a", argv[i])) {
627: if (toopen || logic || igncase || NULL == cur)
628: goto fail;
629: logic = 1;
630: continue;
631: } else if (0 == strcmp("-o", argv[i])) {
632: if (toopen || logic || igncase || NULL == cur)
633: goto fail;
634: logic = 2;
635: continue;
636: } else if (0 == strcmp("-i", argv[i])) {
637: if (igncase)
638: goto fail;
639: igncase = 1;
640: continue;
1.1 schwarze 641: }
1.4 schwarze 642: next = exprterm(search, argv[i], !igncase);
643: if (NULL == next)
644: goto fail;
1.17 schwarze 645: if (NULL == first)
646: first = next;
647: else
1.1 schwarze 648: cur->next = next;
1.18 schwarze 649: prev = cur = next;
1.17 schwarze 650:
651: /*
652: * Searching for descriptions must be split out
653: * because they are stored in the mpages table,
654: * not in the keys table.
655: */
656:
1.18 schwarze 657: for (mask = TYPE_Nm; mask <= TYPE_Nd; mask <<= 1) {
658: if (mask & cur->bits && ~mask & cur->bits) {
659: next = mandoc_calloc(1,
660: sizeof(struct expr));
661: memcpy(next, cur, sizeof(struct expr));
662: prev->open = 1;
663: cur->bits = mask;
664: cur->next = next;
665: cur = next;
666: cur->bits &= ~mask;
667: }
668: }
669: prev->and = (1 == logic);
670: prev->open += toopen;
671: if (cur != prev)
1.17 schwarze 672: cur->close = 1;
1.18 schwarze 673:
1.4 schwarze 674: toopen = logic = igncase = 0;
1.1 schwarze 675: }
1.6 schwarze 676: if (toopen || logic || igncase || toclose)
677: goto fail;
678:
1.23 schwarze 679: if (NULL != search->sec || NULL != search->arch)
680: cur->close++;
681: if (NULL != search->arch)
682: cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$");
683: if (NULL != search->sec)
684: exprspec(cur, TYPE_sec, search->sec, "^%s$");
1.6 schwarze 685:
686: return(first);
687:
1.4 schwarze 688: fail:
689: if (NULL != first)
690: exprfree(first);
691: return(NULL);
1.1 schwarze 692: }
693:
694: static struct expr *
1.6 schwarze 695: exprspec(struct expr *cur, uint64_t key, const char *value,
696: const char *format)
697: {
698: char errbuf[BUFSIZ];
699: char *cp;
700: int irc;
701:
1.15 schwarze 702: mandoc_asprintf(&cp, format, value);
1.6 schwarze 703: cur->next = mandoc_calloc(1, sizeof(struct expr));
704: cur = cur->next;
705: cur->and = 1;
706: cur->bits = key;
707: if (0 != (irc = regcomp(&cur->regexp, cp,
708: REG_EXTENDED | REG_NOSUB | REG_ICASE))) {
709: regerror(irc, &cur->regexp, errbuf, sizeof(errbuf));
710: fprintf(stderr, "regcomp: %s\n", errbuf);
711: cur->substr = value;
712: }
713: free(cp);
714: return(cur);
715: }
716:
717: static struct expr *
1.1 schwarze 718: exprterm(const struct mansearch *search, char *buf, int cs)
719: {
1.6 schwarze 720: char errbuf[BUFSIZ];
1.1 schwarze 721: struct expr *e;
1.28 schwarze 722: char *key, *val;
1.11 schwarze 723: uint64_t iterbit;
724: int i, irc;
1.1 schwarze 725:
726: if ('\0' == *buf)
727: return(NULL);
728:
729: e = mandoc_calloc(1, sizeof(struct expr));
730:
1.28 schwarze 731: if (MANSEARCH_MAN & search->flags) {
732: e->bits = search->deftype;
1.1 schwarze 733: e->substr = buf;
1.28 schwarze 734: e->equal = 1;
1.1 schwarze 735: return(e);
736: }
737:
738: /*
1.28 schwarze 739: * Look for an '=' or '~' operator,
740: * unless forced to some fixed macro keys.
1.1 schwarze 741: */
742:
1.28 schwarze 743: if (MANSEARCH_WHATIS & search->flags)
744: val = NULL;
745: else
746: val = strpbrk(buf, "=~");
747:
748: if (NULL == val) {
749: e->bits = search->deftype;
1.1 schwarze 750: e->substr = buf;
751:
1.28 schwarze 752: /*
753: * Found an operator.
754: * Regexp search is requested by !e->substr.
755: */
756:
757: } else {
758: if (val == buf)
759: e->bits = search->deftype;
760: if ('=' == *val)
761: e->substr = val + 1;
762: *val++ = '\0';
1.12 schwarze 763: if (NULL != strstr(buf, "arch"))
764: cs = 0;
1.28 schwarze 765: }
766:
767: /* Compile regular expressions. */
768:
769: if (MANSEARCH_WHATIS & search->flags) {
770: e->substr = NULL;
771: mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", buf);
1.32 ! schwarze 772: cs = 0;
1.28 schwarze 773: }
774:
775: if (NULL == e->substr) {
776: irc = regcomp(&e->regexp, val,
777: REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
778: if (MANSEARCH_WHATIS & search->flags)
779: free(val);
780: if (irc) {
1.6 schwarze 781: regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
782: fprintf(stderr, "regcomp: %s\n", errbuf);
1.1 schwarze 783: free(e);
784: return(NULL);
785: }
1.28 schwarze 786: }
787:
788: if (e->bits)
789: return(e);
1.1 schwarze 790:
791: /*
792: * Parse out all possible fields.
793: * If the field doesn't resolve, bail.
794: */
795:
796: while (NULL != (key = strsep(&buf, ","))) {
797: if ('\0' == *key)
798: continue;
1.11 schwarze 799: for (i = 0, iterbit = 1;
800: i < mansearch_keymax;
801: i++, iterbit <<= 1) {
802: if (0 == strcasecmp(key,
803: mansearch_keynames[i])) {
804: e->bits |= iterbit;
805: break;
806: }
807: }
808: if (i == mansearch_keymax) {
809: if (strcasecmp(key, "any")) {
810: free(e);
811: return(NULL);
812: }
813: e->bits |= ~0ULL;
1.1 schwarze 814: }
815: }
816:
817: return(e);
818: }
819:
820: static void
821: exprfree(struct expr *p)
822: {
823: struct expr *pp;
824:
825: while (NULL != p) {
826: pp = p->next;
827: free(p);
828: p = pp;
829: }
830: }
831:
832: static void *
1.27 espie 833: hash_calloc(size_t nmemb, size_t sz, void *arg)
1.1 schwarze 834: {
835:
1.27 espie 836: return(mandoc_calloc(nmemb, sz));
1.1 schwarze 837: }
838:
839: static void *
840: hash_alloc(size_t sz, void *arg)
841: {
842:
843: return(mandoc_malloc(sz));
844: }
845:
846: static void
1.27 espie 847: hash_free(void *p, void *arg)
1.1 schwarze 848: {
849:
850: free(p);
851: }