version 1.3, 2011/11/13 11:07:10 |
version 1.4, 2011/11/16 13:23:27 |
|
|
|
|
struct expr { |
struct expr { |
int regex; |
int regex; |
|
int index; |
int mask; |
int mask; |
|
int and; |
char *v; |
char *v; |
regex_t re; |
regex_t re; |
|
struct expr *next; |
}; |
}; |
|
|
struct type { |
struct type { |
|
|
|
|
static DB *btree_open(void); |
static DB *btree_open(void); |
static int btree_read(const DBT *, const struct mchars *, char **); |
static int btree_read(const DBT *, const struct mchars *, char **); |
static int exprexec(const struct expr *, char *, int); |
static int exprexecpre(const struct expr *, const char *, int); |
|
static void exprexecpost(const struct expr *, |
|
const char *, int, int *, size_t); |
|
static struct expr *exprterm(char *, int, int); |
static DB *index_open(void); |
static DB *index_open(void); |
static int index_read(const DBT *, const DBT *, |
static int index_read(const DBT *, const DBT *, |
const struct mchars *, struct rec *); |
const struct mchars *, struct rec *); |
static void norm_string(const char *, |
static void norm_string(const char *, |
const struct mchars *, char **); |
const struct mchars *, char **); |
static size_t norm_utf8(unsigned int, char[7]); |
static size_t norm_utf8(unsigned int, char[7]); |
|
static void recfree(struct rec *); |
|
|
/* |
/* |
* Open the keyword mandoc-db database. |
* Open the keyword mandoc-db database. |
|
|
*/ |
*/ |
void |
void |
apropos_search(const struct opts *opts, const struct expr *expr, |
apropos_search(const struct opts *opts, const struct expr *expr, |
void *arg, void (*res)(struct rec *, size_t, void *)) |
size_t terms, void *arg, |
|
void (*res)(struct rec *, size_t, void *)) |
{ |
{ |
int i, len, root, leaf; |
int i, len, root, leaf, mask, mlen; |
DBT key, val; |
DBT key, val; |
DB *btree, *idx; |
DB *btree, *idx; |
struct mchars *mc; |
struct mchars *mc; |
int ch; |
int ch; |
char *buf; |
char *buf; |
recno_t rec; |
recno_t rec; |
struct rec *recs; |
struct rec *recs, *rrecs; |
struct rec srec; |
struct rec srec; |
|
|
root = -1; |
root = -1; |
|
|
if ( ! btree_read(&key, mc, &buf)) |
if ( ! btree_read(&key, mc, &buf)) |
break; |
break; |
|
|
if ( ! exprexec(expr, buf, *(int *)val.data)) |
mask = *(int *)val.data; |
|
|
|
/* |
|
* See if this keyword record matches any of the |
|
* expressions we have stored. |
|
*/ |
|
if ( ! exprexecpre(expr, buf, mask)) |
continue; |
continue; |
|
|
memcpy(&rec, val.data + 4, sizeof(recno_t)); |
memcpy(&rec, val.data + 4, sizeof(recno_t)); |
|
|
else |
else |
break; |
break; |
|
|
if (leaf >= 0 && recs[leaf].rec == rec) |
if (leaf >= 0 && recs[leaf].rec == rec) { |
|
if (0 == recs[leaf].matches[0]) |
|
exprexecpost |
|
(expr, buf, mask, |
|
recs[leaf].matches, terms); |
continue; |
continue; |
|
} |
|
|
/* |
/* |
* Now we actually extract the manpage's metadata from |
* Now we actually extract the manpage's metadata from |
|
|
(recs, (len + 1) * sizeof(struct rec)); |
(recs, (len + 1) * sizeof(struct rec)); |
|
|
memcpy(&recs[len], &srec, sizeof(struct rec)); |
memcpy(&recs[len], &srec, sizeof(struct rec)); |
|
recs[len].matches = |
|
mandoc_calloc(terms + 1, sizeof(int)); |
|
|
|
exprexecpost |
|
(expr, buf, mask, |
|
recs[len].matches, terms); |
|
|
/* Append to our tree. */ |
/* Append to our tree. */ |
|
|
if (leaf >= 0) { |
if (leaf >= 0) { |
|
|
len++; |
len++; |
} |
} |
|
|
if (1 == ch) |
if (1 == ch) { |
(*res)(recs, len, arg); |
for (mlen = i = 0; i < len; i++) |
|
if (recs[i].matches[0]) |
|
mlen++; |
|
rrecs = mandoc_malloc(mlen * sizeof(struct rec)); |
|
for (mlen = i = 0; i < len; i++) |
|
if (recs[i].matches[0]) |
|
memcpy(&rrecs[mlen++], &recs[i], |
|
sizeof(struct rec)); |
|
(*res)(rrecs, mlen, arg); |
|
free(rrecs); |
|
} |
|
|
/* XXX: else? corrupt database error? */ |
/* XXX: else? corrupt database error? */ |
out: |
out: |
for (i = 0; i < len; i++) { |
for (i = 0; i < len; i++) |
free(recs[i].file); |
recfree(&recs[i]); |
free(recs[i].cat); |
|
free(recs[i].title); |
|
free(recs[i].arch); |
|
free(recs[i].desc); |
|
} |
|
|
|
free(srec.file); |
recfree(&srec); |
free(srec.cat); |
|
free(srec.title); |
|
free(srec.arch); |
|
free(srec.desc); |
|
|
|
if (mc) |
if (mc) |
mchars_free(mc); |
mchars_free(mc); |
|
|
free(recs); |
free(recs); |
} |
} |
|
|
|
static void |
|
recfree(struct rec *rec) |
|
{ |
|
|
|
free(rec->file); |
|
free(rec->matches); |
|
free(rec->cat); |
|
free(rec->title); |
|
free(rec->arch); |
|
free(rec->desc); |
|
} |
|
|
struct expr * |
struct expr * |
exprcomp(int argc, char *argv[]) |
exprcomp(int argc, char *argv[], size_t *tt) |
{ |
{ |
struct expr *p; |
struct expr *e, *first, *next; |
|
int pos, log; |
|
|
|
first = next = NULL; |
|
(*tt) = 0; |
|
|
|
for (pos = 0; pos < argc; pos++) { |
|
e = next; |
|
log = 0; |
|
|
|
if (0 == strcmp("-a", argv[pos])) |
|
log = 1; |
|
else if (0 == strcmp("-o", argv[pos])) |
|
log = 2; |
|
|
|
if (log > 0 && ++pos >= argc) |
|
goto err; |
|
|
|
if (0 == strcmp("-i", argv[pos])) { |
|
if (++pos >= argc) |
|
goto err; |
|
next = exprterm(argv[pos], 1, log == 1); |
|
} else |
|
next = exprterm(argv[pos], 0, log == 1); |
|
|
|
if (NULL == next) |
|
goto err; |
|
|
|
next->index = (int)(*tt)++; |
|
|
|
if (NULL == first) { |
|
assert(NULL == e); |
|
first = next; |
|
} else { |
|
assert(NULL != e); |
|
e->next = next; |
|
} |
|
} |
|
|
|
return(first); |
|
err: |
|
exprfree(first); |
|
return(NULL); |
|
} |
|
|
|
static struct expr * |
|
exprterm(char *buf, int cs, int and) |
|
{ |
struct expr e; |
struct expr e; |
|
struct expr *p; |
char *key; |
char *key; |
int i, icase; |
int i; |
|
|
if (0 >= argc) |
memset(&e, 0, sizeof(struct expr)); |
return(NULL); |
|
|
|
/* |
e.and = and; |
* Choose regex or substring match. |
|
|
/* |
|
* Choose regex or substring match. |
*/ |
*/ |
|
|
if (NULL == (e.v = strpbrk(*argv, "=~"))) { |
if (NULL == (e.v = strpbrk(buf, "=~"))) { |
e.regex = 0; |
e.regex = 0; |
e.v = *argv; |
e.v = buf; |
} else { |
} else { |
e.regex = '~' == *e.v; |
e.regex = '~' == *e.v; |
*e.v++ = '\0'; |
*e.v++ = '\0'; |
|
|
* Determine the record types to search for. |
* Determine the record types to search for. |
*/ |
*/ |
|
|
icase = 0; |
|
e.mask = 0; |
e.mask = 0; |
if (*argv < e.v) { |
if (buf < e.v) { |
while (NULL != (key = strsep(argv, ","))) { |
while (NULL != (key = strsep(&buf, ","))) { |
if ('i' == key[0] && '\0' == key[1]) { |
|
icase = REG_ICASE; |
|
continue; |
|
} |
|
i = 0; |
i = 0; |
while (types[i].mask && |
while (types[i].mask && |
strcmp(types[i].name, key)) |
strcmp(types[i].name, key)) |
|
|
if (0 == e.mask) |
if (0 == e.mask) |
e.mask = TYPE_Nm | TYPE_Nd; |
e.mask = TYPE_Nm | TYPE_Nd; |
|
|
if (e.regex && |
if (e.regex) { |
regcomp(&e.re, e.v, REG_EXTENDED | REG_NOSUB | icase)) |
i = REG_EXTENDED | REG_NOSUB | cs ? REG_ICASE : 0; |
return(NULL); |
if (regcomp(&e.re, e.v, i)) |
|
return(NULL); |
|
} |
|
|
e.v = mandoc_strdup(e.v); |
e.v = mandoc_strdup(e.v); |
|
|
|
|
void |
void |
exprfree(struct expr *p) |
exprfree(struct expr *p) |
{ |
{ |
|
struct expr *pp; |
|
|
|
while (NULL != p) { |
|
if (p->regex) |
|
regfree(&p->re); |
|
free(p->v); |
|
pp = p->next; |
|
free(p); |
|
p = pp; |
|
} |
|
} |
|
|
if (NULL == p) |
/* |
return; |
* See if this expression evaluates to true for any terms. |
|
* Return 1 if any expression evaluates to true, else 0. |
|
*/ |
|
static int |
|
exprexecpre(const struct expr *p, const char *cp, int mask) |
|
{ |
|
|
if (p->regex) |
for ( ; NULL != p; p = p->next) { |
regfree(&p->re); |
if ( ! (mask & p->mask)) |
|
continue; |
free(p->v); |
if (p->regex) { |
free(p); |
if (0 == regexec(&p->re, cp, 0, NULL, 0)) |
|
return(1); |
|
} else if (NULL != strcasestr(cp, p->v)) |
|
return(1); |
|
} |
|
return(0); |
} |
} |
|
|
static int |
/* |
exprexec(const struct expr *p, char *cp, int mask) |
* First, update the array of terms for which this expression evaluates |
|
* to true. |
|
* Second, logically evaluate all terms over the updated array of truth |
|
* values. |
|
* If this evaluates to true, mark the expression as satisfied. |
|
*/ |
|
static void |
|
exprexecpost(const struct expr *e, const char *cp, |
|
int mask, int *matches, size_t matchsz) |
{ |
{ |
|
const struct expr *p; |
|
int match; |
|
|
if ( ! (mask & p->mask)) |
assert(0 == matches[0]); |
return(0); |
|
|
|
if (p->regex) |
for (p = e; p; p = p->next) { |
return(0 == regexec(&p->re, cp, 0, NULL, 0)); |
if ( ! (mask & p->mask)) |
else |
continue; |
return(NULL != strcasestr(cp, p->v)); |
if (p->regex) { |
|
if (regexec(&p->re, cp, 0, NULL, 0)) |
|
continue; |
|
} else if (NULL == strcasestr(cp, p->v)) |
|
continue; |
|
|
|
matches[p->index + 1] = 1; |
|
} |
|
|
|
for (match = 0, p = e; p && ! match; p = p->next) { |
|
match = matches[p->index + 1]; |
|
for ( ; p->next && p->next->and; p = p->next) |
|
match = match && matches[p->next->index + 1]; |
|
} |
|
|
|
matches[0] = match; |
} |
} |