=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/mandoc/mandocdb.c,v retrieving revision 1.10 retrieving revision 1.11 diff -c -r1.10 -r1.11 *** src/usr.bin/mandoc/mandocdb.c 2011/11/26 16:41:35 1.10 --- src/usr.bin/mandoc/mandocdb.c 2011/11/27 22:57:28 1.11 *************** *** 1,4 **** ! /* $Id: mandocdb.c,v 1.10 2011/11/26 16:41:35 schwarze Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze --- 1,4 ---- ! /* $Id: mandocdb.c,v 1.11 2011/11/27 22:57:28 schwarze Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze *************** *** 16,21 **** --- 16,23 ---- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include + #include + #include #include #include *************** *** 36,41 **** --- 38,46 ---- #define MANDOC_BUFSZ BUFSIZ #define MANDOC_SLOP 1024 + #define MANDOC_SRC 0x1 + #define MANDOC_FORM 0x2 + /* Tiny list for files. No need to bring in QUEUE. */ struct of { *************** *** 43,48 **** --- 48,54 ---- char *sec; char *arch; char *title; + int src_form; struct of *next; /* NULL for last one */ struct of *first; /* first in list */ }; *************** *** 92,99 **** static void ofile_argbuild(char *[], int, int, int, struct of **); static int ofile_dirbuild(const char *, const char *, ! const char *, int, int, struct of **); static void ofile_free(struct of *); static int pman_node(MAN_ARGS); static void pmdoc_node(MDOC_ARGS); static void pmdoc_An(MDOC_ARGS); --- 98,108 ---- static void ofile_argbuild(char *[], int, int, int, struct of **); static int ofile_dirbuild(const char *, const char *, ! const char *, int, int, int, ! struct of **); static void ofile_free(struct of *); + static void pformatted(DB *, struct buf *, struct buf *, + const struct of *); static int pman_node(MAN_ARGS); static void pmdoc_node(MDOC_ARGS); static void pmdoc_An(MDOC_ARGS); *************** *** 432,438 **** of = NULL; if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL, ! use_all, verb, &of)) exit((int)MANDOCLEVEL_SYSERR); if (NULL == of) --- 441,447 ---- of = NULL; if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL, ! 0, use_all, verb, &of)) exit((int)MANDOCLEVEL_SYSERR); if (NULL == of) *************** *** 483,488 **** --- 492,502 ---- for (rec = 0; of; of = of->next) { fn = of->fname; + + /* + * Reclaim an empty index record, if available. + */ + if (reccur > 0) { --reccur; rec = recs[(int)reccur]; *************** *** 494,520 **** mparse_reset(mp); hash_reset(&hash); ! if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) { ! fprintf(stderr, "%s: Parse failure\n", fn); ! continue; ! } ! mparse_result(mp, &mdoc, &man); ! if (NULL == mdoc && NULL == man) ! continue; /* * By default, skip a file if the manual section * and architecture given in the file disagree * with the directory where the file is located. */ - msec = NULL != mdoc ? - mdoc_meta(mdoc)->msec : man_meta(man)->msec; - arch = NULL != mdoc ? - mdoc_meta(mdoc)->arch : NULL; - if (0 == use_all) { assert(of->sec); assert(msec); --- 508,547 ---- mparse_reset(mp); hash_reset(&hash); + mdoc = NULL; + man = NULL; ! /* ! * Try interpreting the file as mdoc(7) or man(7) ! * source code, unless it is already known to be ! * formatted. Fall back to formatted mode. ! */ ! if ((MANDOC_SRC & of->src_form || ! ! (MANDOC_FORM & of->src_form)) && ! MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) ! mparse_result(mp, &mdoc, &man); + if (NULL != mdoc) { + msec = mdoc_meta(mdoc)->msec; + arch = mdoc_meta(mdoc)->arch; + mtitle = mdoc_meta(mdoc)->title; + } else if (NULL != man) { + msec = man_meta(man)->msec; + arch = NULL; + mtitle = man_meta(man)->title; + } else { + msec = of->sec; + arch = of->arch; + mtitle = of->title; + } + /* * By default, skip a file if the manual section * and architecture given in the file disagree * with the directory where the file is located. */ if (0 == use_all) { assert(of->sec); assert(msec); *************** *** 539,547 **** * because the one in the file usually is all caps. */ - mtitle = NULL != mdoc ? - mdoc_meta(mdoc)->title : man_meta(man)->title; - assert(of->title); assert(mtitle); --- 566,571 ---- *************** *** 571,578 **** if (mdoc) pmdoc_node(hash, buf, dbuf, mdoc_node(mdoc), mdoc_meta(mdoc)); ! else pman_node(hash, buf, dbuf, man_node(man)); /* * Copy from the in-memory hashtable of pending keywords --- 595,604 ---- if (mdoc) pmdoc_node(hash, buf, dbuf, mdoc_node(mdoc), mdoc_meta(mdoc)); ! else if (man) pman_node(hash, buf, dbuf, man_node(man)); + else + pformatted(hash, buf, dbuf, of); /* * Copy from the in-memory hashtable of pending keywords *************** *** 1223,1235 **** return(0); } static void ofile_argbuild(char *argv[], int argc, int use_all, int verb, struct of **of) { char buf[MAXPATHLEN]; char *sec, *arch, *title, *p; ! int i; struct of *nof; for (i = 0; i < argc; i++) { --- 1249,1337 ---- return(0); } + /* + * Parse a formatted manual page. + * By necessity, this involves rather crude guesswork. + */ static void + pformatted(DB *hash, struct buf *buf, struct buf *dbuf, + const struct of *of) + { + FILE *stream; + char *line, *p; + size_t len, plen; + + if (NULL == (stream = fopen(of->fname, "r"))) { + perror(of->fname); + return; + } + + /* + * Always use the title derived from the filename up front, + * do not even try to find it in the file. This also makes + * sure we don't end up with an orphan index record, even if + * the file content turns out to be completely unintelligible. + */ + + buf->len = 0; + buf_append(buf, of->title); + hash_put(hash, buf, TYPE_Nm); + + while (NULL != (line = fgetln(stream, &len)) && '\n' != *line) + /* Skip to first blank line. */ ; + + while (NULL != (line = fgetln(stream, &len)) && + ('\n' == *line || ' ' == *line)) + /* Skip to first section header. */ ; + + /* + * If no page content can be found, + * reuse the page title as the page description. + */ + + if (NULL == (line = fgetln(stream, &len))) { + buf_appendb(dbuf, buf->cp, buf->size); + hash_put(hash, buf, TYPE_Nd); + fclose(stream); + return; + } + fclose(stream); + + /* + * If there is a dash, skip to the text following it. + */ + + for (p = line, plen = len; plen; p++, plen--) + if ('-' == *p) + break; + for ( ; plen; p++, plen--) + if ('-' != *p && ' ' != *p && 8 != *p) + break; + if (0 == plen) { + p = line; + plen = len; + } + + /* + * Copy the rest of the line, but no more than 70 bytes. + */ + + if (70 < plen) + plen = 70; + p[plen-1] = '\0'; + buf_appendb(dbuf, p, plen); + buf->len = 0; + buf_appendb(buf, p, plen); + hash_put(hash, buf, TYPE_Nd); + } + + static void ofile_argbuild(char *argv[], int argc, int use_all, int verb, struct of **of) { char buf[MAXPATHLEN]; char *sec, *arch, *title, *p; ! int i, src_form; struct of *nof; for (i = 0; i < argc; i++) { *************** *** 1237,1243 **** /* * Try to infer the manual section, architecture and * page title from the path, assuming it looks like ! * man*[/]/.<section> */ if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) { --- 1339,1346 ---- /* * Try to infer the manual section, architecture and * page title from the path, assuming it looks like ! * man*[/<arch>]/<title>.<section> or ! * cat<section>[/<arch>]/<title>.0 */ if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) { *************** *** 1245,1255 **** --- 1348,1363 ---- continue; } sec = arch = title = NULL; + src_form = 0; p = strrchr(buf, '\0'); while (p-- > buf) { if (NULL == sec && '.' == *p) { sec = p + 1; *p = '\0'; + if ('0' == *sec) + src_form |= MANDOC_FORM; + else if ('1' <= *sec && '9' >= *sec) + src_form |= MANDOC_SRC; continue; } if ('/' != *p) *************** *** 1259,1266 **** *p = '\0'; continue; } ! if (strncmp("man", p + 1, 3)) arch = p + 1; break; } if (NULL == title) --- 1367,1379 ---- *p = '\0'; continue; } ! if (strncmp("man", p + 1, 3)) { ! src_form |= MANDOC_SRC; arch = p + 1; + } else if (strncmp("cat", p + 1, 3)) { + src_form |= MANDOC_FORM; + arch = p + 1; + } break; } if (NULL == title) *************** *** 1277,1282 **** --- 1390,1396 ---- if (NULL != arch) nof->arch = mandoc_strdup(arch); nof->title = mandoc_strdup(title); + nof->src_form = src_form; /* * Add the structure to the list. *************** *** 1305,1319 **** */ static int ofile_dirbuild(const char *dir, const char* psec, const char *parch, ! int use_all, int verb, struct of **of) { char buf[MAXPATHLEN]; size_t sz; DIR *d; const char *fn, *sec, *arch; ! char *suffix; struct of *nof; struct dirent *dp; if (NULL == (d = opendir(dir))) { perror(dir); --- 1419,1435 ---- */ static int ofile_dirbuild(const char *dir, const char* psec, const char *parch, ! int p_src_form, int use_all, int verb, struct of **of) { char buf[MAXPATHLEN]; + struct stat sb; size_t sz; DIR *d; const char *fn, *sec, *arch; ! char *p, *q, *suffix; struct of *nof; struct dirent *dp; + int src_form; if (NULL == (d = opendir(dir))) { perror(dir); *************** *** 1326,1344 **** if ('.' == *fn) continue; if (DT_DIR == dp->d_type) { sec = psec; arch = parch; /* * By default, only use directories called: ! * man<section>/[<arch>/] */ if (NULL == sec) { ! if(0 == strncmp("man", fn, 3)) sec = fn + 3; ! else if (use_all) sec = fn; else continue; --- 1442,1467 ---- if ('.' == *fn) continue; + src_form = p_src_form; + if (DT_DIR == dp->d_type) { sec = psec; arch = parch; /* * By default, only use directories called: ! * man<section>/[<arch>/] or ! * cat<section>/[<arch>/] */ if (NULL == sec) { ! if(0 == strncmp("man", fn, 3)) { ! src_form |= MANDOC_SRC; sec = fn + 3; ! } else if (0 == strncmp("cat", fn, 3)) { ! src_form |= MANDOC_FORM; ! sec = fn + 3; ! } else if (use_all) sec = fn; else continue; *************** *** 1362,1368 **** printf("%s: Scanning\n", buf); if ( ! ofile_dirbuild(buf, sec, arch, ! use_all, verb, of)) return(0); } if (DT_REG != dp->d_type || --- 1485,1491 ---- printf("%s: Scanning\n", buf); if ( ! ofile_dirbuild(buf, sec, arch, ! src_form, use_all, verb, of)) return(0); } if (DT_REG != dp->d_type || *************** *** 1381,1388 **** if (0 == use_all) { if (NULL == suffix) continue; ! if (strcmp(suffix + 1, psec)) continue; } buf[0] = '\0'; --- 1504,1559 ---- if (0 == use_all) { if (NULL == suffix) continue; ! if ((MANDOC_SRC & src_form && ! strcmp(suffix + 1, psec)) || ! (MANDOC_FORM & src_form && ! strcmp(suffix + 1, "0"))) ! continue; ! } ! if (NULL != suffix) { ! if ('0' == suffix[1]) ! src_form |= MANDOC_FORM; ! else if ('1' <= suffix[1] && '9' >= suffix[1]) ! src_form |= MANDOC_SRC; ! } ! ! ! /* ! * Skip formatted manuals if a source version is ! * available. Ignore the age: it is very unlikely ! * that people install newer formatted base manuals ! * when they used to have source manuals before, ! * and in ports, old manuals get removed on update. ! */ ! if (0 == use_all && MANDOC_FORM & src_form && ! NULL != psec) { ! buf[0] = '\0'; ! strlcat(buf, dir, MAXPATHLEN); ! p = strrchr(buf, '/'); ! if (NULL == p) ! p = buf; ! else ! p++; ! if (0 == strncmp("cat", p, 3)) ! memcpy(p, "man", 3); ! strlcat(buf, "/", MAXPATHLEN); ! sz = strlcat(buf, fn, MAXPATHLEN); ! if (sz >= MAXPATHLEN) { ! fprintf(stderr, "%s: Path too long\n", buf); continue; + } + q = strrchr(buf, '.'); + if (NULL != q && p < q++) { + *q = '\0'; + sz = strlcat(buf, psec, MAXPATHLEN); + if (sz >= MAXPATHLEN) { + fprintf(stderr, + "%s: Path too long\n", buf); + continue; + } + if (0 == stat(buf, &sb)) + continue; + } } buf[0] = '\0'; *************** *** 1391,1397 **** sz = strlcat(buf, fn, MAXPATHLEN); if (sz >= MAXPATHLEN) { fprintf(stderr, "%s: Path too long\n", dir); ! return(0); } nof = mandoc_calloc(1, sizeof(struct of)); --- 1562,1568 ---- sz = strlcat(buf, fn, MAXPATHLEN); if (sz >= MAXPATHLEN) { fprintf(stderr, "%s: Path too long\n", dir); ! continue; } nof = mandoc_calloc(1, sizeof(struct of)); *************** *** 1400,1405 **** --- 1571,1577 ---- nof->sec = mandoc_strdup(psec); if (NULL != parch) nof->arch = mandoc_strdup(parch); + nof->src_form = src_form; /* * Remember the file name without the extension, *************** *** 1410,1418 **** *suffix = '\0'; nof->title = mandoc_strdup(fn); if (verb > 2) printf("%s: Scheduling\n", buf); - if (NULL == *of) { *of = nof; (*of)->first = nof; --- 1582,1593 ---- *suffix = '\0'; nof->title = mandoc_strdup(fn); + /* + * Add the structure to the list. + */ + if (verb > 2) printf("%s: Scheduling\n", buf); if (NULL == *of) { *of = nof; (*of)->first = nof;