src/usr.bin/spell/spellprog.c - annotate

Return to spellprog.c CVS log
Up to [local] / src / usr.bin / spell
Annotation of src/usr.bin/spell/spellprog.c, Revision 1.5

1.5     ! deraadt     1: /*     $OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $   */
1.1       millert     2:
                      3: /*
                      4:  * Copyright (c) 1991, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
1.4       millert    15:  * 3. Neither the name of the University nor the names of its contributors
1.1       millert    16:  *    may be used to endorse or promote products derived from this software
                     17:  *    without specific prior written permission.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     20:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     21:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     22:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     23:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     24:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     25:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     26:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     27:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     28:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     29:  * SUCH DAMAGE.
                     30:  *
                     31:  *     @(#)spell.h     8.1 (Berkeley) 6/6/93
                     32:  */
                     33: /*
                     34:  * Copyright (C) Caldera International Inc.  2001-2002.
                     35:  * All rights reserved.
                     36:  *
                     37:  * Redistribution and use in source and binary forms, with or without
                     38:  * modification, are permitted provided that the following conditions
                     39:  * are met:
                     40:  * 1. Redistributions of source code and documentation must retain the above
                     41:  *    copyright notice, this list of conditions and the following disclaimer.
                     42:  * 2. Redistributions in binary form must reproduce the above copyright
                     43:  *    notice, this list of conditions and the following disclaimer in the
                     44:  *    documentation and/or other materials provided with the distribution.
                     45:  * 3. All advertising materials mentioning features or use of this software
                     46:  *    must display the following acknowledgement:
                     47:  *     This product includes software developed or owned by Caldera
                     48:  *     International, Inc.
                     49:  * 4. Neither the name of Caldera International, Inc. nor the names of other
                     50:  *    contributors may be used to endorse or promote products derived from
                     51:  *    this software without specific prior written permission.
                     52:  *
                     53:  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
                     54:  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
                     55:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     56:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     57:  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
                     58:  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
                     59:  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
                     60:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     61:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
                     62:  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
                     63:  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     64:  * POSSIBILITY OF SUCH DAMAGE.
                     65:  */
                     66:
                     67: #ifndef lint
                     68: static const char copyright[] =
                     69: "@(#) Copyright (c) 1991, 1993\n\
                     70:        The Regents of the University of California.  All rights reserved.\n";
                     71: #endif /* not lint */
                     72:
                     73: #ifndef lint
                     74: #if 0
                     75: static const char sccsid[] = "@(#)spell.c      8.1 (Berkeley) 6/6/93";
                     76: #else
                     77: #endif
1.5     ! deraadt    78: static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $";
1.1       millert    79: #endif /* not lint */
                     80:
                     81: #include <sys/param.h>
                     82: #include <sys/mman.h>
                     83: #include <sys/stat.h>
                     84:
                     85: #include <ctype.h>
                     86: #include <err.h>
                     87: #include <errno.h>
                     88: #include <fcntl.h>
                     89: #include <limits.h>
                     90: #include <locale.h>
                     91: #include <stdio.h>
                     92: #include <stdlib.h>
                     93: #include <string.h>
                     94: #include <unistd.h>
                     95:
                     96: #define DLEV 2
                     97:
                     98: int     an(char *, char *, char *, int);
                     99: int     bility(char *, char *, char *, int);
                    100: int     es(char *, char *, char *, int);
                    101: int     dict(char *, char *);
                    102: int     i_to_y(char *, char *, char *, int);
                    103: int     ily(char *, char *, char *, int);
                    104: int     ize(char *, char *, char *, int);
                    105: int     metry(char *, char *, char *, int);
                    106: int     monosyl(char *, char *);
                    107: int     ncy(char *, char *, char *, int);
                    108: int     nop(void);
                    109: int     trypref(char *, char *, int);
                    110: int     tryword(char *, char *, int);
                    111: int     s(char *, char *, char *, int);
                    112: int     strip(char *, char *, char *, int);
                    113: int     suffix(char *, int);
                    114: int     tion(char *, char *, char *, int);
                    115: int     vowel(int);
                    116: int     y_to_e(char *, char *, char *, int);
                    117: int     CCe(char *, char *, char *, int);
                    118: int     VCe(char *, char *, char *, int);
                    119: char   *lookuppref(char **, char *);
                    120: char   *skipv(char *);
1.2       millert   121: char   *estrdup(const char *);
1.1       millert   122: void    ise(void);
                    123: void    print_word(FILE *);
                    124: void    ztos(char *);
                    125: __dead void usage(void);
                    126:
                    127: /* from look.c */
                    128: int     look(unsigned char *, unsigned char *, unsigned char *);
                    129:
                    130: struct suftab {
1.2       millert   131:        char *suf;
1.1       millert   132:        int (*p1)();    /* XXX - variable args */
                    133:        int n1;
                    134:        char *d1;
                    135:        char *a1;
                    136:        int (*p2)();    /* XXX - variable args */
                    137:        int n2;
                    138:        char *d2;
                    139:        char *a2;
                    140: } suftab[] = {
                    141:        {"ssen", ily, 4, "-y+iness", "+ness" },
                    142:        {"ssel", ily, 4, "-y+i+less", "+less" },
                    143:        {"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" },
                    144:        {"s'", s, 2, "", "+'s"},
                    145:        {"s", s, 1, "", "+s"},
                    146:        {"ecn", ncy, 1, "", "-t+ce"},
                    147:        {"ycn", ncy, 1, "", "-cy+t"},
                    148:        {"ytilb", nop, 0, "", ""},
                    149:        {"ytilib", bility, 5, "-le+ility", ""},
                    150:        {"elbaif", i_to_y, 4, "-y+iable", ""},
                    151:        {"elba", CCe, 4, "-e+able", "+able"},
                    152:        {"yti", CCe, 3, "-e+ity", "+ity"},
                    153:        {"ylb", y_to_e, 1, "-e+y", ""},
                    154:        {"yl", ily, 2, "-y+ily", "+ly"},
                    155:        {"laci", strip, 2, "", "+al"},
                    156:        {"latnem", strip, 2, "", "+al"},
                    157:        {"lanoi", strip, 2, "", "+al"},
                    158:        {"tnem", strip, 4, "", "+ment"},
                    159:        {"gni", CCe, 3, "-e+ing", "+ing"},
                    160:        {"reta", nop, 0, "", ""},
                    161:        {"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"},
                    162:        {"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"},
                    163:        {"citsi", strip, 2, "", "+ic"},
                    164:        {"cihparg", i_to_y, 1, "-y+ic", ""},
                    165:        {"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"},
                    166:        {"cirtem", i_to_y, 1, "-y+ic", ""},
                    167:        {"yrtem", metry, 0, "-ry+er", ""},
                    168:        {"cigol", i_to_y, 1, "-y+ic", ""},
                    169:        {"tsigol", i_to_y, 2, "-y+ist", ""},
                    170:        {"tsi", VCe, 3, "-e+ist", "+ist"},
                    171:        {"msi", VCe, 3, "-e+ism", "+ist"},
                    172:        {"noitacif", i_to_y, 6, "-y+ication", ""},
                    173:        {"noitazi", ize, 5, "-e+ation", ""},
                    174:        {"rota", tion, 2, "-e+or", ""},
                    175:        {"noit", tion, 3, "-e+ion", "+ion"},
                    176:        {"naino", an, 3, "", "+ian"},
                    177:        {"na", an, 1, "", "+n"},
                    178:        {"evit", tion, 3, "-e+ive", "+ive"},
                    179:        {"ezi", CCe, 3, "-e+ize", "+ize"},
                    180:        {"pihs", strip, 4, "", "+ship"},
                    181:        {"dooh", ily, 4, "-y+hood", "+hood"},
                    182:        {"ekil", strip, 4, "", "+like"},
                    183:        { NULL }
                    184: };
                    185:
                    186: char *preftab[] = {
                    187:        "anti",
                    188:        "bio",
                    189:        "dis",
                    190:        "electro",
                    191:        "en",
                    192:        "fore",
                    193:        "hyper",
                    194:        "intra",
                    195:        "inter",
                    196:        "iso",
                    197:        "kilo",
                    198:        "magneto",
                    199:        "meta",
                    200:        "micro",
                    201:        "milli",
                    202:        "mis",
                    203:        "mono",
                    204:        "multi",
                    205:        "non",
                    206:        "out",
                    207:        "over",
                    208:        "photo",
                    209:        "poly",
                    210:        "pre",
                    211:        "pseudo",
                    212:        "re",
                    213:        "semi",
                    214:        "stereo",
                    215:        "sub",
                    216:        "super",
                    217:        "thermo",
                    218:        "ultra",
                    219:        "under",        /* must precede un */
                    220:        "un",
                    221:        NULL
                    222: };
                    223:
                    224: struct wlist {
                    225:        int fd;
                    226:        unsigned char *front;
                    227:        unsigned char *back;
                    228: } *wlists;
                    229:
                    230: int vflag;
                    231: int xflag;
                    232: char word[LINE_MAX];
                    233: char original[LINE_MAX];
                    234: char *deriv[40];
                    235: char affix[40];
                    236:
                    237: /*
                    238:  * The spellprog utility accepts a newline-delimited list of words
                    239:  * on stdin.  For arguments it expects the path to a word list and
                    240:  * the path to a file in which to store found words.
                    241:  *
                    242:  * In normal usage, spell is called twice.  The first time it is
                    243:  * called with a stop list to flag commonly mispelled words.  The
                    244:  * remaining words are then passed to spell again, this time with
                    245:  * the dictionary file as the first (non-flag) argument.
                    246:  *
                    247:  * Unlike historic versions of spellprog, this one does not use
                    248:  * hashed files.  Instead it simply requires that files be sorted
                    249:  * lexigraphically and uses the same algorithm as the look utility.
                    250:  *
                    251:  * Note that spellprog should be called via the spell shell script
                    252:  * and is not meant to be invoked directly by the user.
                    253:  */
                    254:
                    255: int
                    256: main(int argc, char **argv)
                    257: {
                    258:        char *ep, *cp, *dp;
                    259:        char *outfile;
                    260:        int ch, fold, i;
                    261:        struct stat sb;
                    262:        FILE *file, *found;
                    263:
                    264:        setlocale(LC_ALL, "");
                    265:
                    266:        outfile = NULL;
                    267:        while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
                    268:                switch (ch) {
                    269:                case 'b':
                    270:                        /* Use British dictionary and convert ize -> ise. */
                    271:                        ise();
                    272:                        break;
                    273:                case 'o':
                    274:                        outfile = optarg;
                    275:                        break;
                    276:                case 'v':
                    277:                        /* Also write derivations to "found" file. */
                    278:                        vflag++;
                    279:                        break;
                    280:                case 'x':
                    281:                        /* Print plausible stems to stdout. */
                    282:                        xflag++;
                    283:                        break;
                    284:                default:
                    285:                        usage();
                    286:                }
                    287:
                    288:        }
                    289:        argc -= optind;
                    290:        argv += optind;
                    291:        if (argc < 1)
                    292:                usage();
                    293:
                    294:        /* Open and mmap the word/stop lists. */
1.5     ! deraadt   295:        if ((wlists = calloc(sizeof(struct wlist), (argc + 1))) == NULL)
1.1       millert   296:                err(1, "malloc");
                    297:        for (i = 0; argc--; i++) {
                    298:                wlists[i].fd = open(argv[i], O_RDONLY, 0);
                    299:                if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
                    300:                        err(1, "%s", argv[i]);
                    301:                if (sb.st_size > SIZE_T_MAX)
                    302:                        errx(1, "%s: %s", argv[i], strerror(EFBIG));
                    303:                wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
                    304:                    MAP_PRIVATE, wlists[i].fd, (off_t)0);
                    305:                if (wlists[i].front == MAP_FAILED)
                    306:                        err(1, "%s", argv[i]);
                    307:                wlists[i].back = wlists[i].front + sb.st_size;
                    308:        }
                    309:        wlists[i].fd = -1;
                    310:
                    311:        /* Open file where found words are to be saved. */
                    312:        if (outfile == NULL)
                    313:                found = NULL;
                    314:        else if ((found = fopen(outfile, "w")) == NULL)
                    315:                err(1, "cannot open %s", outfile);
                    316:
                    317:        for (;; print_word(file)) {
                    318:                affix[0] = '\0';
                    319:                file = found;
                    320:                for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
                    321:                        if (ep - word == sizeof(word) - 1) {
                    322:                                *ep = '\0';
                    323:                                warnx("word too long (%s)", word);
                    324:                                while ((ch = getchar()) != '\n')
                    325:                                        ;       /* slurp until EOL */
                    326:                        }
                    327:                        if (ch == EOF) {
                    328:                                if (found != NULL)
                    329:                                        fclose(found);
                    330:                                exit(0);
                    331:                        }
                    332:                }
                    333:                for (cp = word, dp = original; cp < ep; )
                    334:                        *dp++ = *cp++;
                    335:                *dp = '\0';
                    336:                fold = 0;
                    337:                for (cp = word; cp < ep; cp++)
                    338:                        if (islower(*cp))
                    339:                                goto lcase;
                    340:                if (trypref(ep, ".", 0))
                    341:                        continue;
                    342:                ++fold;
                    343:                for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
                    344:                        *dp = tolower(*cp);
                    345: lcase:
                    346:                if (trypref(ep, ".", 0) || suffix(ep, 0))
                    347:                        continue;
                    348:                if (isupper(word[0])) {
                    349:                        for (cp = original, dp = word; (*dp = *cp++); dp++) {
                    350:                                if (fold)
                    351:                                        *dp = tolower(*dp);
                    352:                        }
                    353:                        word[0] = tolower(word[0]);
                    354:                        goto lcase;
                    355:                }
                    356:                file = stdout;
                    357:        }
                    358:
                    359:        exit(0);
                    360: }
                    361:
                    362: void
                    363: print_word(FILE *f)
                    364: {
                    365:
                    366:        if (f != NULL) {
                    367:                if (vflag && affix[0] != '\0' && affix[0] != '.')
                    368:                        fprintf(f, "%s\t%s\n", affix, original);
                    369:                else
                    370:                        fprintf(f, "%s\n", original);
                    371:        }
                    372: }
                    373:
                    374: /*
                    375:  * For each matching suffix in suftab, call the function associated
                    376:  * with that suffix (p1 and p2).
                    377:  */
                    378: int
                    379: suffix(char *ep, int lev)
                    380: {
                    381:        struct suftab *t;
                    382:        char *cp, *sp;
                    383:
                    384:        lev += DLEV;
                    385:        deriv[lev] = deriv[lev-1] = 0;
                    386:        for (t = suftab; (sp = t->suf); t++) {
                    387:                cp = ep;
                    388:                while (*sp) {
                    389:                        if (*--cp != *sp++)
                    390:                                goto next;
                    391:                }
                    392:                for (sp = cp; --sp >= word && !vowel(*sp);)
                    393:                        ;       /* nothing */
                    394:                if (sp < word)
1.3       kjell     395:                        return (0);
1.1       millert   396:                if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1))
1.3       kjell     397:                        return (1);
1.1       millert   398:                if (t->p2 != NULL) {
                    399:                        deriv[lev] = deriv[lev+1] = '\0';
1.3       kjell     400:                        return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev));
1.1       millert   401:                }
1.3       kjell     402:                return (0);
1.1       millert   403: next:          ;
                    404:        }
1.3       kjell     405:        return (0);
1.1       millert   406: }
                    407:
                    408: int
                    409: nop(void)
                    410: {
                    411:
1.3       kjell     412:        return (0);
1.1       millert   413: }
                    414:
                    415: int
                    416: strip(char *ep, char *d, char *a, int lev)
                    417: {
                    418:
1.3       kjell     419:        return (trypref(ep, a, lev) || suffix(ep, lev));
1.1       millert   420: }
                    421:
                    422: int
                    423: s(char *ep, char *d, char *a, int lev)
                    424: {
                    425:
                    426:        if (lev > DLEV + 1)
1.3       kjell     427:                return (0);
1.1       millert   428:        if (*ep == 's' && ep[-1] == 's')
1.3       kjell     429:                return (0);
                    430:        return (strip(ep, d, a, lev));
1.1       millert   431: }
                    432:
                    433: int
                    434: an(char *ep, char *d, char *a, int lev)
                    435: {
                    436:
                    437:        if (!isupper(*word))    /* must be proper name */
1.3       kjell     438:                return (0);
                    439:        return (trypref(ep,a,lev));
1.1       millert   440: }
                    441:
                    442: int
                    443: ize(char *ep, char *d, char *a, int lev)
                    444: {
                    445:
                    446:        *ep++ = 'e';
1.3       kjell     447:        return (strip(ep ,"", d, lev));
1.1       millert   448: }
                    449:
                    450: int
                    451: y_to_e(char *ep, char *d, char *a, int lev)
                    452: {
                    453:        char c = *ep;
                    454:
                    455:        *ep++ = 'e';
                    456:        if (strip(ep, "", d, lev))
                    457:                return (1);
                    458:        ep[-1] = c;
                    459:        return (0);
                    460: }
                    461:
                    462: int
                    463: ily(char *ep, char *d, char *a, int lev)
                    464: {
                    465:
                    466:        if (ep[-1] == 'i')
1.3       kjell     467:                return (i_to_y(ep, d, a, lev));
1.1       millert   468:        else
1.3       kjell     469:                return (strip(ep, d, a, lev));
1.1       millert   470: }
                    471:
                    472: int
                    473: ncy(char *ep, char *d, char *a, int lev)
                    474: {
                    475:
                    476:        if (skipv(skipv(ep-1)) < word)
1.3       kjell     477:                return (0);
1.1       millert   478:        ep[-1] = 't';
1.3       kjell     479:        return (strip(ep, d, a, lev));
1.1       millert   480: }
                    481:
                    482: int
                    483: bility(char *ep, char *d, char *a, int lev)
                    484: {
                    485:
                    486:        *ep++ = 'l';
1.3       kjell     487:        return (y_to_e(ep, d, a, lev));
1.1       millert   488: }
                    489:
                    490: int
                    491: i_to_y(char *ep, char *d, char *a, int lev)
                    492: {
                    493:
                    494:        if (ep[-1] == 'i') {
                    495:                ep[-1] = 'y';
                    496:                a = d;
                    497:        }
1.3       kjell     498:        return (strip(ep, "", a, lev));
1.1       millert   499: }
                    500:
                    501: int
                    502: es(char *ep, char *d, char *a, int lev)
                    503: {
                    504:
                    505:        if (lev > DLEV)
1.3       kjell     506:                return (0);
1.1       millert   507:
                    508:        switch (ep[-1]) {
                    509:        default:
1.3       kjell     510:                return (0);
1.1       millert   511:        case 'i':
1.3       kjell     512:                return (i_to_y(ep, d, a, lev));
1.1       millert   513:        case 's':
                    514:        case 'h':
                    515:        case 'z':
                    516:        case 'x':
1.3       kjell     517:                return (strip(ep, d, a, lev));
1.1       millert   518:        }
                    519: }
                    520:
                    521: int
                    522: metry(char *ep, char *d, char *a, int lev)
                    523: {
                    524:
                    525:        ep[-2] = 'e';
                    526:        ep[-1] = 'r';
1.3       kjell     527:        return (strip(ep, d, a, lev));
1.1       millert   528: }
                    529:
                    530: int
                    531: tion(char *ep, char *d, char *a, int lev)
                    532: {
                    533:
                    534:        switch (ep[-2]) {
                    535:        case 'c':
                    536:        case 'r':
1.3       kjell     537:                return (trypref(ep, a, lev));
1.1       millert   538:        case 'a':
1.3       kjell     539:                return (y_to_e(ep, d, a, lev));
1.1       millert   540:        }
1.3       kjell     541:        return (0);
1.1       millert   542: }
                    543:
                    544: /*
                    545:  * Possible consonant-consonant-e ending.
                    546:  */
                    547: int
                    548: CCe(char *ep, char *d, char *a, int lev)
                    549: {
                    550:
                    551:        switch (ep[-1]) {
                    552:        case 'l':
                    553:                if (vowel(ep[-2]))
                    554:                        break;
                    555:                switch (ep[-2]) {
                    556:                case 'l':
                    557:                case 'r':
                    558:                case 'w':
                    559:                        break;
                    560:                default:
1.3       kjell     561:                        return (y_to_e(ep, d, a, lev));
1.1       millert   562:                }
                    563:                break;
                    564:        case 's':
                    565:                if (ep[-2] == 's')
                    566:                        break;
                    567:        case 'c':
                    568:        case 'g':
                    569:                if (*ep == 'a')
1.3       kjell     570:                        return (0);
1.1       millert   571:        case 'v':
                    572:        case 'z':
                    573:                if (vowel(ep[-2]))
                    574:                        break;
                    575:        case 'u':
                    576:                if (y_to_e(ep, d, a, lev))
1.3       kjell     577:                        return (1);
1.1       millert   578:                if (!(ep[-2] == 'n' && ep[-1] == 'g'))
1.3       kjell     579:                        return (0);
1.1       millert   580:        }
1.3       kjell     581:        return (VCe(ep, d, a, lev));
1.1       millert   582: }
                    583:
                    584: /*
                    585:  * Possible consonant-vowel-consonant-e ending.
                    586:  */
                    587: int
                    588: VCe(char *ep, char *d, char *a, int lev)
                    589: {
                    590:        char c;
                    591:
                    592:        c = ep[-1];
                    593:        if (c == 'e')
1.3       kjell     594:                return (0);
1.1       millert   595:        if (!vowel(c) && vowel(ep[-2])) {
                    596:                c = *ep;
                    597:                *ep++ = 'e';
                    598:                if (trypref(ep, d, lev) || suffix(ep, lev))
1.3       kjell     599:                        return (1);
1.1       millert   600:                ep--;
                    601:                *ep = c;
                    602:        }
1.3       kjell     603:        return (strip(ep, d, a, lev));
1.1       millert   604: }
                    605:
                    606: char *
                    607: lookuppref(char **wp, char *ep)
                    608: {
                    609:        char **sp;
                    610:        char *bp,*cp;
                    611:
                    612:        for (sp = preftab; *sp; sp++) {
                    613:                bp = *wp;
                    614:                for (cp = *sp; *cp; cp++, bp++) {
                    615:                        if (tolower(*bp) != *cp)
                    616:                                goto next;
                    617:                }
                    618:                for (cp = bp; cp < ep; cp++) {
                    619:                        if (vowel(*cp)) {
                    620:                                *wp = bp;
1.3       kjell     621:                                return (*sp);
1.1       millert   622:                        }
                    623:                }
                    624: next:          ;
                    625:        }
1.3       kjell     626:        return (0);
1.1       millert   627: }
                    628:
                    629: /*
                    630:  * If the word is not in the dictionary, try stripping off prefixes
                    631:  * until the word is found or we run out of prefixes to check.
                    632:  */
                    633: int
                    634: trypref(char *ep, char *a, int lev)
                    635: {
                    636:        char *cp;
                    637:        char *bp;
                    638:        char *pp;
                    639:        int val = 0;
                    640:        char space[20];
                    641:
                    642:        deriv[lev] = a;
                    643:        if (tryword(word, ep, lev))
1.3       kjell     644:                return (1);
1.1       millert   645:        bp = word;
                    646:        pp = space;
                    647:        deriv[lev+1] = pp;
                    648:        while ((cp = lookuppref(&bp, ep))) {
                    649:                *pp++ = '+';
                    650:                while ((*pp = *cp++))
                    651:                        pp++;
                    652:                if (tryword(bp, ep, lev+1)) {
                    653:                        val = 1;
                    654:                        break;
                    655:                }
                    656:                if (pp - space >= sizeof(space))
1.3       kjell     657:                        return (0);
1.1       millert   658:        }
                    659:        deriv[lev+1] = deriv[lev+2] = '\0';
1.3       kjell     660:        return (val);
1.1       millert   661: }
                    662:
                    663: int
                    664: tryword(char *bp, char *ep, int lev)
                    665: {
                    666:        int i, j;
                    667:        char duple[3];
                    668:
                    669:        if (ep-bp <= 1)
1.3       kjell     670:                return (0);
1.1       millert   671:        if (vowel(*ep) && monosyl(bp, ep))
1.3       kjell     672:                return (0);
1.1       millert   673:
                    674:        i = dict(bp, ep);
                    675:        if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) {
                    676:                ep--;
                    677:                deriv[++lev] = duple;
                    678:                duple[0] = '+';
                    679:                duple[1] = *ep;
                    680:                duple[2] = '\0';
                    681:                i = dict(bp, ep);
                    682:        }
                    683:        if (vflag == 0 || i == 0)
1.3       kjell     684:                return (i);
1.1       millert   685:
                    686:        /* Also tack on possible derivations. (XXX - warn on truncation?) */
                    687:        for (j = lev; j > 0; j--) {
                    688:                if (deriv[j])
                    689:                        strlcat(affix, deriv[j], sizeof(affix));
                    690:        }
1.3       kjell     691:        return (i);
1.1       millert   692: }
                    693:
                    694: int
                    695: monosyl(char *bp, char *ep)
                    696: {
                    697:
                    698:        if (ep < bp + 2)
1.3       kjell     699:                return (0);
1.1       millert   700:        if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
1.3       kjell     701:                return (0);
1.1       millert   702:        while (--ep >= bp)
                    703:                if (vowel(*ep))
1.3       kjell     704:                        return (0);
                    705:        return (1);
1.1       millert   706: }
                    707:
                    708: char *
                    709: skipv(char *s)
                    710: {
                    711:
                    712:        if (s >= word && vowel(*s))
                    713:                s--;
                    714:        while (s >= word && !vowel(*s))
                    715:                s--;
1.3       kjell     716:        return (s);
1.1       millert   717: }
                    718:
                    719: int
                    720: vowel(int c)
                    721: {
                    722:
                    723:        switch (tolower(c)) {
                    724:        case 'a':
                    725:        case 'e':
                    726:        case 'i':
                    727:        case 'o':
                    728:        case 'u':
                    729:        case 'y':
1.3       kjell     730:                return (1);
1.1       millert   731:        }
1.3       kjell     732:        return (0);
1.1       millert   733: }
                    734:
                    735: /*
                    736:  * Crummy way to Britishise.
                    737:  */
                    738: void
                    739: ise(void)
                    740: {
1.2       millert   741:        struct suftab *tab;
1.1       millert   742:
1.2       millert   743:        for (tab = suftab; tab->suf; tab++) {
                    744:                /* Assume that suffix will contain 'z' if a1 or d1 do */
                    745:                if (strchr(tab->suf, 'z')) {
                    746:                        tab->suf = estrdup(tab->suf);
                    747:                        ztos(tab->suf);
                    748:                        if (strchr(tab->d1, 'z')) {
                    749:                                tab->d1 = estrdup(tab->d1);
                    750:                                ztos(tab->d1);
                    751:                        }
                    752:                        if (strchr(tab->a1, 'z')) {
                    753:                                tab->a1 = estrdup(tab->a1);
                    754:                                ztos(tab->a1);
                    755:                        }
                    756:                }
1.1       millert   757:        }
                    758: }
                    759:
                    760: void
                    761: ztos(char *s)
                    762: {
                    763:
                    764:        for (; *s; s++)
                    765:                if (*s == 'z')
                    766:                        *s = 's';
1.2       millert   767: }
                    768:
                    769: char *
                    770: estrdup(const char *s)
                    771: {
                    772:        char *d;
                    773:
                    774:        if ((d = strdup(s)) == NULL)
                    775:                err(1, "strdup");
1.3       kjell     776:        return (d);
1.1       millert   777: }
                    778:
                    779: /*
                    780:  * Look up a word in the dictionary.
                    781:  * Returns 1 if found, 0 if not.
                    782:  */
                    783: int
                    784: dict(char *bp, char *ep)
                    785: {
                    786:        char c;
                    787:        int i, rval;
                    788:
                    789:        c = *ep;
                    790:        *ep = '\0';
                    791:        if (xflag)
                    792:                printf("=%s\n", bp);
                    793:        for (i = rval = 0; wlists[i].fd != -1; i++) {
                    794:                if ((rval = look((unsigned char *)bp, wlists[i].front,
                    795:                    wlists[i].back)) == 1)
                    796:                        break;
                    797:        }
                    798:        *ep = c;
1.3       kjell     799:        return (rval);
1.1       millert   800: }
                    801:
                    802: __dead void
                    803: usage(void)
                    804: {
                    805:        extern char *__progname;
                    806:
                    807:        fprintf(stderr, "usage: %s [-bvx] [-o found-words] word-list ...\n",
                    808:            __progname);
                    809:        exit(1);
                    810: }