[BACK]Return to regexp.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / vgrind

Annotation of src/usr.bin/vgrind/regexp.c, Revision 1.1

1.1     ! deraadt     1: /*     $NetBSD: regexp.c,v 1.3 1994/11/17 08:28:02 jtc Exp $   */
        !             2:
        !             3: /*
        !             4:  * Copyright (c) 1980, 1993
        !             5:  *     The Regents of the University of California.  All rights reserved.
        !             6:  *
        !             7:  *
        !             8:  * Redistribution and use in source and binary forms, with or without
        !             9:  * modification, are permitted provided that the following conditions
        !            10:  * are met:
        !            11:  * 1. Redistributions of source code must retain the above copyright
        !            12:  *    notice, this list of conditions and the following disclaimer.
        !            13:  * 2. Redistributions in binary form must reproduce the above copyright
        !            14:  *    notice, this list of conditions and the following disclaimer in the
        !            15:  *    documentation and/or other materials provided with the distribution.
        !            16:  * 3. All advertising materials mentioning features or use of this software
        !            17:  *    must display the following acknowledgement:
        !            18:  *     This product includes software developed by the University of
        !            19:  *     California, Berkeley and its contributors.
        !            20:  * 4. Neither the name of the University nor the names of its contributors
        !            21:  *    may be used to endorse or promote products derived from this software
        !            22:  *    without specific prior written permission.
        !            23:  *
        !            24:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            25:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            26:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            27:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            28:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            29:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            30:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            32:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            33:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            34:  * SUCH DAMAGE.
        !            35:  */
        !            36:
        !            37: #ifndef lint
        !            38: static char copyright[] =
        !            39: "@(#) Copyright (c) 1980, 1993\n\
        !            40:        The Regents of the University of California.  All rights reserved.\n";
        !            41: #endif /* not lint */
        !            42:
        !            43: #ifndef lint
        !            44: #if 0
        !            45: static char sccsid[] = "@(#)regexp.c   8.1 (Berkeley) 6/6/93";
        !            46: #endif
        !            47: static char rcsid[] = "$NetBSD: regexp.c,v 1.3 1994/11/17 08:28:02 jtc Exp $";
        !            48: #endif /* not lint */
        !            49:
        !            50: #include <ctype.h>
        !            51: #include <stdlib.h>
        !            52: #include <string.h>
        !            53: #include "extern.h"
        !            54:
        !            55: #define FALSE  0
        !            56: #define TRUE   !(FALSE)
        !            57: #define NIL    0
        !            58:
        !            59: static void    expconv __P((void));
        !            60:
        !            61: boolean         _escaped;      /* true if we are currently _escaped */
        !            62: char   *_start;        /* start of string */
        !            63: boolean         l_onecase;     /* true if upper and lower equivalent */
        !            64:
        !            65: #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
        !            66:
        !            67: /*  STRNCMP -  like strncmp except that we convert the
        !            68:  *             first string to lower case before comparing
        !            69:  *             if l_onecase is set.
        !            70:  */
        !            71:
        !            72: int
        !            73: STRNCMP(s1, s2, len)
        !            74:        register char *s1,*s2;
        !            75:        register int len;
        !            76: {
        !            77:        if (l_onecase) {
        !            78:            do
        !            79:                if (*s2 - makelower(*s1))
        !            80:                        return (*s2 - makelower(*s1));
        !            81:                else {
        !            82:                        s2++;
        !            83:                        s1++;
        !            84:                }
        !            85:            while (--len);
        !            86:        } else {
        !            87:            do
        !            88:                if (*s2 - *s1)
        !            89:                        return (*s2 - *s1);
        !            90:                else {
        !            91:                        s2++;
        !            92:                        s1++;
        !            93:                }
        !            94:            while (--len);
        !            95:        }
        !            96:        return(0);
        !            97: }
        !            98:
        !            99: /*     The following routine converts an irregular expression to
        !           100:  *     internal format.
        !           101:  *
        !           102:  *     Either meta symbols (\a \d or \p) or character strings or
        !           103:  *     operations ( alternation or perenthesizing ) can be
        !           104:  *     specified.  Each starts with a descriptor byte.  The descriptor
        !           105:  *     byte has STR set for strings, META set for meta symbols
        !           106:  *     and OPER set for operations.
        !           107:  *     The descriptor byte can also have the OPT bit set if the object
        !           108:  *     defined is optional.  Also ALT can be set to indicate an alternation.
        !           109:  *
        !           110:  *     For metasymbols the byte following the descriptor byte identities
        !           111:  *     the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '(').  For
        !           112:  *     strings the byte after the descriptor is a character count for
        !           113:  *     the string:
        !           114:  *
        !           115:  *             meta symbols := descriptor
        !           116:  *                             symbol
        !           117:  *
        !           118:  *             strings :=      descriptor
        !           119:  *                             character count
        !           120:  *                             the string
        !           121:  *
        !           122:  *             operatins :=    descriptor
        !           123:  *                             symbol
        !           124:  *                             character count
        !           125:  */
        !           126:
        !           127: /*
        !           128:  *  handy macros for accessing parts of match blocks
        !           129:  */
        !           130: #define MSYM(A) (*(A+1))       /* symbol in a meta symbol block */
        !           131: #define MNEXT(A) (A+2)         /* character following a metasymbol block */
        !           132:
        !           133: #define OSYM(A) (*(A+1))       /* symbol in an operation block */
        !           134: #define OCNT(A) (*(A+2))       /* character count */
        !           135: #define ONEXT(A) (A+3)         /* next character after the operation */
        !           136: #define OPTR(A) (A+*(A+2))     /* place pointed to by the operator */
        !           137:
        !           138: #define SCNT(A) (*(A+1))       /* byte count of a string */
        !           139: #define SSTR(A) (A+2)          /* address of the string */
        !           140: #define SNEXT(A) (A+2+*(A+1))  /* character following the string */
        !           141:
        !           142: /*
        !           143:  *  bit flags in the descriptor
        !           144:  */
        !           145: #define OPT 1
        !           146: #define STR 2
        !           147: #define META 4
        !           148: #define ALT 8
        !           149: #define OPER 16
        !           150:
        !           151: static char *ccre;     /* pointer to current position in converted exp*/
        !           152: static char *ure;      /* pointer current position in unconverted exp */
        !           153:
        !           154: char *
        !           155: convexp(re)
        !           156:     char *re;          /* unconverted irregular expression */
        !           157: {
        !           158:     register char *cre;                /* pointer to converted regular expression */
        !           159:
        !           160:     /* allocate room for the converted expression */
        !           161:     if (re == NIL)
        !           162:        return (NIL);
        !           163:     if (*re == '\0')
        !           164:        return (NIL);
        !           165:     cre = malloc (4 * strlen(re) + 3);
        !           166:     ccre = cre;
        !           167:     ure = re;
        !           168:
        !           169:     /* start the conversion with a \a */
        !           170:     *cre = META | OPT;
        !           171:     MSYM(cre) = 'a';
        !           172:     ccre = MNEXT(cre);
        !           173:
        !           174:     /* start the conversion (its recursive) */
        !           175:     expconv ();
        !           176:     *ccre = 0;
        !           177:     return (cre);
        !           178: }
        !           179:
        !           180: static void
        !           181: expconv()
        !           182: {
        !           183:     register char *cs;         /* pointer to current symbol in converted exp */
        !           184:     register char c;           /* character being processed */
        !           185:     register char *acs;                /* pinter to last alternate */
        !           186:     register int temp;
        !           187:
        !           188:     /* let the conversion begin */
        !           189:     acs = NIL;
        !           190:     cs = NIL;
        !           191:     while (*ure != NIL) {
        !           192:        switch (c = *ure++) {
        !           193:
        !           194:        case '\\':
        !           195:            switch (c = *ure++) {
        !           196:
        !           197:            /* escaped characters are just characters */
        !           198:            default:
        !           199:                if (cs == NIL || (*cs & STR) == 0) {
        !           200:                    cs = ccre;
        !           201:                    *cs = STR;
        !           202:                    SCNT(cs) = 1;
        !           203:                    ccre += 2;
        !           204:                } else
        !           205:                    SCNT(cs)++;
        !           206:                *ccre++ = c;
        !           207:                break;
        !           208:
        !           209:            /* normal(?) metacharacters */
        !           210:            case 'a':
        !           211:            case 'd':
        !           212:            case 'e':
        !           213:            case 'p':
        !           214:                if (acs != NIL && acs != cs) {
        !           215:                    do {
        !           216:                        temp = OCNT(acs);
        !           217:                        OCNT(acs) = ccre - acs;
        !           218:                        acs -= temp;
        !           219:                    } while (temp != 0);
        !           220:                    acs = NIL;
        !           221:                }
        !           222:                cs = ccre;
        !           223:                *cs = META;
        !           224:                MSYM(cs) = c;
        !           225:                ccre = MNEXT(cs);
        !           226:                break;
        !           227:            }
        !           228:            break;
        !           229:
        !           230:        /* just put the symbol in */
        !           231:        case '^':
        !           232:        case '$':
        !           233:            if (acs != NIL && acs != cs) {
        !           234:                do {
        !           235:                    temp = OCNT(acs);
        !           236:                    OCNT(acs) = ccre - acs;
        !           237:                    acs -= temp;
        !           238:                } while (temp != 0);
        !           239:                acs = NIL;
        !           240:            }
        !           241:            cs = ccre;
        !           242:            *cs = META;
        !           243:            MSYM(cs) = c;
        !           244:            ccre = MNEXT(cs);
        !           245:            break;
        !           246:
        !           247:        /* mark the last match sequence as optional */
        !           248:        case '?':
        !           249:            if (cs)
        !           250:                *cs = *cs | OPT;
        !           251:            break;
        !           252:
        !           253:        /* recurse and define a subexpression */
        !           254:        case '(':
        !           255:            if (acs != NIL && acs != cs) {
        !           256:                do {
        !           257:                    temp = OCNT(acs);
        !           258:                    OCNT(acs) = ccre - acs;
        !           259:                    acs -= temp;
        !           260:                } while (temp != 0);
        !           261:                acs = NIL;
        !           262:            }
        !           263:            cs = ccre;
        !           264:            *cs = OPER;
        !           265:            OSYM(cs) = '(';
        !           266:            ccre = ONEXT(cs);
        !           267:            expconv ();
        !           268:            OCNT(cs) = ccre - cs;               /* offset to next symbol */
        !           269:            break;
        !           270:
        !           271:        /* reurn from a recursion */
        !           272:        case ')':
        !           273:            if (acs != NIL) {
        !           274:                do {
        !           275:                    temp = OCNT(acs);
        !           276:                    OCNT(acs) = ccre - acs;
        !           277:                    acs -= temp;
        !           278:                } while (temp != 0);
        !           279:                acs = NIL;
        !           280:            }
        !           281:            cs = ccre;
        !           282:            *cs = META;
        !           283:            MSYM(cs) = c;
        !           284:            ccre = MNEXT(cs);
        !           285:            return;
        !           286:
        !           287:        /* mark the last match sequence as having an alternate */
        !           288:        /* the third byte will contain an offset to jump over the */
        !           289:        /* alternate match in case the first did not fail */
        !           290:        case '|':
        !           291:            if (acs != NIL && acs != cs)
        !           292:                OCNT(ccre) = ccre - acs;        /* make a back pointer */
        !           293:            else
        !           294:                OCNT(ccre) = 0;
        !           295:            *cs |= ALT;
        !           296:            cs = ccre;
        !           297:            *cs = OPER;
        !           298:            OSYM(cs) = '|';
        !           299:            ccre = ONEXT(cs);
        !           300:            acs = cs;   /* remember that the pointer is to be filles */
        !           301:            break;
        !           302:
        !           303:        /* if its not a metasymbol just build a scharacter string */
        !           304:        default:
        !           305:            if (cs == NIL || (*cs & STR) == 0) {
        !           306:                cs = ccre;
        !           307:                *cs = STR;
        !           308:                SCNT(cs) = 1;
        !           309:                ccre = SSTR(cs);
        !           310:            } else
        !           311:                SCNT(cs)++;
        !           312:            *ccre++ = c;
        !           313:            break;
        !           314:        }
        !           315:     }
        !           316:     if (acs != NIL) {
        !           317:        do {
        !           318:            temp = OCNT(acs);
        !           319:            OCNT(acs) = ccre - acs;
        !           320:            acs -= temp;
        !           321:        } while (temp != 0);
        !           322:        acs = NIL;
        !           323:     }
        !           324:     return;
        !           325: }
        !           326: /* end of convertre */
        !           327:
        !           328:
        !           329: /*
        !           330:  *     The following routine recognises an irregular expresion
        !           331:  *     with the following special characters:
        !           332:  *
        !           333:  *             \?      -       means last match was optional
        !           334:  *             \a      -       matches any number of characters
        !           335:  *             \d      -       matches any number of spaces and tabs
        !           336:  *             \p      -       matches any number of alphanumeric
        !           337:  *                             characters. The
        !           338:  *                             characters matched will be copied into
        !           339:  *                             the area pointed to by 'name'.
        !           340:  *             \|      -       alternation
        !           341:  *             \( \)   -       grouping used mostly for alternation and
        !           342:  *                             optionality
        !           343:  *
        !           344:  *     The irregular expression must be translated to internal form
        !           345:  *     prior to calling this routine
        !           346:  *
        !           347:  *     The value returned is the pointer to the first non \a
        !           348:  *     character matched.
        !           349:  */
        !           350:
        !           351: char *
        !           352: expmatch (s, re, mstring)
        !           353:     register char *s;          /* string to check for a match in */
        !           354:     register char *re;         /* a converted irregular expression */
        !           355:     register char *mstring;    /* where to put whatever matches a \p */
        !           356: {
        !           357:     register char *cs;         /* the current symbol */
        !           358:     register char *ptr,*s1;    /* temporary pointer */
        !           359:     boolean matched;           /* a temporary boolean */
        !           360:
        !           361:     /* initial conditions */
        !           362:     if (re == NIL)
        !           363:        return (NIL);
        !           364:     cs = re;
        !           365:     matched = FALSE;
        !           366:
        !           367:     /* loop till expression string is exhausted (or at least pretty tired) */
        !           368:     while (*cs) {
        !           369:        switch (*cs & (OPER | STR | META)) {
        !           370:
        !           371:        /* try to match a string */
        !           372:        case STR:
        !           373:            matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
        !           374:            if (matched) {
        !           375:
        !           376:                /* hoorah it matches */
        !           377:                s += SCNT(cs);
        !           378:                cs = SNEXT(cs);
        !           379:            } else if (*cs & ALT) {
        !           380:
        !           381:                /* alternation, skip to next expression */
        !           382:                cs = SNEXT(cs);
        !           383:            } else if (*cs & OPT) {
        !           384:
        !           385:                /* the match is optional */
        !           386:                cs = SNEXT(cs);
        !           387:                matched = 1;            /* indicate a successful match */
        !           388:            } else {
        !           389:
        !           390:                /* no match, error return */
        !           391:                return (NIL);
        !           392:            }
        !           393:            break;
        !           394:
        !           395:        /* an operator, do something fancy */
        !           396:        case OPER:
        !           397:            switch (OSYM(cs)) {
        !           398:
        !           399:            /* this is an alternation */
        !           400:            case '|':
        !           401:                if (matched)
        !           402:
        !           403:                    /* last thing in the alternation was a match, skip ahead */
        !           404:                    cs = OPTR(cs);
        !           405:                else
        !           406:
        !           407:                    /* no match, keep trying */
        !           408:                    cs = ONEXT(cs);
        !           409:                break;
        !           410:
        !           411:            /* this is a grouping, recurse */
        !           412:            case '(':
        !           413:                ptr = expmatch (s, ONEXT(cs), mstring);
        !           414:                if (ptr != NIL) {
        !           415:
        !           416:                    /* the subexpression matched */
        !           417:                    matched = 1;
        !           418:                    s = ptr;
        !           419:                } else if (*cs & ALT) {
        !           420:
        !           421:                    /* alternation, skip to next expression */
        !           422:                    matched = 0;
        !           423:                } else if (*cs & OPT) {
        !           424:
        !           425:                    /* the match is optional */
        !           426:                    matched = 1;        /* indicate a successful match */
        !           427:                } else {
        !           428:
        !           429:                    /* no match, error return */
        !           430:                    return (NIL);
        !           431:                }
        !           432:                cs = OPTR(cs);
        !           433:                break;
        !           434:            }
        !           435:            break;
        !           436:
        !           437:        /* try to match a metasymbol */
        !           438:        case META:
        !           439:            switch (MSYM(cs)) {
        !           440:
        !           441:            /* try to match anything and remember what was matched */
        !           442:            case 'p':
        !           443:                /*
        !           444:                 *  This is really the same as trying the match the
        !           445:                 *  remaining parts of the expression to any subset
        !           446:                 *  of the string.
        !           447:                 */
        !           448:                s1 = s;
        !           449:                do {
        !           450:                    ptr = expmatch (s1, MNEXT(cs), mstring);
        !           451:                    if (ptr != NIL && s1 != s) {
        !           452:
        !           453:                        /* we have a match, remember the match */
        !           454:                        strncpy (mstring, s, s1 - s);
        !           455:                        mstring[s1 - s] = '\0';
        !           456:                        return (ptr);
        !           457:                    } else if (ptr != NIL && (*cs & OPT)) {
        !           458:
        !           459:                        /* it was aoptional so no match is ok */
        !           460:                        return (ptr);
        !           461:                    } else if (ptr != NIL) {
        !           462:
        !           463:                        /* not optional and we still matched */
        !           464:                        return (NIL);
        !           465:                    }
        !           466:                    if (!isalnum(*s1) && *s1 != '_')
        !           467:                        return (NIL);
        !           468:                    if (*s1 == '\\')
        !           469:                        _escaped = _escaped ? FALSE : TRUE;
        !           470:                    else
        !           471:                        _escaped = FALSE;
        !           472:                } while (*s1++);
        !           473:                return (NIL);
        !           474:
        !           475:            /* try to match anything */
        !           476:            case 'a':
        !           477:                /*
        !           478:                 *  This is really the same as trying the match the
        !           479:                 *  remaining parts of the expression to any subset
        !           480:                 *  of the string.
        !           481:                 */
        !           482:                s1 = s;
        !           483:                do {
        !           484:                    ptr = expmatch (s1, MNEXT(cs), mstring);
        !           485:                    if (ptr != NIL && s1 != s) {
        !           486:
        !           487:                        /* we have a match */
        !           488:                        return (ptr);
        !           489:                    } else if (ptr != NIL && (*cs & OPT)) {
        !           490:
        !           491:                        /* it was aoptional so no match is ok */
        !           492:                        return (ptr);
        !           493:                    } else if (ptr != NIL) {
        !           494:
        !           495:                        /* not optional and we still matched */
        !           496:                        return (NIL);
        !           497:                    }
        !           498:                    if (*s1 == '\\')
        !           499:                        _escaped = _escaped ? FALSE : TRUE;
        !           500:                    else
        !           501:                        _escaped = FALSE;
        !           502:                } while (*s1++);
        !           503:                return (NIL);
        !           504:
        !           505:            /* fail if we are currently _escaped */
        !           506:            case 'e':
        !           507:                if (_escaped)
        !           508:                    return(NIL);
        !           509:                cs = MNEXT(cs);
        !           510:                break;
        !           511:
        !           512:            /* match any number of tabs and spaces */
        !           513:            case 'd':
        !           514:                ptr = s;
        !           515:                while (*s == ' ' || *s == '\t')
        !           516:                    s++;
        !           517:                if (s != ptr || s == _start) {
        !           518:
        !           519:                    /* match, be happy */
        !           520:                    matched = 1;
        !           521:                    cs = MNEXT(cs);
        !           522:                } else if (*s == '\n' || *s == '\0') {
        !           523:
        !           524:                    /* match, be happy */
        !           525:                    matched = 1;
        !           526:                    cs = MNEXT(cs);
        !           527:                } else if (*cs & ALT) {
        !           528:
        !           529:                    /* try the next part */
        !           530:                    matched = 0;
        !           531:                    cs = MNEXT(cs);
        !           532:                } else if (*cs & OPT) {
        !           533:
        !           534:                    /* doesn't matter */
        !           535:                    matched = 1;
        !           536:                    cs = MNEXT(cs);
        !           537:                } else
        !           538:
        !           539:                    /* no match, error return */
        !           540:                    return (NIL);
        !           541:                break;
        !           542:
        !           543:            /* check for end of line */
        !           544:            case '$':
        !           545:                if (*s == '\0' || *s == '\n') {
        !           546:
        !           547:                    /* match, be happy */
        !           548:                    s++;
        !           549:                    matched = 1;
        !           550:                    cs = MNEXT(cs);
        !           551:                } else if (*cs & ALT) {
        !           552:
        !           553:                    /* try the next part */
        !           554:                    matched = 0;
        !           555:                    cs = MNEXT(cs);
        !           556:                } else if (*cs & OPT) {
        !           557:
        !           558:                    /* doesn't matter */
        !           559:                    matched = 1;
        !           560:                    cs = MNEXT(cs);
        !           561:                } else
        !           562:
        !           563:                    /* no match, error return */
        !           564:                    return (NIL);
        !           565:                break;
        !           566:
        !           567:            /* check for start of line */
        !           568:            case '^':
        !           569:                if (s == _start) {
        !           570:
        !           571:                    /* match, be happy */
        !           572:                    matched = 1;
        !           573:                    cs = MNEXT(cs);
        !           574:                } else if (*cs & ALT) {
        !           575:
        !           576:                    /* try the next part */
        !           577:                    matched = 0;
        !           578:                    cs = MNEXT(cs);
        !           579:                } else if (*cs & OPT) {
        !           580:
        !           581:                    /* doesn't matter */
        !           582:                    matched = 1;
        !           583:                    cs = MNEXT(cs);
        !           584:                } else
        !           585:
        !           586:                    /* no match, error return */
        !           587:                    return (NIL);
        !           588:                break;
        !           589:
        !           590:            /* end of a subexpression, return success */
        !           591:            case ')':
        !           592:                return (s);
        !           593:            }
        !           594:            break;
        !           595:        }
        !           596:     }
        !           597:     return (s);
        !           598: }