src/usr.bin/vgrind/regexp.c - annotate

Return to regexp.c CVS log
Up to [local] / src / usr.bin / vgrind
Annotation of src/usr.bin/vgrind/regexp.c, Revision 1.2

1.2     ! deraadt     1: /*     $OpenBSD: regexp.c,v 1.3 1994/11/17 08:28:02 jtc Exp $  */
1.1       deraadt     2: /*     $NetBSD: regexp.c,v 1.3 1994/11/17 08:28:02 jtc Exp $   */
                      3:
                      4: /*
                      5:  * Copyright (c) 1980, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  *
                      8:  *
                      9:  * Redistribution and use in source and binary forms, with or without
                     10:  * modification, are permitted provided that the following conditions
                     11:  * are met:
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in the
                     16:  *    documentation and/or other materials provided with the distribution.
                     17:  * 3. All advertising materials mentioning features or use of this software
                     18:  *    must display the following acknowledgement:
                     19:  *     This product includes software developed by the University of
                     20:  *     California, Berkeley and its contributors.
                     21:  * 4. Neither the name of the University nor the names of its contributors
                     22:  *    may be used to endorse or promote products derived from this software
                     23:  *    without specific prior written permission.
                     24:  *
                     25:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     26:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     27:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     28:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     29:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     30:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     31:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     32:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     33:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     34:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     35:  * SUCH DAMAGE.
                     36:  */
                     37:
                     38: #ifndef lint
                     39: static char copyright[] =
                     40: "@(#) Copyright (c) 1980, 1993\n\
                     41:        The Regents of the University of California.  All rights reserved.\n";
                     42: #endif /* not lint */
                     43:
                     44: #ifndef lint
                     45: #if 0
                     46: static char sccsid[] = "@(#)regexp.c   8.1 (Berkeley) 6/6/93";
                     47: #endif
1.2     ! deraadt    48: static char rcsid[] = "$OpenBSD: regexp.c,v 1.3 1994/11/17 08:28:02 jtc Exp $";
1.1       deraadt    49: #endif /* not lint */
                     50:
                     51: #include <ctype.h>
                     52: #include <stdlib.h>
                     53: #include <string.h>
                     54: #include "extern.h"
                     55:
                     56: #define FALSE  0
                     57: #define TRUE   !(FALSE)
                     58: #define NIL    0
                     59:
                     60: static void    expconv __P((void));
                     61:
                     62: boolean         _escaped;      /* true if we are currently _escaped */
                     63: char   *_start;        /* start of string */
                     64: boolean         l_onecase;     /* true if upper and lower equivalent */
                     65:
                     66: #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
                     67:
                     68: /*  STRNCMP -  like strncmp except that we convert the
                     69:  *             first string to lower case before comparing
                     70:  *             if l_onecase is set.
                     71:  */
                     72:
                     73: int
                     74: STRNCMP(s1, s2, len)
                     75:        register char *s1,*s2;
                     76:        register int len;
                     77: {
                     78:        if (l_onecase) {
                     79:            do
                     80:                if (*s2 - makelower(*s1))
                     81:                        return (*s2 - makelower(*s1));
                     82:                else {
                     83:                        s2++;
                     84:                        s1++;
                     85:                }
                     86:            while (--len);
                     87:        } else {
                     88:            do
                     89:                if (*s2 - *s1)
                     90:                        return (*s2 - *s1);
                     91:                else {
                     92:                        s2++;
                     93:                        s1++;
                     94:                }
                     95:            while (--len);
                     96:        }
                     97:        return(0);
                     98: }
                     99:
                    100: /*     The following routine converts an irregular expression to
                    101:  *     internal format.
                    102:  *
                    103:  *     Either meta symbols (\a \d or \p) or character strings or
                    104:  *     operations ( alternation or perenthesizing ) can be
                    105:  *     specified.  Each starts with a descriptor byte.  The descriptor
                    106:  *     byte has STR set for strings, META set for meta symbols
                    107:  *     and OPER set for operations.
                    108:  *     The descriptor byte can also have the OPT bit set if the object
                    109:  *     defined is optional.  Also ALT can be set to indicate an alternation.
                    110:  *
                    111:  *     For metasymbols the byte following the descriptor byte identities
                    112:  *     the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '(').  For
                    113:  *     strings the byte after the descriptor is a character count for
                    114:  *     the string:
                    115:  *
                    116:  *             meta symbols := descriptor
                    117:  *                             symbol
                    118:  *
                    119:  *             strings :=      descriptor
                    120:  *                             character count
                    121:  *                             the string
                    122:  *
                    123:  *             operatins :=    descriptor
                    124:  *                             symbol
                    125:  *                             character count
                    126:  */
                    127:
                    128: /*
                    129:  *  handy macros for accessing parts of match blocks
                    130:  */
                    131: #define MSYM(A) (*(A+1))       /* symbol in a meta symbol block */
                    132: #define MNEXT(A) (A+2)         /* character following a metasymbol block */
                    133:
                    134: #define OSYM(A) (*(A+1))       /* symbol in an operation block */
                    135: #define OCNT(A) (*(A+2))       /* character count */
                    136: #define ONEXT(A) (A+3)         /* next character after the operation */
                    137: #define OPTR(A) (A+*(A+2))     /* place pointed to by the operator */
                    138:
                    139: #define SCNT(A) (*(A+1))       /* byte count of a string */
                    140: #define SSTR(A) (A+2)          /* address of the string */
                    141: #define SNEXT(A) (A+2+*(A+1))  /* character following the string */
                    142:
                    143: /*
                    144:  *  bit flags in the descriptor
                    145:  */
                    146: #define OPT 1
                    147: #define STR 2
                    148: #define META 4
                    149: #define ALT 8
                    150: #define OPER 16
                    151:
                    152: static char *ccre;     /* pointer to current position in converted exp*/
                    153: static char *ure;      /* pointer current position in unconverted exp */
                    154:
                    155: char *
                    156: convexp(re)
                    157:     char *re;          /* unconverted irregular expression */
                    158: {
                    159:     register char *cre;                /* pointer to converted regular expression */
                    160:
                    161:     /* allocate room for the converted expression */
                    162:     if (re == NIL)
                    163:        return (NIL);
                    164:     if (*re == '\0')
                    165:        return (NIL);
                    166:     cre = malloc (4 * strlen(re) + 3);
                    167:     ccre = cre;
                    168:     ure = re;
                    169:
                    170:     /* start the conversion with a \a */
                    171:     *cre = META | OPT;
                    172:     MSYM(cre) = 'a';
                    173:     ccre = MNEXT(cre);
                    174:
                    175:     /* start the conversion (its recursive) */
                    176:     expconv ();
                    177:     *ccre = 0;
                    178:     return (cre);
                    179: }
                    180:
                    181: static void
                    182: expconv()
                    183: {
                    184:     register char *cs;         /* pointer to current symbol in converted exp */
                    185:     register char c;           /* character being processed */
                    186:     register char *acs;                /* pinter to last alternate */
                    187:     register int temp;
                    188:
                    189:     /* let the conversion begin */
                    190:     acs = NIL;
                    191:     cs = NIL;
                    192:     while (*ure != NIL) {
                    193:        switch (c = *ure++) {
                    194:
                    195:        case '\\':
                    196:            switch (c = *ure++) {
                    197:
                    198:            /* escaped characters are just characters */
                    199:            default:
                    200:                if (cs == NIL || (*cs & STR) == 0) {
                    201:                    cs = ccre;
                    202:                    *cs = STR;
                    203:                    SCNT(cs) = 1;
                    204:                    ccre += 2;
                    205:                } else
                    206:                    SCNT(cs)++;
                    207:                *ccre++ = c;
                    208:                break;
                    209:
                    210:            /* normal(?) metacharacters */
                    211:            case 'a':
                    212:            case 'd':
                    213:            case 'e':
                    214:            case 'p':
                    215:                if (acs != NIL && acs != cs) {
                    216:                    do {
                    217:                        temp = OCNT(acs);
                    218:                        OCNT(acs) = ccre - acs;
                    219:                        acs -= temp;
                    220:                    } while (temp != 0);
                    221:                    acs = NIL;
                    222:                }
                    223:                cs = ccre;
                    224:                *cs = META;
                    225:                MSYM(cs) = c;
                    226:                ccre = MNEXT(cs);
                    227:                break;
                    228:            }
                    229:            break;
                    230:
                    231:        /* just put the symbol in */
                    232:        case '^':
                    233:        case '$':
                    234:            if (acs != NIL && acs != cs) {
                    235:                do {
                    236:                    temp = OCNT(acs);
                    237:                    OCNT(acs) = ccre - acs;
                    238:                    acs -= temp;
                    239:                } while (temp != 0);
                    240:                acs = NIL;
                    241:            }
                    242:            cs = ccre;
                    243:            *cs = META;
                    244:            MSYM(cs) = c;
                    245:            ccre = MNEXT(cs);
                    246:            break;
                    247:
                    248:        /* mark the last match sequence as optional */
                    249:        case '?':
                    250:            if (cs)
                    251:                *cs = *cs | OPT;
                    252:            break;
                    253:
                    254:        /* recurse and define a subexpression */
                    255:        case '(':
                    256:            if (acs != NIL && acs != cs) {
                    257:                do {
                    258:                    temp = OCNT(acs);
                    259:                    OCNT(acs) = ccre - acs;
                    260:                    acs -= temp;
                    261:                } while (temp != 0);
                    262:                acs = NIL;
                    263:            }
                    264:            cs = ccre;
                    265:            *cs = OPER;
                    266:            OSYM(cs) = '(';
                    267:            ccre = ONEXT(cs);
                    268:            expconv ();
                    269:            OCNT(cs) = ccre - cs;               /* offset to next symbol */
                    270:            break;
                    271:
                    272:        /* reurn from a recursion */
                    273:        case ')':
                    274:            if (acs != NIL) {
                    275:                do {
                    276:                    temp = OCNT(acs);
                    277:                    OCNT(acs) = ccre - acs;
                    278:                    acs -= temp;
                    279:                } while (temp != 0);
                    280:                acs = NIL;
                    281:            }
                    282:            cs = ccre;
                    283:            *cs = META;
                    284:            MSYM(cs) = c;
                    285:            ccre = MNEXT(cs);
                    286:            return;
                    287:
                    288:        /* mark the last match sequence as having an alternate */
                    289:        /* the third byte will contain an offset to jump over the */
                    290:        /* alternate match in case the first did not fail */
                    291:        case '|':
                    292:            if (acs != NIL && acs != cs)
                    293:                OCNT(ccre) = ccre - acs;        /* make a back pointer */
                    294:            else
                    295:                OCNT(ccre) = 0;
                    296:            *cs |= ALT;
                    297:            cs = ccre;
                    298:            *cs = OPER;
                    299:            OSYM(cs) = '|';
                    300:            ccre = ONEXT(cs);
                    301:            acs = cs;   /* remember that the pointer is to be filles */
                    302:            break;
                    303:
                    304:        /* if its not a metasymbol just build a scharacter string */
                    305:        default:
                    306:            if (cs == NIL || (*cs & STR) == 0) {
                    307:                cs = ccre;
                    308:                *cs = STR;
                    309:                SCNT(cs) = 1;
                    310:                ccre = SSTR(cs);
                    311:            } else
                    312:                SCNT(cs)++;
                    313:            *ccre++ = c;
                    314:            break;
                    315:        }
                    316:     }
                    317:     if (acs != NIL) {
                    318:        do {
                    319:            temp = OCNT(acs);
                    320:            OCNT(acs) = ccre - acs;
                    321:            acs -= temp;
                    322:        } while (temp != 0);
                    323:        acs = NIL;
                    324:     }
                    325:     return;
                    326: }
                    327: /* end of convertre */
                    328:
                    329:
                    330: /*
                    331:  *     The following routine recognises an irregular expresion
                    332:  *     with the following special characters:
                    333:  *
                    334:  *             \?      -       means last match was optional
                    335:  *             \a      -       matches any number of characters
                    336:  *             \d      -       matches any number of spaces and tabs
                    337:  *             \p      -       matches any number of alphanumeric
                    338:  *                             characters. The
                    339:  *                             characters matched will be copied into
                    340:  *                             the area pointed to by 'name'.
                    341:  *             \|      -       alternation
                    342:  *             \( \)   -       grouping used mostly for alternation and
                    343:  *                             optionality
                    344:  *
                    345:  *     The irregular expression must be translated to internal form
                    346:  *     prior to calling this routine
                    347:  *
                    348:  *     The value returned is the pointer to the first non \a
                    349:  *     character matched.
                    350:  */
                    351:
                    352: char *
                    353: expmatch (s, re, mstring)
                    354:     register char *s;          /* string to check for a match in */
                    355:     register char *re;         /* a converted irregular expression */
                    356:     register char *mstring;    /* where to put whatever matches a \p */
                    357: {
                    358:     register char *cs;         /* the current symbol */
                    359:     register char *ptr,*s1;    /* temporary pointer */
                    360:     boolean matched;           /* a temporary boolean */
                    361:
                    362:     /* initial conditions */
                    363:     if (re == NIL)
                    364:        return (NIL);
                    365:     cs = re;
                    366:     matched = FALSE;
                    367:
                    368:     /* loop till expression string is exhausted (or at least pretty tired) */
                    369:     while (*cs) {
                    370:        switch (*cs & (OPER | STR | META)) {
                    371:
                    372:        /* try to match a string */
                    373:        case STR:
                    374:            matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
                    375:            if (matched) {
                    376:
                    377:                /* hoorah it matches */
                    378:                s += SCNT(cs);
                    379:                cs = SNEXT(cs);
                    380:            } else if (*cs & ALT) {
                    381:
                    382:                /* alternation, skip to next expression */
                    383:                cs = SNEXT(cs);
                    384:            } else if (*cs & OPT) {
                    385:
                    386:                /* the match is optional */
                    387:                cs = SNEXT(cs);
                    388:                matched = 1;            /* indicate a successful match */
                    389:            } else {
                    390:
                    391:                /* no match, error return */
                    392:                return (NIL);
                    393:            }
                    394:            break;
                    395:
                    396:        /* an operator, do something fancy */
                    397:        case OPER:
                    398:            switch (OSYM(cs)) {
                    399:
                    400:            /* this is an alternation */
                    401:            case '|':
                    402:                if (matched)
                    403:
                    404:                    /* last thing in the alternation was a match, skip ahead */
                    405:                    cs = OPTR(cs);
                    406:                else
                    407:
                    408:                    /* no match, keep trying */
                    409:                    cs = ONEXT(cs);
                    410:                break;
                    411:
                    412:            /* this is a grouping, recurse */
                    413:            case '(':
                    414:                ptr = expmatch (s, ONEXT(cs), mstring);
                    415:                if (ptr != NIL) {
                    416:
                    417:                    /* the subexpression matched */
                    418:                    matched = 1;
                    419:                    s = ptr;
                    420:                } else if (*cs & ALT) {
                    421:
                    422:                    /* alternation, skip to next expression */
                    423:                    matched = 0;
                    424:                } else if (*cs & OPT) {
                    425:
                    426:                    /* the match is optional */
                    427:                    matched = 1;        /* indicate a successful match */
                    428:                } else {
                    429:
                    430:                    /* no match, error return */
                    431:                    return (NIL);
                    432:                }
                    433:                cs = OPTR(cs);
                    434:                break;
                    435:            }
                    436:            break;
                    437:
                    438:        /* try to match a metasymbol */
                    439:        case META:
                    440:            switch (MSYM(cs)) {
                    441:
                    442:            /* try to match anything and remember what was matched */
                    443:            case 'p':
                    444:                /*
                    445:                 *  This is really the same as trying the match the
                    446:                 *  remaining parts of the expression to any subset
                    447:                 *  of the string.
                    448:                 */
                    449:                s1 = s;
                    450:                do {
                    451:                    ptr = expmatch (s1, MNEXT(cs), mstring);
                    452:                    if (ptr != NIL && s1 != s) {
                    453:
                    454:                        /* we have a match, remember the match */
                    455:                        strncpy (mstring, s, s1 - s);
                    456:                        mstring[s1 - s] = '\0';
                    457:                        return (ptr);
                    458:                    } else if (ptr != NIL && (*cs & OPT)) {
                    459:
                    460:                        /* it was aoptional so no match is ok */
                    461:                        return (ptr);
                    462:                    } else if (ptr != NIL) {
                    463:
                    464:                        /* not optional and we still matched */
                    465:                        return (NIL);
                    466:                    }
                    467:                    if (!isalnum(*s1) && *s1 != '_')
                    468:                        return (NIL);
                    469:                    if (*s1 == '\\')
                    470:                        _escaped = _escaped ? FALSE : TRUE;
                    471:                    else
                    472:                        _escaped = FALSE;
                    473:                } while (*s1++);
                    474:                return (NIL);
                    475:
                    476:            /* try to match anything */
                    477:            case 'a':
                    478:                /*
                    479:                 *  This is really the same as trying the match the
                    480:                 *  remaining parts of the expression to any subset
                    481:                 *  of the string.
                    482:                 */
                    483:                s1 = s;
                    484:                do {
                    485:                    ptr = expmatch (s1, MNEXT(cs), mstring);
                    486:                    if (ptr != NIL && s1 != s) {
                    487:
                    488:                        /* we have a match */
                    489:                        return (ptr);
                    490:                    } else if (ptr != NIL && (*cs & OPT)) {
                    491:
                    492:                        /* it was aoptional so no match is ok */
                    493:                        return (ptr);
                    494:                    } else if (ptr != NIL) {
                    495:
                    496:                        /* not optional and we still matched */
                    497:                        return (NIL);
                    498:                    }
                    499:                    if (*s1 == '\\')
                    500:                        _escaped = _escaped ? FALSE : TRUE;
                    501:                    else
                    502:                        _escaped = FALSE;
                    503:                } while (*s1++);
                    504:                return (NIL);
                    505:
                    506:            /* fail if we are currently _escaped */
                    507:            case 'e':
                    508:                if (_escaped)
                    509:                    return(NIL);
                    510:                cs = MNEXT(cs);
                    511:                break;
                    512:
                    513:            /* match any number of tabs and spaces */
                    514:            case 'd':
                    515:                ptr = s;
                    516:                while (*s == ' ' || *s == '\t')
                    517:                    s++;
                    518:                if (s != ptr || s == _start) {
                    519:
                    520:                    /* match, be happy */
                    521:                    matched = 1;
                    522:                    cs = MNEXT(cs);
                    523:                } else if (*s == '\n' || *s == '\0') {
                    524:
                    525:                    /* match, be happy */
                    526:                    matched = 1;
                    527:                    cs = MNEXT(cs);
                    528:                } else if (*cs & ALT) {
                    529:
                    530:                    /* try the next part */
                    531:                    matched = 0;
                    532:                    cs = MNEXT(cs);
                    533:                } else if (*cs & OPT) {
                    534:
                    535:                    /* doesn't matter */
                    536:                    matched = 1;
                    537:                    cs = MNEXT(cs);
                    538:                } else
                    539:
                    540:                    /* no match, error return */
                    541:                    return (NIL);
                    542:                break;
                    543:
                    544:            /* check for end of line */
                    545:            case '$':
                    546:                if (*s == '\0' || *s == '\n') {
                    547:
                    548:                    /* match, be happy */
                    549:                    s++;
                    550:                    matched = 1;
                    551:                    cs = MNEXT(cs);
                    552:                } else if (*cs & ALT) {
                    553:
                    554:                    /* try the next part */
                    555:                    matched = 0;
                    556:                    cs = MNEXT(cs);
                    557:                } else if (*cs & OPT) {
                    558:
                    559:                    /* doesn't matter */
                    560:                    matched = 1;
                    561:                    cs = MNEXT(cs);
                    562:                } else
                    563:
                    564:                    /* no match, error return */
                    565:                    return (NIL);
                    566:                break;
                    567:
                    568:            /* check for start of line */
                    569:            case '^':
                    570:                if (s == _start) {
                    571:
                    572:                    /* match, be happy */
                    573:                    matched = 1;
                    574:                    cs = MNEXT(cs);
                    575:                } else if (*cs & ALT) {
                    576:
                    577:                    /* try the next part */
                    578:                    matched = 0;
                    579:                    cs = MNEXT(cs);
                    580:                } else if (*cs & OPT) {
                    581:
                    582:                    /* doesn't matter */
                    583:                    matched = 1;
                    584:                    cs = MNEXT(cs);
                    585:                } else
                    586:
                    587:                    /* no match, error return */
                    588:                    return (NIL);
                    589:                break;
                    590:
                    591:            /* end of a subexpression, return success */
                    592:            case ')':
                    593:                return (s);
                    594:            }
                    595:            break;
                    596:        }
                    597:     }
                    598:     return (s);
                    599: }