[BACK]Return to regexp.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / vgrind

Annotation of src/usr.bin/vgrind/regexp.c, Revision 1.8

1.8     ! deraadt     1: /*     $OpenBSD: regexp.c,v 1.7 2003/06/03 02:56:21 millert Exp $      */
1.1       deraadt     2: /*     $NetBSD: regexp.c,v 1.3 1994/11/17 08:28:02 jtc Exp $   */
                      3:
                      4: /*
                      5:  * Copyright (c) 1980, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  *
                      8:  *
                      9:  * Redistribution and use in source and binary forms, with or without
                     10:  * modification, are permitted provided that the following conditions
                     11:  * are met:
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in the
                     16:  *    documentation and/or other materials provided with the distribution.
1.7       millert    17:  * 3. Neither the name of the University nor the names of its contributors
1.1       deraadt    18:  *    may be used to endorse or promote products derived from this software
                     19:  *    without specific prior written permission.
                     20:  *
                     21:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     22:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     23:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     24:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     25:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     26:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     27:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     28:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     29:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     30:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     31:  * SUCH DAMAGE.
                     32:  */
                     33:
                     34: #include <ctype.h>
                     35: #include <stdlib.h>
                     36: #include <string.h>
                     37: #include "extern.h"
                     38:
                     39: #define FALSE  0
                     40: #define TRUE   !(FALSE)
                     41: #define NIL    0
                     42:
1.4       millert    43: static void    expconv(void);
1.1       deraadt    44:
1.5       deraadt    45: boolean         x_escaped;     /* true if we are currently x_escaped */
                     46: char   *x_start;       /* start of string */
1.1       deraadt    47: boolean         l_onecase;     /* true if upper and lower equivalent */
                     48:
                     49: #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
                     50:
                     51: /*  STRNCMP -  like strncmp except that we convert the
                     52:  *             first string to lower case before comparing
                     53:  *             if l_onecase is set.
                     54:  */
                     55:
                     56: int
1.6       deraadt    57: STRNCMP(char *s1, char *s2, int len)
1.1       deraadt    58: {
                     59:        if (l_onecase) {
                     60:            do
                     61:                if (*s2 - makelower(*s1))
                     62:                        return (*s2 - makelower(*s1));
                     63:                else {
                     64:                        s2++;
                     65:                        s1++;
                     66:                }
                     67:            while (--len);
                     68:        } else {
                     69:            do
                     70:                if (*s2 - *s1)
                     71:                        return (*s2 - *s1);
                     72:                else {
                     73:                        s2++;
                     74:                        s1++;
                     75:                }
                     76:            while (--len);
                     77:        }
                     78:        return(0);
                     79: }
                     80:
                     81: /*     The following routine converts an irregular expression to
                     82:  *     internal format.
                     83:  *
                     84:  *     Either meta symbols (\a \d or \p) or character strings or
1.5       deraadt    85:  *     operations ( alternation or parenthesizing ) can be
1.1       deraadt    86:  *     specified.  Each starts with a descriptor byte.  The descriptor
                     87:  *     byte has STR set for strings, META set for meta symbols
                     88:  *     and OPER set for operations.
                     89:  *     The descriptor byte can also have the OPT bit set if the object
                     90:  *     defined is optional.  Also ALT can be set to indicate an alternation.
                     91:  *
1.5       deraadt    92:  *     For metasymbols the byte following the descriptor byte identifies
1.1       deraadt    93:  *     the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '(').  For
                     94:  *     strings the byte after the descriptor is a character count for
                     95:  *     the string:
                     96:  *
                     97:  *             meta symbols := descriptor
                     98:  *                             symbol
                     99:  *
                    100:  *             strings :=      descriptor
                    101:  *                             character count
                    102:  *                             the string
                    103:  *
                    104:  *             operatins :=    descriptor
                    105:  *                             symbol
                    106:  *                             character count
                    107:  */
                    108:
                    109: /*
                    110:  *  handy macros for accessing parts of match blocks
                    111:  */
                    112: #define MSYM(A) (*(A+1))       /* symbol in a meta symbol block */
                    113: #define MNEXT(A) (A+2)         /* character following a metasymbol block */
                    114:
                    115: #define OSYM(A) (*(A+1))       /* symbol in an operation block */
                    116: #define OCNT(A) (*(A+2))       /* character count */
                    117: #define ONEXT(A) (A+3)         /* next character after the operation */
                    118: #define OPTR(A) (A+*(A+2))     /* place pointed to by the operator */
                    119:
                    120: #define SCNT(A) (*(A+1))       /* byte count of a string */
                    121: #define SSTR(A) (A+2)          /* address of the string */
                    122: #define SNEXT(A) (A+2+*(A+1))  /* character following the string */
                    123:
                    124: /*
                    125:  *  bit flags in the descriptor
                    126:  */
                    127: #define OPT 1
                    128: #define STR 2
                    129: #define META 4
                    130: #define ALT 8
                    131: #define OPER 16
                    132:
                    133: static char *ccre;     /* pointer to current position in converted exp*/
                    134: static char *ure;      /* pointer current position in unconverted exp */
                    135:
                    136: char *
1.6       deraadt   137: convexp(char *re)
1.1       deraadt   138: {
1.3       mpech     139:     char *cre;         /* pointer to converted regular expression */
1.1       deraadt   140:
                    141:     /* allocate room for the converted expression */
                    142:     if (re == NIL)
                    143:        return (NIL);
                    144:     if (*re == '\0')
                    145:        return (NIL);
                    146:     cre = malloc (4 * strlen(re) + 3);
                    147:     ccre = cre;
                    148:     ure = re;
                    149:
                    150:     /* start the conversion with a \a */
                    151:     *cre = META | OPT;
                    152:     MSYM(cre) = 'a';
                    153:     ccre = MNEXT(cre);
                    154:
                    155:     /* start the conversion (its recursive) */
                    156:     expconv ();
                    157:     *ccre = 0;
                    158:     return (cre);
                    159: }
                    160:
                    161: static void
1.6       deraadt   162: expconv(void)
1.1       deraadt   163: {
1.3       mpech     164:     char *cs;          /* pointer to current symbol in converted exp */
                    165:     char c;            /* character being processed */
1.5       deraadt   166:     char *acs;         /* pointer to last alternate */
1.3       mpech     167:     int temp;
1.1       deraadt   168:
                    169:     /* let the conversion begin */
                    170:     acs = NIL;
                    171:     cs = NIL;
                    172:     while (*ure != NIL) {
                    173:        switch (c = *ure++) {
                    174:
                    175:        case '\\':
                    176:            switch (c = *ure++) {
                    177:
                    178:            /* escaped characters are just characters */
                    179:            default:
                    180:                if (cs == NIL || (*cs & STR) == 0) {
                    181:                    cs = ccre;
                    182:                    *cs = STR;
                    183:                    SCNT(cs) = 1;
                    184:                    ccre += 2;
                    185:                } else
                    186:                    SCNT(cs)++;
                    187:                *ccre++ = c;
                    188:                break;
                    189:
                    190:            /* normal(?) metacharacters */
                    191:            case 'a':
                    192:            case 'd':
                    193:            case 'e':
                    194:            case 'p':
                    195:                if (acs != NIL && acs != cs) {
                    196:                    do {
                    197:                        temp = OCNT(acs);
                    198:                        OCNT(acs) = ccre - acs;
                    199:                        acs -= temp;
                    200:                    } while (temp != 0);
                    201:                    acs = NIL;
                    202:                }
                    203:                cs = ccre;
                    204:                *cs = META;
                    205:                MSYM(cs) = c;
                    206:                ccre = MNEXT(cs);
                    207:                break;
                    208:            }
                    209:            break;
                    210:
                    211:        /* just put the symbol in */
                    212:        case '^':
                    213:        case '$':
                    214:            if (acs != NIL && acs != cs) {
                    215:                do {
                    216:                    temp = OCNT(acs);
                    217:                    OCNT(acs) = ccre - acs;
                    218:                    acs -= temp;
                    219:                } while (temp != 0);
                    220:                acs = NIL;
                    221:            }
                    222:            cs = ccre;
                    223:            *cs = META;
                    224:            MSYM(cs) = c;
                    225:            ccre = MNEXT(cs);
                    226:            break;
                    227:
                    228:        /* mark the last match sequence as optional */
                    229:        case '?':
                    230:            if (cs)
                    231:                *cs = *cs | OPT;
                    232:            break;
                    233:
                    234:        /* recurse and define a subexpression */
                    235:        case '(':
                    236:            if (acs != NIL && acs != cs) {
                    237:                do {
                    238:                    temp = OCNT(acs);
                    239:                    OCNT(acs) = ccre - acs;
                    240:                    acs -= temp;
                    241:                } while (temp != 0);
                    242:                acs = NIL;
                    243:            }
                    244:            cs = ccre;
                    245:            *cs = OPER;
                    246:            OSYM(cs) = '(';
                    247:            ccre = ONEXT(cs);
                    248:            expconv ();
                    249:            OCNT(cs) = ccre - cs;               /* offset to next symbol */
                    250:            break;
                    251:
1.5       deraadt   252:        /* return from a recursion */
1.1       deraadt   253:        case ')':
                    254:            if (acs != NIL) {
                    255:                do {
                    256:                    temp = OCNT(acs);
                    257:                    OCNT(acs) = ccre - acs;
                    258:                    acs -= temp;
                    259:                } while (temp != 0);
                    260:                acs = NIL;
                    261:            }
                    262:            cs = ccre;
                    263:            *cs = META;
                    264:            MSYM(cs) = c;
                    265:            ccre = MNEXT(cs);
                    266:            return;
                    267:
                    268:        /* mark the last match sequence as having an alternate */
                    269:        /* the third byte will contain an offset to jump over the */
                    270:        /* alternate match in case the first did not fail */
                    271:        case '|':
                    272:            if (acs != NIL && acs != cs)
                    273:                OCNT(ccre) = ccre - acs;        /* make a back pointer */
                    274:            else
                    275:                OCNT(ccre) = 0;
                    276:            *cs |= ALT;
                    277:            cs = ccre;
                    278:            *cs = OPER;
                    279:            OSYM(cs) = '|';
                    280:            ccre = ONEXT(cs);
                    281:            acs = cs;   /* remember that the pointer is to be filles */
                    282:            break;
                    283:
1.5       deraadt   284:        /* if its not a metasymbol just build a character string */
1.1       deraadt   285:        default:
                    286:            if (cs == NIL || (*cs & STR) == 0) {
                    287:                cs = ccre;
                    288:                *cs = STR;
                    289:                SCNT(cs) = 1;
                    290:                ccre = SSTR(cs);
                    291:            } else
                    292:                SCNT(cs)++;
                    293:            *ccre++ = c;
                    294:            break;
                    295:        }
                    296:     }
                    297:     if (acs != NIL) {
                    298:        do {
                    299:            temp = OCNT(acs);
                    300:            OCNT(acs) = ccre - acs;
                    301:            acs -= temp;
                    302:        } while (temp != 0);
                    303:        acs = NIL;
                    304:     }
                    305:     return;
                    306: }
1.5       deraadt   307: /* end of converter */
1.1       deraadt   308:
                    309:
                    310: /*
1.5       deraadt   311:  *     The following routine recognises an irregular expression
1.1       deraadt   312:  *     with the following special characters:
                    313:  *
                    314:  *             \?      -       means last match was optional
                    315:  *             \a      -       matches any number of characters
                    316:  *             \d      -       matches any number of spaces and tabs
                    317:  *             \p      -       matches any number of alphanumeric
                    318:  *                             characters. The
                    319:  *                             characters matched will be copied into
                    320:  *                             the area pointed to by 'name'.
                    321:  *             \|      -       alternation
                    322:  *             \( \)   -       grouping used mostly for alternation and
                    323:  *                             optionality
                    324:  *
                    325:  *     The irregular expression must be translated to internal form
                    326:  *     prior to calling this routine
                    327:  *
                    328:  *     The value returned is the pointer to the first non \a
                    329:  *     character matched.
                    330:  */
                    331:
                    332: char *
1.6       deraadt   333: expmatch(char *s, char *re, char *mstring)
1.1       deraadt   334: {
1.3       mpech     335:     char *cs;                  /* the current symbol */
                    336:     char *ptr,*s1;             /* temporary pointer */
1.1       deraadt   337:     boolean matched;           /* a temporary boolean */
                    338:
                    339:     /* initial conditions */
                    340:     if (re == NIL)
                    341:        return (NIL);
                    342:     cs = re;
                    343:     matched = FALSE;
                    344:
                    345:     /* loop till expression string is exhausted (or at least pretty tired) */
                    346:     while (*cs) {
                    347:        switch (*cs & (OPER | STR | META)) {
                    348:
                    349:        /* try to match a string */
                    350:        case STR:
                    351:            matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
                    352:            if (matched) {
                    353:
                    354:                /* hoorah it matches */
                    355:                s += SCNT(cs);
                    356:                cs = SNEXT(cs);
                    357:            } else if (*cs & ALT) {
                    358:
                    359:                /* alternation, skip to next expression */
                    360:                cs = SNEXT(cs);
                    361:            } else if (*cs & OPT) {
                    362:
                    363:                /* the match is optional */
                    364:                cs = SNEXT(cs);
                    365:                matched = 1;            /* indicate a successful match */
                    366:            } else {
                    367:
                    368:                /* no match, error return */
                    369:                return (NIL);
                    370:            }
                    371:            break;
                    372:
                    373:        /* an operator, do something fancy */
                    374:        case OPER:
                    375:            switch (OSYM(cs)) {
                    376:
                    377:            /* this is an alternation */
                    378:            case '|':
                    379:                if (matched)
                    380:
                    381:                    /* last thing in the alternation was a match, skip ahead */
                    382:                    cs = OPTR(cs);
                    383:                else
                    384:
                    385:                    /* no match, keep trying */
                    386:                    cs = ONEXT(cs);
                    387:                break;
                    388:
                    389:            /* this is a grouping, recurse */
                    390:            case '(':
                    391:                ptr = expmatch (s, ONEXT(cs), mstring);
                    392:                if (ptr != NIL) {
                    393:
                    394:                    /* the subexpression matched */
                    395:                    matched = 1;
                    396:                    s = ptr;
                    397:                } else if (*cs & ALT) {
                    398:
                    399:                    /* alternation, skip to next expression */
                    400:                    matched = 0;
                    401:                } else if (*cs & OPT) {
                    402:
                    403:                    /* the match is optional */
                    404:                    matched = 1;        /* indicate a successful match */
                    405:                } else {
                    406:
                    407:                    /* no match, error return */
                    408:                    return (NIL);
                    409:                }
                    410:                cs = OPTR(cs);
                    411:                break;
                    412:            }
                    413:            break;
                    414:
                    415:        /* try to match a metasymbol */
                    416:        case META:
                    417:            switch (MSYM(cs)) {
                    418:
                    419:            /* try to match anything and remember what was matched */
                    420:            case 'p':
                    421:                /*
                    422:                 *  This is really the same as trying the match the
                    423:                 *  remaining parts of the expression to any subset
                    424:                 *  of the string.
                    425:                 */
                    426:                s1 = s;
                    427:                do {
                    428:                    ptr = expmatch (s1, MNEXT(cs), mstring);
                    429:                    if (ptr != NIL && s1 != s) {
                    430:
                    431:                        /* we have a match, remember the match */
                    432:                        strncpy (mstring, s, s1 - s);
                    433:                        mstring[s1 - s] = '\0';
                    434:                        return (ptr);
                    435:                    } else if (ptr != NIL && (*cs & OPT)) {
                    436:
1.5       deraadt   437:                        /* it was optional so no match is ok */
1.1       deraadt   438:                        return (ptr);
                    439:                    } else if (ptr != NIL) {
                    440:
                    441:                        /* not optional and we still matched */
                    442:                        return (NIL);
                    443:                    }
                    444:                    if (!isalnum(*s1) && *s1 != '_')
                    445:                        return (NIL);
                    446:                    if (*s1 == '\\')
1.5       deraadt   447:                        x_escaped = x_escaped ? FALSE : TRUE;
1.1       deraadt   448:                    else
1.5       deraadt   449:                        x_escaped = FALSE;
1.1       deraadt   450:                } while (*s1++);
                    451:                return (NIL);
                    452:
                    453:            /* try to match anything */
                    454:            case 'a':
                    455:                /*
                    456:                 *  This is really the same as trying the match the
                    457:                 *  remaining parts of the expression to any subset
                    458:                 *  of the string.
                    459:                 */
                    460:                s1 = s;
                    461:                do {
                    462:                    ptr = expmatch (s1, MNEXT(cs), mstring);
                    463:                    if (ptr != NIL && s1 != s) {
                    464:
                    465:                        /* we have a match */
                    466:                        return (ptr);
                    467:                    } else if (ptr != NIL && (*cs & OPT)) {
                    468:
1.5       deraadt   469:                        /* it was optional so no match is ok */
1.1       deraadt   470:                        return (ptr);
                    471:                    } else if (ptr != NIL) {
                    472:
                    473:                        /* not optional and we still matched */
                    474:                        return (NIL);
                    475:                    }
                    476:                    if (*s1 == '\\')
1.5       deraadt   477:                        x_escaped = x_escaped ? FALSE : TRUE;
1.1       deraadt   478:                    else
1.5       deraadt   479:                        x_escaped = FALSE;
1.1       deraadt   480:                } while (*s1++);
                    481:                return (NIL);
                    482:
1.5       deraadt   483:            /* fail if we are currently x_escaped */
1.1       deraadt   484:            case 'e':
1.5       deraadt   485:                if (x_escaped)
1.1       deraadt   486:                    return(NIL);
                    487:                cs = MNEXT(cs);
                    488:                break;
                    489:
                    490:            /* match any number of tabs and spaces */
                    491:            case 'd':
                    492:                ptr = s;
                    493:                while (*s == ' ' || *s == '\t')
                    494:                    s++;
1.5       deraadt   495:                if (s != ptr || s == x_start) {
1.1       deraadt   496:
                    497:                    /* match, be happy */
                    498:                    matched = 1;
                    499:                    cs = MNEXT(cs);
                    500:                } else if (*s == '\n' || *s == '\0') {
                    501:
                    502:                    /* match, be happy */
                    503:                    matched = 1;
                    504:                    cs = MNEXT(cs);
                    505:                } else if (*cs & ALT) {
                    506:
                    507:                    /* try the next part */
                    508:                    matched = 0;
                    509:                    cs = MNEXT(cs);
                    510:                } else if (*cs & OPT) {
                    511:
                    512:                    /* doesn't matter */
                    513:                    matched = 1;
                    514:                    cs = MNEXT(cs);
                    515:                } else
                    516:
                    517:                    /* no match, error return */
                    518:                    return (NIL);
                    519:                break;
                    520:
                    521:            /* check for end of line */
                    522:            case '$':
                    523:                if (*s == '\0' || *s == '\n') {
                    524:
                    525:                    /* match, be happy */
                    526:                    s++;
                    527:                    matched = 1;
                    528:                    cs = MNEXT(cs);
                    529:                } else if (*cs & ALT) {
                    530:
                    531:                    /* try the next part */
                    532:                    matched = 0;
                    533:                    cs = MNEXT(cs);
                    534:                } else if (*cs & OPT) {
                    535:
                    536:                    /* doesn't matter */
                    537:                    matched = 1;
                    538:                    cs = MNEXT(cs);
                    539:                } else
                    540:
                    541:                    /* no match, error return */
                    542:                    return (NIL);
                    543:                break;
                    544:
                    545:            /* check for start of line */
                    546:            case '^':
1.5       deraadt   547:                if (s == x_start) {
1.1       deraadt   548:
                    549:                    /* match, be happy */
                    550:                    matched = 1;
                    551:                    cs = MNEXT(cs);
                    552:                } else if (*cs & ALT) {
                    553:
                    554:                    /* try the next part */
                    555:                    matched = 0;
                    556:                    cs = MNEXT(cs);
                    557:                } else if (*cs & OPT) {
                    558:
                    559:                    /* doesn't matter */
                    560:                    matched = 1;
                    561:                    cs = MNEXT(cs);
                    562:                } else
                    563:
                    564:                    /* no match, error return */
                    565:                    return (NIL);
                    566:                break;
                    567:
                    568:            /* end of a subexpression, return success */
                    569:            case ')':
                    570:                return (s);
                    571:            }
                    572:            break;
                    573:        }
                    574:     }
                    575:     return (s);
                    576: }