src/usr.bin/rsync/rmatch.c - annotate

Return to rmatch.c CVS log
Up to [local] / src / usr.bin / rsync
Annotation of src/usr.bin/rsync/rmatch.c, Revision 1.2

1.2     ! claudio     1: /*     $OpenBSD: rmatch.c,v 1.1 2021/08/29 13:43:46 claudio Exp $      */
1.1       claudio     2:
                      3: /*
                      4:  * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
                      5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
                      7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
                      9:  *
                     10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17:  */
                     18:
                     19: /*
                     20:  * Copyright (c) 1989, 1993, 1994
                     21:  *     The Regents of the University of California.  All rights reserved.
                     22:  *
                     23:  * This code is derived from software contributed to Berkeley by
                     24:  * Guido van Rossum.
                     25:  *
                     26:  * Redistribution and use in source and binary forms, with or without
                     27:  * modification, are permitted provided that the following conditions
                     28:  * are met:
                     29:  * 1. Redistributions of source code must retain the above copyright
                     30:  *    notice, this list of conditions and the following disclaimer.
                     31:  * 2. Redistributions in binary form must reproduce the above copyright
                     32:  *    notice, this list of conditions and the following disclaimer in the
                     33:  *    documentation and/or other materials provided with the distribution.
                     34:  * 3. Neither the name of the University nor the names of its contributors
                     35:  *    may be used to endorse or promote products derived from this software
                     36:  *    without specific prior written permission.
                     37:  *
                     38:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     39:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     40:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     41:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     42:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     43:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     44:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     45:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     46:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     47:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     48:  * SUCH DAMAGE.
                     49:  */
                     50:
                     51: #include <ctype.h>
                     52: #include <stdio.h>
                     53: #include <string.h>
                     54: #include <limits.h>
                     55:
                     56: #include "charclass.h"
1.2     ! claudio    57: #include "extern.h"
1.1       claudio    58:
                     59: #define        RANGE_MATCH     1
                     60: #define        RANGE_NOMATCH   0
                     61: #define        RANGE_ERROR     (-1)
                     62:
                     63: static int
                     64: classmatch(const char *pattern, char test, const char **ep)
                     65: {
                     66:        const char *mismatch = pattern;
                     67:        const struct cclass *cc;
                     68:        const char *colon;
                     69:        size_t len;
                     70:        int rval = RANGE_NOMATCH;
                     71:
                     72:        if (*pattern++ != ':') {
                     73:                *ep = mismatch;
                     74:                return RANGE_ERROR;
                     75:        }
                     76:        if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
                     77:                *ep = mismatch;
                     78:                return RANGE_ERROR;
                     79:        }
                     80:        *ep = colon + 2;
                     81:        len = (size_t)(colon - pattern);
                     82:
                     83:        for (cc = cclasses; cc->name != NULL; cc++) {
                     84:                if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
                     85:                        if (cc->isctype((unsigned char)test))
                     86:                                rval = RANGE_MATCH;
                     87:                        return rval;
                     88:                }
                     89:        }
                     90:
                     91:        /* invalid character class, treat as normal text */
                     92:        *ep = mismatch;
                     93:        return RANGE_ERROR;
                     94: }
                     95:
                     96: static int
                     97: rangematch(const char **pp, char test)
                     98: {
                     99:        const char *pattern = *pp;
                    100:        int negate, ok;
                    101:        char c, c2;
                    102:
                    103:        /*
                    104:         * A bracket expression starting with an unquoted circumflex
                    105:         * character produces unspecified results (IEEE 1003.2-1992,
                    106:         * 3.13.2).  This implementation treats it like '!', for
                    107:         * consistency with the regular expression syntax.
                    108:         * J.T. Conklin (conklin@ngai.kaleida.com)
                    109:         */
                    110:        if ((negate = (*pattern == '!' || *pattern == '^')))
                    111:                ++pattern;
                    112:
                    113:        /*
                    114:         * A right bracket shall lose its special meaning and represent
                    115:         * itself in a bracket expression if it occurs first in the list.
                    116:         * -- POSIX.2 2.8.3.2
                    117:         */
                    118:        ok = 0;
                    119:        c = *pattern++;
                    120:        do {
                    121:                if (c == '[') {
                    122:                        switch (classmatch(pattern, test, &pattern)) {
                    123:                        case RANGE_MATCH:
                    124:                                ok = 1;
                    125:                                continue;
                    126:                        case RANGE_NOMATCH:
                    127:                                continue;
                    128:                        default:
                    129:                                /* invalid character class, treat litterally. */
                    130:                                break;
                    131:                        }
                    132:                }
                    133:                if (c == '\\')
                    134:                        c = *pattern++;
                    135:                if (c == '\0')
                    136:                        return RANGE_ERROR;
                    137:                /* patterns can not match on '/' */
                    138:                if (c == '/')
                    139:                        return RANGE_NOMATCH;
                    140:                if (*pattern == '-'
                    141:                    && (c2 = *(pattern + 1)) != '\0' && c2 != ']') {
                    142:                        pattern += 2;
                    143:                        if (c2 == '\\')
                    144:                                c2 = *pattern++;
                    145:                        if (c2 == '\0')
                    146:                                return RANGE_ERROR;
                    147:                        if (c <= test && test <= c2)
                    148:                                ok = 1;
                    149:                } else if (c == test)
                    150:                        ok = 1;
                    151:        } while ((c = *pattern++) != ']');
                    152:
                    153:        *pp = pattern;
                    154:        return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
                    155: }
                    156:
                    157: /*
                    158:  * Single character match, advances pattern as much as needed.
                    159:  * Return 0 on match and !0 (aka 1) on missmatch.
                    160:  * When matched pp is advanced to the end of the pattern matched.
                    161:  */
                    162: static int
                    163: matchchar(const char **pp, const char in)
                    164: {
                    165:        const char *pattern = *pp;
                    166:        char c;
                    167:        int rv = 0;
                    168:
                    169:        switch (c = *pattern++) {
                    170:        case '?':
                    171:                if (in == '\0')
                    172:                        rv = 1;
                    173:                if (in == '/')
                    174:                        rv = 1;
                    175:                break;
                    176:        case '[':
                    177:                if (in == '\0')
                    178:                        rv = 1;
                    179:                if (in == '/')
                    180:                        rv = 1;
                    181:                if (rv == 1)
                    182:                        break;
                    183:
                    184:                switch (rangematch(&pattern, in)) {
                    185:                case RANGE_ERROR:
                    186:                        /* not a good range, treat as normal text */
                    187:                        goto normal;
                    188:                case RANGE_MATCH:
                    189:                        break;
                    190:                case RANGE_NOMATCH:
                    191:                        rv = 1;
                    192:                }
                    193:                break;
                    194:        case '\\':
                    195:                if ((c = *pattern++) == '\0') {
                    196:                        c = '\\';
                    197:                        --pattern;
                    198:                }
                    199:                /* FALLTHROUGH */
                    200:        default:
                    201:        normal:
                    202:                if (c != in)
                    203:                        rv = 1;
                    204:                break;
                    205:        }
                    206:
                    207:        *pp = pattern;
                    208:        return rv;
                    209: }
                    210:
                    211: /*
                    212:  * Do a substring match. If wild is set then the pattern started with a '*'.
                    213:  * The match will go until '*', '/' or '\0' is encountered in pattern or
                    214:  * the input string is consumed up to end.
                    215:  * The pattern and string handles pp and ss are updated only on success.
                    216:  */
                    217: static int
                    218: matchsub(const char **pp, const char **ss, const char *end, int wild)
                    219: {
                    220:        const char *pattern = *pp;
                    221:        const char *p = pattern;
                    222:        const char *string = *ss;
                    223:        size_t matchlen;
                    224:
                    225:        /* first calculate how many characters the submatch will consume */
                    226:        for (matchlen = 0; *p != '\0'; matchlen++) {
                    227:                if (p[0] == '*')
                    228:                        break;
                    229:                /* '/' acts as barrier */
                    230:                if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) {
                    231:                        if (wild) {
                    232:                                /* match needs to match up to end of segment */
                    233:                                if (string > end - matchlen)
                    234:                                        return 1;
                    235:                                string = end - matchlen;
                    236:                                wild = 0;
                    237:                        }
                    238:                        break;
                    239:                }
                    240:                /*
                    241:                 * skip forward one character in pattern by doing a
                    242:                 * dummy lookup.
                    243:                 */
                    244:                matchchar(&p, ' ');
                    245:        }
                    246:
                    247:        /* not enough char to match */
                    248:        if (string > end - matchlen)
                    249:                return 1;
                    250:
                    251:        if (*p == '\0') {
                    252:                if (wild) {
                    253:                        /* match needs to match up to end of segment */
                    254:                        string = end - matchlen;
                    255:                        wild = 0;
                    256:                }
                    257:        }
                    258:
                    259:        while (*pattern != '\0' && *pattern != '*') {
                    260:                /* eat possible escape char before '/' */
                    261:                if (pattern[0] == '\\' && pattern[1] == '/')
                    262:                        pattern++;
                    263:                if (pattern[0] == '/')
                    264:                        break;
                    265:
                    266:                /* check if there are still characters available to compare */
                    267:                if (string >= end)
                    268:                        return 1;
                    269:                /* Compare one char at a time. */
                    270:                if (!matchchar(&pattern, *string++))
                    271:                        continue;
                    272:                if (wild) {
                    273:                        /* skip forward one char and restart match */
                    274:                        string = ++*ss;
                    275:                        pattern = *pp;
                    276:                        /* can it still match? */
                    277:                        if (string > end - matchlen)
                    278:                                return 1;
                    279:                } else {
                    280:                        /* failed match */
                    281:                        return 1;
                    282:                }
                    283:        }
                    284:
                    285:        *pp = pattern;
                    286:        *ss = string;
                    287:        return 0;
                    288: }
                    289:
                    290: /*
                    291:  * File matching with the addition of the special '**'.
                    292:  * Returns 0 on match and !0 for strings that do not match pattern.
                    293:  */
                    294: int
                    295: rmatch(const char *pattern, const char *string, int leading_dir)
                    296: {
                    297:        const char *segend, *segnext, *mismatch = NULL;
                    298:        int wild, starstar;
                    299:
                    300:        while (*pattern && *string) {
                    301:
                    302:                /* handle leading '/' first */
                    303:                if (pattern[0] == '\\' && pattern[1] == '/')
                    304:                        pattern++;
                    305:                if (*string == '/' && *pattern == '/') {
                    306:                        string++;
                    307:                        pattern++;
                    308:                }
                    309:
                    310:                /* match to the next '/' in string */
                    311:                segend = strchr(string, '/');
                    312:                if (segend == NULL)
                    313:                        segend = strchr(string, '\0');
                    314:
                    315:                while (*pattern) {
                    316:                        /*
                    317:                         * Check for '*' and '**'. For '*' reduce '*' and '?'
                    318:                         * sequences into n-'?' and trailing '*'.
                    319:                         * For '**' this optimisation can not be done
                    320:                         * since '**???/' will match 'a/aa/aaa/' but not
                    321:                         * 'a/aa/aa/' still additional '*' will be reduced.
                    322:                         */
                    323:                        wild = 0;
                    324:                        starstar = 0;
                    325:                        for ( ; *pattern == '*' || *pattern == '?'; pattern++) {
                    326:                                if (pattern[0] == '*') {
                    327:                                        if (pattern[1] == '*') {
                    328:                                                starstar = 1;
                    329:                                                pattern++;
                    330:                                        }
                    331:                                        wild = 1;
                    332:                                } else if (!starstar) { /* pattern[0] == '?' */
                    333:                                        if (string < segend && *string != '/')
                    334:                                                string++;
                    335:                                        else
                    336:                                                /* no match possible */
                    337:                                                return 1;
                    338:                                } else
                    339:                                        break;
                    340:                        }
                    341:
                    342:                        /* pattern ends in '**' so it is a match */
                    343:                        if (starstar && *pattern == '\0')
                    344:                                return 0;
                    345:
                    346:                        if (starstar) {
                    347:                                segnext = segend;
                    348:                                mismatch = pattern;
                    349:                        }
                    350:
                    351:                        while (string < segend) {
                    352:                                if (matchsub(&pattern, &string, segend, wild)) {
                    353: failed_match:
                    354:                                        /*
                    355:                                         * failed to match, if starstar retry
                    356:                                         * with the next segment.
                    357:                                         */
                    358:                                        if (mismatch) {
                    359:                                                pattern = mismatch;
                    360:                                                wild = 1;
                    361:                                                string = segnext;
                    362:                                                if (*string == '/')
                    363:                                                        string++;
                    364:                                                segend = strchr(string, '/');
                    365:                                                if (!segend)
                    366:                                                        segend = strchr(string,
                    367:                                                            '\0');
                    368:                                                segnext = segend;
                    369:                                                if (string < segend)
                    370:                                                        continue;
                    371:                                        }
                    372:                                        /* no match possible */
                    373:                                        return 1;
                    374:                                }
                    375:                                break;
                    376:                        }
                    377:
                    378:                        /* at end of string segment, eat up any extra '*' */
                    379:                        if (string >= segend && *pattern != '*')
                    380:                                break;
                    381:                }
                    382:                if (*string != '\0' && *string != '/')
                    383:                        goto failed_match;
                    384:                if (*pattern != '\0' && *pattern != '/')
                    385:                        goto failed_match;
                    386:        }
                    387:
                    388:        /* if both pattern and string are consumed it was a match */
                    389:        if (*pattern == '\0' && *string == '\0')
                    390:                return 0;
                    391:        /* if leading_dir is set then string can also be '/' for success */
                    392:        if (leading_dir && *pattern == '\0' && *string == '/')
                    393:                return 0;
                    394:        /* else failure */
                    395:        return 1;
                    396: }