src/usr.bin/rsync/rmatch.c - annotate

Return to rmatch.c CVS log
Up to [local] / src / usr.bin / rsync
Annotation of src/usr.bin/rsync/rmatch.c, Revision 1.1

1.1     ! claudio     1: /*     $OpenBSD: fnmatch.c,v 1.15 2011/02/10 21:31:59 stsp Exp $       */
        !             2:
        !             3: /*
        !             4:  * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
        !             5:  *
        !             6:  * Permission to use, copy, modify, and distribute this software for any
        !             7:  * purpose with or without fee is hereby granted, provided that the above
        !             8:  * copyright notice and this permission notice appear in all copies.
        !             9:  *
        !            10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
        !            11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
        !            12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
        !            13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
        !            14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
        !            15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
        !            16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
        !            17:  */
        !            18:
        !            19: /*
        !            20:  * Copyright (c) 1989, 1993, 1994
        !            21:  *     The Regents of the University of California.  All rights reserved.
        !            22:  *
        !            23:  * This code is derived from software contributed to Berkeley by
        !            24:  * Guido van Rossum.
        !            25:  *
        !            26:  * Redistribution and use in source and binary forms, with or without
        !            27:  * modification, are permitted provided that the following conditions
        !            28:  * are met:
        !            29:  * 1. Redistributions of source code must retain the above copyright
        !            30:  *    notice, this list of conditions and the following disclaimer.
        !            31:  * 2. Redistributions in binary form must reproduce the above copyright
        !            32:  *    notice, this list of conditions and the following disclaimer in the
        !            33:  *    documentation and/or other materials provided with the distribution.
        !            34:  * 3. Neither the name of the University nor the names of its contributors
        !            35:  *    may be used to endorse or promote products derived from this software
        !            36:  *    without specific prior written permission.
        !            37:  *
        !            38:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            39:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            40:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            41:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            42:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            43:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            44:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            45:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            46:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            47:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            48:  * SUCH DAMAGE.
        !            49:  */
        !            50:
        !            51: #include <ctype.h>
        !            52: #include <stdio.h>
        !            53: #include <string.h>
        !            54: #include <limits.h>
        !            55:
        !            56: #include "charclass.h"
        !            57:
        !            58: #define        RANGE_MATCH     1
        !            59: #define        RANGE_NOMATCH   0
        !            60: #define        RANGE_ERROR     (-1)
        !            61:
        !            62: static int
        !            63: classmatch(const char *pattern, char test, const char **ep)
        !            64: {
        !            65:        const char *mismatch = pattern;
        !            66:        const struct cclass *cc;
        !            67:        const char *colon;
        !            68:        size_t len;
        !            69:        int rval = RANGE_NOMATCH;
        !            70:
        !            71:        if (*pattern++ != ':') {
        !            72:                *ep = mismatch;
        !            73:                return RANGE_ERROR;
        !            74:        }
        !            75:        if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
        !            76:                *ep = mismatch;
        !            77:                return RANGE_ERROR;
        !            78:        }
        !            79:        *ep = colon + 2;
        !            80:        len = (size_t)(colon - pattern);
        !            81:
        !            82:        for (cc = cclasses; cc->name != NULL; cc++) {
        !            83:                if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
        !            84:                        if (cc->isctype((unsigned char)test))
        !            85:                                rval = RANGE_MATCH;
        !            86:                        return rval;
        !            87:                }
        !            88:        }
        !            89:
        !            90:        /* invalid character class, treat as normal text */
        !            91:        *ep = mismatch;
        !            92:        return RANGE_ERROR;
        !            93: }
        !            94:
        !            95: static int
        !            96: rangematch(const char **pp, char test)
        !            97: {
        !            98:        const char *pattern = *pp;
        !            99:        int negate, ok;
        !           100:        char c, c2;
        !           101:
        !           102:        /*
        !           103:         * A bracket expression starting with an unquoted circumflex
        !           104:         * character produces unspecified results (IEEE 1003.2-1992,
        !           105:         * 3.13.2).  This implementation treats it like '!', for
        !           106:         * consistency with the regular expression syntax.
        !           107:         * J.T. Conklin (conklin@ngai.kaleida.com)
        !           108:         */
        !           109:        if ((negate = (*pattern == '!' || *pattern == '^')))
        !           110:                ++pattern;
        !           111:
        !           112:        /*
        !           113:         * A right bracket shall lose its special meaning and represent
        !           114:         * itself in a bracket expression if it occurs first in the list.
        !           115:         * -- POSIX.2 2.8.3.2
        !           116:         */
        !           117:        ok = 0;
        !           118:        c = *pattern++;
        !           119:        do {
        !           120:                if (c == '[') {
        !           121:                        switch (classmatch(pattern, test, &pattern)) {
        !           122:                        case RANGE_MATCH:
        !           123:                                ok = 1;
        !           124:                                continue;
        !           125:                        case RANGE_NOMATCH:
        !           126:                                continue;
        !           127:                        default:
        !           128:                                /* invalid character class, treat litterally. */
        !           129:                                break;
        !           130:                        }
        !           131:                }
        !           132:                if (c == '\\')
        !           133:                        c = *pattern++;
        !           134:                if (c == '\0')
        !           135:                        return RANGE_ERROR;
        !           136:                /* patterns can not match on '/' */
        !           137:                if (c == '/')
        !           138:                        return RANGE_NOMATCH;
        !           139:                if (*pattern == '-'
        !           140:                    && (c2 = *(pattern + 1)) != '\0' && c2 != ']') {
        !           141:                        pattern += 2;
        !           142:                        if (c2 == '\\')
        !           143:                                c2 = *pattern++;
        !           144:                        if (c2 == '\0')
        !           145:                                return RANGE_ERROR;
        !           146:                        if (c <= test && test <= c2)
        !           147:                                ok = 1;
        !           148:                } else if (c == test)
        !           149:                        ok = 1;
        !           150:        } while ((c = *pattern++) != ']');
        !           151:
        !           152:        *pp = pattern;
        !           153:        return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
        !           154: }
        !           155:
        !           156: /*
        !           157:  * Single character match, advances pattern as much as needed.
        !           158:  * Return 0 on match and !0 (aka 1) on missmatch.
        !           159:  * When matched pp is advanced to the end of the pattern matched.
        !           160:  */
        !           161: static int
        !           162: matchchar(const char **pp, const char in)
        !           163: {
        !           164:        const char *pattern = *pp;
        !           165:        char c;
        !           166:        int rv = 0;
        !           167:
        !           168:        switch (c = *pattern++) {
        !           169:        case '?':
        !           170:                if (in == '\0')
        !           171:                        rv = 1;
        !           172:                if (in == '/')
        !           173:                        rv = 1;
        !           174:                break;
        !           175:        case '[':
        !           176:                if (in == '\0')
        !           177:                        rv = 1;
        !           178:                if (in == '/')
        !           179:                        rv = 1;
        !           180:                if (rv == 1)
        !           181:                        break;
        !           182:
        !           183:                switch (rangematch(&pattern, in)) {
        !           184:                case RANGE_ERROR:
        !           185:                        /* not a good range, treat as normal text */
        !           186:                        goto normal;
        !           187:                case RANGE_MATCH:
        !           188:                        break;
        !           189:                case RANGE_NOMATCH:
        !           190:                        rv = 1;
        !           191:                }
        !           192:                break;
        !           193:        case '\\':
        !           194:                if ((c = *pattern++) == '\0') {
        !           195:                        c = '\\';
        !           196:                        --pattern;
        !           197:                }
        !           198:                /* FALLTHROUGH */
        !           199:        default:
        !           200:        normal:
        !           201:                if (c != in)
        !           202:                        rv = 1;
        !           203:                break;
        !           204:        }
        !           205:
        !           206:        *pp = pattern;
        !           207:        return rv;
        !           208: }
        !           209:
        !           210: /*
        !           211:  * Do a substring match. If wild is set then the pattern started with a '*'.
        !           212:  * The match will go until '*', '/' or '\0' is encountered in pattern or
        !           213:  * the input string is consumed up to end.
        !           214:  * The pattern and string handles pp and ss are updated only on success.
        !           215:  */
        !           216: static int
        !           217: matchsub(const char **pp, const char **ss, const char *end, int wild)
        !           218: {
        !           219:        const char *pattern = *pp;
        !           220:        const char *p = pattern;
        !           221:        const char *string = *ss;
        !           222:        size_t matchlen;
        !           223:
        !           224:        /* first calculate how many characters the submatch will consume */
        !           225:        for (matchlen = 0; *p != '\0'; matchlen++) {
        !           226:                if (p[0] == '*')
        !           227:                        break;
        !           228:                /* '/' acts as barrier */
        !           229:                if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) {
        !           230:                        if (wild) {
        !           231:                                /* match needs to match up to end of segment */
        !           232:                                if (string > end - matchlen)
        !           233:                                        return 1;
        !           234:                                string = end - matchlen;
        !           235:                                wild = 0;
        !           236:                        }
        !           237:                        break;
        !           238:                }
        !           239:                /*
        !           240:                 * skip forward one character in pattern by doing a
        !           241:                 * dummy lookup.
        !           242:                 */
        !           243:                matchchar(&p, ' ');
        !           244:        }
        !           245:
        !           246:        /* not enough char to match */
        !           247:        if (string > end - matchlen)
        !           248:                return 1;
        !           249:
        !           250:        if (*p == '\0') {
        !           251:                if (wild) {
        !           252:                        /* match needs to match up to end of segment */
        !           253:                        string = end - matchlen;
        !           254:                        wild = 0;
        !           255:                }
        !           256:        }
        !           257:
        !           258:        while (*pattern != '\0' && *pattern != '*') {
        !           259:                /* eat possible escape char before '/' */
        !           260:                if (pattern[0] == '\\' && pattern[1] == '/')
        !           261:                        pattern++;
        !           262:                if (pattern[0] == '/')
        !           263:                        break;
        !           264:
        !           265:                /* check if there are still characters available to compare */
        !           266:                if (string >= end)
        !           267:                        return 1;
        !           268:                /* Compare one char at a time. */
        !           269:                if (!matchchar(&pattern, *string++))
        !           270:                        continue;
        !           271:                if (wild) {
        !           272:                        /* skip forward one char and restart match */
        !           273:                        string = ++*ss;
        !           274:                        pattern = *pp;
        !           275:                        /* can it still match? */
        !           276:                        if (string > end - matchlen)
        !           277:                                return 1;
        !           278:                } else {
        !           279:                        /* failed match */
        !           280:                        return 1;
        !           281:                }
        !           282:        }
        !           283:
        !           284:        *pp = pattern;
        !           285:        *ss = string;
        !           286:        return 0;
        !           287: }
        !           288:
        !           289: /*
        !           290:  * File matching with the addition of the special '**'.
        !           291:  * Returns 0 on match and !0 for strings that do not match pattern.
        !           292:  */
        !           293: int
        !           294: rmatch(const char *pattern, const char *string, int leading_dir)
        !           295: {
        !           296:        const char *segend, *segnext, *mismatch = NULL;
        !           297:        int wild, starstar;
        !           298:
        !           299:        while (*pattern && *string) {
        !           300:
        !           301:                /* handle leading '/' first */
        !           302:                if (pattern[0] == '\\' && pattern[1] == '/')
        !           303:                        pattern++;
        !           304:                if (*string == '/' && *pattern == '/') {
        !           305:                        string++;
        !           306:                        pattern++;
        !           307:                }
        !           308:
        !           309:                /* match to the next '/' in string */
        !           310:                segend = strchr(string, '/');
        !           311:                if (segend == NULL)
        !           312:                        segend = strchr(string, '\0');
        !           313:
        !           314:                while (*pattern) {
        !           315:                        /*
        !           316:                         * Check for '*' and '**'. For '*' reduce '*' and '?'
        !           317:                         * sequences into n-'?' and trailing '*'.
        !           318:                         * For '**' this optimisation can not be done
        !           319:                         * since '**???/' will match 'a/aa/aaa/' but not
        !           320:                         * 'a/aa/aa/' still additional '*' will be reduced.
        !           321:                         */
        !           322:                        wild = 0;
        !           323:                        starstar = 0;
        !           324:                        for ( ; *pattern == '*' || *pattern == '?'; pattern++) {
        !           325:                                if (pattern[0] == '*') {
        !           326:                                        if (pattern[1] == '*') {
        !           327:                                                starstar = 1;
        !           328:                                                pattern++;
        !           329:                                        }
        !           330:                                        wild = 1;
        !           331:                                } else if (!starstar) { /* pattern[0] == '?' */
        !           332:                                        if (string < segend && *string != '/')
        !           333:                                                string++;
        !           334:                                        else
        !           335:                                                /* no match possible */
        !           336:                                                return 1;
        !           337:                                } else
        !           338:                                        break;
        !           339:                        }
        !           340:
        !           341:                        /* pattern ends in '**' so it is a match */
        !           342:                        if (starstar && *pattern == '\0')
        !           343:                                return 0;
        !           344:
        !           345:                        if (starstar) {
        !           346:                                segnext = segend;
        !           347:                                mismatch = pattern;
        !           348:                        }
        !           349:
        !           350:                        while (string < segend) {
        !           351:                                if (matchsub(&pattern, &string, segend, wild)) {
        !           352: failed_match:
        !           353:                                        /*
        !           354:                                         * failed to match, if starstar retry
        !           355:                                         * with the next segment.
        !           356:                                         */
        !           357:                                        if (mismatch) {
        !           358:                                                pattern = mismatch;
        !           359:                                                wild = 1;
        !           360:                                                string = segnext;
        !           361:                                                if (*string == '/')
        !           362:                                                        string++;
        !           363:                                                segend = strchr(string, '/');
        !           364:                                                if (!segend)
        !           365:                                                        segend = strchr(string,
        !           366:                                                            '\0');
        !           367:                                                segnext = segend;
        !           368:                                                if (string < segend)
        !           369:                                                        continue;
        !           370:                                        }
        !           371:                                        /* no match possible */
        !           372:                                        return 1;
        !           373:                                }
        !           374:                                break;
        !           375:                        }
        !           376:
        !           377:                        /* at end of string segment, eat up any extra '*' */
        !           378:                        if (string >= segend && *pattern != '*')
        !           379:                                break;
        !           380:                }
        !           381:                if (*string != '\0' && *string != '/')
        !           382:                        goto failed_match;
        !           383:                if (*pattern != '\0' && *pattern != '/')
        !           384:                        goto failed_match;
        !           385:        }
        !           386:
        !           387:        /* if both pattern and string are consumed it was a match */
        !           388:        if (*pattern == '\0' && *string == '\0')
        !           389:                return 0;
        !           390:        /* if leading_dir is set then string can also be '/' for success */
        !           391:        if (leading_dir && *pattern == '\0' && *string == '/')
        !           392:                return 0;
        !           393:        /* else failure */
        !           394:        return 1;
        !           395: }