Annotation of src/usr.bin/rsync/rmatch.c, Revision 1.1
1.1 ! claudio 1: /* $OpenBSD: fnmatch.c,v 1.15 2011/02/10 21:31:59 stsp Exp $ */
! 2:
! 3: /*
! 4: * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
! 5: *
! 6: * Permission to use, copy, modify, and distribute this software for any
! 7: * purpose with or without fee is hereby granted, provided that the above
! 8: * copyright notice and this permission notice appear in all copies.
! 9: *
! 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
! 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
! 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
! 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
! 15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
! 16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
! 17: */
! 18:
! 19: /*
! 20: * Copyright (c) 1989, 1993, 1994
! 21: * The Regents of the University of California. All rights reserved.
! 22: *
! 23: * This code is derived from software contributed to Berkeley by
! 24: * Guido van Rossum.
! 25: *
! 26: * Redistribution and use in source and binary forms, with or without
! 27: * modification, are permitted provided that the following conditions
! 28: * are met:
! 29: * 1. Redistributions of source code must retain the above copyright
! 30: * notice, this list of conditions and the following disclaimer.
! 31: * 2. Redistributions in binary form must reproduce the above copyright
! 32: * notice, this list of conditions and the following disclaimer in the
! 33: * documentation and/or other materials provided with the distribution.
! 34: * 3. Neither the name of the University nor the names of its contributors
! 35: * may be used to endorse or promote products derived from this software
! 36: * without specific prior written permission.
! 37: *
! 38: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 39: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 40: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 41: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 42: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 43: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 44: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 45: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 46: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 47: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 48: * SUCH DAMAGE.
! 49: */
! 50:
! 51: #include <ctype.h>
! 52: #include <stdio.h>
! 53: #include <string.h>
! 54: #include <limits.h>
! 55:
! 56: #include "charclass.h"
! 57:
! 58: #define RANGE_MATCH 1
! 59: #define RANGE_NOMATCH 0
! 60: #define RANGE_ERROR (-1)
! 61:
! 62: static int
! 63: classmatch(const char *pattern, char test, const char **ep)
! 64: {
! 65: const char *mismatch = pattern;
! 66: const struct cclass *cc;
! 67: const char *colon;
! 68: size_t len;
! 69: int rval = RANGE_NOMATCH;
! 70:
! 71: if (*pattern++ != ':') {
! 72: *ep = mismatch;
! 73: return RANGE_ERROR;
! 74: }
! 75: if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
! 76: *ep = mismatch;
! 77: return RANGE_ERROR;
! 78: }
! 79: *ep = colon + 2;
! 80: len = (size_t)(colon - pattern);
! 81:
! 82: for (cc = cclasses; cc->name != NULL; cc++) {
! 83: if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
! 84: if (cc->isctype((unsigned char)test))
! 85: rval = RANGE_MATCH;
! 86: return rval;
! 87: }
! 88: }
! 89:
! 90: /* invalid character class, treat as normal text */
! 91: *ep = mismatch;
! 92: return RANGE_ERROR;
! 93: }
! 94:
! 95: static int
! 96: rangematch(const char **pp, char test)
! 97: {
! 98: const char *pattern = *pp;
! 99: int negate, ok;
! 100: char c, c2;
! 101:
! 102: /*
! 103: * A bracket expression starting with an unquoted circumflex
! 104: * character produces unspecified results (IEEE 1003.2-1992,
! 105: * 3.13.2). This implementation treats it like '!', for
! 106: * consistency with the regular expression syntax.
! 107: * J.T. Conklin (conklin@ngai.kaleida.com)
! 108: */
! 109: if ((negate = (*pattern == '!' || *pattern == '^')))
! 110: ++pattern;
! 111:
! 112: /*
! 113: * A right bracket shall lose its special meaning and represent
! 114: * itself in a bracket expression if it occurs first in the list.
! 115: * -- POSIX.2 2.8.3.2
! 116: */
! 117: ok = 0;
! 118: c = *pattern++;
! 119: do {
! 120: if (c == '[') {
! 121: switch (classmatch(pattern, test, &pattern)) {
! 122: case RANGE_MATCH:
! 123: ok = 1;
! 124: continue;
! 125: case RANGE_NOMATCH:
! 126: continue;
! 127: default:
! 128: /* invalid character class, treat litterally. */
! 129: break;
! 130: }
! 131: }
! 132: if (c == '\\')
! 133: c = *pattern++;
! 134: if (c == '\0')
! 135: return RANGE_ERROR;
! 136: /* patterns can not match on '/' */
! 137: if (c == '/')
! 138: return RANGE_NOMATCH;
! 139: if (*pattern == '-'
! 140: && (c2 = *(pattern + 1)) != '\0' && c2 != ']') {
! 141: pattern += 2;
! 142: if (c2 == '\\')
! 143: c2 = *pattern++;
! 144: if (c2 == '\0')
! 145: return RANGE_ERROR;
! 146: if (c <= test && test <= c2)
! 147: ok = 1;
! 148: } else if (c == test)
! 149: ok = 1;
! 150: } while ((c = *pattern++) != ']');
! 151:
! 152: *pp = pattern;
! 153: return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
! 154: }
! 155:
! 156: /*
! 157: * Single character match, advances pattern as much as needed.
! 158: * Return 0 on match and !0 (aka 1) on missmatch.
! 159: * When matched pp is advanced to the end of the pattern matched.
! 160: */
! 161: static int
! 162: matchchar(const char **pp, const char in)
! 163: {
! 164: const char *pattern = *pp;
! 165: char c;
! 166: int rv = 0;
! 167:
! 168: switch (c = *pattern++) {
! 169: case '?':
! 170: if (in == '\0')
! 171: rv = 1;
! 172: if (in == '/')
! 173: rv = 1;
! 174: break;
! 175: case '[':
! 176: if (in == '\0')
! 177: rv = 1;
! 178: if (in == '/')
! 179: rv = 1;
! 180: if (rv == 1)
! 181: break;
! 182:
! 183: switch (rangematch(&pattern, in)) {
! 184: case RANGE_ERROR:
! 185: /* not a good range, treat as normal text */
! 186: goto normal;
! 187: case RANGE_MATCH:
! 188: break;
! 189: case RANGE_NOMATCH:
! 190: rv = 1;
! 191: }
! 192: break;
! 193: case '\\':
! 194: if ((c = *pattern++) == '\0') {
! 195: c = '\\';
! 196: --pattern;
! 197: }
! 198: /* FALLTHROUGH */
! 199: default:
! 200: normal:
! 201: if (c != in)
! 202: rv = 1;
! 203: break;
! 204: }
! 205:
! 206: *pp = pattern;
! 207: return rv;
! 208: }
! 209:
! 210: /*
! 211: * Do a substring match. If wild is set then the pattern started with a '*'.
! 212: * The match will go until '*', '/' or '\0' is encountered in pattern or
! 213: * the input string is consumed up to end.
! 214: * The pattern and string handles pp and ss are updated only on success.
! 215: */
! 216: static int
! 217: matchsub(const char **pp, const char **ss, const char *end, int wild)
! 218: {
! 219: const char *pattern = *pp;
! 220: const char *p = pattern;
! 221: const char *string = *ss;
! 222: size_t matchlen;
! 223:
! 224: /* first calculate how many characters the submatch will consume */
! 225: for (matchlen = 0; *p != '\0'; matchlen++) {
! 226: if (p[0] == '*')
! 227: break;
! 228: /* '/' acts as barrier */
! 229: if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) {
! 230: if (wild) {
! 231: /* match needs to match up to end of segment */
! 232: if (string > end - matchlen)
! 233: return 1;
! 234: string = end - matchlen;
! 235: wild = 0;
! 236: }
! 237: break;
! 238: }
! 239: /*
! 240: * skip forward one character in pattern by doing a
! 241: * dummy lookup.
! 242: */
! 243: matchchar(&p, ' ');
! 244: }
! 245:
! 246: /* not enough char to match */
! 247: if (string > end - matchlen)
! 248: return 1;
! 249:
! 250: if (*p == '\0') {
! 251: if (wild) {
! 252: /* match needs to match up to end of segment */
! 253: string = end - matchlen;
! 254: wild = 0;
! 255: }
! 256: }
! 257:
! 258: while (*pattern != '\0' && *pattern != '*') {
! 259: /* eat possible escape char before '/' */
! 260: if (pattern[0] == '\\' && pattern[1] == '/')
! 261: pattern++;
! 262: if (pattern[0] == '/')
! 263: break;
! 264:
! 265: /* check if there are still characters available to compare */
! 266: if (string >= end)
! 267: return 1;
! 268: /* Compare one char at a time. */
! 269: if (!matchchar(&pattern, *string++))
! 270: continue;
! 271: if (wild) {
! 272: /* skip forward one char and restart match */
! 273: string = ++*ss;
! 274: pattern = *pp;
! 275: /* can it still match? */
! 276: if (string > end - matchlen)
! 277: return 1;
! 278: } else {
! 279: /* failed match */
! 280: return 1;
! 281: }
! 282: }
! 283:
! 284: *pp = pattern;
! 285: *ss = string;
! 286: return 0;
! 287: }
! 288:
! 289: /*
! 290: * File matching with the addition of the special '**'.
! 291: * Returns 0 on match and !0 for strings that do not match pattern.
! 292: */
! 293: int
! 294: rmatch(const char *pattern, const char *string, int leading_dir)
! 295: {
! 296: const char *segend, *segnext, *mismatch = NULL;
! 297: int wild, starstar;
! 298:
! 299: while (*pattern && *string) {
! 300:
! 301: /* handle leading '/' first */
! 302: if (pattern[0] == '\\' && pattern[1] == '/')
! 303: pattern++;
! 304: if (*string == '/' && *pattern == '/') {
! 305: string++;
! 306: pattern++;
! 307: }
! 308:
! 309: /* match to the next '/' in string */
! 310: segend = strchr(string, '/');
! 311: if (segend == NULL)
! 312: segend = strchr(string, '\0');
! 313:
! 314: while (*pattern) {
! 315: /*
! 316: * Check for '*' and '**'. For '*' reduce '*' and '?'
! 317: * sequences into n-'?' and trailing '*'.
! 318: * For '**' this optimisation can not be done
! 319: * since '**???/' will match 'a/aa/aaa/' but not
! 320: * 'a/aa/aa/' still additional '*' will be reduced.
! 321: */
! 322: wild = 0;
! 323: starstar = 0;
! 324: for ( ; *pattern == '*' || *pattern == '?'; pattern++) {
! 325: if (pattern[0] == '*') {
! 326: if (pattern[1] == '*') {
! 327: starstar = 1;
! 328: pattern++;
! 329: }
! 330: wild = 1;
! 331: } else if (!starstar) { /* pattern[0] == '?' */
! 332: if (string < segend && *string != '/')
! 333: string++;
! 334: else
! 335: /* no match possible */
! 336: return 1;
! 337: } else
! 338: break;
! 339: }
! 340:
! 341: /* pattern ends in '**' so it is a match */
! 342: if (starstar && *pattern == '\0')
! 343: return 0;
! 344:
! 345: if (starstar) {
! 346: segnext = segend;
! 347: mismatch = pattern;
! 348: }
! 349:
! 350: while (string < segend) {
! 351: if (matchsub(&pattern, &string, segend, wild)) {
! 352: failed_match:
! 353: /*
! 354: * failed to match, if starstar retry
! 355: * with the next segment.
! 356: */
! 357: if (mismatch) {
! 358: pattern = mismatch;
! 359: wild = 1;
! 360: string = segnext;
! 361: if (*string == '/')
! 362: string++;
! 363: segend = strchr(string, '/');
! 364: if (!segend)
! 365: segend = strchr(string,
! 366: '\0');
! 367: segnext = segend;
! 368: if (string < segend)
! 369: continue;
! 370: }
! 371: /* no match possible */
! 372: return 1;
! 373: }
! 374: break;
! 375: }
! 376:
! 377: /* at end of string segment, eat up any extra '*' */
! 378: if (string >= segend && *pattern != '*')
! 379: break;
! 380: }
! 381: if (*string != '\0' && *string != '/')
! 382: goto failed_match;
! 383: if (*pattern != '\0' && *pattern != '/')
! 384: goto failed_match;
! 385: }
! 386:
! 387: /* if both pattern and string are consumed it was a match */
! 388: if (*pattern == '\0' && *string == '\0')
! 389: return 0;
! 390: /* if leading_dir is set then string can also be '/' for success */
! 391: if (leading_dir && *pattern == '\0' && *string == '/')
! 392: return 0;
! 393: /* else failure */
! 394: return 1;
! 395: }