Annotation of src/usr.bin/rsync/rmatch.c, Revision 1.2
1.2 ! claudio 1: /* $OpenBSD: rmatch.c,v 1.1 2021/08/29 13:43:46 claudio Exp $ */
1.1 claudio 2:
3: /*
4: * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18:
19: /*
20: * Copyright (c) 1989, 1993, 1994
21: * The Regents of the University of California. All rights reserved.
22: *
23: * This code is derived from software contributed to Berkeley by
24: * Guido van Rossum.
25: *
26: * Redistribution and use in source and binary forms, with or without
27: * modification, are permitted provided that the following conditions
28: * are met:
29: * 1. Redistributions of source code must retain the above copyright
30: * notice, this list of conditions and the following disclaimer.
31: * 2. Redistributions in binary form must reproduce the above copyright
32: * notice, this list of conditions and the following disclaimer in the
33: * documentation and/or other materials provided with the distribution.
34: * 3. Neither the name of the University nor the names of its contributors
35: * may be used to endorse or promote products derived from this software
36: * without specific prior written permission.
37: *
38: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48: * SUCH DAMAGE.
49: */
50:
51: #include <ctype.h>
52: #include <stdio.h>
53: #include <string.h>
54: #include <limits.h>
55:
56: #include "charclass.h"
1.2 ! claudio 57: #include "extern.h"
1.1 claudio 58:
59: #define RANGE_MATCH 1
60: #define RANGE_NOMATCH 0
61: #define RANGE_ERROR (-1)
62:
63: static int
64: classmatch(const char *pattern, char test, const char **ep)
65: {
66: const char *mismatch = pattern;
67: const struct cclass *cc;
68: const char *colon;
69: size_t len;
70: int rval = RANGE_NOMATCH;
71:
72: if (*pattern++ != ':') {
73: *ep = mismatch;
74: return RANGE_ERROR;
75: }
76: if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
77: *ep = mismatch;
78: return RANGE_ERROR;
79: }
80: *ep = colon + 2;
81: len = (size_t)(colon - pattern);
82:
83: for (cc = cclasses; cc->name != NULL; cc++) {
84: if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
85: if (cc->isctype((unsigned char)test))
86: rval = RANGE_MATCH;
87: return rval;
88: }
89: }
90:
91: /* invalid character class, treat as normal text */
92: *ep = mismatch;
93: return RANGE_ERROR;
94: }
95:
96: static int
97: rangematch(const char **pp, char test)
98: {
99: const char *pattern = *pp;
100: int negate, ok;
101: char c, c2;
102:
103: /*
104: * A bracket expression starting with an unquoted circumflex
105: * character produces unspecified results (IEEE 1003.2-1992,
106: * 3.13.2). This implementation treats it like '!', for
107: * consistency with the regular expression syntax.
108: * J.T. Conklin (conklin@ngai.kaleida.com)
109: */
110: if ((negate = (*pattern == '!' || *pattern == '^')))
111: ++pattern;
112:
113: /*
114: * A right bracket shall lose its special meaning and represent
115: * itself in a bracket expression if it occurs first in the list.
116: * -- POSIX.2 2.8.3.2
117: */
118: ok = 0;
119: c = *pattern++;
120: do {
121: if (c == '[') {
122: switch (classmatch(pattern, test, &pattern)) {
123: case RANGE_MATCH:
124: ok = 1;
125: continue;
126: case RANGE_NOMATCH:
127: continue;
128: default:
129: /* invalid character class, treat litterally. */
130: break;
131: }
132: }
133: if (c == '\\')
134: c = *pattern++;
135: if (c == '\0')
136: return RANGE_ERROR;
137: /* patterns can not match on '/' */
138: if (c == '/')
139: return RANGE_NOMATCH;
140: if (*pattern == '-'
141: && (c2 = *(pattern + 1)) != '\0' && c2 != ']') {
142: pattern += 2;
143: if (c2 == '\\')
144: c2 = *pattern++;
145: if (c2 == '\0')
146: return RANGE_ERROR;
147: if (c <= test && test <= c2)
148: ok = 1;
149: } else if (c == test)
150: ok = 1;
151: } while ((c = *pattern++) != ']');
152:
153: *pp = pattern;
154: return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
155: }
156:
157: /*
158: * Single character match, advances pattern as much as needed.
159: * Return 0 on match and !0 (aka 1) on missmatch.
160: * When matched pp is advanced to the end of the pattern matched.
161: */
162: static int
163: matchchar(const char **pp, const char in)
164: {
165: const char *pattern = *pp;
166: char c;
167: int rv = 0;
168:
169: switch (c = *pattern++) {
170: case '?':
171: if (in == '\0')
172: rv = 1;
173: if (in == '/')
174: rv = 1;
175: break;
176: case '[':
177: if (in == '\0')
178: rv = 1;
179: if (in == '/')
180: rv = 1;
181: if (rv == 1)
182: break;
183:
184: switch (rangematch(&pattern, in)) {
185: case RANGE_ERROR:
186: /* not a good range, treat as normal text */
187: goto normal;
188: case RANGE_MATCH:
189: break;
190: case RANGE_NOMATCH:
191: rv = 1;
192: }
193: break;
194: case '\\':
195: if ((c = *pattern++) == '\0') {
196: c = '\\';
197: --pattern;
198: }
199: /* FALLTHROUGH */
200: default:
201: normal:
202: if (c != in)
203: rv = 1;
204: break;
205: }
206:
207: *pp = pattern;
208: return rv;
209: }
210:
211: /*
212: * Do a substring match. If wild is set then the pattern started with a '*'.
213: * The match will go until '*', '/' or '\0' is encountered in pattern or
214: * the input string is consumed up to end.
215: * The pattern and string handles pp and ss are updated only on success.
216: */
217: static int
218: matchsub(const char **pp, const char **ss, const char *end, int wild)
219: {
220: const char *pattern = *pp;
221: const char *p = pattern;
222: const char *string = *ss;
223: size_t matchlen;
224:
225: /* first calculate how many characters the submatch will consume */
226: for (matchlen = 0; *p != '\0'; matchlen++) {
227: if (p[0] == '*')
228: break;
229: /* '/' acts as barrier */
230: if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) {
231: if (wild) {
232: /* match needs to match up to end of segment */
233: if (string > end - matchlen)
234: return 1;
235: string = end - matchlen;
236: wild = 0;
237: }
238: break;
239: }
240: /*
241: * skip forward one character in pattern by doing a
242: * dummy lookup.
243: */
244: matchchar(&p, ' ');
245: }
246:
247: /* not enough char to match */
248: if (string > end - matchlen)
249: return 1;
250:
251: if (*p == '\0') {
252: if (wild) {
253: /* match needs to match up to end of segment */
254: string = end - matchlen;
255: wild = 0;
256: }
257: }
258:
259: while (*pattern != '\0' && *pattern != '*') {
260: /* eat possible escape char before '/' */
261: if (pattern[0] == '\\' && pattern[1] == '/')
262: pattern++;
263: if (pattern[0] == '/')
264: break;
265:
266: /* check if there are still characters available to compare */
267: if (string >= end)
268: return 1;
269: /* Compare one char at a time. */
270: if (!matchchar(&pattern, *string++))
271: continue;
272: if (wild) {
273: /* skip forward one char and restart match */
274: string = ++*ss;
275: pattern = *pp;
276: /* can it still match? */
277: if (string > end - matchlen)
278: return 1;
279: } else {
280: /* failed match */
281: return 1;
282: }
283: }
284:
285: *pp = pattern;
286: *ss = string;
287: return 0;
288: }
289:
290: /*
291: * File matching with the addition of the special '**'.
292: * Returns 0 on match and !0 for strings that do not match pattern.
293: */
294: int
295: rmatch(const char *pattern, const char *string, int leading_dir)
296: {
297: const char *segend, *segnext, *mismatch = NULL;
298: int wild, starstar;
299:
300: while (*pattern && *string) {
301:
302: /* handle leading '/' first */
303: if (pattern[0] == '\\' && pattern[1] == '/')
304: pattern++;
305: if (*string == '/' && *pattern == '/') {
306: string++;
307: pattern++;
308: }
309:
310: /* match to the next '/' in string */
311: segend = strchr(string, '/');
312: if (segend == NULL)
313: segend = strchr(string, '\0');
314:
315: while (*pattern) {
316: /*
317: * Check for '*' and '**'. For '*' reduce '*' and '?'
318: * sequences into n-'?' and trailing '*'.
319: * For '**' this optimisation can not be done
320: * since '**???/' will match 'a/aa/aaa/' but not
321: * 'a/aa/aa/' still additional '*' will be reduced.
322: */
323: wild = 0;
324: starstar = 0;
325: for ( ; *pattern == '*' || *pattern == '?'; pattern++) {
326: if (pattern[0] == '*') {
327: if (pattern[1] == '*') {
328: starstar = 1;
329: pattern++;
330: }
331: wild = 1;
332: } else if (!starstar) { /* pattern[0] == '?' */
333: if (string < segend && *string != '/')
334: string++;
335: else
336: /* no match possible */
337: return 1;
338: } else
339: break;
340: }
341:
342: /* pattern ends in '**' so it is a match */
343: if (starstar && *pattern == '\0')
344: return 0;
345:
346: if (starstar) {
347: segnext = segend;
348: mismatch = pattern;
349: }
350:
351: while (string < segend) {
352: if (matchsub(&pattern, &string, segend, wild)) {
353: failed_match:
354: /*
355: * failed to match, if starstar retry
356: * with the next segment.
357: */
358: if (mismatch) {
359: pattern = mismatch;
360: wild = 1;
361: string = segnext;
362: if (*string == '/')
363: string++;
364: segend = strchr(string, '/');
365: if (!segend)
366: segend = strchr(string,
367: '\0');
368: segnext = segend;
369: if (string < segend)
370: continue;
371: }
372: /* no match possible */
373: return 1;
374: }
375: break;
376: }
377:
378: /* at end of string segment, eat up any extra '*' */
379: if (string >= segend && *pattern != '*')
380: break;
381: }
382: if (*string != '\0' && *string != '/')
383: goto failed_match;
384: if (*pattern != '\0' && *pattern != '/')
385: goto failed_match;
386: }
387:
388: /* if both pattern and string are consumed it was a match */
389: if (*pattern == '\0' && *string == '\0')
390: return 0;
391: /* if leading_dir is set then string can also be '/' for success */
392: if (leading_dir && *pattern == '\0' && *string == '/')
393: return 0;
394: /* else failure */
395: return 1;
396: }