Annotation of src/usr.bin/make/str.c, Revision 1.32
1.32 ! espie 1: /* $OpenBSD: str.c,v 1.31 2014/05/18 08:08:50 espie Exp $ */
1.5 millert 2: /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */
1.1 deraadt 3:
4: /*-
1.5 millert 5: * Copyright (c) 1988, 1989, 1990, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: * Copyright (c) 1989 by Berkeley Softworks
8: * All rights reserved.
9: *
10: * This code is derived from software contributed to Berkeley by
11: * Adam de Boor.
12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: * 1. Redistributions of source code must retain the above copyright
17: * notice, this list of conditions and the following disclaimer.
18: * 2. Redistributions in binary form must reproduce the above copyright
19: * notice, this list of conditions and the following disclaimer in the
20: * documentation and/or other materials provided with the distribution.
1.20 millert 21: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 22: * may be used to endorse or promote products derived from this software
23: * without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35: * SUCH DAMAGE.
36: */
37:
1.19 espie 38: #include <ctype.h>
39: #include <string.h>
40: #include "config.h"
41: #include "defines.h"
42: #include "str.h"
43: #include "memory.h"
44: #include "buf.h"
1.15 espie 45:
1.22 espie 46: /* helpers for Str_Matchi */
47: static bool range_match(char, const char **, const char *);
48: static bool star_match(const char *, const char *, const char *, const char *);
49:
1.1 deraadt 50: char *
1.24 espie 51: Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
1.21 espie 52: int sep)
1.1 deraadt 53: {
1.23 espie 54: size_t len1, len2;
55: char *result;
1.1 deraadt 56:
1.23 espie 57: /* get the length of both strings */
58: len1 = e1 - s1;
59: len2 = e2 - s2;
60:
61: /* space for separator */
62: if (sep)
63: len1++;
64: result = emalloc(len1 + len2 + 1);
65:
66: /* copy first string into place */
67: memcpy(result, s1, len1);
68:
69: /* add separator character */
70: if (sep)
71: result[len1-1] = sep;
72:
73: /* copy second string plus EOS into place */
74: memcpy(result + len1, s2, len2);
75: result[len1+len2] = '\0';
76: return result;
1.1 deraadt 77: }
78:
79: /*-
80: * brk_string --
81: * Fracture a string into an array of words (as delineated by tabs or
82: * spaces) taking quotation marks into account. Leading tabs/spaces
83: * are ignored.
84: *
85: * returns --
1.32 ! espie 86: * Pointer to the array of pointers to the words. Fills up
! 87: * store_args with its size.
! 88: * The returned parameters are allocated in a single buffer,
! 89: * return as *buffer, to be freed later.
1.1 deraadt 90: */
91: char **
1.21 espie 92: brk_string(const char *str, int *store_argc, char **buffer)
1.1 deraadt 93: {
1.23 espie 94: int argc;
95: char ch;
96: char inquote;
97: const char *p;
98: char *start, *t;
99: size_t len;
1.32 ! espie 100: int argmax = 50; /* start at 50 */
1.23 espie 101: size_t curlen = 0;
1.31 espie 102: char **argv = ereallocarray(NULL, argmax + 1, sizeof(char *));
1.23 espie 103:
104: /* skip leading space chars. */
105: for (; *str == ' ' || *str == '\t'; ++str)
1.1 deraadt 106: continue;
1.23 espie 107:
108: /* allocate room for a copy of the string */
109: if ((len = strlen(str) + 1) > curlen)
110: *buffer = emalloc(curlen = len);
111:
112: /*
113: * copy the string; at the same time, parse backslashes,
114: * quotes and build the argument list.
115: */
116: argc = 0;
117: inquote = '\0';
118: for (p = str, start = t = *buffer;; ++p) {
119: switch (ch = *p) {
120: case '"':
121: case '\'':
122: if (inquote) {
123: if (inquote == ch)
124: inquote = '\0';
125: else
126: break;
127: } else {
128: inquote = ch;
129: /* Don't miss "" or '' */
130: if (start == NULL && p[1] == inquote) {
131: start = t + 1;
132: break;
133: }
134: }
135: continue;
136: case ' ':
137: case '\t':
138: case '\n':
139: if (inquote)
140: break;
141: if (!start)
142: continue;
143: /* FALLTHROUGH */
144: case '\0':
145: /*
146: * end of a token -- make sure there's enough argv
147: * space and save off a pointer.
148: */
149: if (!start)
150: goto done;
151:
152: *t++ = '\0';
153: if (argc == argmax) {
154: argmax *= 2; /* ramp up fast */
1.30 espie 155: argv = ereallocarray(argv,
1.29 espie 156: (argmax + 1), sizeof(char *));
1.23 espie 157: }
158: argv[argc++] = start;
159: start = NULL;
160: if (ch == '\n' || ch == '\0')
161: goto done;
162: continue;
163: case '\\':
164: switch (ch = *++p) {
165: case '\0':
166: case '\n':
167: /* hmmm; fix it up as best we can */
168: ch = '\\';
169: --p;
170: break;
171: case 'b':
172: ch = '\b';
173: break;
174: case 'f':
175: ch = '\f';
176: break;
177: case 'n':
178: ch = '\n';
179: break;
180: case 'r':
181: ch = '\r';
182: break;
183: case 't':
184: ch = '\t';
185: break;
186: }
187: break;
188: }
189: if (!start)
190: start = t;
191: *t++ = ch;
1.1 deraadt 192: }
1.22 espie 193: done:
1.23 espie 194: argv[argc] = NULL;
195: *store_argc = argc;
196: return argv;
1.1 deraadt 197: }
198:
1.19 espie 199:
1.16 espie 200: const char *
1.21 espie 201: iterate_words(const char **end)
1.16 espie 202: {
1.23 espie 203: const char *start, *p;
204: char state = 0;
205: start = *end;
206:
1.28 espie 207: while (ISSPACE(*start))
1.23 espie 208: start++;
209: if (*start == '\0')
210: return NULL;
1.16 espie 211:
1.23 espie 212: for (p = start;; p++)
213: switch(*p) {
1.16 espie 214: case '\\':
1.23 espie 215: if (p[1] != '\0')
216: p++;
217: break;
1.16 espie 218: case '\'':
219: case '"':
1.23 espie 220: if (state == *p)
221: state = 0;
222: else if (state == 0)
223: state = *p;
224: break;
1.16 espie 225: case ' ':
226: case '\t':
1.23 espie 227: if (state != 0)
228: break;
229: /* FALLTHROUGH */
1.16 espie 230: case '\0':
1.23 espie 231: *end = p;
232: return start;
1.16 espie 233: default:
1.23 espie 234: break;
1.16 espie 235: }
236: }
1.17 espie 237:
1.22 espie 238: static bool
1.24 espie 239: star_match(const char *string, const char *estring,
1.21 espie 240: const char *pattern, const char *epattern)
1.1 deraadt 241: {
1.22 espie 242: /* '*' matches any substring. We handle this by calling ourselves
1.24 espie 243: * recursively for each postfix of string, until either we match or
1.22 espie 244: * we reach the end of the string. */
1.23 espie 245: pattern++;
1.24 espie 246: /* Skip over contiguous sequences of `?*', so that
1.22 espie 247: * recursive calls only occur on `real' characters. */
1.24 espie 248: while (pattern != epattern &&
1.23 espie 249: (*pattern == '?' || *pattern == '*')) {
1.14 espie 250: if (*pattern == '?') {
1.23 espie 251: if (string == estring)
252: return false;
253: else
254: string++;
1.14 espie 255: }
256: pattern++;
1.23 espie 257: }
258: if (pattern == epattern)
1.19 espie 259: return true;
1.23 espie 260: for (; string != estring; string++)
1.24 espie 261: if (Str_Matchi(string, estring, pattern,
1.22 espie 262: epattern))
1.23 espie 263: return true;
264: return false;
1.22 espie 265: }
266:
267: static bool
268: range_match(char c, const char **ppat, const char *epattern)
269: {
270: if (*ppat == epattern) {
271: if (c == '[')
272: return true;
1.24 espie 273: else
1.23 espie 274: return false;
275: }
1.22 espie 276: if (**ppat == '!' || **ppat == '^') {
277: (*ppat)++;
278: return !range_match(c, ppat, epattern);
1.24 espie 279: }
1.23 espie 280: for (;;) {
1.22 espie 281: if (**ppat == '\\') {
282: if (++(*ppat) == epattern)
1.23 espie 283: return false;
284: }
1.22 espie 285: if (**ppat == c)
1.14 espie 286: break;
1.22 espie 287: if ((*ppat)[1] == '-') {
288: if (*ppat + 2 == epattern)
1.23 espie 289: return false;
1.22 espie 290: if (**ppat < c && c <= (*ppat)[2])
1.23 espie 291: break;
1.22 espie 292: if ((*ppat)[2] <= c && c < **ppat)
1.23 espie 293: break;
1.22 espie 294: *ppat += 3;
1.23 espie 295: } else
1.22 espie 296: (*ppat)++;
1.24 espie 297: /* The test for ']' is done at the end
298: * so that ']' can be used at the
1.22 espie 299: * start of the range without '\' */
300: if (*ppat == epattern || **ppat == ']')
1.19 espie 301: return false;
1.23 espie 302: }
1.24 espie 303: /* Found matching character, skip over rest
1.22 espie 304: * of class. */
305: while (**ppat != ']') {
306: if (**ppat == '\\')
307: (*ppat)++;
1.24 espie 308: /* A non-terminated character class
1.22 espie 309: * is ok. */
310: if (*ppat == epattern)
1.14 espie 311: break;
1.22 espie 312: (*ppat)++;
1.23 espie 313: }
1.22 espie 314: return true;
315: }
316:
317: bool
1.24 espie 318: Str_Matchi(const char *string, const char *estring,
1.22 espie 319: const char *pattern, const char *epattern)
320: {
321: while (pattern != epattern) {
322: /* Check for a "*" as the next pattern character. */
323: if (*pattern == '*')
324: return star_match(string, estring, pattern, epattern);
325: else if (string == estring)
326: return false;
327: /* Check for a "[" as the next pattern character. It is
328: * followed by a list of characters that are acceptable, or
329: * by a range (two characters separated by "-"). */
330: else if (*pattern == '[') {
331: pattern++;
332: if (!range_match(*string, &pattern, epattern))
333: return false;
334:
1.23 espie 335: }
336: /* '?' matches any single character, so shunt test. */
337: else if (*pattern != '?') {
1.24 espie 338: /* If the next pattern character is '\', just strip
339: * off the '\' so we do exact matching on the
1.22 espie 340: * character that follows. */
1.23 espie 341: if (*pattern == '\\') {
342: if (++pattern == epattern)
343: return false;
344: }
345: /* There's no special character. Just make sure that
346: * the next characters of each string match. */
347: if (*pattern != *string)
348: return false;
349: }
350: pattern++;
351: string++;
352: }
353: if (string == estring)
354: return true;
355: else
1.19 espie 356: return false;
1.1 deraadt 357: }
358:
1.17 espie 359:
1.1 deraadt 360: /*-
361: *-----------------------------------------------------------------------
362: * Str_SYSVMatch --
1.5 millert 363: * Check word against pattern for a match (% is wild),
364: *
1.1 deraadt 365: * Results:
366: * Returns the beginning position of a match or null. The number
367: * of characters matched is returned in len.
368: *-----------------------------------------------------------------------
369: */
1.13 espie 370: const char *
1.25 espie 371: Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
1.1 deraadt 372: {
1.23 espie 373: const char *p = pattern;
374: const char *w = word;
375: const char *m;
376:
377: if (*p == '\0') {
378: /* Null pattern is the whole string. */
379: *len = strlen(w);
380: return w;
381: }
382:
383: if ((m = strchr(p, '%')) != NULL) {
384: /* Check that the prefix matches. */
385: for (; p != m && *w && *w == *p; w++, p++)
386: continue;
387:
388: if (p != m)
389: return NULL; /* No match. */
390:
391: if (*++p == '\0') {
392: /* No more pattern, return the rest of the string. */
393: *len = strlen(w);
394: return w;
395: }
1.1 deraadt 396: }
397:
1.23 espie 398: m = w;
1.1 deraadt 399:
1.23 espie 400: /* Find a matching tail. */
401: do {
402: if (strcmp(p, w) == 0) {
403: *len = w - m;
404: return m;
405: }
406: } while (*w++ != '\0');
1.5 millert 407:
1.23 espie 408: return NULL;
1.1 deraadt 409: }
410:
411:
412: /*-
413: *-----------------------------------------------------------------------
414: * Str_SYSVSubst --
1.21 espie 415: * Substitute '%' in the pattern with len characters from src.
1.1 deraadt 416: * If the pattern does not contain a '%' prepend len characters
417: * from src.
1.5 millert 418: *
1.1 deraadt 419: * Side Effects:
1.21 espie 420: * Adds result to buf
1.1 deraadt 421: *-----------------------------------------------------------------------
422: */
423: void
1.21 espie 424: Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
1.1 deraadt 425: {
1.23 espie 426: const char *m;
1.1 deraadt 427:
1.23 espie 428: if ((m = strchr(pat, '%')) != NULL) {
429: /* Copy the prefix. */
430: Buf_Addi(buf, pat, m);
431: /* Skip the %. */
432: pat = m + 1;
433: }
1.1 deraadt 434:
1.23 espie 435: /* Copy the pattern. */
436: Buf_AddChars(buf, len, src);
1.1 deraadt 437:
1.23 espie 438: /* Append the rest. */
439: Buf_AddString(buf, pat);
1.9 espie 440: }
441:
442: char *
1.21 espie 443: Str_dupi(const char *begin, const char *end)
1.9 espie 444: {
1.23 espie 445: char *s;
1.9 espie 446:
1.23 espie 447: s = emalloc(end - begin + 1);
448: memcpy(s, begin, end - begin);
449: s[end-begin] = '\0';
450: return s;
1.1 deraadt 451: }
1.16 espie 452:
453: char *
1.21 espie 454: escape_dupi(const char *begin, const char *end, const char *set)
1.16 espie 455: {
1.23 espie 456: char *s, *t;
1.16 espie 457:
1.23 espie 458: t = s = emalloc(end - begin + 1);
459: while (begin != end) {
460: if (*begin == '\\') {
461: begin++;
462: if (begin == end) {
463: *t++ = '\\';
464: break;
465: }
466: if (strchr(set, *begin) == NULL)
467: *t++ = '\\';
468: }
469: *t++ = *begin++;
1.16 espie 470: }
1.23 espie 471: *t++ = '\0';
472: return s;
1.16 espie 473: }
474:
1.17 espie 475: char *
1.21 espie 476: Str_rchri(const char *begin, const char *end, int c)
1.17 espie 477: {
1.23 espie 478: if (begin != end)
479: do {
480: if (*--end == c)
481: return (char *)end;
482: } while (end != begin);
483: return NULL;
1.17 espie 484: }