Annotation of src/usr.bin/make/str.c, Revision 1.30
1.30 ! espie 1: /* $OpenBSD: str.c,v 1.29 2014/04/22 08:26:31 espie Exp $ */
1.5 millert 2: /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */
1.1 deraadt 3:
4: /*-
1.5 millert 5: * Copyright (c) 1988, 1989, 1990, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: * Copyright (c) 1989 by Berkeley Softworks
8: * All rights reserved.
9: *
10: * This code is derived from software contributed to Berkeley by
11: * Adam de Boor.
12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: * 1. Redistributions of source code must retain the above copyright
17: * notice, this list of conditions and the following disclaimer.
18: * 2. Redistributions in binary form must reproduce the above copyright
19: * notice, this list of conditions and the following disclaimer in the
20: * documentation and/or other materials provided with the distribution.
1.20 millert 21: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 22: * may be used to endorse or promote products derived from this software
23: * without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35: * SUCH DAMAGE.
36: */
37:
1.19 espie 38: #include <ctype.h>
39: #include <string.h>
40: #include "config.h"
41: #include "defines.h"
42: #include "str.h"
43: #include "memory.h"
44: #include "buf.h"
1.15 espie 45:
1.22 espie 46: /* helpers for Str_Matchi */
47: static bool range_match(char, const char **, const char *);
48: static bool star_match(const char *, const char *, const char *, const char *);
49:
1.1 deraadt 50: char *
1.24 espie 51: Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
1.21 espie 52: int sep)
1.1 deraadt 53: {
1.23 espie 54: size_t len1, len2;
55: char *result;
1.1 deraadt 56:
1.23 espie 57: /* get the length of both strings */
58: len1 = e1 - s1;
59: len2 = e2 - s2;
60:
61: /* space for separator */
62: if (sep)
63: len1++;
64: result = emalloc(len1 + len2 + 1);
65:
66: /* copy first string into place */
67: memcpy(result, s1, len1);
68:
69: /* add separator character */
70: if (sep)
71: result[len1-1] = sep;
72:
73: /* copy second string plus EOS into place */
74: memcpy(result + len1, s2, len2);
75: result[len1+len2] = '\0';
76: return result;
1.1 deraadt 77: }
78:
79: /*-
80: * brk_string --
81: * Fracture a string into an array of words (as delineated by tabs or
82: * spaces) taking quotation marks into account. Leading tabs/spaces
83: * are ignored.
84: *
85: * returns --
1.17 espie 86: * Pointer to the array of pointers to the words. To make life easier,
1.1 deraadt 87: * the first word is always the value of the .MAKE variable.
88: */
89: char **
1.21 espie 90: brk_string(const char *str, int *store_argc, char **buffer)
1.1 deraadt 91: {
1.23 espie 92: int argc;
93: char ch;
94: char inquote;
95: const char *p;
96: char *start, *t;
97: size_t len;
98: int argmax = 50;
99: size_t curlen = 0;
100: char **argv = emalloc((argmax + 1) * sizeof(char *));
101:
102: /* skip leading space chars. */
103: for (; *str == ' ' || *str == '\t'; ++str)
1.1 deraadt 104: continue;
1.23 espie 105:
106: /* allocate room for a copy of the string */
107: if ((len = strlen(str) + 1) > curlen)
108: *buffer = emalloc(curlen = len);
109:
110: /*
111: * copy the string; at the same time, parse backslashes,
112: * quotes and build the argument list.
113: */
114: argc = 0;
115: inquote = '\0';
116: for (p = str, start = t = *buffer;; ++p) {
117: switch (ch = *p) {
118: case '"':
119: case '\'':
120: if (inquote) {
121: if (inquote == ch)
122: inquote = '\0';
123: else
124: break;
125: } else {
126: inquote = ch;
127: /* Don't miss "" or '' */
128: if (start == NULL && p[1] == inquote) {
129: start = t + 1;
130: break;
131: }
132: }
133: continue;
134: case ' ':
135: case '\t':
136: case '\n':
137: if (inquote)
138: break;
139: if (!start)
140: continue;
141: /* FALLTHROUGH */
142: case '\0':
143: /*
144: * end of a token -- make sure there's enough argv
145: * space and save off a pointer.
146: */
147: if (!start)
148: goto done;
149:
150: *t++ = '\0';
151: if (argc == argmax) {
152: argmax *= 2; /* ramp up fast */
1.30 ! espie 153: argv = ereallocarray(argv,
1.29 espie 154: (argmax + 1), sizeof(char *));
1.23 espie 155: }
156: argv[argc++] = start;
157: start = NULL;
158: if (ch == '\n' || ch == '\0')
159: goto done;
160: continue;
161: case '\\':
162: switch (ch = *++p) {
163: case '\0':
164: case '\n':
165: /* hmmm; fix it up as best we can */
166: ch = '\\';
167: --p;
168: break;
169: case 'b':
170: ch = '\b';
171: break;
172: case 'f':
173: ch = '\f';
174: break;
175: case 'n':
176: ch = '\n';
177: break;
178: case 'r':
179: ch = '\r';
180: break;
181: case 't':
182: ch = '\t';
183: break;
184: }
185: break;
186: }
187: if (!start)
188: start = t;
189: *t++ = ch;
1.1 deraadt 190: }
1.22 espie 191: done:
1.23 espie 192: argv[argc] = NULL;
193: *store_argc = argc;
194: return argv;
1.1 deraadt 195: }
196:
1.19 espie 197:
1.16 espie 198: const char *
1.21 espie 199: iterate_words(const char **end)
1.16 espie 200: {
1.23 espie 201: const char *start, *p;
202: char state = 0;
203: start = *end;
204:
1.28 espie 205: while (ISSPACE(*start))
1.23 espie 206: start++;
207: if (*start == '\0')
208: return NULL;
1.16 espie 209:
1.23 espie 210: for (p = start;; p++)
211: switch(*p) {
1.16 espie 212: case '\\':
1.23 espie 213: if (p[1] != '\0')
214: p++;
215: break;
1.16 espie 216: case '\'':
217: case '"':
1.23 espie 218: if (state == *p)
219: state = 0;
220: else if (state == 0)
221: state = *p;
222: break;
1.16 espie 223: case ' ':
224: case '\t':
1.23 espie 225: if (state != 0)
226: break;
227: /* FALLTHROUGH */
1.16 espie 228: case '\0':
1.23 espie 229: *end = p;
230: return start;
1.16 espie 231: default:
1.23 espie 232: break;
1.16 espie 233: }
234: }
1.17 espie 235:
1.22 espie 236: static bool
1.24 espie 237: star_match(const char *string, const char *estring,
1.21 espie 238: const char *pattern, const char *epattern)
1.1 deraadt 239: {
1.22 espie 240: /* '*' matches any substring. We handle this by calling ourselves
1.24 espie 241: * recursively for each postfix of string, until either we match or
1.22 espie 242: * we reach the end of the string. */
1.23 espie 243: pattern++;
1.24 espie 244: /* Skip over contiguous sequences of `?*', so that
1.22 espie 245: * recursive calls only occur on `real' characters. */
1.24 espie 246: while (pattern != epattern &&
1.23 espie 247: (*pattern == '?' || *pattern == '*')) {
1.14 espie 248: if (*pattern == '?') {
1.23 espie 249: if (string == estring)
250: return false;
251: else
252: string++;
1.14 espie 253: }
254: pattern++;
1.23 espie 255: }
256: if (pattern == epattern)
1.19 espie 257: return true;
1.23 espie 258: for (; string != estring; string++)
1.24 espie 259: if (Str_Matchi(string, estring, pattern,
1.22 espie 260: epattern))
1.23 espie 261: return true;
262: return false;
1.22 espie 263: }
264:
265: static bool
266: range_match(char c, const char **ppat, const char *epattern)
267: {
268: if (*ppat == epattern) {
269: if (c == '[')
270: return true;
1.24 espie 271: else
1.23 espie 272: return false;
273: }
1.22 espie 274: if (**ppat == '!' || **ppat == '^') {
275: (*ppat)++;
276: return !range_match(c, ppat, epattern);
1.24 espie 277: }
1.23 espie 278: for (;;) {
1.22 espie 279: if (**ppat == '\\') {
280: if (++(*ppat) == epattern)
1.23 espie 281: return false;
282: }
1.22 espie 283: if (**ppat == c)
1.14 espie 284: break;
1.22 espie 285: if ((*ppat)[1] == '-') {
286: if (*ppat + 2 == epattern)
1.23 espie 287: return false;
1.22 espie 288: if (**ppat < c && c <= (*ppat)[2])
1.23 espie 289: break;
1.22 espie 290: if ((*ppat)[2] <= c && c < **ppat)
1.23 espie 291: break;
1.22 espie 292: *ppat += 3;
1.23 espie 293: } else
1.22 espie 294: (*ppat)++;
1.24 espie 295: /* The test for ']' is done at the end
296: * so that ']' can be used at the
1.22 espie 297: * start of the range without '\' */
298: if (*ppat == epattern || **ppat == ']')
1.19 espie 299: return false;
1.23 espie 300: }
1.24 espie 301: /* Found matching character, skip over rest
1.22 espie 302: * of class. */
303: while (**ppat != ']') {
304: if (**ppat == '\\')
305: (*ppat)++;
1.24 espie 306: /* A non-terminated character class
1.22 espie 307: * is ok. */
308: if (*ppat == epattern)
1.14 espie 309: break;
1.22 espie 310: (*ppat)++;
1.23 espie 311: }
1.22 espie 312: return true;
313: }
314:
315: bool
1.24 espie 316: Str_Matchi(const char *string, const char *estring,
1.22 espie 317: const char *pattern, const char *epattern)
318: {
319: while (pattern != epattern) {
320: /* Check for a "*" as the next pattern character. */
321: if (*pattern == '*')
322: return star_match(string, estring, pattern, epattern);
323: else if (string == estring)
324: return false;
325: /* Check for a "[" as the next pattern character. It is
326: * followed by a list of characters that are acceptable, or
327: * by a range (two characters separated by "-"). */
328: else if (*pattern == '[') {
329: pattern++;
330: if (!range_match(*string, &pattern, epattern))
331: return false;
332:
1.23 espie 333: }
334: /* '?' matches any single character, so shunt test. */
335: else if (*pattern != '?') {
1.24 espie 336: /* If the next pattern character is '\', just strip
337: * off the '\' so we do exact matching on the
1.22 espie 338: * character that follows. */
1.23 espie 339: if (*pattern == '\\') {
340: if (++pattern == epattern)
341: return false;
342: }
343: /* There's no special character. Just make sure that
344: * the next characters of each string match. */
345: if (*pattern != *string)
346: return false;
347: }
348: pattern++;
349: string++;
350: }
351: if (string == estring)
352: return true;
353: else
1.19 espie 354: return false;
1.1 deraadt 355: }
356:
1.17 espie 357:
1.1 deraadt 358: /*-
359: *-----------------------------------------------------------------------
360: * Str_SYSVMatch --
1.5 millert 361: * Check word against pattern for a match (% is wild),
362: *
1.1 deraadt 363: * Results:
364: * Returns the beginning position of a match or null. The number
365: * of characters matched is returned in len.
366: *-----------------------------------------------------------------------
367: */
1.13 espie 368: const char *
1.25 espie 369: Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
1.1 deraadt 370: {
1.23 espie 371: const char *p = pattern;
372: const char *w = word;
373: const char *m;
374:
375: if (*p == '\0') {
376: /* Null pattern is the whole string. */
377: *len = strlen(w);
378: return w;
379: }
380:
381: if ((m = strchr(p, '%')) != NULL) {
382: /* Check that the prefix matches. */
383: for (; p != m && *w && *w == *p; w++, p++)
384: continue;
385:
386: if (p != m)
387: return NULL; /* No match. */
388:
389: if (*++p == '\0') {
390: /* No more pattern, return the rest of the string. */
391: *len = strlen(w);
392: return w;
393: }
1.1 deraadt 394: }
395:
1.23 espie 396: m = w;
1.1 deraadt 397:
1.23 espie 398: /* Find a matching tail. */
399: do {
400: if (strcmp(p, w) == 0) {
401: *len = w - m;
402: return m;
403: }
404: } while (*w++ != '\0');
1.5 millert 405:
1.23 espie 406: return NULL;
1.1 deraadt 407: }
408:
409:
410: /*-
411: *-----------------------------------------------------------------------
412: * Str_SYSVSubst --
1.21 espie 413: * Substitute '%' in the pattern with len characters from src.
1.1 deraadt 414: * If the pattern does not contain a '%' prepend len characters
415: * from src.
1.5 millert 416: *
1.1 deraadt 417: * Side Effects:
1.21 espie 418: * Adds result to buf
1.1 deraadt 419: *-----------------------------------------------------------------------
420: */
421: void
1.21 espie 422: Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
1.1 deraadt 423: {
1.23 espie 424: const char *m;
1.1 deraadt 425:
1.23 espie 426: if ((m = strchr(pat, '%')) != NULL) {
427: /* Copy the prefix. */
428: Buf_Addi(buf, pat, m);
429: /* Skip the %. */
430: pat = m + 1;
431: }
1.1 deraadt 432:
1.23 espie 433: /* Copy the pattern. */
434: Buf_AddChars(buf, len, src);
1.1 deraadt 435:
1.23 espie 436: /* Append the rest. */
437: Buf_AddString(buf, pat);
1.9 espie 438: }
439:
440: char *
1.21 espie 441: Str_dupi(const char *begin, const char *end)
1.9 espie 442: {
1.23 espie 443: char *s;
1.9 espie 444:
1.23 espie 445: s = emalloc(end - begin + 1);
446: memcpy(s, begin, end - begin);
447: s[end-begin] = '\0';
448: return s;
1.1 deraadt 449: }
1.16 espie 450:
451: char *
1.21 espie 452: escape_dupi(const char *begin, const char *end, const char *set)
1.16 espie 453: {
1.23 espie 454: char *s, *t;
1.16 espie 455:
1.23 espie 456: t = s = emalloc(end - begin + 1);
457: while (begin != end) {
458: if (*begin == '\\') {
459: begin++;
460: if (begin == end) {
461: *t++ = '\\';
462: break;
463: }
464: if (strchr(set, *begin) == NULL)
465: *t++ = '\\';
466: }
467: *t++ = *begin++;
1.16 espie 468: }
1.23 espie 469: *t++ = '\0';
470: return s;
1.16 espie 471: }
472:
1.17 espie 473: char *
1.21 espie 474: Str_rchri(const char *begin, const char *end, int c)
1.17 espie 475: {
1.23 espie 476: if (begin != end)
477: do {
478: if (*--end == c)
479: return (char *)end;
480: } while (end != begin);
481: return NULL;
1.17 espie 482: }