Annotation of src/usr.bin/make/str.c, Revision 1.25
1.17 espie 1: /* $OpenPackages$ */
1.25 ! espie 2: /* $OpenBSD: str.c,v 1.24 2007/09/17 09:28:36 espie Exp $ */
1.5 millert 3: /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */
1.1 deraadt 4:
5: /*-
1.5 millert 6: * Copyright (c) 1988, 1989, 1990, 1993
7: * The Regents of the University of California. All rights reserved.
1.1 deraadt 8: * Copyright (c) 1989 by Berkeley Softworks
9: * All rights reserved.
10: *
11: * This code is derived from software contributed to Berkeley by
12: * Adam de Boor.
13: *
14: * Redistribution and use in source and binary forms, with or without
15: * modification, are permitted provided that the following conditions
16: * are met:
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
1.20 millert 22: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
1.19 espie 39: #include <ctype.h>
40: #include <string.h>
41: #include "config.h"
42: #include "defines.h"
43: #include "str.h"
44: #include "memory.h"
45: #include "buf.h"
1.15 espie 46:
1.22 espie 47: /* helpers for Str_Matchi */
48: static bool range_match(char, const char **, const char *);
49: static bool star_match(const char *, const char *, const char *, const char *);
50:
1.1 deraadt 51: char *
1.24 espie 52: Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
1.21 espie 53: int sep)
1.1 deraadt 54: {
1.23 espie 55: size_t len1, len2;
56: char *result;
1.1 deraadt 57:
1.23 espie 58: /* get the length of both strings */
59: len1 = e1 - s1;
60: len2 = e2 - s2;
61:
62: /* space for separator */
63: if (sep)
64: len1++;
65: result = emalloc(len1 + len2 + 1);
66:
67: /* copy first string into place */
68: memcpy(result, s1, len1);
69:
70: /* add separator character */
71: if (sep)
72: result[len1-1] = sep;
73:
74: /* copy second string plus EOS into place */
75: memcpy(result + len1, s2, len2);
76: result[len1+len2] = '\0';
77: return result;
1.1 deraadt 78: }
79:
80: /*-
81: * brk_string --
82: * Fracture a string into an array of words (as delineated by tabs or
83: * spaces) taking quotation marks into account. Leading tabs/spaces
84: * are ignored.
85: *
86: * returns --
1.17 espie 87: * Pointer to the array of pointers to the words. To make life easier,
1.1 deraadt 88: * the first word is always the value of the .MAKE variable.
89: */
90: char **
1.21 espie 91: brk_string(const char *str, int *store_argc, char **buffer)
1.1 deraadt 92: {
1.23 espie 93: int argc;
94: char ch;
95: char inquote;
96: const char *p;
97: char *start, *t;
98: size_t len;
99: int argmax = 50;
100: size_t curlen = 0;
101: char **argv = emalloc((argmax + 1) * sizeof(char *));
102:
103: /* skip leading space chars. */
104: for (; *str == ' ' || *str == '\t'; ++str)
1.1 deraadt 105: continue;
1.23 espie 106:
107: /* allocate room for a copy of the string */
108: if ((len = strlen(str) + 1) > curlen)
109: *buffer = emalloc(curlen = len);
110:
111: /*
112: * copy the string; at the same time, parse backslashes,
113: * quotes and build the argument list.
114: */
115: argc = 0;
116: inquote = '\0';
117: for (p = str, start = t = *buffer;; ++p) {
118: switch (ch = *p) {
119: case '"':
120: case '\'':
121: if (inquote) {
122: if (inquote == ch)
123: inquote = '\0';
124: else
125: break;
126: } else {
127: inquote = ch;
128: /* Don't miss "" or '' */
129: if (start == NULL && p[1] == inquote) {
130: start = t + 1;
131: break;
132: }
133: }
134: continue;
135: case ' ':
136: case '\t':
137: case '\n':
138: if (inquote)
139: break;
140: if (!start)
141: continue;
142: /* FALLTHROUGH */
143: case '\0':
144: /*
145: * end of a token -- make sure there's enough argv
146: * space and save off a pointer.
147: */
148: if (!start)
149: goto done;
150:
151: *t++ = '\0';
152: if (argc == argmax) {
153: argmax *= 2; /* ramp up fast */
1.24 espie 154: argv = erealloc(argv,
1.22 espie 155: (argmax + 1) * sizeof(char *));
1.23 espie 156: }
157: argv[argc++] = start;
158: start = NULL;
159: if (ch == '\n' || ch == '\0')
160: goto done;
161: continue;
162: case '\\':
163: switch (ch = *++p) {
164: case '\0':
165: case '\n':
166: /* hmmm; fix it up as best we can */
167: ch = '\\';
168: --p;
169: break;
170: case 'b':
171: ch = '\b';
172: break;
173: case 'f':
174: ch = '\f';
175: break;
176: case 'n':
177: ch = '\n';
178: break;
179: case 'r':
180: ch = '\r';
181: break;
182: case 't':
183: ch = '\t';
184: break;
185: }
186: break;
187: }
188: if (!start)
189: start = t;
190: *t++ = ch;
1.1 deraadt 191: }
1.22 espie 192: done:
1.23 espie 193: argv[argc] = NULL;
194: *store_argc = argc;
195: return argv;
1.1 deraadt 196: }
197:
1.19 espie 198:
1.16 espie 199: const char *
1.21 espie 200: iterate_words(const char **end)
1.16 espie 201: {
1.23 espie 202: const char *start, *p;
203: char state = 0;
204: start = *end;
205:
206: while (isspace(*start))
207: start++;
208: if (*start == '\0')
209: return NULL;
1.16 espie 210:
1.23 espie 211: for (p = start;; p++)
212: switch(*p) {
1.16 espie 213: case '\\':
1.23 espie 214: if (p[1] != '\0')
215: p++;
216: break;
1.16 espie 217: case '\'':
218: case '"':
1.23 espie 219: if (state == *p)
220: state = 0;
221: else if (state == 0)
222: state = *p;
223: break;
1.16 espie 224: case ' ':
225: case '\t':
1.23 espie 226: if (state != 0)
227: break;
228: /* FALLTHROUGH */
1.16 espie 229: case '\0':
1.23 espie 230: *end = p;
231: return start;
1.16 espie 232: default:
1.23 espie 233: break;
1.16 espie 234: }
235: }
1.17 espie 236:
1.22 espie 237: static bool
1.24 espie 238: star_match(const char *string, const char *estring,
1.21 espie 239: const char *pattern, const char *epattern)
1.1 deraadt 240: {
1.22 espie 241: /* '*' matches any substring. We handle this by calling ourselves
1.24 espie 242: * recursively for each postfix of string, until either we match or
1.22 espie 243: * we reach the end of the string. */
1.23 espie 244: pattern++;
1.24 espie 245: /* Skip over contiguous sequences of `?*', so that
1.22 espie 246: * recursive calls only occur on `real' characters. */
1.24 espie 247: while (pattern != epattern &&
1.23 espie 248: (*pattern == '?' || *pattern == '*')) {
1.14 espie 249: if (*pattern == '?') {
1.23 espie 250: if (string == estring)
251: return false;
252: else
253: string++;
1.14 espie 254: }
255: pattern++;
1.23 espie 256: }
257: if (pattern == epattern)
1.19 espie 258: return true;
1.23 espie 259: for (; string != estring; string++)
1.24 espie 260: if (Str_Matchi(string, estring, pattern,
1.22 espie 261: epattern))
1.23 espie 262: return true;
263: return false;
1.22 espie 264: }
265:
266: static bool
267: range_match(char c, const char **ppat, const char *epattern)
268: {
269: if (*ppat == epattern) {
270: if (c == '[')
271: return true;
1.24 espie 272: else
1.23 espie 273: return false;
274: }
1.22 espie 275: if (**ppat == '!' || **ppat == '^') {
276: (*ppat)++;
277: return !range_match(c, ppat, epattern);
1.24 espie 278: }
1.23 espie 279: for (;;) {
1.22 espie 280: if (**ppat == '\\') {
281: if (++(*ppat) == epattern)
1.23 espie 282: return false;
283: }
1.22 espie 284: if (**ppat == c)
1.14 espie 285: break;
1.22 espie 286: if ((*ppat)[1] == '-') {
287: if (*ppat + 2 == epattern)
1.23 espie 288: return false;
1.22 espie 289: if (**ppat < c && c <= (*ppat)[2])
1.23 espie 290: break;
1.22 espie 291: if ((*ppat)[2] <= c && c < **ppat)
1.23 espie 292: break;
1.22 espie 293: *ppat += 3;
1.23 espie 294: } else
1.22 espie 295: (*ppat)++;
1.24 espie 296: /* The test for ']' is done at the end
297: * so that ']' can be used at the
1.22 espie 298: * start of the range without '\' */
299: if (*ppat == epattern || **ppat == ']')
1.19 espie 300: return false;
1.23 espie 301: }
1.24 espie 302: /* Found matching character, skip over rest
1.22 espie 303: * of class. */
304: while (**ppat != ']') {
305: if (**ppat == '\\')
306: (*ppat)++;
1.24 espie 307: /* A non-terminated character class
1.22 espie 308: * is ok. */
309: if (*ppat == epattern)
1.14 espie 310: break;
1.22 espie 311: (*ppat)++;
1.23 espie 312: }
1.22 espie 313: return true;
314: }
315:
316: bool
1.24 espie 317: Str_Matchi(const char *string, const char *estring,
1.22 espie 318: const char *pattern, const char *epattern)
319: {
320: while (pattern != epattern) {
321: /* Check for a "*" as the next pattern character. */
322: if (*pattern == '*')
323: return star_match(string, estring, pattern, epattern);
324: else if (string == estring)
325: return false;
326: /* Check for a "[" as the next pattern character. It is
327: * followed by a list of characters that are acceptable, or
328: * by a range (two characters separated by "-"). */
329: else if (*pattern == '[') {
330: pattern++;
331: if (!range_match(*string, &pattern, epattern))
332: return false;
333:
1.23 espie 334: }
335: /* '?' matches any single character, so shunt test. */
336: else if (*pattern != '?') {
1.24 espie 337: /* If the next pattern character is '\', just strip
338: * off the '\' so we do exact matching on the
1.22 espie 339: * character that follows. */
1.23 espie 340: if (*pattern == '\\') {
341: if (++pattern == epattern)
342: return false;
343: }
344: /* There's no special character. Just make sure that
345: * the next characters of each string match. */
346: if (*pattern != *string)
347: return false;
348: }
349: pattern++;
350: string++;
351: }
352: if (string == estring)
353: return true;
354: else
1.19 espie 355: return false;
1.1 deraadt 356: }
357:
1.17 espie 358:
1.1 deraadt 359: /*-
360: *-----------------------------------------------------------------------
361: * Str_SYSVMatch --
1.5 millert 362: * Check word against pattern for a match (% is wild),
363: *
1.1 deraadt 364: * Results:
365: * Returns the beginning position of a match or null. The number
366: * of characters matched is returned in len.
367: *-----------------------------------------------------------------------
368: */
1.13 espie 369: const char *
1.25 ! espie 370: Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
1.1 deraadt 371: {
1.23 espie 372: const char *p = pattern;
373: const char *w = word;
374: const char *m;
375:
376: if (*p == '\0') {
377: /* Null pattern is the whole string. */
378: *len = strlen(w);
379: return w;
380: }
381:
382: if ((m = strchr(p, '%')) != NULL) {
383: /* Check that the prefix matches. */
384: for (; p != m && *w && *w == *p; w++, p++)
385: continue;
386:
387: if (p != m)
388: return NULL; /* No match. */
389:
390: if (*++p == '\0') {
391: /* No more pattern, return the rest of the string. */
392: *len = strlen(w);
393: return w;
394: }
1.1 deraadt 395: }
396:
1.23 espie 397: m = w;
1.1 deraadt 398:
1.23 espie 399: /* Find a matching tail. */
400: do {
401: if (strcmp(p, w) == 0) {
402: *len = w - m;
403: return m;
404: }
405: } while (*w++ != '\0');
1.5 millert 406:
1.23 espie 407: return NULL;
1.1 deraadt 408: }
409:
410:
411: /*-
412: *-----------------------------------------------------------------------
413: * Str_SYSVSubst --
1.21 espie 414: * Substitute '%' in the pattern with len characters from src.
1.1 deraadt 415: * If the pattern does not contain a '%' prepend len characters
416: * from src.
1.5 millert 417: *
1.1 deraadt 418: * Side Effects:
1.21 espie 419: * Adds result to buf
1.1 deraadt 420: *-----------------------------------------------------------------------
421: */
422: void
1.21 espie 423: Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
1.1 deraadt 424: {
1.23 espie 425: const char *m;
1.1 deraadt 426:
1.23 espie 427: if ((m = strchr(pat, '%')) != NULL) {
428: /* Copy the prefix. */
429: Buf_Addi(buf, pat, m);
430: /* Skip the %. */
431: pat = m + 1;
432: }
1.1 deraadt 433:
1.23 espie 434: /* Copy the pattern. */
435: Buf_AddChars(buf, len, src);
1.1 deraadt 436:
1.23 espie 437: /* Append the rest. */
438: Buf_AddString(buf, pat);
1.9 espie 439: }
440:
441: char *
1.21 espie 442: Str_dupi(const char *begin, const char *end)
1.9 espie 443: {
1.23 espie 444: char *s;
1.9 espie 445:
1.23 espie 446: s = emalloc(end - begin + 1);
447: memcpy(s, begin, end - begin);
448: s[end-begin] = '\0';
449: return s;
1.1 deraadt 450: }
1.16 espie 451:
452: char *
1.21 espie 453: escape_dupi(const char *begin, const char *end, const char *set)
1.16 espie 454: {
1.23 espie 455: char *s, *t;
1.16 espie 456:
1.23 espie 457: t = s = emalloc(end - begin + 1);
458: while (begin != end) {
459: if (*begin == '\\') {
460: begin++;
461: if (begin == end) {
462: *t++ = '\\';
463: break;
464: }
465: if (strchr(set, *begin) == NULL)
466: *t++ = '\\';
467: }
468: *t++ = *begin++;
1.16 espie 469: }
1.23 espie 470: *t++ = '\0';
471: return s;
1.16 espie 472: }
473:
1.17 espie 474: char *
1.21 espie 475: Str_rchri(const char *begin, const char *end, int c)
1.17 espie 476: {
1.23 espie 477: if (begin != end)
478: do {
479: if (*--end == c)
480: return (char *)end;
481: } while (end != begin);
482: return NULL;
1.17 espie 483: }