Annotation of src/usr.bin/make/str.c, Revision 1.23
1.17 espie 1: /* $OpenPackages$ */
1.22 espie 2: /* $OpenBSD: str.c,v 1.21 2004/04/07 13:11:36 espie Exp $ */
1.5 millert 3: /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */
1.1 deraadt 4:
5: /*-
1.5 millert 6: * Copyright (c) 1988, 1989, 1990, 1993
7: * The Regents of the University of California. All rights reserved.
1.1 deraadt 8: * Copyright (c) 1989 by Berkeley Softworks
9: * All rights reserved.
10: *
11: * This code is derived from software contributed to Berkeley by
12: * Adam de Boor.
13: *
14: * Redistribution and use in source and binary forms, with or without
15: * modification, are permitted provided that the following conditions
16: * are met:
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
1.20 millert 22: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
1.19 espie 39: #include <ctype.h>
40: #include <string.h>
41: #include "config.h"
42: #include "defines.h"
43: #include "str.h"
44: #include "memory.h"
45: #include "buf.h"
1.15 espie 46:
1.22 espie 47: /* helpers for Str_Matchi */
48: static bool range_match(char, const char **, const char *);
49: static bool star_match(const char *, const char *, const char *, const char *);
50:
1.1 deraadt 51: char *
1.21 espie 52: Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
53: int sep)
1.1 deraadt 54: {
1.23 ! espie 55: size_t len1, len2;
! 56: char *result;
1.1 deraadt 57:
1.23 ! espie 58: /* get the length of both strings */
! 59: len1 = e1 - s1;
! 60: len2 = e2 - s2;
! 61:
! 62: /* space for separator */
! 63: if (sep)
! 64: len1++;
! 65: result = emalloc(len1 + len2 + 1);
! 66:
! 67: /* copy first string into place */
! 68: memcpy(result, s1, len1);
! 69:
! 70: /* add separator character */
! 71: if (sep)
! 72: result[len1-1] = sep;
! 73:
! 74: /* copy second string plus EOS into place */
! 75: memcpy(result + len1, s2, len2);
! 76: result[len1+len2] = '\0';
! 77: return result;
1.1 deraadt 78: }
79:
80: /*-
81: * brk_string --
82: * Fracture a string into an array of words (as delineated by tabs or
83: * spaces) taking quotation marks into account. Leading tabs/spaces
84: * are ignored.
85: *
86: * returns --
1.17 espie 87: * Pointer to the array of pointers to the words. To make life easier,
1.1 deraadt 88: * the first word is always the value of the .MAKE variable.
89: */
90: char **
1.21 espie 91: brk_string(const char *str, int *store_argc, char **buffer)
1.1 deraadt 92: {
1.23 ! espie 93: int argc;
! 94: char ch;
! 95: char inquote;
! 96: const char *p;
! 97: char *start, *t;
! 98: size_t len;
! 99: int argmax = 50;
! 100: size_t curlen = 0;
! 101: char **argv = emalloc((argmax + 1) * sizeof(char *));
! 102:
! 103: /* skip leading space chars. */
! 104: for (; *str == ' ' || *str == '\t'; ++str)
1.1 deraadt 105: continue;
1.23 ! espie 106:
! 107: /* allocate room for a copy of the string */
! 108: if ((len = strlen(str) + 1) > curlen)
! 109: *buffer = emalloc(curlen = len);
! 110:
! 111: /*
! 112: * copy the string; at the same time, parse backslashes,
! 113: * quotes and build the argument list.
! 114: */
! 115: argc = 0;
! 116: inquote = '\0';
! 117: for (p = str, start = t = *buffer;; ++p) {
! 118: switch (ch = *p) {
! 119: case '"':
! 120: case '\'':
! 121: if (inquote) {
! 122: if (inquote == ch)
! 123: inquote = '\0';
! 124: else
! 125: break;
! 126: } else {
! 127: inquote = ch;
! 128: /* Don't miss "" or '' */
! 129: if (start == NULL && p[1] == inquote) {
! 130: start = t + 1;
! 131: break;
! 132: }
! 133: }
! 134: continue;
! 135: case ' ':
! 136: case '\t':
! 137: case '\n':
! 138: if (inquote)
! 139: break;
! 140: if (!start)
! 141: continue;
! 142: /* FALLTHROUGH */
! 143: case '\0':
! 144: /*
! 145: * end of a token -- make sure there's enough argv
! 146: * space and save off a pointer.
! 147: */
! 148: if (!start)
! 149: goto done;
! 150:
! 151: *t++ = '\0';
! 152: if (argc == argmax) {
! 153: argmax *= 2; /* ramp up fast */
1.22 espie 154: argv = erealloc(argv,
155: (argmax + 1) * sizeof(char *));
1.23 ! espie 156: }
! 157: argv[argc++] = start;
! 158: start = NULL;
! 159: if (ch == '\n' || ch == '\0')
! 160: goto done;
! 161: continue;
! 162: case '\\':
! 163: switch (ch = *++p) {
! 164: case '\0':
! 165: case '\n':
! 166: /* hmmm; fix it up as best we can */
! 167: ch = '\\';
! 168: --p;
! 169: break;
! 170: case 'b':
! 171: ch = '\b';
! 172: break;
! 173: case 'f':
! 174: ch = '\f';
! 175: break;
! 176: case 'n':
! 177: ch = '\n';
! 178: break;
! 179: case 'r':
! 180: ch = '\r';
! 181: break;
! 182: case 't':
! 183: ch = '\t';
! 184: break;
! 185: }
! 186: break;
! 187: }
! 188: if (!start)
! 189: start = t;
! 190: *t++ = ch;
1.1 deraadt 191: }
1.22 espie 192: done:
1.23 ! espie 193: argv[argc] = NULL;
! 194: *store_argc = argc;
! 195: return argv;
1.1 deraadt 196: }
197:
1.19 espie 198:
1.16 espie 199: const char *
1.21 espie 200: iterate_words(const char **end)
1.16 espie 201: {
1.23 ! espie 202: const char *start, *p;
! 203: char state = 0;
! 204: start = *end;
! 205:
! 206: while (isspace(*start))
! 207: start++;
! 208: if (*start == '\0')
! 209: return NULL;
1.16 espie 210:
1.23 ! espie 211: for (p = start;; p++)
! 212: switch(*p) {
1.16 espie 213: case '\\':
1.23 ! espie 214: if (p[1] != '\0')
! 215: p++;
! 216: break;
1.16 espie 217: case '\'':
218: case '"':
1.23 ! espie 219: if (state == *p)
! 220: state = 0;
! 221: else if (state == 0)
! 222: state = *p;
! 223: break;
1.16 espie 224: case ' ':
225: case '\t':
1.23 ! espie 226: if (state != 0)
! 227: break;
! 228: /* FALLTHROUGH */
1.16 espie 229: case '\0':
1.23 ! espie 230: *end = p;
! 231: return start;
1.16 espie 232: default:
1.23 ! espie 233: break;
1.16 espie 234: }
235: }
1.17 espie 236:
1.22 espie 237: static bool
238: star_match(const char *string, const char *estring,
1.21 espie 239: const char *pattern, const char *epattern)
1.1 deraadt 240: {
1.22 espie 241: /* '*' matches any substring. We handle this by calling ourselves
242: * recursively for each postfix of string, until either we match or
243: * we reach the end of the string. */
1.23 ! espie 244: pattern++;
1.22 espie 245: /* Skip over contiguous sequences of `?*', so that
246: * recursive calls only occur on `real' characters. */
1.23 ! espie 247: while (pattern != epattern &&
! 248: (*pattern == '?' || *pattern == '*')) {
1.14 espie 249: if (*pattern == '?') {
1.23 ! espie 250: if (string == estring)
! 251: return false;
! 252: else
! 253: string++;
1.14 espie 254: }
255: pattern++;
1.23 ! espie 256: }
! 257: if (pattern == epattern)
1.19 espie 258: return true;
1.23 ! espie 259: for (; string != estring; string++)
1.22 espie 260: if (Str_Matchi(string, estring, pattern,
261: epattern))
1.23 ! espie 262: return true;
! 263: return false;
1.22 espie 264: }
265:
266: static bool
267: range_match(char c, const char **ppat, const char *epattern)
268: {
269: if (*ppat == epattern) {
270: if (c == '[')
271: return true;
272: else
1.23 ! espie 273: return false;
! 274: }
1.22 espie 275: if (**ppat == '!' || **ppat == '^') {
276: (*ppat)++;
277: return !range_match(c, ppat, epattern);
1.23 ! espie 278: }
! 279: for (;;) {
1.22 espie 280: if (**ppat == '\\') {
281: if (++(*ppat) == epattern)
1.23 ! espie 282: return false;
! 283: }
1.22 espie 284: if (**ppat == c)
1.14 espie 285: break;
1.22 espie 286: if ((*ppat)[1] == '-') {
287: if (*ppat + 2 == epattern)
1.23 ! espie 288: return false;
1.22 espie 289: if (**ppat < c && c <= (*ppat)[2])
1.23 ! espie 290: break;
1.22 espie 291: if ((*ppat)[2] <= c && c < **ppat)
1.23 ! espie 292: break;
1.22 espie 293: *ppat += 3;
1.23 ! espie 294: } else
1.22 espie 295: (*ppat)++;
296: /* The test for ']' is done at the end
297: * so that ']' can be used at the
298: * start of the range without '\' */
299: if (*ppat == epattern || **ppat == ']')
1.19 espie 300: return false;
1.23 ! espie 301: }
1.22 espie 302: /* Found matching character, skip over rest
303: * of class. */
304: while (**ppat != ']') {
305: if (**ppat == '\\')
306: (*ppat)++;
307: /* A non-terminated character class
308: * is ok. */
309: if (*ppat == epattern)
1.14 espie 310: break;
1.22 espie 311: (*ppat)++;
1.23 ! espie 312: }
1.22 espie 313: return true;
314: }
315:
316: bool
317: Str_Matchi(const char *string, const char *estring,
318: const char *pattern, const char *epattern)
319: {
320: while (pattern != epattern) {
321: /* Check for a "*" as the next pattern character. */
322: if (*pattern == '*')
323: return star_match(string, estring, pattern, epattern);
324: else if (string == estring)
325: return false;
326: /* Check for a "[" as the next pattern character. It is
327: * followed by a list of characters that are acceptable, or
328: * by a range (two characters separated by "-"). */
329: else if (*pattern == '[') {
330: pattern++;
331: if (!range_match(*string, &pattern, epattern))
332: return false;
333:
1.23 ! espie 334: }
! 335: /* '?' matches any single character, so shunt test. */
! 336: else if (*pattern != '?') {
1.22 espie 337: /* If the next pattern character is '\', just strip
338: * off the '\' so we do exact matching on the
339: * character that follows. */
1.23 ! espie 340: if (*pattern == '\\') {
! 341: if (++pattern == epattern)
! 342: return false;
! 343: }
! 344: /* There's no special character. Just make sure that
! 345: * the next characters of each string match. */
! 346: if (*pattern != *string)
! 347: return false;
! 348: }
! 349: pattern++;
! 350: string++;
! 351: }
! 352: if (string == estring)
! 353: return true;
! 354: else
1.19 espie 355: return false;
1.1 deraadt 356: }
357:
1.17 espie 358:
1.1 deraadt 359: /*-
360: *-----------------------------------------------------------------------
361: * Str_SYSVMatch --
1.5 millert 362: * Check word against pattern for a match (% is wild),
363: *
1.1 deraadt 364: * Results:
365: * Returns the beginning position of a match or null. The number
366: * of characters matched is returned in len.
367: *-----------------------------------------------------------------------
368: */
1.13 espie 369: const char *
1.21 espie 370: Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
1.1 deraadt 371: {
1.23 ! espie 372: const char *p = pattern;
! 373: const char *w = word;
! 374: const char *m;
! 375:
! 376: if (*p == '\0') {
! 377: /* Null pattern is the whole string. */
! 378: *len = strlen(w);
! 379: return w;
! 380: }
! 381:
! 382: if ((m = strchr(p, '%')) != NULL) {
! 383: /* Check that the prefix matches. */
! 384: for (; p != m && *w && *w == *p; w++, p++)
! 385: continue;
! 386:
! 387: if (p != m)
! 388: return NULL; /* No match. */
! 389:
! 390: if (*++p == '\0') {
! 391: /* No more pattern, return the rest of the string. */
! 392: *len = strlen(w);
! 393: return w;
! 394: }
1.1 deraadt 395: }
396:
1.23 ! espie 397: m = w;
1.1 deraadt 398:
1.23 ! espie 399: /* Find a matching tail. */
! 400: do {
! 401: if (strcmp(p, w) == 0) {
! 402: *len = w - m;
! 403: return m;
! 404: }
! 405: } while (*w++ != '\0');
1.5 millert 406:
1.23 ! espie 407: return NULL;
1.1 deraadt 408: }
409:
410:
411: /*-
412: *-----------------------------------------------------------------------
413: * Str_SYSVSubst --
1.21 espie 414: * Substitute '%' in the pattern with len characters from src.
1.1 deraadt 415: * If the pattern does not contain a '%' prepend len characters
416: * from src.
1.5 millert 417: *
1.1 deraadt 418: * Side Effects:
1.21 espie 419: * Adds result to buf
1.1 deraadt 420: *-----------------------------------------------------------------------
421: */
422: void
1.21 espie 423: Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
1.1 deraadt 424: {
1.23 ! espie 425: const char *m;
1.1 deraadt 426:
1.23 ! espie 427: if ((m = strchr(pat, '%')) != NULL) {
! 428: /* Copy the prefix. */
! 429: Buf_Addi(buf, pat, m);
! 430: /* Skip the %. */
! 431: pat = m + 1;
! 432: }
1.1 deraadt 433:
1.23 ! espie 434: /* Copy the pattern. */
! 435: Buf_AddChars(buf, len, src);
1.1 deraadt 436:
1.23 ! espie 437: /* Append the rest. */
! 438: Buf_AddString(buf, pat);
1.9 espie 439: }
440:
441: char *
1.21 espie 442: Str_dupi(const char *begin, const char *end)
1.9 espie 443: {
1.23 ! espie 444: char *s;
1.9 espie 445:
1.23 ! espie 446: s = emalloc(end - begin + 1);
! 447: memcpy(s, begin, end - begin);
! 448: s[end-begin] = '\0';
! 449: return s;
1.1 deraadt 450: }
1.16 espie 451:
452: char *
1.21 espie 453: escape_dupi(const char *begin, const char *end, const char *set)
1.16 espie 454: {
1.23 ! espie 455: char *s, *t;
1.16 espie 456:
1.23 ! espie 457: t = s = emalloc(end - begin + 1);
! 458: while (begin != end) {
! 459: if (*begin == '\\') {
! 460: begin++;
! 461: if (begin == end) {
! 462: *t++ = '\\';
! 463: break;
! 464: }
! 465: if (strchr(set, *begin) == NULL)
! 466: *t++ = '\\';
! 467: }
! 468: *t++ = *begin++;
1.16 espie 469: }
1.23 ! espie 470: *t++ = '\0';
! 471: return s;
1.16 espie 472: }
473:
1.17 espie 474: char *
1.21 espie 475: Str_rchri(const char *begin, const char *end, int c)
1.17 espie 476: {
1.23 ! espie 477: if (begin != end)
! 478: do {
! 479: if (*--end == c)
! 480: return (char *)end;
! 481: } while (end != begin);
! 482: return NULL;
1.17 espie 483: }