Annotation of src/usr.bin/make/str.c, Revision 1.16
1.16 ! espie 1: /* $OpenBSD: str.c,v 1.15 2000/09/14 13:32:07 espie Exp $ */
1.5 millert 2: /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */
1.1 deraadt 3:
4: /*-
1.5 millert 5: * Copyright (c) 1988, 1989, 1990, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: * Copyright (c) 1989 by Berkeley Softworks
8: * All rights reserved.
9: *
10: * This code is derived from software contributed to Berkeley by
11: * Adam de Boor.
12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: * 1. Redistributions of source code must retain the above copyright
17: * notice, this list of conditions and the following disclaimer.
18: * 2. Redistributions in binary form must reproduce the above copyright
19: * notice, this list of conditions and the following disclaimer in the
20: * documentation and/or other materials provided with the distribution.
21: * 3. All advertising materials mentioning features or use of this software
22: * must display the following acknowledgement:
23: * This product includes software developed by the University of
24: * California, Berkeley and its contributors.
25: * 4. Neither the name of the University nor the names of its contributors
26: * may be used to endorse or promote products derived from this software
27: * without specific prior written permission.
28: *
29: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39: * SUCH DAMAGE.
40: */
41:
1.15 espie 42: #include "make.h"
43:
1.1 deraadt 44: #ifndef lint
45: #if 0
46: static char sccsid[] = "@(#)str.c 5.8 (Berkeley) 6/1/90";
47: #else
1.15 espie 48: UNUSED
1.16 ! espie 49: static char rcsid[] = "$OpenBSD: str.c,v 1.15 2000/09/14 13:32:07 espie Exp $";
1.1 deraadt 50: #endif
51: #endif /* not lint */
52:
53: /*-
54: * str_concat --
1.12 espie 55: * concatenate the two strings, possibly inserting a separator
1.1 deraadt 56: *
57: * returns --
58: * the resulting string in allocated space.
59: */
60: char *
1.12 espie 61: str_concat(s1, s2, sep)
62: const char *s1, *s2;
63: char sep;
1.1 deraadt 64: {
1.12 espie 65: size_t len1, len2;
66: char *result;
1.1 deraadt 67:
1.12 espie 68: /* get the length of both strings */
69: len1 = strlen(s1);
70: len2 = strlen(s2);
71:
72: /* space for separator */
73: if (sep)
74: len1++;
75: result = emalloc(len1 + len2 + 1);
76:
77: /* copy first string into place */
78: memcpy(result, s1, len1);
79:
80: /* add separator character */
81: if (sep)
82: result[len1-1] = sep;
83:
84: /* copy second string plus EOS into place */
85: memcpy(result + len1, s2, len2 + 1);
86: return result;
1.1 deraadt 87: }
88:
89: /*-
90: * brk_string --
91: * Fracture a string into an array of words (as delineated by tabs or
92: * spaces) taking quotation marks into account. Leading tabs/spaces
93: * are ignored.
94: *
95: * returns --
96: * Pointer to the array of pointers to the words. To make life easier,
97: * the first word is always the value of the .MAKE variable.
98: */
99: char **
1.7 espie 100: brk_string(str, store_argc, expand, buffer)
1.13 espie 101: const char *str;
1.1 deraadt 102: int *store_argc;
103: Boolean expand;
1.7 espie 104: char **buffer;
1.1 deraadt 105: {
106: register int argc, ch;
1.13 espie 107: char inquote;
108: const char *p;
109: char *start, *t;
110: size_t len;
111: int argmax = 50;
112: size_t curlen = 0;
1.7 espie 113: char **argv = (char **)emalloc((argmax + 1) * sizeof(char *));
1.1 deraadt 114:
115: /* skip leading space chars. */
116: for (; *str == ' ' || *str == '\t'; ++str)
117: continue;
118:
119: /* allocate room for a copy of the string */
1.7 espie 120: if ((len = strlen(str) + 1) > curlen)
121: *buffer = emalloc(curlen = len);
1.1 deraadt 122:
123: /*
124: * copy the string; at the same time, parse backslashes,
125: * quotes and build the argument list.
126: */
1.7 espie 127: argc = 0;
1.1 deraadt 128: inquote = '\0';
1.7 espie 129: for (p = str, start = t = *buffer;; ++p) {
1.1 deraadt 130: switch(ch = *p) {
131: case '"':
132: case '\'':
1.6 millert 133: if (inquote) {
1.1 deraadt 134: if (inquote == ch)
135: inquote = '\0';
136: else
137: break;
1.6 millert 138: } else {
1.1 deraadt 139: inquote = (char) ch;
140: /* Don't miss "" or '' */
141: if (start == NULL && p[1] == inquote) {
142: start = t + 1;
143: break;
144: }
145: }
146: if (!expand) {
147: if (!start)
148: start = t;
149: *t++ = ch;
150: }
151: continue;
152: case ' ':
153: case '\t':
154: case '\n':
155: if (inquote)
156: break;
157: if (!start)
158: continue;
159: /* FALLTHROUGH */
160: case '\0':
161: /*
162: * end of a token -- make sure there's enough argv
163: * space and save off a pointer.
164: */
165: if (!start)
166: goto done;
167:
168: *t++ = '\0';
169: if (argc == argmax) {
170: argmax *= 2; /* ramp up fast */
1.3 deraadt 171: argv = (char **)erealloc(argv,
172: (argmax + 1) * sizeof(char *));
1.1 deraadt 173: }
174: argv[argc++] = start;
175: start = (char *)NULL;
176: if (ch == '\n' || ch == '\0')
177: goto done;
178: continue;
179: case '\\':
180: if (!expand) {
181: if (!start)
182: start = t;
183: *t++ = '\\';
184: ch = *++p;
185: break;
186: }
1.5 millert 187:
1.1 deraadt 188: switch (ch = *++p) {
189: case '\0':
190: case '\n':
191: /* hmmm; fix it up as best we can */
192: ch = '\\';
193: --p;
194: break;
195: case 'b':
196: ch = '\b';
197: break;
198: case 'f':
199: ch = '\f';
200: break;
201: case 'n':
202: ch = '\n';
203: break;
204: case 'r':
205: ch = '\r';
206: break;
207: case 't':
208: ch = '\t';
209: break;
210: }
211: break;
212: }
213: if (!start)
214: start = t;
215: *t++ = (char) ch;
216: }
217: done: argv[argc] = (char *)NULL;
218: *store_argc = argc;
219: return(argv);
220: }
221:
1.16 ! espie 222: /* Iterate through a string word by word,
! 223: * without needing to copy anything.
! 224: * More light-weight than brk_string, handles \ ' " as well.
! 225: *
! 226: * position = s;
! 227: * while ((begin = iterate_words(&position)) != NULL) {
! 228: * do_something_with_word_interval(begin, position);
! 229: * }
! 230: */
! 231: const char *
! 232: iterate_words(end)
! 233: const char **end;
! 234: {
! 235: const char *start, *p;
! 236: char state = 0;
! 237: start = *end;
! 238:
! 239: while (isspace(*start))
! 240: start++;
! 241: if (*start == '\0')
! 242: return NULL;
! 243:
! 244: for (p = start;; p++)
! 245: switch(*p) {
! 246: case '\\':
! 247: if (p[1] != '\0')
! 248: p++;
! 249: break;
! 250: case '\'':
! 251: case '"':
! 252: if (state == *p)
! 253: state = 0;
! 254: else if (state == 0)
! 255: state = *p;
! 256: break;
! 257: case ' ':
! 258: case '\t':
! 259: if (state != 0)
! 260: break;
! 261: /* FALLTHROUGH */
! 262: case '\0':
! 263: *end = p;
! 264: return start;
! 265: default:
! 266: break;
! 267: }
! 268: }
! 269:
1.1 deraadt 270: /*
271: * Str_Match --
1.5 millert 272: *
1.1 deraadt 273: * See if a particular string matches a particular pattern.
1.5 millert 274: *
1.14 espie 275: * Results: TRUE is returned if string matches pattern, FALSE otherwise. The
1.1 deraadt 276: * matching operation permits the following special characters in the
277: * pattern: *?\[] (see the man page for details on what these mean).
278: */
1.14 espie 279: Boolean
1.1 deraadt 280: Str_Match(string, pattern)
1.14 espie 281: const char *string; /* String */
282: const char *pattern; /* Pattern */
1.1 deraadt 283: {
1.14 espie 284: while (*pattern != '\0') {
285: /* Check for a "*" as the next pattern character. It matches
286: * any substring. We handle this by calling ourselves
287: * recursively for each postfix of string, until either we
288: * match or we reach the end of the string. */
289: if (*pattern == '*') {
290: pattern++;
291: /* Skip over contiguous sequences of `?*', so that recursive
292: * calls only occur on `real' characters. */
293: while (*pattern == '?' || *pattern == '*') {
294: if (*pattern == '?') {
295: if (*string == '\0')
296: return FALSE;
297: else
298: string++;
299: }
300: pattern++;
301: }
302: if (*pattern == '\0')
303: return TRUE;
304: for (; *string != '\0'; string++)
305: if (Str_Match(string, pattern))
306: return TRUE;
307: return FALSE;
308: } else if (*string == '\0')
309: return FALSE;
310: /* Check for a "[" as the next pattern character. It is
311: * followed by a list of characters that are acceptable, or
312: * by a range (two characters separated by "-"). */
313: else if (*pattern == '[') {
314: pattern++;
315: if (*pattern == '\0')
316: return FALSE;
317: if (*pattern == '!' || *pattern == '^') {
318: pattern++;
319: if (*pattern == '\0')
320: return FALSE;
321: /* Negative match */
322: for (;;) {
323: if (*pattern == '\\') {
324: if (*++pattern == '\0')
325: return FALSE;
326: }
327: if (*pattern == *string)
328: return FALSE;
329: if (pattern[1] == '-') {
330: if (pattern[2] == '\0')
331: return FALSE;
332: if (*pattern < *string && *string <= pattern[2])
333: return FALSE;
334: if (pattern[2] <= *string && *string < *pattern)
335: return FALSE;
336: pattern += 3;
337: } else
338: pattern++;
339: if (*pattern == '\0')
340: return FALSE;
341: /* The test for ']' is done at the end so that ']'
342: * can be used at the start of the range without '\' */
343: if (*pattern == ']')
344: break;
1.1 deraadt 345: }
1.14 espie 346: } else {
347: for (;;) {
348: if (*pattern == '\\') {
349: if (*++pattern == '\0')
350: return FALSE;
351: }
352: if (*pattern == *string)
353: break;
354: if (pattern[1] == '-') {
355: if (pattern[2] == '\0')
356: return FALSE;
357: if (*pattern < *string && *string <= pattern[2])
358: break;
359: if (pattern[2] <= *string && *string < *pattern)
360: break;
361: pattern += 3;
362: } else
363: pattern++;
364: /* The test for ']' is done at the end so that ']'
365: * can be used at the start of the range without '\' */
366: if (*pattern == '\0' || *pattern == ']')
367: return FALSE;
1.1 deraadt 368: }
1.14 espie 369: /* Found matching character, skip over rest of class. */
370: while (*pattern != ']') {
371: if (*pattern == '\\')
372: pattern++;
373: /* A non-terminated character class is ok. */
374: if (*pattern == '\0')
375: break;
376: pattern++;
1.1 deraadt 377: }
1.14 espie 378: }
1.1 deraadt 379: }
1.14 espie 380: /* '?' matches any single character, so shunt test. */
381: else if (*pattern != '?') {
382: /* If the next pattern character is '\', just strip off the
383: * '\' so we do exact matching on the character that follows. */
384: if (*pattern == '\\') {
385: if (*++pattern == '\0')
386: return FALSE;
387: }
388: /* There's no special character. Just make sure that
389: * the next characters of each string match. */
390: if (*pattern != *string)
391: return FALSE;
392: }
393: pattern++;
394: string++;
395: }
396: if (*string == '\0')
397: return TRUE;
398: else
399: return FALSE;
1.1 deraadt 400: }
401:
402: /*-
403: *-----------------------------------------------------------------------
404: * Str_SYSVMatch --
1.5 millert 405: * Check word against pattern for a match (% is wild),
406: *
1.1 deraadt 407: * Results:
408: * Returns the beginning position of a match or null. The number
409: * of characters matched is returned in len.
410: *-----------------------------------------------------------------------
411: */
1.13 espie 412: const char *
1.1 deraadt 413: Str_SYSVMatch(word, pattern, len)
1.13 espie 414: const char *word; /* Word to examine */
415: const char *pattern; /* Pattern to examine against */
416: size_t *len; /* Number of characters to substitute */
1.1 deraadt 417: {
1.13 espie 418: const char *p = pattern;
419: const char *w = word;
420: const char *m;
1.1 deraadt 421:
422: if (*p == '\0') {
423: /* Null pattern is the whole string */
424: *len = strlen(w);
425: return w;
426: }
427:
428: if ((m = strchr(p, '%')) != NULL) {
429: /* check that the prefix matches */
430: for (; p != m && *w && *w == *p; w++, p++)
431: continue;
432:
433: if (p != m)
434: return NULL; /* No match */
435:
436: if (*++p == '\0') {
437: /* No more pattern, return the rest of the string */
438: *len = strlen(w);
439: return w;
440: }
441: }
442:
443: m = w;
444:
445: /* Find a matching tail */
446: do
447: if (strcmp(p, w) == 0) {
448: *len = w - m;
449: return m;
450: }
451: while (*w++ != '\0');
1.5 millert 452:
1.1 deraadt 453: return NULL;
454: }
455:
456:
457: /*-
458: *-----------------------------------------------------------------------
459: * Str_SYSVSubst --
460: * Substitute '%' on the pattern with len characters from src.
461: * If the pattern does not contain a '%' prepend len characters
462: * from src.
1.5 millert 463: *
1.1 deraadt 464: * Side Effects:
465: * Places result on buf
466: *-----------------------------------------------------------------------
467: */
468: void
469: Str_SYSVSubst(buf, pat, src, len)
470: Buffer buf;
1.13 espie 471: const char *pat;
472: const char *src;
473: size_t len;
1.1 deraadt 474: {
1.13 espie 475: const char *m;
1.1 deraadt 476:
477: if ((m = strchr(pat, '%')) != NULL) {
478: /* Copy the prefix */
1.11 espie 479: Buf_AddInterval(buf, pat, m);
1.1 deraadt 480: /* skip the % */
481: pat = m + 1;
482: }
483:
484: /* Copy the pattern */
1.10 espie 485: Buf_AddChars(buf, len, src);
1.1 deraadt 486:
487: /* append the rest */
1.11 espie 488: Buf_AddString(buf, pat);
1.9 espie 489: }
490:
491: char *
492: interval_dup(begin, end)
493: const char *begin;
494: const char *end;
495: {
496: char *s;
497:
498: s = emalloc(end - begin + 1);
499: memcpy(s, begin, end - begin);
500: s[end-begin] = '\0';
501: return s;
1.1 deraadt 502: }
1.16 ! espie 503:
! 504: /* copy interval, skipping characters in the set. */
! 505: char *
! 506: escape_dup(begin, end, set)
! 507: const char *begin;
! 508: const char *end;
! 509: const char *set;
! 510: {
! 511: char *s, *t;
! 512:
! 513: t = s = emalloc(end - begin + 1);
! 514: while (begin != end) {
! 515: if (*begin == '\\') {
! 516: begin++;
! 517: if (begin == end) {
! 518: *t++ = '\\';
! 519: break;
! 520: }
! 521: if (strchr(set, *begin) == NULL)
! 522: *t++ = '\\';
! 523: }
! 524: *t++ = *begin++;
! 525: }
! 526: *t++ = '\0';
! 527: return s;
! 528: }
! 529: