Annotation of src/usr.bin/m4/gnum4.c, Revision 1.32
1.32 ! espie 1: /* $OpenBSD: gnum4.c,v 1.31 2005/05/29 18:44:36 espie Exp $ */
1.1 espie 2:
3: /*
4: * Copyright (c) 1999 Marc Espie
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions
8: * are met:
9: * 1. Redistributions of source code must retain the above copyright
10: * notice, this list of conditions and the following disclaimer.
11: * 2. Redistributions in binary form must reproduce the above copyright
12: * notice, this list of conditions and the following disclaimer in the
13: * documentation and/or other materials provided with the distribution.
14: *
15: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
16: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
19: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25: * SUCH DAMAGE.
26: */
27:
28: /*
29: * functions needed to support gnu-m4 extensions, including a fake freezing
30: */
31:
32: #include <sys/param.h>
1.5 espie 33: #include <sys/types.h>
1.8 espie 34: #include <sys/wait.h>
1.5 espie 35: #include <ctype.h>
1.8 espie 36: #include <paths.h>
1.5 espie 37: #include <regex.h>
1.1 espie 38: #include <stddef.h>
39: #include <stdlib.h>
40: #include <stdio.h>
41: #include <string.h>
1.8 espie 42: #include <errno.h>
43: #include <unistd.h>
1.1 espie 44: #include "mdef.h"
45: #include "stdd.h"
46: #include "extern.h"
47:
1.5 espie 48:
49: int mimic_gnu = 0;
50:
1.1 espie 51: /*
52: * Support for include path search
1.27 espie 53: * First search in the current directory.
1.1 espie 54: * If not found, and the path is not absolute, include path kicks in.
55: * First, -I options, in the order found on the command line.
56: * Then M4PATH env variable
57: */
58:
59: struct path_entry {
60: char *name;
61: struct path_entry *next;
62: } *first, *last;
63:
1.16 millert 64: static struct path_entry *new_path_entry(const char *);
65: static void ensure_m4path(void);
66: static struct input_file *dopath(struct input_file *, const char *);
1.3 espie 67:
1.1 espie 68: static struct path_entry *
1.18 espie 69: new_path_entry(const char *dirname)
1.1 espie 70: {
71: struct path_entry *n;
72:
73: n = malloc(sizeof(struct path_entry));
74: if (!n)
75: errx(1, "out of memory");
76: n->name = strdup(dirname);
77: if (!n->name)
78: errx(1, "out of memory");
79: n->next = 0;
80: return n;
81: }
82:
83: void
1.18 espie 84: addtoincludepath(const char *dirname)
1.1 espie 85: {
86: struct path_entry *n;
87:
88: n = new_path_entry(dirname);
89:
90: if (last) {
91: last->next = n;
92: last = n;
93: }
94: else
95: last = first = n;
96: }
97:
98: static void
99: ensure_m4path()
100: {
101: static int envpathdone = 0;
102: char *envpath;
103: char *sweep;
104: char *path;
105:
106: if (envpathdone)
107: return;
108: envpathdone = TRUE;
109: envpath = getenv("M4PATH");
110: if (!envpath)
111: return;
112: /* for portability: getenv result is read-only */
113: envpath = strdup(envpath);
114: if (!envpath)
115: errx(1, "out of memory");
116: for (sweep = envpath;
117: (path = strsep(&sweep, ":")) != NULL;)
118: addtoincludepath(path);
119: free(envpath);
120: }
121:
122: static
1.4 espie 123: struct input_file *
1.18 espie 124: dopath(struct input_file *i, const char *filename)
1.1 espie 125: {
126: char path[MAXPATHLEN];
127: struct path_entry *pe;
1.4 espie 128: FILE *f;
1.1 espie 129:
130: for (pe = first; pe; pe = pe->next) {
131: snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
1.4 espie 132: if ((f = fopen(path, "r")) != 0) {
133: set_input(i, f, path);
134: return i;
135: }
1.1 espie 136: }
137: return NULL;
138: }
139:
1.4 espie 140: struct input_file *
1.18 espie 141: fopen_trypath(struct input_file *i, const char *filename)
1.1 espie 142: {
143: FILE *f;
144:
145: f = fopen(filename, "r");
1.4 espie 146: if (f != NULL) {
147: set_input(i, f, filename);
148: return i;
149: }
1.1 espie 150: if (filename[0] == '/')
151: return NULL;
152:
153: ensure_m4path();
154:
1.4 espie 155: return dopath(i, filename);
1.1 espie 156: }
157:
1.5 espie 158: void
1.18 espie 159: doindir(const char *argv[], int argc)
1.5 espie 160: {
1.25 espie 161: ndptr n;
1.22 espie 162: struct macro_definition *p;
1.5 espie 163:
1.25 espie 164: n = lookup(argv[2]);
165: if (n == NULL || (p = macro_getdef(n)) == NULL)
1.32 ! espie 166: m4errx(1, "indir: undefined macro %s.", argv[2]);
1.5 espie 167: argv[1] = p->defn;
1.23 espie 168:
1.25 espie 169: eval(argv+1, argc-1, p->type, is_traced(n));
1.5 espie 170: }
171:
172: void
1.18 espie 173: dobuiltin(const char *argv[], int argc)
1.5 espie 174: {
1.24 espie 175: ndptr p;
1.25 espie 176:
1.5 espie 177: argv[1] = NULL;
1.24 espie 178: p = macro_getbuiltin(argv[2]);
179: if (p != NULL)
1.25 espie 180: eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
1.5 espie 181: else
1.32 ! espie 182: m4errx(1, "unknown builtin %s.", argv[2]);
1.5 espie 183: }
184:
185:
186: /* We need some temporary buffer space, as pb pushes BACK and substitution
187: * proceeds forward... */
188: static char *buffer;
189: static size_t bufsize = 0;
190: static size_t current = 0;
191:
1.16 millert 192: static void addchars(const char *, size_t);
1.17 espie 193: static void addchar(int);
1.16 millert 194: static char *twiddle(const char *);
195: static char *getstring(void);
196: static void exit_regerror(int, regex_t *);
197: static void do_subst(const char *, regex_t *, const char *, regmatch_t *);
198: static void do_regexpindex(const char *, regex_t *, regmatch_t *);
199: static void do_regexp(const char *, regex_t *, const char *, regmatch_t *);
200: static void add_sub(int, const char *, regex_t *, regmatch_t *);
201: static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
1.14 espie 202: #define addconstantstring(s) addchars((s), sizeof(s)-1)
1.5 espie 203:
204: static void
1.18 espie 205: addchars(const char *c, size_t n)
1.5 espie 206: {
207: if (n == 0)
208: return;
1.15 espie 209: while (current + n > bufsize) {
1.5 espie 210: if (bufsize == 0)
211: bufsize = 1024;
212: else
213: bufsize *= 2;
1.26 espie 214: buffer = xrealloc(buffer, bufsize, NULL);
1.5 espie 215: }
216: memcpy(buffer+current, c, n);
217: current += n;
218: }
219:
220: static void
1.18 espie 221: addchar(int c)
1.5 espie 222: {
223: if (current +1 > bufsize) {
224: if (bufsize == 0)
225: bufsize = 1024;
226: else
227: bufsize *= 2;
1.26 espie 228: buffer = xrealloc(buffer, bufsize, NULL);
1.5 espie 229: }
230: buffer[current++] = c;
231: }
232:
233: static char *
234: getstring()
235: {
236: addchar('\0');
237: current = 0;
238: return buffer;
239: }
240:
241:
242: static void
1.18 espie 243: exit_regerror(int er, regex_t *re)
1.5 espie 244: {
245: size_t errlen;
246: char *errbuf;
247:
248: errlen = regerror(er, re, NULL, 0);
1.26 espie 249: errbuf = xalloc(errlen,
250: "malloc in regerror: %lu", (unsigned long)errlen);
1.5 espie 251: regerror(er, re, errbuf, errlen);
1.32 ! espie 252: m4errx(1, "regular expression error: %s.", errbuf);
1.5 espie 253: }
254:
255: static void
1.18 espie 256: add_sub(int n, const char *string, regex_t *re, regmatch_t *pm)
1.5 espie 257: {
258: if (n > re->re_nsub)
259: warnx("No subexpression %d", n);
260: /* Subexpressions that did not match are
261: * not an error. */
262: else if (pm[n].rm_so != -1 &&
263: pm[n].rm_eo != -1) {
264: addchars(string + pm[n].rm_so,
265: pm[n].rm_eo - pm[n].rm_so);
266: }
267: }
268:
269: /* Add replacement string to the output buffer, recognizing special
270: * constructs and replacing them with substrings of the original string.
271: */
272: static void
1.18 espie 273: add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
1.5 espie 274: {
275: const char *p;
276:
277: for (p = replace; *p != '\0'; p++) {
278: if (*p == '&' && !mimic_gnu) {
279: add_sub(0, string, re, pm);
280: continue;
281: }
282: if (*p == '\\') {
283: if (p[1] == '\\') {
284: addchar(p[1]);
1.10 espie 285: p++;
1.5 espie 286: continue;
287: }
288: if (p[1] == '&') {
289: if (mimic_gnu)
290: add_sub(0, string, re, pm);
291: else
292: addchar(p[1]);
293: p++;
294: continue;
295: }
296: if (isdigit(p[1])) {
297: add_sub(*(++p) - '0', string, re, pm);
298: continue;
299: }
300: }
301: addchar(*p);
302: }
303: }
304:
305: static void
1.18 espie 306: do_subst(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
1.5 espie 307: {
308: int error;
1.11 espie 309: int flags = 0;
310: const char *last_match = NULL;
1.5 espie 311:
1.11 espie 312: while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
1.13 espie 313: if (pm[0].rm_eo != 0) {
314: if (string[pm[0].rm_eo-1] == '\n')
315: flags = 0;
316: else
317: flags = REG_NOTBOL;
318: }
1.5 espie 319:
320: /* NULL length matches are special... We use the `vi-mode'
321: * rule: don't allow a NULL-match at the last match
322: * position.
323: */
1.11 espie 324: if (pm[0].rm_so == pm[0].rm_eo &&
325: string + pm[0].rm_so == last_match) {
1.5 espie 326: if (*string == '\0')
327: return;
328: addchar(*string);
1.13 espie 329: if (*string++ == '\n')
330: flags = 0;
331: else
332: flags = REG_NOTBOL;
1.5 espie 333: continue;
334: }
1.11 espie 335: last_match = string + pm[0].rm_so;
336: addchars(string, pm[0].rm_so);
1.5 espie 337: add_replace(string, re, replace, pm);
338: string += pm[0].rm_eo;
339: }
340: if (error != REG_NOMATCH)
341: exit_regerror(error, re);
342: pbstr(string);
343: }
344:
345: static void
1.18 espie 346: do_regexp(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
1.5 espie 347: {
348: int error;
349:
350: switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
351: case 0:
352: add_replace(string, re, replace, pm);
353: pbstr(getstring());
354: break;
355: case REG_NOMATCH:
356: break;
357: default:
358: exit_regerror(error, re);
359: }
360: }
361:
362: static void
1.18 espie 363: do_regexpindex(const char *string, regex_t *re, regmatch_t *pm)
1.5 espie 364: {
365: int error;
366:
367: switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
368: case 0:
369: pbunsigned(pm[0].rm_so);
370: break;
371: case REG_NOMATCH:
372: pbnum(-1);
373: break;
374: default:
375: exit_regerror(error, re);
376: }
377: }
378:
379: /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
380: * says. So we twiddle with the regexp before passing it to regcomp.
381: */
382: static char *
1.18 espie 383: twiddle(const char *p)
1.5 espie 384: {
1.28 espie 385: /* + at start of regexp is a normal character for Gnu m4 */
386: if (*p == '^') {
387: addchar(*p);
388: p++;
389: }
390: if (*p == '+') {
391: addchar('\\');
392: }
1.5 espie 393: /* This could use strcspn for speed... */
394: while (*p != '\0') {
1.14 espie 395: if (*p == '\\') {
396: switch(p[1]) {
397: case '(':
398: case ')':
399: case '|':
400: addchar(p[1]);
401: break;
402: case 'w':
403: addconstantstring("[_a-zA-Z0-9]");
404: break;
405: case 'W':
406: addconstantstring("[^_a-zA-Z0-9]");
407: break;
408: case '<':
409: addconstantstring("[[:<:]]");
410: break;
411: case '>':
412: addconstantstring("[[:>:]]");
413: break;
414: default:
415: addchars(p, 2);
416: break;
417: }
1.5 espie 418: p+=2;
419: continue;
420: }
1.14 espie 421: if (*p == '(' || *p == ')' || *p == '|')
1.5 espie 422: addchar('\\');
423:
424: addchar(*p);
425: p++;
426: }
427: return getstring();
428: }
429:
430: /* patsubst(string, regexp, opt replacement) */
431: /* argv[2]: string
432: * argv[3]: regexp
433: * argv[4]: opt rep
434: */
435: void
1.18 espie 436: dopatsubst(const char *argv[], int argc)
1.5 espie 437: {
438: if (argc <= 3) {
439: warnx("Too few arguments to patsubst");
440: return;
441: }
1.20 espie 442: /* special case: empty regexp */
443: if (argv[3][0] == '\0') {
444: const char *s;
1.21 espie 445: size_t len;
446: if (argv[4] && argc > 4)
447: len = strlen(argv[4]);
448: else
449: len = 0;
1.20 espie 450: for (s = argv[2]; *s != '\0'; s++) {
451: addchars(argv[4], len);
452: addchar(*s);
453: }
454: } else {
455: int error;
456: regex_t re;
457: regmatch_t *pmatch;
1.30 espie 458: int mode = REG_EXTENDED;
459: size_t l = strlen(argv[3]);
460:
461: if (!mimic_gnu ||
462: (argv[3][0] == '^') ||
463: (l > 0 && argv[3][l-1] == '$'))
464: mode |= REG_NEWLINE;
1.20 espie 465:
466: error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3],
1.30 espie 467: mode);
1.20 espie 468: if (error != 0)
469: exit_regerror(error, &re);
470:
1.26 espie 471: pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL);
1.20 espie 472: do_subst(argv[2], &re,
1.21 espie 473: argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
1.20 espie 474: free(pmatch);
475: regfree(&re);
476: }
1.5 espie 477: pbstr(getstring());
478: }
479:
480: void
1.18 espie 481: doregexp(const char *argv[], int argc)
1.5 espie 482: {
483: int error;
484: regex_t re;
485: regmatch_t *pmatch;
486:
487: if (argc <= 3) {
1.7 espie 488: warnx("Too few arguments to regexp");
1.5 espie 489: return;
490: }
491: error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3],
492: REG_EXTENDED);
493: if (error != 0)
494: exit_regerror(error, &re);
495:
1.26 espie 496: pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL);
1.7 espie 497: if (argv[4] == NULL || argc == 4)
1.5 espie 498: do_regexpindex(argv[2], &re, pmatch);
499: else
500: do_regexp(argv[2], &re, argv[4], pmatch);
501: free(pmatch);
502: regfree(&re);
1.8 espie 503: }
504:
505: void
1.18 espie 506: doesyscmd(const char *cmd)
1.8 espie 507: {
508: int p[2];
509: pid_t pid, cpid;
510: char *argv[4];
511: int cc;
512: int status;
513:
514: /* Follow gnu m4 documentation: first flush buffers. */
515: fflush(NULL);
516:
517: argv[0] = "sh";
518: argv[1] = "-c";
519: argv[2] = (char *)cmd;
520: argv[3] = NULL;
521:
522: /* Just set up standard output, share stderr and stdin with m4 */
523: if (pipe(p) == -1)
524: err(1, "bad pipe");
525: switch(cpid = fork()) {
526: case -1:
527: err(1, "bad fork");
528: /* NOTREACHED */
529: case 0:
530: (void) close(p[0]);
531: (void) dup2(p[1], 1);
532: (void) close(p[1]);
533: execv(_PATH_BSHELL, argv);
534: exit(1);
535: default:
536: /* Read result in two stages, since m4's buffer is
537: * pushback-only. */
538: (void) close(p[1]);
539: do {
540: char result[BUFSIZE];
541: cc = read(p[0], result, sizeof result);
542: if (cc > 0)
543: addchars(result, cc);
544: } while (cc > 0 || (cc == -1 && errno == EINTR));
545:
546: (void) close(p[0]);
547: while ((pid = wait(&status)) != cpid && pid >= 0)
548: continue;
549: pbstr(getstring());
550: }
1.31 espie 551: }
552:
553: void
554: getdivfile(const char *name)
555: {
556: FILE *f;
557: int c;
558:
559: f = fopen(name, "r");
560: if (!f)
561: return;
562:
563: while ((c = getc(f))!= EOF)
564: putc(c, active);
565: (void) fclose(f);
1.5 espie 566: }