Annotation of src/usr.bin/m4/gnum4.c, Revision 1.48
1.48 ! millert 1: /* $OpenBSD: gnum4.c,v 1.47 2015/01/16 06:40:09 deraadt Exp $ */
1.1 espie 2:
3: /*
4: * Copyright (c) 1999 Marc Espie
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions
8: * are met:
9: * 1. Redistributions of source code must retain the above copyright
10: * notice, this list of conditions and the following disclaimer.
11: * 2. Redistributions in binary form must reproduce the above copyright
12: * notice, this list of conditions and the following disclaimer in the
13: * documentation and/or other materials provided with the distribution.
14: *
15: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
16: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
19: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25: * SUCH DAMAGE.
26: */
27:
1.41 marco 28: /*
1.1 espie 29: * functions needed to support gnu-m4 extensions, including a fake freezing
30: */
31:
1.5 espie 32: #include <sys/types.h>
1.8 espie 33: #include <sys/wait.h>
1.5 espie 34: #include <ctype.h>
1.36 espie 35: #include <err.h>
1.8 espie 36: #include <paths.h>
1.5 espie 37: #include <regex.h>
1.1 espie 38: #include <stddef.h>
39: #include <stdlib.h>
40: #include <stdio.h>
41: #include <string.h>
1.8 espie 42: #include <errno.h>
43: #include <unistd.h>
1.47 deraadt 44: #include <limits.h>
1.1 espie 45: #include "mdef.h"
46: #include "stdd.h"
47: #include "extern.h"
48:
1.5 espie 49:
50: int mimic_gnu = 0;
51:
1.1 espie 52: /*
53: * Support for include path search
1.27 espie 54: * First search in the current directory.
1.1 espie 55: * If not found, and the path is not absolute, include path kicks in.
56: * First, -I options, in the order found on the command line.
57: * Then M4PATH env variable
58: */
59:
60: struct path_entry {
61: char *name;
62: struct path_entry *next;
63: } *first, *last;
64:
1.16 millert 65: static struct path_entry *new_path_entry(const char *);
66: static void ensure_m4path(void);
67: static struct input_file *dopath(struct input_file *, const char *);
1.3 espie 68:
1.1 espie 69: static struct path_entry *
1.18 espie 70: new_path_entry(const char *dirname)
1.1 espie 71: {
72: struct path_entry *n;
73:
74: n = malloc(sizeof(struct path_entry));
75: if (!n)
76: errx(1, "out of memory");
1.48 ! millert 77: n->name = xstrdup(dirname);
1.1 espie 78: n->next = 0;
79: return n;
80: }
1.41 marco 81:
82: void
1.18 espie 83: addtoincludepath(const char *dirname)
1.1 espie 84: {
85: struct path_entry *n;
86:
87: n = new_path_entry(dirname);
88:
89: if (last) {
90: last->next = n;
91: last = n;
92: }
93: else
94: last = first = n;
95: }
96:
97: static void
98: ensure_m4path()
99: {
100: static int envpathdone = 0;
101: char *envpath;
102: char *sweep;
103: char *path;
104:
105: if (envpathdone)
106: return;
107: envpathdone = TRUE;
108: envpath = getenv("M4PATH");
1.41 marco 109: if (!envpath)
1.1 espie 110: return;
111: /* for portability: getenv result is read-only */
1.48 ! millert 112: envpath = xstrdup(envpath);
1.41 marco 113: for (sweep = envpath;
1.1 espie 114: (path = strsep(&sweep, ":")) != NULL;)
115: addtoincludepath(path);
116: free(envpath);
117: }
118:
119: static
1.4 espie 120: struct input_file *
1.18 espie 121: dopath(struct input_file *i, const char *filename)
1.1 espie 122: {
1.47 deraadt 123: char path[PATH_MAX];
1.1 espie 124: struct path_entry *pe;
1.4 espie 125: FILE *f;
1.1 espie 126:
127: for (pe = first; pe; pe = pe->next) {
128: snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
1.4 espie 129: if ((f = fopen(path, "r")) != 0) {
130: set_input(i, f, path);
131: return i;
132: }
1.1 espie 133: }
134: return NULL;
135: }
136:
1.4 espie 137: struct input_file *
1.18 espie 138: fopen_trypath(struct input_file *i, const char *filename)
1.1 espie 139: {
140: FILE *f;
141:
142: f = fopen(filename, "r");
1.4 espie 143: if (f != NULL) {
144: set_input(i, f, filename);
145: return i;
146: }
1.1 espie 147: if (filename[0] == '/')
148: return NULL;
149:
150: ensure_m4path();
151:
1.4 espie 152: return dopath(i, filename);
1.1 espie 153: }
154:
1.41 marco 155: void
1.18 espie 156: doindir(const char *argv[], int argc)
1.5 espie 157: {
1.25 espie 158: ndptr n;
1.22 espie 159: struct macro_definition *p;
1.5 espie 160:
1.25 espie 161: n = lookup(argv[2]);
162: if (n == NULL || (p = macro_getdef(n)) == NULL)
1.32 espie 163: m4errx(1, "indir: undefined macro %s.", argv[2]);
1.5 espie 164: argv[1] = p->defn;
1.41 marco 165:
1.25 espie 166: eval(argv+1, argc-1, p->type, is_traced(n));
1.5 espie 167: }
168:
1.41 marco 169: void
1.18 espie 170: dobuiltin(const char *argv[], int argc)
1.5 espie 171: {
1.24 espie 172: ndptr p;
1.25 espie 173:
1.5 espie 174: argv[1] = NULL;
1.24 espie 175: p = macro_getbuiltin(argv[2]);
176: if (p != NULL)
1.25 espie 177: eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
1.5 espie 178: else
1.32 espie 179: m4errx(1, "unknown builtin %s.", argv[2]);
1.41 marco 180: }
1.5 espie 181:
182:
183: /* We need some temporary buffer space, as pb pushes BACK and substitution
184: * proceeds forward... */
185: static char *buffer;
186: static size_t bufsize = 0;
187: static size_t current = 0;
188:
1.16 millert 189: static void addchars(const char *, size_t);
1.17 espie 190: static void addchar(int);
1.16 millert 191: static char *twiddle(const char *);
192: static char *getstring(void);
1.46 espie 193: static void exit_regerror(int, regex_t *, const char *);
194: static void do_subst(const char *, regex_t *, const char *, const char *,
195: regmatch_t *);
196: static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *);
197: static void do_regexp(const char *, regex_t *, const char *, const char *,
198: regmatch_t *);
1.16 millert 199: static void add_sub(int, const char *, regex_t *, regmatch_t *);
200: static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
1.14 espie 201: #define addconstantstring(s) addchars((s), sizeof(s)-1)
1.5 espie 202:
1.41 marco 203: static void
1.18 espie 204: addchars(const char *c, size_t n)
1.5 espie 205: {
206: if (n == 0)
207: return;
1.15 espie 208: while (current + n > bufsize) {
1.5 espie 209: if (bufsize == 0)
210: bufsize = 1024;
211: else
212: bufsize *= 2;
1.26 espie 213: buffer = xrealloc(buffer, bufsize, NULL);
1.5 espie 214: }
215: memcpy(buffer+current, c, n);
216: current += n;
217: }
218:
1.41 marco 219: static void
1.18 espie 220: addchar(int c)
1.5 espie 221: {
222: if (current +1 > bufsize) {
223: if (bufsize == 0)
224: bufsize = 1024;
225: else
226: bufsize *= 2;
1.26 espie 227: buffer = xrealloc(buffer, bufsize, NULL);
1.5 espie 228: }
229: buffer[current++] = c;
230: }
231:
232: static char *
233: getstring()
234: {
235: addchar('\0');
236: current = 0;
237: return buffer;
238: }
239:
240:
1.41 marco 241: static void
1.46 espie 242: exit_regerror(int er, regex_t *re, const char *source)
1.5 espie 243: {
1.41 marco 244: size_t errlen;
245: char *errbuf;
1.5 espie 246:
247: errlen = regerror(er, re, NULL, 0);
1.41 marco 248: errbuf = xalloc(errlen,
1.26 espie 249: "malloc in regerror: %lu", (unsigned long)errlen);
1.5 espie 250: regerror(er, re, errbuf, errlen);
1.46 espie 251: m4errx(1, "regular expression error in %s: %s.", source, errbuf);
1.5 espie 252: }
253:
254: static void
1.18 espie 255: add_sub(int n, const char *string, regex_t *re, regmatch_t *pm)
1.5 espie 256: {
257: if (n > re->re_nsub)
258: warnx("No subexpression %d", n);
259: /* Subexpressions that did not match are
260: * not an error. */
261: else if (pm[n].rm_so != -1 &&
262: pm[n].rm_eo != -1) {
263: addchars(string + pm[n].rm_so,
264: pm[n].rm_eo - pm[n].rm_so);
265: }
266: }
267:
268: /* Add replacement string to the output buffer, recognizing special
269: * constructs and replacing them with substrings of the original string.
270: */
1.41 marco 271: static void
1.18 espie 272: add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
1.5 espie 273: {
274: const char *p;
275:
276: for (p = replace; *p != '\0'; p++) {
277: if (*p == '&' && !mimic_gnu) {
278: add_sub(0, string, re, pm);
279: continue;
280: }
281: if (*p == '\\') {
282: if (p[1] == '\\') {
283: addchar(p[1]);
1.10 espie 284: p++;
1.5 espie 285: continue;
286: }
287: if (p[1] == '&') {
288: if (mimic_gnu)
289: add_sub(0, string, re, pm);
290: else
291: addchar(p[1]);
292: p++;
293: continue;
294: }
1.43 deraadt 295: if (isdigit((unsigned char)p[1])) {
1.5 espie 296: add_sub(*(++p) - '0', string, re, pm);
297: continue;
298: }
299: }
1.41 marco 300: addchar(*p);
1.5 espie 301: }
302: }
303:
1.41 marco 304: static void
1.46 espie 305: do_subst(const char *string, regex_t *re, const char *source,
306: const char *replace, regmatch_t *pm)
1.5 espie 307: {
308: int error;
1.11 espie 309: int flags = 0;
310: const char *last_match = NULL;
1.5 espie 311:
1.11 espie 312: while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
1.13 espie 313: if (pm[0].rm_eo != 0) {
314: if (string[pm[0].rm_eo-1] == '\n')
315: flags = 0;
316: else
317: flags = REG_NOTBOL;
318: }
1.5 espie 319:
1.41 marco 320: /* NULL length matches are special... We use the `vi-mode'
1.5 espie 321: * rule: don't allow a NULL-match at the last match
1.41 marco 322: * position.
1.5 espie 323: */
1.41 marco 324: if (pm[0].rm_so == pm[0].rm_eo &&
1.11 espie 325: string + pm[0].rm_so == last_match) {
1.5 espie 326: if (*string == '\0')
327: return;
328: addchar(*string);
1.13 espie 329: if (*string++ == '\n')
330: flags = 0;
331: else
332: flags = REG_NOTBOL;
1.5 espie 333: continue;
334: }
1.11 espie 335: last_match = string + pm[0].rm_so;
336: addchars(string, pm[0].rm_so);
1.5 espie 337: add_replace(string, re, replace, pm);
338: string += pm[0].rm_eo;
339: }
340: if (error != REG_NOMATCH)
1.46 espie 341: exit_regerror(error, re, source);
1.5 espie 342: pbstr(string);
343: }
344:
1.41 marco 345: static void
1.46 espie 346: do_regexp(const char *string, regex_t *re, const char *source,
347: const char *replace, regmatch_t *pm)
1.5 espie 348: {
349: int error;
350:
351: switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
1.41 marco 352: case 0:
1.5 espie 353: add_replace(string, re, replace, pm);
354: pbstr(getstring());
355: break;
356: case REG_NOMATCH:
357: break;
358: default:
1.46 espie 359: exit_regerror(error, re, source);
1.5 espie 360: }
361: }
362:
1.41 marco 363: static void
1.46 espie 364: do_regexpindex(const char *string, regex_t *re, const char *source,
365: regmatch_t *pm)
1.5 espie 366: {
367: int error;
368:
369: switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
370: case 0:
371: pbunsigned(pm[0].rm_so);
372: break;
373: case REG_NOMATCH:
374: pbnum(-1);
375: break;
376: default:
1.46 espie 377: exit_regerror(error, re, source);
1.5 espie 378: }
379: }
380:
381: /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
382: * says. So we twiddle with the regexp before passing it to regcomp.
383: */
384: static char *
1.18 espie 385: twiddle(const char *p)
1.5 espie 386: {
1.28 espie 387: /* + at start of regexp is a normal character for Gnu m4 */
388: if (*p == '^') {
389: addchar(*p);
390: p++;
391: }
392: if (*p == '+') {
393: addchar('\\');
394: }
1.5 espie 395: /* This could use strcspn for speed... */
396: while (*p != '\0') {
1.14 espie 397: if (*p == '\\') {
398: switch(p[1]) {
399: case '(':
400: case ')':
401: case '|':
402: addchar(p[1]);
403: break;
404: case 'w':
405: addconstantstring("[_a-zA-Z0-9]");
406: break;
407: case 'W':
408: addconstantstring("[^_a-zA-Z0-9]");
409: break;
410: case '<':
411: addconstantstring("[[:<:]]");
412: break;
413: case '>':
414: addconstantstring("[[:>:]]");
415: break;
416: default:
417: addchars(p, 2);
418: break;
419: }
1.5 espie 420: p+=2;
421: continue;
422: }
1.14 espie 423: if (*p == '(' || *p == ')' || *p == '|')
1.5 espie 424: addchar('\\');
425:
426: addchar(*p);
427: p++;
428: }
429: return getstring();
430: }
431:
432: /* patsubst(string, regexp, opt replacement) */
433: /* argv[2]: string
434: * argv[3]: regexp
435: * argv[4]: opt rep
436: */
437: void
1.18 espie 438: dopatsubst(const char *argv[], int argc)
1.5 espie 439: {
440: if (argc <= 3) {
441: warnx("Too few arguments to patsubst");
442: return;
443: }
1.20 espie 444: /* special case: empty regexp */
445: if (argv[3][0] == '\0') {
446: const char *s;
1.21 espie 447: size_t len;
1.41 marco 448: if (argc > 4 && argv[4])
1.21 espie 449: len = strlen(argv[4]);
450: else
451: len = 0;
1.20 espie 452: for (s = argv[2]; *s != '\0'; s++) {
453: addchars(argv[4], len);
454: addchar(*s);
455: }
456: } else {
457: int error;
458: regex_t re;
459: regmatch_t *pmatch;
1.30 espie 460: int mode = REG_EXTENDED;
1.46 espie 461: const char *source;
1.30 espie 462: size_t l = strlen(argv[3]);
463:
464: if (!mimic_gnu ||
1.41 marco 465: (argv[3][0] == '^') ||
1.30 espie 466: (l > 0 && argv[3][l-1] == '$'))
467: mode |= REG_NEWLINE;
1.20 espie 468:
1.46 espie 469: source = mimic_gnu ? twiddle(argv[3]) : argv[3];
470: error = regcomp(&re, source, mode);
1.20 espie 471: if (error != 0)
1.46 espie 472: exit_regerror(error, &re, source);
1.41 marco 473:
1.45 espie 474: pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t),
475: NULL);
1.46 espie 476: do_subst(argv[2], &re, source,
1.21 espie 477: argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
1.20 espie 478: free(pmatch);
479: regfree(&re);
480: }
1.5 espie 481: pbstr(getstring());
482: }
483:
484: void
1.18 espie 485: doregexp(const char *argv[], int argc)
1.5 espie 486: {
487: int error;
488: regex_t re;
489: regmatch_t *pmatch;
1.46 espie 490: const char *source;
1.5 espie 491:
492: if (argc <= 3) {
1.7 espie 493: warnx("Too few arguments to regexp");
1.5 espie 494: return;
495: }
1.40 espie 496: /* special gnu case */
497: if (argv[3][0] == '\0' && mimic_gnu) {
498: if (argc == 4 || argv[4] == NULL)
499: return;
1.41 marco 500: else
1.40 espie 501: pbstr(argv[4]);
502: }
1.46 espie 503: source = mimic_gnu ? twiddle(argv[3]) : argv[3];
504: error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE);
1.5 espie 505: if (error != 0)
1.46 espie 506: exit_regerror(error, &re, source);
1.41 marco 507:
1.45 espie 508: pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL);
1.40 espie 509: if (argc == 4 || argv[4] == NULL)
1.46 espie 510: do_regexpindex(argv[2], &re, source, pmatch);
1.5 espie 511: else
1.46 espie 512: do_regexp(argv[2], &re, source, argv[4], pmatch);
1.5 espie 513: free(pmatch);
514: regfree(&re);
1.33 espie 515: }
516:
517: void
518: doformat(const char *argv[], int argc)
519: {
520: const char *format = argv[2];
521: int pos = 3;
1.39 espie 522: int left_padded;
523: long width;
524: size_t l;
525: const char *thisarg;
526: char temp[2];
527: long extra;
1.35 espie 528:
1.33 espie 529: while (*format != 0) {
530: if (*format != '%') {
531: addchar(*format++);
1.39 espie 532: continue;
533: }
534:
535: format++;
536: if (*format == '%') {
537: addchar(*format++);
538: continue;
539: }
540: if (*format == 0) {
541: addchar('%');
542: break;
543: }
544:
545: if (*format == '*') {
546: format++;
547: if (pos >= argc)
1.41 marco 548: m4errx(1,
1.39 espie 549: "Format with too many format specifiers.");
550: width = strtol(argv[pos++], NULL, 10);
551: } else {
552: width = strtol(format, (char **)&format, 10);
553: }
554: if (width < 0) {
555: left_padded = 1;
556: width = -width;
1.37 espie 557: } else {
1.39 espie 558: left_padded = 0;
559: }
560: if (*format == '.') {
1.33 espie 561: format++;
1.39 espie 562: if (*format == '*') {
563: format++;
564: if (pos >= argc)
1.41 marco 565: m4errx(1,
1.39 espie 566: "Format with too many format specifiers.");
567: extra = strtol(argv[pos++], NULL, 10);
1.33 espie 568: } else {
1.39 espie 569: extra = strtol(format, (char **)&format, 10);
1.33 espie 570: }
1.39 espie 571: } else {
572: extra = LONG_MAX;
573: }
574: if (pos >= argc)
575: m4errx(1, "Format with too many format specifiers.");
576: switch(*format) {
577: case 's':
578: thisarg = argv[pos++];
579: break;
580: case 'c':
581: temp[0] = strtoul(argv[pos++], NULL, 10);
582: temp[1] = 0;
583: thisarg = temp;
584: break;
585: default:
1.41 marco 586: m4errx(1, "Unsupported format specification: %s.",
1.39 espie 587: argv[2]);
588: }
589: format++;
590: l = strlen(thisarg);
591: if (l > extra)
592: l = extra;
593: if (!left_padded) {
594: while (l < width--)
595: addchar(' ');
596: }
597: addchars(thisarg, l);
598: if (left_padded) {
599: while (l < width--)
600: addchar(' ');
1.33 espie 601: }
602: }
603: pbstr(getstring());
1.8 espie 604: }
605:
606: void
1.18 espie 607: doesyscmd(const char *cmd)
1.8 espie 608: {
609: int p[2];
610: pid_t pid, cpid;
611: char *argv[4];
612: int cc;
613: int status;
614:
615: /* Follow gnu m4 documentation: first flush buffers. */
616: fflush(NULL);
617:
618: argv[0] = "sh";
619: argv[1] = "-c";
620: argv[2] = (char *)cmd;
621: argv[3] = NULL;
622:
623: /* Just set up standard output, share stderr and stdin with m4 */
624: if (pipe(p) == -1)
625: err(1, "bad pipe");
626: switch(cpid = fork()) {
627: case -1:
628: err(1, "bad fork");
629: /* NOTREACHED */
630: case 0:
631: (void) close(p[0]);
632: (void) dup2(p[1], 1);
633: (void) close(p[1]);
634: execv(_PATH_BSHELL, argv);
635: exit(1);
636: default:
637: /* Read result in two stages, since m4's buffer is
638: * pushback-only. */
639: (void) close(p[1]);
640: do {
641: char result[BUFSIZE];
642: cc = read(p[0], result, sizeof result);
643: if (cc > 0)
644: addchars(result, cc);
645: } while (cc > 0 || (cc == -1 && errno == EINTR));
646:
647: (void) close(p[0]);
648: while ((pid = wait(&status)) != cpid && pid >= 0)
649: continue;
650: pbstr(getstring());
651: }
1.31 espie 652: }
653:
654: void
655: getdivfile(const char *name)
656: {
657: FILE *f;
658: int c;
659:
660: f = fopen(name, "r");
661: if (!f)
662: return;
663:
664: while ((c = getc(f))!= EOF)
665: putc(c, active);
666: (void) fclose(f);
1.5 espie 667: }