[BACK]Return to gnum4.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / m4

File: [local] / src / usr.bin / m4 / gnum4.c (download)

Revision 1.54, Mon Jul 4 10:41:21 2022 UTC (22 months, 2 weeks ago) by espie
Branch: MAIN
CVS Tags: OPENBSD_7_5_BASE, OPENBSD_7_5, OPENBSD_7_4_BASE, OPENBSD_7_4, OPENBSD_7_3_BASE, OPENBSD_7_3, OPENBSD_7_2_BASE, OPENBSD_7_2, HEAD
Changes since 1.53: +12 -21 lines

somehow, when I created this file, I copied the licence of the other files
around, but with my name instead... which makes absolutely no sense
whatsoever, especially the regents part.

replace with ISC licence, much saner.

(as noticed by miod@, who else ?)

/* $OpenBSD: gnum4.c,v 1.54 2022/07/04 10:41:21 espie Exp $ */

/*
 * Copyright (c) 1999-2022 Marc Espie <espie@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * functions needed to support gnu-m4 extensions, including a fake freezing
 */

#include <sys/types.h>
#include <sys/wait.h>
#include <ctype.h>
#include <err.h>
#include <paths.h>
#include <regex.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <limits.h>
#include "mdef.h"
#include "stdd.h"
#include "extern.h"


int mimic_gnu = 0;

/*
 * Support for include path search
 * First search in the current directory.
 * If not found, and the path is not absolute, include path kicks in.
 * First, -I options, in the order found on the command line.
 * Then M4PATH env variable
 */

struct path_entry {
	char *name;
	struct path_entry *next;
} *first, *last;

static struct path_entry *new_path_entry(const char *);
static void ensure_m4path(void);
static struct input_file *dopath(struct input_file *, const char *);

static struct path_entry *
new_path_entry(const char *dirname)
{
	struct path_entry *n;

	n = malloc(sizeof(struct path_entry));
	if (!n)
		errx(1, "out of memory");
	n->name = xstrdup(dirname);
	n->next = 0;
	return n;
}

void
addtoincludepath(const char *dirname)
{
	struct path_entry *n;

	n = new_path_entry(dirname);

	if (last) {
		last->next = n;
		last = n;
	}
	else
		last = first = n;
}

static void
ensure_m4path()
{
	static int envpathdone = 0;
	char *envpath;
	char *sweep;
	char *path;

	if (envpathdone)
		return;
	envpathdone = TRUE;
	envpath = getenv("M4PATH");
	if (!envpath)
		return;
	/* for portability: getenv result is read-only */
	envpath = xstrdup(envpath);
	for (sweep = envpath;
	    (path = strsep(&sweep, ":")) != NULL;)
	    addtoincludepath(path);
	free(envpath);
}

static
struct input_file *
dopath(struct input_file *i, const char *filename)
{
	char path[PATH_MAX];
	struct path_entry *pe;
	FILE *f;

	for (pe = first; pe; pe = pe->next) {
		snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
		if ((f = fopen(path, "r")) != 0) {
			set_input(i, f, path);
			return i;
		}
	}
	return NULL;
}

struct input_file *
fopen_trypath(struct input_file *i, const char *filename)
{
	FILE *f;

	f = fopen(filename, "r");
	if (f != NULL) {
		set_input(i, f, filename);
		return i;
	}
	if (filename[0] == '/')
		return NULL;

	ensure_m4path();

	return dopath(i, filename);
}

void
doindir(const char *argv[], int argc)
{
	ndptr n;
	struct macro_definition *p;

	n = lookup(argv[2]);
	if (n == NULL || (p = macro_getdef(n)) == NULL)
		m4errx(1, "indir: undefined macro %s.", argv[2]);
	argv[1] = p->defn;

	eval(argv+1, argc-1, p->type, is_traced(n));
}

void
dobuiltin(const char *argv[], int argc)
{
	ndptr p;

	argv[1] = NULL;
	p = macro_getbuiltin(argv[2]);
	if (p != NULL)
		eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
	else
		m4errx(1, "unknown builtin %s.", argv[2]);
}


/* We need some temporary buffer space, as pb pushes BACK and substitution
 * proceeds forward... */
static char *buffer;
static size_t bufsize = 0;
static size_t current = 0;

static void addchars(const char *, size_t);
static void addchar(int);
static char *twiddle(const char *);
static char *getstring(void);
static void exit_regerror(int, regex_t *, const char *);
static void do_subst(const char *, regex_t *, const char *, const char *, 
    regmatch_t *);
static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *);
static void do_regexp(const char *, regex_t *, const char *, const char *, 
    regmatch_t *);
static void add_sub(int, const char *, regex_t *, regmatch_t *);
static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
#define addconstantstring(s) addchars((s), sizeof(s)-1)

static void
addchars(const char *c, size_t n)
{
	if (n == 0)
		return;
	while (current + n > bufsize) {
		if (bufsize == 0)
			bufsize = 1024;
		else if (bufsize <= SIZE_MAX/2) {
			bufsize *= 2;
		} else {
			errx(1, "size overflow");
		}
		buffer = xrealloc(buffer, bufsize, NULL);
	}
	memcpy(buffer+current, c, n);
	current += n;
}

static void
addchar(int c)
{
	if (current +1 > bufsize) {
		if (bufsize == 0)
			bufsize = 1024;
		else
			bufsize *= 2;
		buffer = xrealloc(buffer, bufsize, NULL);
	}
	buffer[current++] = c;
}

static char *
getstring(void)
{
	addchar('\0');
	current = 0;
	return buffer;
}


static void
exit_regerror(int er, regex_t *re, const char *source)
{
	size_t	errlen;
	char	*errbuf;

	errlen = regerror(er, re, NULL, 0);
	errbuf = xalloc(errlen,
	    "malloc in regerror: %lu", (unsigned long)errlen);
	regerror(er, re, errbuf, errlen);
	m4errx(1, "regular expression error in %s: %s.", source, errbuf);
}

/* warnx() plus check to see if we need to change exit code or exit.
 * -E flag functionality.
 */
void
m4_warnx(const char *fmt, ...)
{
	va_list ap;

	va_start(ap, fmt);
	vwarnx(fmt, ap);
	va_end(ap);

	if (fatal_warns)
		exit(1);
	if (error_warns)
		exit_code = 1;
}

static void
add_sub(int n, const char *string, regex_t *re, regmatch_t *pm)
{
	if (n > re->re_nsub)
		m4_warnx("No subexpression %d", n);
	/* Subexpressions that did not match are
	 * not an error.  */
	else if (pm[n].rm_so != -1 &&
	    pm[n].rm_eo != -1) {
		addchars(string + pm[n].rm_so,
			pm[n].rm_eo - pm[n].rm_so);
	}
}

/* Add replacement string to the output buffer, recognizing special
 * constructs and replacing them with substrings of the original string.
 */
static void
add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
{
	const char *p;

	for (p = replace; *p != '\0'; p++) {
		if (*p == '&' && !mimic_gnu) {
			add_sub(0, string, re, pm);
			continue;
		}
		if (*p == '\\') {
			if (p[1] == '\\') {
				addchar(p[1]);
				p++;
				continue;
			}
			if (p[1] == '&') {
				if (mimic_gnu)
					add_sub(0, string, re, pm);
				else
					addchar(p[1]);
				p++;
				continue;
			}
			if (isdigit((unsigned char)p[1])) {
				add_sub(*(++p) - '0', string, re, pm);
				continue;
			}
		}
		addchar(*p);
	}
}

static void
do_subst(const char *string, regex_t *re, const char *source, 
    const char *replace, regmatch_t *pm)
{
	int error;
	int flags = 0;
	const char *last_match = NULL;

	while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
		if (pm[0].rm_eo != 0) {
			if (string[pm[0].rm_eo-1] == '\n')
				flags = 0;
			else
				flags = REG_NOTBOL;
		}

		/* NULL length matches are special... We use the `vi-mode'
		 * rule: don't allow a NULL-match at the last match
		 * position.
		 */
		if (pm[0].rm_so == pm[0].rm_eo &&
		    string + pm[0].rm_so == last_match) {
			if (*string == '\0')
				return;
			addchar(*string);
			if (*string++ == '\n')
				flags = 0;
			else
				flags = REG_NOTBOL;
			continue;
		}
		last_match = string + pm[0].rm_so;
		addchars(string, pm[0].rm_so);
		add_replace(string, re, replace, pm);
		string += pm[0].rm_eo;
	}
	if (error != REG_NOMATCH)
		exit_regerror(error, re, source);
	pbstr(string);
}

static void
do_regexp(const char *string, regex_t *re, const char *source, 
    const char *replace, regmatch_t *pm)
{
	int error;

	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
	case 0:
		add_replace(string, re, replace, pm);
		pbstr(getstring());
		break;
	case REG_NOMATCH:
		break;
	default:
		exit_regerror(error, re, source);
	}
}

static void
do_regexpindex(const char *string, regex_t *re, const char *source, 
    regmatch_t *pm)
{
	int error;

	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
	case 0:
		pbunsigned(pm[0].rm_so);
		break;
	case REG_NOMATCH:
		pbnum(-1);
		break;
	default:
		exit_regerror(error, re, source);
	}
}

/* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
 * says. So we twiddle with the regexp before passing it to regcomp.
 */
static char *
twiddle(const char *p)
{
	/* + at start of regexp is a normal character for Gnu m4 */
	if (*p == '^') {
		addchar(*p);
		p++;
	}
	if (*p == '+') {
		addchar('\\');
	}
	/* This could use strcspn for speed... */
	while (*p != '\0') {
		if (*p == '\\') {
			switch(p[1]) {
			case '(':
			case ')':
			case '|':
				addchar(p[1]);
				break;
			case 'w':
				addconstantstring("[_a-zA-Z0-9]");
				break;
			case 'W':
				addconstantstring("[^_a-zA-Z0-9]");
				break;
			case '<':
				addconstantstring("[[:<:]]");
				break;
			case '>':
				addconstantstring("[[:>:]]");
				break;
			default:
				addchars(p, 2);
				break;
			}
			p+=2;
			continue;
		}
		if (*p == '(' || *p == ')' || *p == '|')
			addchar('\\');

		addchar(*p);
		p++;
	}
	return getstring();
}

/* patsubst(string, regexp, opt replacement) */
/* argv[2]: string
 * argv[3]: regexp
 * argv[4]: opt rep
 */
void
dopatsubst(const char *argv[], int argc)
{
	if (argc <= 3) {
		m4_warnx("Too few arguments to patsubst");
		return;
	}
	/* special case: empty regexp */
	if (argv[3][0] == '\0') {
		const char *s;
		size_t len;
		if (argc > 4 && argv[4])
			len = strlen(argv[4]);
		else
			len = 0;
		for (s = argv[2]; *s != '\0'; s++) {
			addchars(argv[4], len);
			addchar(*s);
		}
	} else {
		int error;
		regex_t re;
		regmatch_t *pmatch;
		int mode = REG_EXTENDED;
		const char *source;
		size_t l = strlen(argv[3]);

		if (!mimic_gnu ||
		    (argv[3][0] == '^') ||
		    (l > 0 && argv[3][l-1] == '$'))
			mode |= REG_NEWLINE;

		source = mimic_gnu ? twiddle(argv[3]) : argv[3];
		error = regcomp(&re, source, mode);
		if (error != 0)
			exit_regerror(error, &re, source);

		pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), 
		    NULL);
		do_subst(argv[2], &re, source,
		    argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
		free(pmatch);
		regfree(&re);
	}
	pbstr(getstring());
}

void
doregexp(const char *argv[], int argc)
{
	int error;
	regex_t re;
	regmatch_t *pmatch;
	const char *source;

	if (argc <= 3) {
		m4_warnx("Too few arguments to regexp");
		return;
	}
	/* special gnu case */
	if (argv[3][0] == '\0' && mimic_gnu) {
		if (argc == 4 || argv[4] == NULL)
			return;
		else
			pbstr(argv[4]);
	}
	source = mimic_gnu ? twiddle(argv[3]) : argv[3];
	error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE);
	if (error != 0)
		exit_regerror(error, &re, source);

	pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL);
	if (argc == 4 || argv[4] == NULL)
		do_regexpindex(argv[2], &re, source, pmatch);
	else
		do_regexp(argv[2], &re, source, argv[4], pmatch);
	free(pmatch);
	regfree(&re);
}

void
doformat(const char *argv[], int argc)
{
	const char *format = argv[2];
	int pos = 3;
	int left_padded;
	long width;
	size_t l;
	const char *thisarg;
	char temp[2];
	long extra;

	while (*format != 0) {
		if (*format != '%') {
			addchar(*format++);
			continue;
		}

		format++;
		if (*format == '%') {
			addchar(*format++);
			continue;
		}
		if (*format == 0) {
			addchar('%');
			break;
		}

		if (*format == '*') {
			format++;
			if (pos >= argc)
				m4errx(1,
				    "Format with too many format specifiers.");
			width = strtol(argv[pos++], NULL, 10);
		} else {
			width = strtol(format, (char **)&format, 10);
		}
		if (width < 0) {
			left_padded = 1;
			width = -width;
		} else {
			left_padded = 0;
		}
		if (*format == '.') {
			format++;
			if (*format == '*') {
				format++;
				if (pos >= argc)
					m4errx(1,
					    "Format with too many format specifiers.");
				extra = strtol(argv[pos++], NULL, 10);
			} else {
				extra = strtol(format, (char **)&format, 10);
			}
		} else {
			extra = LONG_MAX;
		}
		if (pos >= argc)
			m4errx(1, "Format with too many format specifiers.");
		switch(*format) {
		case 's':
			thisarg = argv[pos++];
			break;
		case 'c':
			temp[0] = strtoul(argv[pos++], NULL, 10);
			temp[1] = 0;
			thisarg = temp;
			break;
		default:
			m4errx(1, "Unsupported format specification: %s.",
			    argv[2]);
		}
		format++;
		l = strlen(thisarg);
		if (l > extra)
			l = extra;
		if (!left_padded) {
			while (l < width--)
				addchar(' ');
		}
		addchars(thisarg, l);
		if (left_padded) {
			while (l < width--)
				addchar(' ');
		}
	}
	pbstr(getstring());
}

void
doesyscmd(const char *cmd)
{
	int p[2];
	pid_t cpid;
	char *argv[4];
	int cc;
	int status;

	/* Follow gnu m4 documentation: first flush buffers. */
	fflush(NULL);

	argv[0] = "sh";
	argv[1] = "-c";
	argv[2] = (char *)cmd;
	argv[3] = NULL;

	/* Just set up standard output, share stderr and stdin with m4 */
	if (pipe(p) == -1)
		err(1, "bad pipe");
	switch(cpid = fork()) {
	case -1:
		err(1, "bad fork");
		/* NOTREACHED */
	case 0:
		(void) close(p[0]);
		(void) dup2(p[1], 1);
		(void) close(p[1]);
		execv(_PATH_BSHELL, argv);
		exit(1);
	default:
		/* Read result in two stages, since m4's buffer is
		 * pushback-only. */
		(void) close(p[1]);
		do {
			char result[BUFSIZE];
			cc = read(p[0], result, sizeof result);
			if (cc > 0)
				addchars(result, cc);
		} while (cc > 0 || (cc == -1 && errno == EINTR));

		(void) close(p[0]);
		while (waitpid(cpid, &status, 0) == -1) {
			if (errno != EINTR)
				break;
		}
		pbstr(getstring());
	}
}

void
getdivfile(const char *name)
{
	FILE *f;
	int c;

	f = fopen(name, "r");
	if (!f)
		return;

	while ((c = getc(f))!= EOF)
		putc(c, active);
	(void) fclose(f);
}