[BACK]Return to patterns.c CVS log [TXT][DIR] Up to [local] / src / usr.sbin / httpd

File: [local] / src / usr.sbin / httpd / patterns.c (download)

Revision 1.5, Sun Feb 14 18:20:59 2016 UTC (8 years, 3 months ago) by semarie
Branch: MAIN
CVS Tags: OPENBSD_7_5_BASE, OPENBSD_7_5, OPENBSD_7_4_BASE, OPENBSD_7_4, OPENBSD_7_3_BASE, OPENBSD_7_3, OPENBSD_7_2_BASE, OPENBSD_7_2, OPENBSD_7_1_BASE, OPENBSD_7_1, OPENBSD_7_0_BASE, OPENBSD_7_0, OPENBSD_6_9_BASE, OPENBSD_6_9, OPENBSD_6_8_BASE, OPENBSD_6_8, OPENBSD_6_7_BASE, OPENBSD_6_7, OPENBSD_6_6_BASE, OPENBSD_6_6, OPENBSD_6_5_BASE, OPENBSD_6_5, OPENBSD_6_4_BASE, OPENBSD_6_4, OPENBSD_6_3_BASE, OPENBSD_6_3, OPENBSD_6_2_BASE, OPENBSD_6_2, OPENBSD_6_1_BASE, OPENBSD_6_1, OPENBSD_6_0_BASE, OPENBSD_6_0, OPENBSD_5_9_BASE, OPENBSD_5_9, HEAD
Changes since 1.4: +3 -2 lines

httpd patterns double free

issue and diff from Alexander Schrijver alex at flupzor nl

ok reyk@

/*	$OpenBSD: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $	*/

/*
 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
 * Copyright (C) 1994-2015 Lua.org, PUC-Rio.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

/*
 * Derived from Lua 5.3.1:
 * $Id: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $
 * Standard library for string operations and pattern-matching
 */

#include <sys/types.h>
#include <ctype.h>
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>

#include "patterns.h"

#define uchar(c)	((unsigned char)(c)) /* macro to 'unsign' a char */
#define CAP_UNFINISHED	(-1)
#define CAP_POSITION	(-2)
#define L_ESC		'%'
#define SPECIALS	"^$*+?.([%-"

struct match_state {
	int matchdepth;		/* control for recursive depth (to avoid C
				 * stack overflow) */
	int repetitioncounter;	/* control the repetition items */
	int maxcaptures;	/* configured capture limit */
	const char *src_init;	/* init of source string */
	const char *src_end;	/* end ('\0') of source string */
	const char *p_end;	/* end ('\0') of pattern */
	const char *error;	/* should be NULL */
	int level;		/* total number of captures (finished or
				 * unfinished) */
	struct {
		const char *init;
		ptrdiff_t len;
	} capture[MAXCAPTURES];
};

/* recursive function */
static const char *match(struct match_state *, const char *, const char *);

static int
match_error(struct match_state *ms, const char *error)
{
	ms->error = ms->error == NULL ? error : ms->error;
	return (-1);
}

static int
check_capture(struct match_state *ms, int l)
{
	l -= '1';
	if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
		return match_error(ms, "invalid capture index");
	return (l);
}

static int
capture_to_close(struct match_state *ms)
{
	int level = ms->level;
	for (level--; level >= 0; level--)
		if (ms->capture[level].len == CAP_UNFINISHED)
			return (level);
	return match_error(ms, "invalid pattern capture");
}

static const char *
classend(struct match_state *ms, const char *p)
{
	switch (*p++) {
	case L_ESC:
		if (p == ms->p_end)
			match_error(ms,
			    "malformed pattern (ends with '%')");
		return p + 1;
	case '[':
		if (*p == '^')
			p++;
		do {
			/* look for a ']' */
			if (p == ms->p_end) {
				match_error(ms,
				    "malformed pattern (missing ']')");
				break;
			}
			if (*(p++) == L_ESC && p < ms->p_end) {
				/* skip escapes (e.g. '%]') */
				p++;
			}
		} while (*p != ']');
		return p + 1;
	default:
		return p;
	}
}

static int
match_class(int c, int cl)
{
	int res;
	switch (tolower(cl)) {
	case 'a':
		res = isalpha(c);
		break;
	case 'c':
		res = iscntrl(c);
		break;
	case 'd':
		res = isdigit(c);
		break;
	case 'g':
		res = isgraph(c);
		break;
	case 'l':
		res = islower(c);
		break;
	case 'p':
		res = ispunct(c);
		break;
	case 's':
		res = isspace(c);
		break;
	case 'u':
		res = isupper(c);
		break;
	case 'w':
		res = isalnum(c);
		break;
	case 'x':
		res = isxdigit(c);
		break;
	default:
		return (cl == c);
	}
	return (islower(cl) ? res : !res);
}

static int
matchbracketclass(int c, const char *p, const char *ec)
{
	int sig = 1;
	if (*(p + 1) == '^') {
		sig = 0;
		/* skip the '^' */
		p++;
	}
	while (++p < ec) {
		if (*p == L_ESC) {
			p++;
			if (match_class(c, uchar(*p)))
				return sig;
		} else if ((*(p + 1) == '-') && (p + 2 < ec)) {
			p += 2;
			if (uchar(*(p - 2)) <= c && c <= uchar(*p))
				return sig;
		} else if (uchar(*p) == c)
			return sig;
	}
	return !sig;
}

static int
singlematch(struct match_state *ms, const char *s, const char *p,
    const char *ep)
{
	if (s >= ms->src_end)
		return 0;
	else {
		int c = uchar(*s);
		switch (*p) {
		case '.':
			/* matches any char */
			return (1);
		case L_ESC:
			return match_class(c, uchar(*(p + 1)));
		case '[':
			return matchbracketclass(c, p, ep - 1);
		default:
			return (uchar(*p) == c);
		}
	}
}

static const char *
matchbalance(struct match_state *ms, const char *s, const char *p)
{
	if (p >= ms->p_end - 1) {
		match_error(ms,
		    "malformed pattern (missing arguments to '%b')");
		return (NULL);
	}
	if (*s != *p)
		return (NULL);
	else {
		int b = *p;
		int e = *(p + 1);
		int cont = 1;
		while (++s < ms->src_end) {
			if (*s == e) {
				if (--cont == 0)
					return s + 1;
			} else if (*s == b)
				cont++;
		}
	}

	/* string ends out of balance */
	return (NULL);
}

static const char *
max_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
{
	ptrdiff_t i = 0;
	/* counts maximum expand for item */
	while (singlematch(ms, s + i, p, ep))
		i++;
	/* keeps trying to match with the maximum repetitions */
	while (i >= 0) {
		const char *res = match(ms, (s + i), ep + 1);
		if (res)
			return res;
		/* else didn't match; reduce 1 repetition to try again */
		i--;
	}
	return NULL;
}

static const char *
min_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
{
	for (;;) {
		const char *res = match(ms, s, ep + 1);
		if (res != NULL)
			return res;
		else if (singlematch(ms, s, p, ep))
			s++;	/* try with one more repetition */
		else
			return NULL;
	}
}

static const char *
start_capture(struct match_state *ms, const char *s, const char *p, int what)
{
	const char *res;

	int level = ms->level;
	if (level >= ms->maxcaptures) {
		match_error(ms, "too many captures");
		return (NULL);
	}
	ms->capture[level].init = s;
	ms->capture[level].len = what;
	ms->level = level + 1;
	/* undo capture if match failed */
	if ((res = match(ms, s, p)) == NULL)
		ms->level--;
	return res;
}

static const char *
end_capture(struct match_state *ms, const char *s, const char *p)
{
	int l = capture_to_close(ms);
	const char *res;
	if (l == -1)
		return NULL;
	/* close capture */
	ms->capture[l].len = s - ms->capture[l].init;
	/* undo capture if match failed */
	if ((res = match(ms, s, p)) == NULL)
		ms->capture[l].len = CAP_UNFINISHED;
	return res;
}

static const char *
match_capture(struct match_state *ms, const char *s, int l)
{
	size_t len;
	l = check_capture(ms, l);
	if (l == -1)
		return NULL;
	len = ms->capture[l].len;
	if ((size_t) (ms->src_end - s) >= len &&
	    memcmp(ms->capture[l].init, s, len) == 0)
		return s + len;
	else
		return NULL;
}

static const char *
match(struct match_state *ms, const char *s, const char *p)
{
	const char *ep, *res;
	char previous;

	if (ms->matchdepth-- == 0) {
		match_error(ms, "pattern too complex");
		return (NULL);
	}

	/* using goto's to optimize tail recursion */
 init:
	/* end of pattern? */
	if (p != ms->p_end) {
		switch (*p) {
		case '(':
			/* start capture */
			if (*(p + 1) == ')')
				/* position capture? */
				s = start_capture(ms, s, p + 2, CAP_POSITION);
			else
				s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
			break;
		case ')':
			/* end capture */
			s = end_capture(ms, s, p + 1);
			break;
		case '$':
			/* is the '$' the last char in pattern? */
			if ((p + 1) != ms->p_end) {
				/* no; go to default */
				goto dflt;
			}
			 /* check end of string */
			s = (s == ms->src_end) ? s : NULL;
			break;
		case L_ESC:
			/* escaped sequences not in the format class[*+?-]? */
			switch (*(p + 1)) {
			case 'b':
				/* balanced string? */
				s = matchbalance(ms, s, p + 2);
				if (s != NULL) {
					p += 4;
					/* return match(ms, s, p + 4); */
					goto init;
				} /* else fail (s == NULL) */
				break;
			case 'f':
				/* frontier? */
				p += 2;
				if (*p != '[') {
					match_error(ms, "missing '['"
					    " after '%f' in pattern");
					break;
				}
				/* points to what is next */
				ep = classend(ms, p);
				if (ms->error != NULL)
					break;
				previous =
				    (s == ms->src_init) ? '\0' : *(s - 1);
				if (!matchbracketclass(uchar(previous),
				    p, ep - 1) &&
				    matchbracketclass(uchar(*s),
				    p, ep - 1)) {
					p = ep;
					/* return match(ms, s, ep); */
					goto init;
				}
				/* match failed */
				s = NULL;
				break;
			case '0':
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
			case '8':
			case '9':
				/* capture results (%0-%9)? */
				s = match_capture(ms, s, uchar(*(p + 1)));
				if (s != NULL) {
					p += 2;
					/* return match(ms, s, p + 2) */
					goto init;
				}
				break;
			default:
				goto dflt;
			}
			break;
		default:

			/* pattern class plus optional suffix */
	dflt:
			/* points to optional suffix */
			ep = classend(ms, p);
			if (ms->error != NULL)
				break;

			/* does not match at least once? */
			if (!singlematch(ms, s, p, ep)) {
				if (ms->repetitioncounter-- == 0) {
					match_error(ms, "max repetition items");
					s = NULL; /* fail */
				/* accept empty? */
				} else if
				    (*ep == '*' || *ep == '?' || *ep == '-') {
					 p = ep + 1;
					/* return match(ms, s, ep + 1); */
					 goto init;
				} else {
					/* '+' or no suffix */
					s = NULL; /* fail */
				}
			} else {
				/* matched once */
				/* handle optional suffix */
				switch (*ep) {
				case '?':
					/* optional */
					if ((res =
					    match(ms, s + 1, ep + 1)) != NULL)
						s = res;
					else {
						/* 
						 * else return
						 *     match(ms, s, ep + 1);
						 */
						p = ep + 1;
						goto init;
					}
					break;
				case '+':
					/* 1 or more repetitions */
					s++; /* 1 match already done */
					/* FALLTHROUGH */
				case '*':
					/* 0 or more repetitions */
					s = max_expand(ms, s, p, ep);
					break;
				case '-':
					/* 0 or more repetitions (minimum) */
					s = min_expand(ms, s, p, ep);
					break;
				default:
					/* no suffix */
					s++;
					p = ep;
					/* return match(ms, s + 1, ep); */
					goto init;
				}
			}
			break;
		}
	}
	ms->matchdepth++;
	return s;
}

static const char *
lmemfind(const char *s1, size_t l1,
    const char *s2, size_t l2)
{
	const char *init;

	if (l2 == 0) {
		/* empty strings are everywhere */
		return (s1);
	} else if (l2 > l1) {
		/* avoids a negative 'l1' */
		return (NULL);
	} else {
		/*
		 * to search for a '*s2' inside 's1'
		 * - 1st char will be checked by 'memchr'
		 * - 's2' cannot be found after that
		 */
		l2--;
		l1 = l1 - l2;
		while (l1 > 0 &&
		    (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
			/* 1st char is already checked */
			init++;
			if (memcmp(init, s2 + 1, l2) == 0)
				return init - 1;
			else {
				/* correct 'l1' and 's1' to try again */
				l1 -= init - s1;
				s1 = init;
			}
		}
		/* not found */
		return (NULL);
	}
}

static int
push_onecapture(struct match_state *ms, int i, const char *s,
    const char *e, struct str_find *sm)
{
	if (i >= ms->level) {
		if (i == 0 || ms->level == 0) {
			/* add whole match */
			sm->sm_so = (off_t)(s - ms->src_init);
			sm->sm_eo = (off_t)(e - s) + sm->sm_so;
		} else
			return match_error(ms, "invalid capture index");
	} else {
		ptrdiff_t l = ms->capture[i].len;
		if (l == CAP_UNFINISHED)
			return match_error(ms, "unfinished capture");
		sm->sm_so = ms->capture[i].init - ms->src_init;
		sm->sm_eo = sm->sm_so + l;
	}
	sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo;
	return (0);
}

static int
push_captures(struct match_state *ms, const char *s, const char *e,
    struct str_find *sm, size_t nsm)
{
	unsigned int i;
	unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level;

	if (nlevels > nsm)
		nlevels = nsm;
	for (i = 0; i < nlevels; i++)
		if (push_onecapture(ms, i, s, e, sm + i) == -1)
			break;

	/* number of strings pushed */
	return (nlevels);
}

/* check whether pattern has no special characters */
static int
nospecials(const char *p, size_t l)
{
	size_t upto = 0;

	do {
		if (strpbrk(p + upto, SPECIALS)) {
			/* pattern has a special character */
			return 0;
		}
		/* may have more after \0 */
		upto += strlen(p + upto) + 1;
	} while (upto <= l);

	/* no special chars found */
	return (1);
}

static int
str_find_aux(struct match_state *ms, const char *pattern, const char *string,
    struct str_find *sm, size_t nsm, off_t init)
{
	size_t		 ls = strlen(string);
	size_t		 lp = strlen(pattern);
	const char	*s = string;
	const char	*p = pattern;
	const char	*s1, *s2;
	int		 anchor, i;

	if (init < 0)
		init = 0;
	else if (init > (off_t)ls)
		return match_error(ms, "starting after string's end");
	s1 = s + init;

	if (nospecials(p, lp)) {
		/* do a plain search */
		s2 = lmemfind(s1, ls - (size_t)init, p, lp);
		if (s2 != NULL) {
			i = 0;
			sm[i].sm_so = 0;
			sm[i].sm_eo = ls;
			if (nsm > 1) {
				i++;
				sm[i].sm_so = s2 - s;
				sm[i].sm_eo = (s2 - s) + lp;
			}
			return (i + 1);
		}
		return (0);
	}

	anchor = (*p == '^');
	if (anchor) {
		p++;
		lp--;	/* skip anchor character */
	}
	ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1;
	ms->matchdepth = MAXCCALLS;
	ms->repetitioncounter = MAXREPETITION;
	ms->src_init = s;
	ms->src_end = s + ls;
	ms->p_end = p + lp;
	do {
		const char *res;
		ms->level = 0;
		if ((res = match(ms, s1, p)) != NULL) {
			sm->sm_so = 0;
			sm->sm_eo = ls;
			return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1;

		} else if (ms->error != NULL) {
			return 0;
		}
	} while (s1++ < ms->src_end && !anchor);

	return 0;
}

int
str_find(const char *string, const char *pattern, struct str_find *sm,
    size_t nsm, const char **errstr)
{
	struct match_state	ms;
	int			ret;

	memset(&ms, 0, sizeof(ms));
	memset(sm, 0, nsm * sizeof(*sm));

	ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
	if (ms.error != NULL) {
		/* Return 0 on error and store the error string */
		*errstr = ms.error;
		ret = 0;
	} else
		*errstr = NULL;

	return (ret);
}

int
str_match(const char *string, const char *pattern, struct str_match *m,
    const char **errstr)
{
	struct str_find		 sm[MAXCAPTURES];
	struct match_state	 ms;
	int			 ret, i;
	size_t			 len, nsm;

	nsm = MAXCAPTURES;
	memset(&ms, 0, sizeof(ms));
	memset(sm, 0, sizeof(sm));
	memset(m, 0, sizeof(*m));

	ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
	if (ret <= 0 || ms.error != NULL) {
		/* Return -1 on error and store the error string */
		*errstr = ms.error;
		return (-1);
	}

	if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) {
		*errstr = strerror(errno);
		return (-1);
	}
	m->sm_nmatch = ret;

	for (i = 0; i < ret; i++) {
		if (sm[i].sm_so > sm[i].sm_eo)
			continue;
		len = sm[i].sm_eo - sm[i].sm_so;
		if ((m->sm_match[i] = strndup(string +
		    sm[i].sm_so, len)) == NULL) {
			*errstr = strerror(errno);
			str_match_free(m);
			return (-1);
		}
	}

	*errstr = NULL;
	return (0);
}

void
str_match_free(struct str_match *m)
{
	unsigned int	 i = 0;
	for (i = 0; i < m->sm_nmatch; i++)
		free(m->sm_match[i]);
	free(m->sm_match);
	m->sm_match = NULL;
	m->sm_nmatch = 0;
}