[BACK]Return to magic-load.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / file

File: [local] / src / usr.bin / file / magic-load.c (download)

Revision 1.26, Sun Jul 2 10:58:15 2017 UTC (6 years, 10 months ago) by brynet
Branch: MAIN
CVS Tags: OPENBSD_7_5_BASE, OPENBSD_7_5, OPENBSD_7_4_BASE, OPENBSD_7_4, OPENBSD_7_3_BASE, OPENBSD_7_3, OPENBSD_7_2_BASE, OPENBSD_7_2, OPENBSD_7_1_BASE, OPENBSD_7_1, OPENBSD_7_0_BASE, OPENBSD_7_0, OPENBSD_6_9_BASE, OPENBSD_6_9, OPENBSD_6_8_BASE, OPENBSD_6_8, OPENBSD_6_7_BASE, OPENBSD_6_7, OPENBSD_6_6_BASE, OPENBSD_6_6, OPENBSD_6_5_BASE, OPENBSD_6_5, OPENBSD_6_4_BASE, OPENBSD_6_4, OPENBSD_6_3_BASE, OPENBSD_6_3, OPENBSD_6_2_BASE, OPENBSD_6_2, HEAD
Changes since 1.25: +11 -10 lines

Replace fgetln with POSIX getline, even though file was using fgetln in
the idiomatic safe way. Also, check for stream errors with ferror and
err(1, "") in that case.

with feedback from joerg@, schwarze@, nicm@

ok nicm "code is shorter anyway", and joerg@ for an earlier diff

/* $OpenBSD: magic-load.c,v 1.26 2017/07/02 10:58:15 brynet Exp $ */

/*
 * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/types.h>

#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <regex.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "magic.h"
#include "xmalloc.h"

static int
magic_odigit(u_char c)
{
	if (c >= '0' && c <= '7')
		return (c - '0');
	return (-1);
}

static int
magic_xdigit(u_char c)
{
	if (c >= '0' && c <= '9')
		return (c - '0');
	if (c >= 'a' && c <= 'f')
		return (10 + c - 'a');
	if (c >= 'A' && c <= 'F')
		return (10 + c - 'A');
	return (-1);
}

static void
magic_mark_text(struct magic_line *ml, int text)
{
	do {
		ml->text = text;
		ml = ml->parent;
	} while (ml != NULL);
}

static int
magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
    const char *p)
{
	int	error;
	char	errbuf[256];

	error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
	if (error != 0) {
		regerror(error, re, errbuf, sizeof errbuf);
		magic_warn(ml, "bad %s pattern: %s", name, errbuf);
		return (-1);
	}
	return (0);
}

static int
magic_set_result(struct magic_line *ml, const char *s)
{
	const char	*fmt, *endfmt, *cp;
	regex_t		*re = NULL;
	regmatch_t	 pmatch;
	size_t		 fmtlen;

	while (isspace((u_char)*s))
		s++;
	if (*s == '\0') {
		ml->result = NULL;
		return (0);
	}
	ml->result = xstrdup(s);

	fmt = NULL;
	for (cp = s; *cp != '\0'; cp++) {
		if (cp[0] == '%' && cp[1] != '%') {
			if (fmt != NULL) {
				magic_warn(ml, "multiple formats");
				return (-1);
			}
			fmt = cp;
		}
	}
	if (fmt == NULL)
		return (0);
	fmt++;

	for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
		if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
			break;
	}
	if (*endfmt == '\0') {
		magic_warn(ml, "unterminated format");
		return (-1);
	}
	fmtlen = endfmt + 1 - fmt;
	if (fmtlen > 32) {
		magic_warn(ml, "format too long");
		return (-1);
	}

	if (*endfmt == 's') {
		switch (ml->type) {
		case MAGIC_TYPE_DATE:
		case MAGIC_TYPE_LDATE:
		case MAGIC_TYPE_UDATE:
		case MAGIC_TYPE_ULDATE:
		case MAGIC_TYPE_BEDATE:
		case MAGIC_TYPE_BELDATE:
		case MAGIC_TYPE_UBEDATE:
		case MAGIC_TYPE_UBELDATE:
		case MAGIC_TYPE_QDATE:
		case MAGIC_TYPE_QLDATE:
		case MAGIC_TYPE_UQDATE:
		case MAGIC_TYPE_UQLDATE:
		case MAGIC_TYPE_BEQDATE:
		case MAGIC_TYPE_BEQLDATE:
		case MAGIC_TYPE_UBEQDATE:
		case MAGIC_TYPE_UBEQLDATE:
		case MAGIC_TYPE_LEQDATE:
		case MAGIC_TYPE_LEQLDATE:
		case MAGIC_TYPE_ULEQDATE:
		case MAGIC_TYPE_ULEQLDATE:
		case MAGIC_TYPE_LEDATE:
		case MAGIC_TYPE_LELDATE:
		case MAGIC_TYPE_ULEDATE:
		case MAGIC_TYPE_ULELDATE:
		case MAGIC_TYPE_MEDATE:
		case MAGIC_TYPE_MELDATE:
		case MAGIC_TYPE_STRING:
		case MAGIC_TYPE_PSTRING:
		case MAGIC_TYPE_BESTRING16:
		case MAGIC_TYPE_LESTRING16:
		case MAGIC_TYPE_REGEX:
		case MAGIC_TYPE_SEARCH:
			break;
		default:
			ml->stringify = 1;
			break;
		}
	}

	if (!ml->root->compiled) {
		/*
		 * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
		 * with byte, short, long. We get lucky because our first and
		 * only argument ends up in a register. Accept it for now.
		 */
		if (magic_make_pattern(ml, "short", &ml->root->format_short,
		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
			return (-1);
		if (magic_make_pattern(ml, "long", &ml->root->format_long,
		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
			return (-1);
		if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
		    "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
			return (-1);
		if (magic_make_pattern(ml, "float", &ml->root->format_float,
		    "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
			return (-1);
		if (magic_make_pattern(ml, "string", &ml->root->format_string,
		    "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
			return (-1);
		ml->root->compiled = 1;
	}

	if (ml->stringify)
		re = &ml->root->format_string;
	else {
		switch (ml->type) {
		case MAGIC_TYPE_NONE:
		case MAGIC_TYPE_BESTRING16:
		case MAGIC_TYPE_LESTRING16:
		case MAGIC_TYPE_NAME:
		case MAGIC_TYPE_USE:
			return (0); /* don't use result */
		case MAGIC_TYPE_BYTE:
		case MAGIC_TYPE_UBYTE:
		case MAGIC_TYPE_SHORT:
		case MAGIC_TYPE_USHORT:
		case MAGIC_TYPE_BESHORT:
		case MAGIC_TYPE_UBESHORT:
		case MAGIC_TYPE_LESHORT:
		case MAGIC_TYPE_ULESHORT:
			re = &ml->root->format_short;
			break;
		case MAGIC_TYPE_LONG:
		case MAGIC_TYPE_ULONG:
		case MAGIC_TYPE_BELONG:
		case MAGIC_TYPE_UBELONG:
		case MAGIC_TYPE_LELONG:
		case MAGIC_TYPE_ULELONG:
		case MAGIC_TYPE_MELONG:
			re = &ml->root->format_long;
			break;
		case MAGIC_TYPE_QUAD:
		case MAGIC_TYPE_UQUAD:
		case MAGIC_TYPE_BEQUAD:
		case MAGIC_TYPE_UBEQUAD:
		case MAGIC_TYPE_LEQUAD:
		case MAGIC_TYPE_ULEQUAD:
			re = &ml->root->format_quad;
			break;
		case MAGIC_TYPE_FLOAT:
		case MAGIC_TYPE_BEFLOAT:
		case MAGIC_TYPE_LEFLOAT:
		case MAGIC_TYPE_DOUBLE:
		case MAGIC_TYPE_BEDOUBLE:
		case MAGIC_TYPE_LEDOUBLE:
			re = &ml->root->format_float;
			break;
		case MAGIC_TYPE_DATE:
		case MAGIC_TYPE_LDATE:
		case MAGIC_TYPE_UDATE:
		case MAGIC_TYPE_ULDATE:
		case MAGIC_TYPE_BEDATE:
		case MAGIC_TYPE_BELDATE:
		case MAGIC_TYPE_UBEDATE:
		case MAGIC_TYPE_UBELDATE:
		case MAGIC_TYPE_QDATE:
		case MAGIC_TYPE_QLDATE:
		case MAGIC_TYPE_UQDATE:
		case MAGIC_TYPE_UQLDATE:
		case MAGIC_TYPE_BEQDATE:
		case MAGIC_TYPE_BEQLDATE:
		case MAGIC_TYPE_UBEQDATE:
		case MAGIC_TYPE_UBEQLDATE:
		case MAGIC_TYPE_LEQDATE:
		case MAGIC_TYPE_LEQLDATE:
		case MAGIC_TYPE_ULEQDATE:
		case MAGIC_TYPE_ULEQLDATE:
		case MAGIC_TYPE_LEDATE:
		case MAGIC_TYPE_LELDATE:
		case MAGIC_TYPE_ULEDATE:
		case MAGIC_TYPE_ULELDATE:
		case MAGIC_TYPE_MEDATE:
		case MAGIC_TYPE_MELDATE:
		case MAGIC_TYPE_STRING:
		case MAGIC_TYPE_PSTRING:
		case MAGIC_TYPE_REGEX:
		case MAGIC_TYPE_SEARCH:
		case MAGIC_TYPE_DEFAULT:
		case MAGIC_TYPE_CLEAR:
			re = &ml->root->format_string;
			break;
		}
	}

	pmatch.rm_so = 0;
	pmatch.rm_eo = fmtlen;
	if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
		magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
		    (int)fmtlen, fmt);
		return (-1);
	}

	return (0);
}

static u_int
magic_get_strength(struct magic_line *ml)
{
	int	n;
	size_t	size;

	if (ml->type == MAGIC_TYPE_NONE)
		return (0);

	if (ml->test_not || ml->test_operator == 'x') {
		n = 1;
		goto skip;
	}

	n = 2 * MAGIC_STRENGTH_MULTIPLIER;
	switch (ml->type) {
	case MAGIC_TYPE_NONE:
	case MAGIC_TYPE_DEFAULT:
		return (0);
	case MAGIC_TYPE_CLEAR:
	case MAGIC_TYPE_NAME:
	case MAGIC_TYPE_USE:
		break;
	case MAGIC_TYPE_BYTE:
	case MAGIC_TYPE_UBYTE:
		n += 1 * MAGIC_STRENGTH_MULTIPLIER;
		break;
	case MAGIC_TYPE_SHORT:
	case MAGIC_TYPE_USHORT:
	case MAGIC_TYPE_BESHORT:
	case MAGIC_TYPE_UBESHORT:
	case MAGIC_TYPE_LESHORT:
	case MAGIC_TYPE_ULESHORT:
		n += 2 * MAGIC_STRENGTH_MULTIPLIER;
		break;
	case MAGIC_TYPE_LONG:
	case MAGIC_TYPE_ULONG:
	case MAGIC_TYPE_FLOAT:
	case MAGIC_TYPE_DATE:
	case MAGIC_TYPE_LDATE:
	case MAGIC_TYPE_UDATE:
	case MAGIC_TYPE_ULDATE:
	case MAGIC_TYPE_BELONG:
	case MAGIC_TYPE_UBELONG:
	case MAGIC_TYPE_BEFLOAT:
	case MAGIC_TYPE_BEDATE:
	case MAGIC_TYPE_BELDATE:
	case MAGIC_TYPE_UBEDATE:
	case MAGIC_TYPE_UBELDATE:
		n += 4 * MAGIC_STRENGTH_MULTIPLIER;
		break;
	case MAGIC_TYPE_QUAD:
	case MAGIC_TYPE_UQUAD:
	case MAGIC_TYPE_DOUBLE:
	case MAGIC_TYPE_QDATE:
	case MAGIC_TYPE_QLDATE:
	case MAGIC_TYPE_UQDATE:
	case MAGIC_TYPE_UQLDATE:
	case MAGIC_TYPE_BEQUAD:
	case MAGIC_TYPE_UBEQUAD:
	case MAGIC_TYPE_BEDOUBLE:
	case MAGIC_TYPE_BEQDATE:
	case MAGIC_TYPE_BEQLDATE:
	case MAGIC_TYPE_UBEQDATE:
	case MAGIC_TYPE_UBEQLDATE:
	case MAGIC_TYPE_LEQUAD:
	case MAGIC_TYPE_ULEQUAD:
	case MAGIC_TYPE_LEDOUBLE:
	case MAGIC_TYPE_LEQDATE:
	case MAGIC_TYPE_LEQLDATE:
	case MAGIC_TYPE_ULEQDATE:
	case MAGIC_TYPE_ULEQLDATE:
	case MAGIC_TYPE_LELONG:
	case MAGIC_TYPE_ULELONG:
	case MAGIC_TYPE_LEFLOAT:
	case MAGIC_TYPE_LEDATE:
	case MAGIC_TYPE_LELDATE:
	case MAGIC_TYPE_ULEDATE:
	case MAGIC_TYPE_ULELDATE:
	case MAGIC_TYPE_MELONG:
	case MAGIC_TYPE_MEDATE:
	case MAGIC_TYPE_MELDATE:
		n += 8 * MAGIC_STRENGTH_MULTIPLIER;
		break;
	case MAGIC_TYPE_STRING:
	case MAGIC_TYPE_PSTRING:
		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
		break;
	case MAGIC_TYPE_BESTRING16:
	case MAGIC_TYPE_LESTRING16:
		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
		break;
	case MAGIC_TYPE_REGEX:
	case MAGIC_TYPE_SEARCH:
		size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
		if (size < 1)
			size = 1;
		n += ml->test_string_size * size;
		break;
	}
	switch (ml->test_operator) {
	case '=':
		n += MAGIC_STRENGTH_MULTIPLIER;
		break;
	case '<':
	case '>':
	case '[':
	case ']':
		n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
		break;
	case '^':
	case '&':
		n -= MAGIC_STRENGTH_MULTIPLIER;
		break;
	}

skip:
	switch (ml->strength_operator) {
	case '+':
		n += ml->strength_value;
		break;
	case '-':
		n -= ml->strength_value;
		break;
	case '*':
		n *= ml->strength_value;
		break;
	case '/':
		n /= ml->strength_value;
		break;
	}
	return (n <= 0 ? 1 : n);
}

static int
magic_get_string(char **line, char *out, size_t *outlen)
{
	char	*start, *cp, c;
	int	 d0, d1, d2;

	start = out;
	for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
		if (*cp != '\\') {
			*out++ = *cp;
			continue;
		}

		switch (c = *++cp) {
		case '\0': /* end of line */
			return (-1);
		case ' ':
			*out++ = ' ';
			break;
		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
			d0 = magic_odigit(cp[0]);
			if (cp[0] != '\0')
				d1 = magic_odigit(cp[1]);
			else
				d1 = -1;
			if (cp[0] != '\0' && cp[1] != '\0')
				d2 = magic_odigit(cp[2]);
			else
				d2 = -1;

			if (d0 != -1 && d1 != -1 && d2 != -1) {
				*out = d2 | (d1 << 3) | (d0 << 6);
				cp += 2;
			} else if (d0 != -1 && d1 != -1) {
				*out = d1 | (d0 << 3);
				cp++;
			} else if (d0 != -1)
				*out = d0;
			else
				return (-1);
			out++;
			break;
		case 'x':
			d0 = magic_xdigit(cp[1]);
			if (cp[1] != '\0')
				d1 = magic_xdigit(cp[2]);
			else
				d1 = -1;

			if (d0 != -1 && d1 != -1) {
				*out = d1 | (d0 << 4);
				cp += 2;
			} else if (d0 != -1) {
				*out = d0;
				cp++;
			} else
				return (-1);
			out++;

			break;
		case 'a':
			*out++ = '\a';
			break;
		case 'b':
			*out++ = '\b';
			break;
		case 't':
			*out++ = '\t';
			break;
		case 'f':
			*out++ = '\f';
			break;
		case 'n':
			*out++ = '\n';
			break;
		case 'r':
			*out++ = '\r';
			break;
		case '\\':
			*out++ = '\\';
			break;
		case '\'':
			*out++ = '\'';
			break;
		case '\"':
			*out++ = '\"';
			break;
		default:
			*out++ = c;
			break;
		}
	}
	*out = '\0';
	*outlen = out - start;

	*line = cp;
	return (0);
}

static int
magic_parse_offset(struct magic_line *ml, char **line)
{
	char	*copy, *s, *cp, *endptr;

	while (isspace((u_char)**line))
		(*line)++;
	copy = s = cp = xmalloc(strlen(*line) + 1);
	while (**line != '\0' && !isspace((u_char)**line))
		*cp++ = *(*line)++;
	*cp = '\0';

	ml->offset = 0;
	ml->offset_relative = 0;

	ml->indirect_type = ' ';
	ml->indirect_relative = 0;
	ml->indirect_offset = 0;
	ml->indirect_operator = ' ';
	ml->indirect_operand = 0;

	if (*s == '&') {
		ml->offset_relative = 1;
		s++;
	}

	if (*s != '(') {
		endptr = magic_strtoll(s, &ml->offset);
		if (endptr == NULL || *endptr != '\0') {
			magic_warn(ml, "missing closing bracket");
			goto fail;
		}
		if (ml->offset < 0 && !ml->offset_relative) {
			magic_warn(ml, "negative absolute offset");
			goto fail;
		}
		goto done;
	}
	s++;

	if (*s == '&') {
		ml->indirect_relative = 1;
		s++;
	}

	endptr = magic_strtoll(s, &ml->indirect_offset);
	if (endptr == NULL) {
		magic_warn(ml, "can't parse offset: %s", s);
		goto fail;
	}
	s = endptr;
	if (*s == ')')
		goto done;

	if (*s == '.') {
		s++;
		if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
			magic_warn(ml, "unknown offset type: %c", *s);
			goto fail;
		}
		ml->indirect_type = *s;
		s++;
		if (*s == ')')
			goto done;
	}

	if (*s == '\0' || strchr("+-*", *s) == NULL) {
		magic_warn(ml, "unknown offset operator: %c", *s);
		goto fail;
	}
	ml->indirect_operator = *s;
	s++;
	if (*s == ')')
		goto done;

	if (*s == '(') {
		s++;
		endptr = magic_strtoll(s, &ml->indirect_operand);
		if (endptr == NULL || *endptr != ')') {
			magic_warn(ml, "missing closing bracket");
			goto fail;
		}
		if (*++endptr != ')') {
			magic_warn(ml, "missing closing bracket");
			goto fail;
		}
	} else {
		endptr = magic_strtoll(s, &ml->indirect_operand);
		if (endptr == NULL || *endptr != ')') {
			magic_warn(ml, "missing closing bracket");
			goto fail;
		}
	}

done:
	free(copy);
	return (0);

fail:
	free(copy);
	return (-1);
}

static int
magic_parse_type(struct magic_line *ml, char **line)
{
	char	*copy, *s, *cp, *endptr;

	while (isspace((u_char)**line))
		(*line)++;
	copy = s = cp = xmalloc(strlen(*line) + 1);
	while (**line != '\0' && !isspace((u_char)**line))
		*cp++ = *(*line)++;
	*cp = '\0';

	ml->type = MAGIC_TYPE_NONE;
	ml->type_operator = ' ';
	ml->type_operand = 0;

	if (strcmp(s, "name") == 0) {
		ml->type = MAGIC_TYPE_NAME;
		ml->type_string = xstrdup(s);
		goto done;
	}
	if (strcmp(s, "use") == 0) {
		ml->type = MAGIC_TYPE_USE;
		ml->type_string = xstrdup(s);
		goto done;
	}

	if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
	    strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
		if (*s == 'u')
			ml->type_string = xstrdup(s + 1);
		else
			ml->type_string = xstrdup(s);
		ml->type = MAGIC_TYPE_STRING;
		magic_mark_text(ml, 0);
		goto done;
	}
	if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
	    strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
		if (*s == 'u')
			ml->type_string = xstrdup(s + 1);
		else
			ml->type_string = xstrdup(s);
		ml->type = MAGIC_TYPE_PSTRING;
		magic_mark_text(ml, 0);
		goto done;
	}
	if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
	    strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
		if (*s == 'u')
			ml->type_string = xstrdup(s + 1);
		else
			ml->type_string = xstrdup(s);
		ml->type = MAGIC_TYPE_SEARCH;
		goto done;
	}
	if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
	    strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
		if (*s == 'u')
			ml->type_string = xstrdup(s + 1);
		else
			ml->type_string = xstrdup(s);
		ml->type = MAGIC_TYPE_REGEX;
		goto done;
	}
	ml->type_string = xstrdup(s);

	cp = &s[strcspn(s, "+-&/%*")];
	if (*cp != '\0') {
		ml->type_operator = *cp;
		endptr = magic_strtoull(cp + 1, &ml->type_operand);
		if (endptr == NULL || *endptr != '\0') {
			magic_warn(ml, "can't parse operand: %s", cp + 1);
			goto fail;
		}
		*cp = '\0';
	}

	if (strcmp(s, "byte") == 0)
		ml->type = MAGIC_TYPE_BYTE;
	else if (strcmp(s, "short") == 0)
		ml->type = MAGIC_TYPE_SHORT;
	else if (strcmp(s, "long") == 0)
		ml->type = MAGIC_TYPE_LONG;
	else if (strcmp(s, "quad") == 0)
		ml->type = MAGIC_TYPE_QUAD;
	else if (strcmp(s, "ubyte") == 0)
		ml->type = MAGIC_TYPE_UBYTE;
	else if (strcmp(s, "ushort") == 0)
		ml->type = MAGIC_TYPE_USHORT;
	else if (strcmp(s, "ulong") == 0)
		ml->type = MAGIC_TYPE_ULONG;
	else if (strcmp(s, "uquad") == 0)
		ml->type = MAGIC_TYPE_UQUAD;
	else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
		ml->type = MAGIC_TYPE_FLOAT;
	else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
		ml->type = MAGIC_TYPE_DOUBLE;
	else if (strcmp(s, "date") == 0)
		ml->type = MAGIC_TYPE_DATE;
	else if (strcmp(s, "qdate") == 0)
		ml->type = MAGIC_TYPE_QDATE;
	else if (strcmp(s, "ldate") == 0)
		ml->type = MAGIC_TYPE_LDATE;
	else if (strcmp(s, "qldate") == 0)
		ml->type = MAGIC_TYPE_QLDATE;
	else if (strcmp(s, "udate") == 0)
		ml->type = MAGIC_TYPE_UDATE;
	else if (strcmp(s, "uqdate") == 0)
		ml->type = MAGIC_TYPE_UQDATE;
	else if (strcmp(s, "uldate") == 0)
		ml->type = MAGIC_TYPE_ULDATE;
	else if (strcmp(s, "uqldate") == 0)
		ml->type = MAGIC_TYPE_UQLDATE;
	else if (strcmp(s, "beshort") == 0)
		ml->type = MAGIC_TYPE_BESHORT;
	else if (strcmp(s, "belong") == 0)
		ml->type = MAGIC_TYPE_BELONG;
	else if (strcmp(s, "bequad") == 0)
		ml->type = MAGIC_TYPE_BEQUAD;
	else if (strcmp(s, "ubeshort") == 0)
		ml->type = MAGIC_TYPE_UBESHORT;
	else if (strcmp(s, "ubelong") == 0)
		ml->type = MAGIC_TYPE_UBELONG;
	else if (strcmp(s, "ubequad") == 0)
		ml->type = MAGIC_TYPE_UBEQUAD;
	else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
		ml->type = MAGIC_TYPE_BEFLOAT;
	else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
		ml->type = MAGIC_TYPE_BEDOUBLE;
	else if (strcmp(s, "bedate") == 0)
		ml->type = MAGIC_TYPE_BEDATE;
	else if (strcmp(s, "beqdate") == 0)
		ml->type = MAGIC_TYPE_BEQDATE;
	else if (strcmp(s, "beldate") == 0)
		ml->type = MAGIC_TYPE_BELDATE;
	else if (strcmp(s, "beqldate") == 0)
		ml->type = MAGIC_TYPE_BEQLDATE;
	else if (strcmp(s, "ubedate") == 0)
		ml->type = MAGIC_TYPE_UBEDATE;
	else if (strcmp(s, "ubeqdate") == 0)
		ml->type = MAGIC_TYPE_UBEQDATE;
	else if (strcmp(s, "ubeldate") == 0)
		ml->type = MAGIC_TYPE_UBELDATE;
	else if (strcmp(s, "ubeqldate") == 0)
		ml->type = MAGIC_TYPE_UBEQLDATE;
	else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
		ml->type = MAGIC_TYPE_BESTRING16;
	else if (strcmp(s, "leshort") == 0)
		ml->type = MAGIC_TYPE_LESHORT;
	else if (strcmp(s, "lelong") == 0)
		ml->type = MAGIC_TYPE_LELONG;
	else if (strcmp(s, "lequad") == 0)
		ml->type = MAGIC_TYPE_LEQUAD;
	else if (strcmp(s, "uleshort") == 0)
		ml->type = MAGIC_TYPE_ULESHORT;
	else if (strcmp(s, "ulelong") == 0)
		ml->type = MAGIC_TYPE_ULELONG;
	else if (strcmp(s, "ulequad") == 0)
		ml->type = MAGIC_TYPE_ULEQUAD;
	else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
		ml->type = MAGIC_TYPE_LEFLOAT;
	else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
		ml->type = MAGIC_TYPE_LEDOUBLE;
	else if (strcmp(s, "ledate") == 0)
		ml->type = MAGIC_TYPE_LEDATE;
	else if (strcmp(s, "leqdate") == 0)
		ml->type = MAGIC_TYPE_LEQDATE;
	else if (strcmp(s, "leldate") == 0)
		ml->type = MAGIC_TYPE_LELDATE;
	else if (strcmp(s, "leqldate") == 0)
		ml->type = MAGIC_TYPE_LEQLDATE;
	else if (strcmp(s, "uledate") == 0)
		ml->type = MAGIC_TYPE_ULEDATE;
	else if (strcmp(s, "uleqdate") == 0)
		ml->type = MAGIC_TYPE_ULEQDATE;
	else if (strcmp(s, "uleldate") == 0)
		ml->type = MAGIC_TYPE_ULELDATE;
	else if (strcmp(s, "uleqldate") == 0)
		ml->type = MAGIC_TYPE_ULEQLDATE;
	else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
		ml->type = MAGIC_TYPE_LESTRING16;
	else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
		ml->type = MAGIC_TYPE_MELONG;
	else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
		ml->type = MAGIC_TYPE_MEDATE;
	else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
		ml->type = MAGIC_TYPE_MELDATE;
	else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
		ml->type = MAGIC_TYPE_DEFAULT;
	else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
		ml->type = MAGIC_TYPE_CLEAR;
	else {
		magic_warn(ml, "unknown type: %s", s);
		goto fail;
	}
	magic_mark_text(ml, 0);

done:
	free(copy);
	return (0);

fail:
	free(copy);
	return (-1);
}

static int
magic_parse_value(struct magic_line *ml, char **line)
{
	char	*copy, *s, *cp, *endptr;
	size_t	 slen;
	uint64_t u;

	while (isspace((u_char)**line))
		(*line)++;

	ml->test_operator = '=';
	ml->test_not = 0;
	ml->test_string = NULL;
	ml->test_string_size = 0;
	ml->test_unsigned = 0;
	ml->test_signed = 0;

	if (**line == '\0')
		return (0);

	s = *line;
	if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
		(*line)++;
		ml->test_operator = 'x';
		return (0);
	}

	if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
		magic_warn(ml, "test specified for default or clear");
		ml->test_operator = 'x';
		return (0);
	}

	if (**line == '!') {
		ml->test_not = 1;
		(*line)++;
	}

	switch (ml->type) {
	case MAGIC_TYPE_NAME:
	case MAGIC_TYPE_USE:
		copy = s = xmalloc(strlen(*line) + 1);
		if (magic_get_string(line, s, &slen) != 0 || slen == 0) {
			magic_warn(ml, "can't parse string");
			goto fail;
		}
		if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) {
			magic_warn(ml, "invalid name");
			goto fail;
		}
		ml->name = s;
		return (0); /* do not free */
	case MAGIC_TYPE_STRING:
	case MAGIC_TYPE_PSTRING:
	case MAGIC_TYPE_SEARCH:
		if (**line == '>' || **line == '<' || **line == '=') {
			ml->test_operator = **line;
			(*line)++;
		}
		/* FALLTHROUGH */
	case MAGIC_TYPE_REGEX:
		if (**line == '=')
			(*line)++;
		copy = s = xmalloc(strlen(*line) + 1);
		if (magic_get_string(line, s, &slen) != 0) {
			magic_warn(ml, "can't parse string");
			goto fail;
		}
		ml->test_string_size = slen;
		ml->test_string = s;
		return (0); /* do not free */
	default:
		break;
	}

	while (isspace((u_char)**line))
		(*line)++;
	if ((*line)[0] == '<' && (*line)[1] == '=') {
		ml->test_operator = '[';
		(*line) += 2;
	} else if ((*line)[0] == '>' && (*line)[1] == '=') {
		ml->test_operator = ']';
		(*line) += 2;
	} else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
		ml->test_operator = **line;
		(*line)++;
	}

	while (isspace((u_char)**line))
		(*line)++;
	copy = cp = xmalloc(strlen(*line) + 1);
	while (**line != '\0' && !isspace((u_char)**line))
		*cp++ = *(*line)++;
	*cp = '\0';

	switch (ml->type) {
	case MAGIC_TYPE_FLOAT:
	case MAGIC_TYPE_DOUBLE:
	case MAGIC_TYPE_BEFLOAT:
	case MAGIC_TYPE_BEDOUBLE:
	case MAGIC_TYPE_LEFLOAT:
	case MAGIC_TYPE_LEDOUBLE:
		errno = 0;
		ml->test_double = strtod(copy, &endptr);
		if (errno == ERANGE)
			endptr = NULL;
		break;
	default:
		if (*ml->type_string == 'u')
			endptr = magic_strtoull(copy, &ml->test_unsigned);
		else {
			endptr = magic_strtoll(copy, &ml->test_signed);
			if (endptr == NULL || *endptr != '\0') {
				/*
				 * If we can't parse this as a signed number,
				 * try as unsigned instead.
				 */
				endptr = magic_strtoull(copy, &u);
				if (endptr != NULL && *endptr == '\0')
					ml->test_signed = (int64_t)u;
			}
		}
		break;
	}
	if (endptr == NULL || *endptr != '\0') {
		magic_warn(ml, "can't parse number: %s", copy);
		goto fail;
	}

	free(copy);
	return (0);

fail:
	free(copy);
	return (-1);
}

int
magic_compare(struct magic_line *ml1, struct magic_line *ml2)
{
	if (ml1->strength < ml2->strength)
		return (1);
	if (ml1->strength > ml2->strength)
		return (-1);

	/*
	 * The original file depends on the (undefined!) qsort(3) behaviour
	 * when the strength is equal. This is impossible to reproduce with an
	 * RB tree so just use the line number and hope for the best.
	 */
	if (ml1->line < ml2->line)
		return (-1);
	if (ml1->line > ml2->line)
		return (1);

	return (0);
}
RB_GENERATE(magic_tree, magic_line, node, magic_compare);

int
magic_named_compare(struct magic_line *ml1, struct magic_line *ml2)
{
	return (strcmp(ml1->name, ml2->name));
}
RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare);

static void
magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
    char *line)
{
	char	*cp, *s;
	int64_t	 value;

	cp = line + (sizeof "!:strength") - 1;
	while (isspace((u_char)*cp))
		cp++;
	s = cp;

	cp = strchr(s, '#');
	if (cp != NULL)
		*cp = '\0';
	cp = s;

	if (*s == '\0' || strchr("+-*/", *s) == NULL) {
		magic_warnm(m, at, "invalid strength operator: %s", s);
		return;
	}
	ml->strength_operator = *cp++;

	while (isspace((u_char)*cp))
		cp++;
	cp = magic_strtoll(cp, &value);
	while (cp != NULL && isspace((u_char)*cp))
		cp++;
	if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
		magic_warnm(m, at, "invalid strength value: %s", s);
		return;
	}
	ml->strength_value = value;
}

static void
magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
{
	char	*mimetype, *cp;

	mimetype = line + (sizeof "!:mime") - 1;
	while (isspace((u_char)*mimetype))
		mimetype++;

	cp = strchr(mimetype, '#');
	if (cp != NULL)
		*cp = '\0';

	if (*mimetype != '\0') {
		cp = mimetype + strlen(mimetype) - 1;
		while (cp != mimetype && isspace((u_char)*cp))
			*cp-- = '\0';
	}

	cp = mimetype;
	while (*cp != '\0') {
		if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
			break;
		cp++;
	}
	if (*mimetype == '\0' || *cp != '\0') {
		magic_warnm(m, at, "invalid MIME type: %s", mimetype);
		return;
	}
	if (ml == NULL) {
		magic_warnm(m, at, "stray MIME type: %s", mimetype);
		return;
	}
	ml->mimetype = xstrdup(mimetype);
}

struct magic *
magic_load(FILE *f, const char *path, int warnings)
{
	struct magic		*m;
	struct magic_line	*ml = NULL, *parent, *parent0;
	char			*line, *tmp;
	size_t			 size;
	ssize_t			 slen;
	u_int			 at, level, n, i;

	m = xcalloc(1, sizeof *m);
	m->path = xstrdup(path);
	m->warnings = warnings;
	RB_INIT(&m->tree);

	parent = NULL;
	parent0 = NULL;
	level = 0;

	at = 0;
	tmp = NULL;
	size = 0;
	while ((slen = getline(&tmp, &size, f)) != -1) {
		line = tmp;
		if (line[slen - 1] == '\n')
			line[slen - 1] = '\0';

		at++;

		while (isspace((u_char)*line))
		    line++;
		if (*line == '\0' || *line == '#')
			continue;

		if (strncmp (line, "!:mime", 6) == 0) {
			magic_set_mimetype(m, at, ml, line);
			continue;
		}
		if (strncmp (line, "!:strength", 10) == 0) {
			magic_adjust_strength(m, at, ml, line);
			continue;
		}
		if (strncmp (line, "!:", 2) == 0) {
			for (i = 0; i < 64 && line[i] != '\0'; i++) {
				if (isspace((u_char)line[i]))
					break;
			}
			magic_warnm(m, at, "%.*s not supported", i, line);
			continue;
		}

		n = 0;
		for (; *line == '>'; line++)
			n++;

		ml = xcalloc(1, sizeof *ml);
		ml->root = m;
		ml->line = at;
		ml->type = MAGIC_TYPE_NONE;
		TAILQ_INIT(&ml->children);
		ml->text = 1;

		/*
		 * At this point n is the level we want, level is the current
		 * level. parent0 is the last line at the same level and parent
		 * is the last line at the previous level.
		 */
		if (n == level + 1) {
			parent = parent0;
		} else if (n < level) {
			for (i = n; i < level && parent != NULL; i++)
				parent = parent->parent;
		} else if (n != level) {
			magic_warn(ml, "level skipped (%u->%u)", level, n);
			free(ml);
			continue;
		}
		ml->parent = parent;
		level = n;

		if (magic_parse_offset(ml, &line) != 0 ||
		    magic_parse_type(ml, &line) != 0 ||
		    magic_parse_value(ml, &line) != 0 ||
		    magic_set_result(ml, line) != 0) {
			/*
			 * An invalid line still needs to appear in the tree in
			 * case it has any children.
			 */
			ml->type = MAGIC_TYPE_NONE;
		}

		ml->strength = magic_get_strength(ml);
		if (ml->parent == NULL) {
			if (ml->name != NULL)
				RB_INSERT(magic_named_tree, &m->named, ml);
			else
				RB_INSERT(magic_tree, &m->tree, ml);
		} else
			TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
		parent0 = ml;
	}
	free(tmp);
	if (ferror(f))
		err(1, "%s", path);

	return (m);
}