Annotation of src/usr.bin/file/magic-load.c, Revision 1.1
1.1 ! nicm 1: /* $OpenBSD$ */
! 2:
! 3: /*
! 4: * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
! 5: *
! 6: * Permission to use, copy, modify, and distribute this software for any
! 7: * purpose with or without fee is hereby granted, provided that the above
! 8: * copyright notice and this permission notice appear in all copies.
! 9: *
! 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
! 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
! 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
! 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 14: * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
! 15: * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
! 16: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
! 17: */
! 18:
! 19: #include <sys/types.h>
! 20:
! 21: #include <ctype.h>
! 22: #include <errno.h>
! 23: #include <limits.h>
! 24: #include <regex.h>
! 25: #include <stdarg.h>
! 26: #include <stdio.h>
! 27: #include <stdlib.h>
! 28: #include <string.h>
! 29:
! 30: #include "magic.h"
! 31: #include "xmalloc.h"
! 32:
! 33: static int
! 34: magic_odigit(u_char c)
! 35: {
! 36: if (c >= '0' && c <= '7')
! 37: return (c - '0');
! 38: return (-1);
! 39: }
! 40:
! 41: static int
! 42: magic_xdigit(u_char c)
! 43: {
! 44: if (c >= '0' && c <= '9')
! 45: return (c - '0');
! 46: if (c >= 'a' && c <= 'f')
! 47: return (10 + c - 'a');
! 48: if (c >= 'A' && c <= 'F')
! 49: return (10 + c - 'A');
! 50: return (-1);
! 51: }
! 52:
! 53: static void
! 54: magic_mark_text(struct magic_line *ml, int text)
! 55: {
! 56: do {
! 57: ml->text = text;
! 58: ml = ml->parent;
! 59: } while (ml != NULL);
! 60: }
! 61:
! 62: static int
! 63: magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
! 64: const char *p)
! 65: {
! 66: int error;
! 67: char errbuf[256];
! 68:
! 69: error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
! 70: if (error != 0) {
! 71: regerror(error, re, errbuf, sizeof errbuf);
! 72: magic_warn(ml, "bad %s pattern: %s", name, errbuf);
! 73: return (-1);
! 74: }
! 75: return (0);
! 76: }
! 77:
! 78: static int
! 79: magic_set_result(struct magic_line *ml, const char *s)
! 80: {
! 81: const char *fmt;
! 82: const char *endfmt;
! 83: const char *cp;
! 84: regex_t *re = NULL;
! 85: regmatch_t pmatch;
! 86: size_t fmtlen;
! 87:
! 88: while (isspace((u_char)*s))
! 89: s++;
! 90: if (*s == '\0') {
! 91: ml->result = NULL;
! 92: return (0);
! 93: }
! 94: ml->result = xstrdup(s);
! 95:
! 96: fmt = NULL;
! 97: for (cp = s; *cp != '\0'; cp++) {
! 98: if (cp[0] == '%' && cp[1] != '%') {
! 99: if (fmt != NULL) {
! 100: magic_warn(ml, "multiple formats");
! 101: return (-1);
! 102: }
! 103: fmt = cp;
! 104: }
! 105: }
! 106: if (fmt == NULL)
! 107: return (0);
! 108: fmt++;
! 109:
! 110: for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
! 111: if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
! 112: break;
! 113: }
! 114: if (*endfmt == '\0') {
! 115: magic_warn(ml, "unterminated format");
! 116: return (-1);
! 117: }
! 118: fmtlen = endfmt + 1 - fmt;
! 119: if (fmtlen > 32) {
! 120: magic_warn(ml, "format too long");
! 121: return (-1);
! 122: }
! 123:
! 124: if (*endfmt == 's') {
! 125: switch (ml->type) {
! 126: case MAGIC_TYPE_DATE:
! 127: case MAGIC_TYPE_LDATE:
! 128: case MAGIC_TYPE_UDATE:
! 129: case MAGIC_TYPE_ULDATE:
! 130: case MAGIC_TYPE_BEDATE:
! 131: case MAGIC_TYPE_BELDATE:
! 132: case MAGIC_TYPE_UBEDATE:
! 133: case MAGIC_TYPE_UBELDATE:
! 134: case MAGIC_TYPE_QDATE:
! 135: case MAGIC_TYPE_QLDATE:
! 136: case MAGIC_TYPE_UQDATE:
! 137: case MAGIC_TYPE_UQLDATE:
! 138: case MAGIC_TYPE_BEQDATE:
! 139: case MAGIC_TYPE_BEQLDATE:
! 140: case MAGIC_TYPE_UBEQDATE:
! 141: case MAGIC_TYPE_UBEQLDATE:
! 142: case MAGIC_TYPE_LEQDATE:
! 143: case MAGIC_TYPE_LEQLDATE:
! 144: case MAGIC_TYPE_ULEQDATE:
! 145: case MAGIC_TYPE_ULEQLDATE:
! 146: case MAGIC_TYPE_LEDATE:
! 147: case MAGIC_TYPE_LELDATE:
! 148: case MAGIC_TYPE_ULEDATE:
! 149: case MAGIC_TYPE_ULELDATE:
! 150: case MAGIC_TYPE_MEDATE:
! 151: case MAGIC_TYPE_MELDATE:
! 152: case MAGIC_TYPE_STRING:
! 153: case MAGIC_TYPE_PSTRING:
! 154: case MAGIC_TYPE_BESTRING16:
! 155: case MAGIC_TYPE_LESTRING16:
! 156: case MAGIC_TYPE_REGEX:
! 157: case MAGIC_TYPE_SEARCH:
! 158: break;
! 159: default:
! 160: ml->stringify = 1;
! 161: break;
! 162: }
! 163: }
! 164:
! 165: if (!ml->root->compiled) {
! 166: /*
! 167: * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
! 168: * with byte, short, long. We get lucky because our first and
! 169: * only argument ends up in a register. Accept it for now.
! 170: */
! 171: if (magic_make_pattern(ml, "short", &ml->root->format_short,
! 172: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
! 173: return (-1);
! 174: if (magic_make_pattern(ml, "long", &ml->root->format_long,
! 175: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
! 176: return (-1);
! 177: if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
! 178: "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
! 179: return (-1);
! 180: if (magic_make_pattern(ml, "float", &ml->root->format_float,
! 181: "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
! 182: return (-1);
! 183: if (magic_make_pattern(ml, "string", &ml->root->format_string,
! 184: "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
! 185: return (-1);
! 186: ml->root->compiled = 1;
! 187: }
! 188:
! 189: if (ml->stringify)
! 190: re = &ml->root->format_string;
! 191: else {
! 192: switch (ml->type) {
! 193: case MAGIC_TYPE_NONE:
! 194: case MAGIC_TYPE_DEFAULT:
! 195: return (0); /* don't use result */
! 196: case MAGIC_TYPE_BYTE:
! 197: case MAGIC_TYPE_UBYTE:
! 198: case MAGIC_TYPE_SHORT:
! 199: case MAGIC_TYPE_USHORT:
! 200: case MAGIC_TYPE_BESHORT:
! 201: case MAGIC_TYPE_UBESHORT:
! 202: case MAGIC_TYPE_LESHORT:
! 203: case MAGIC_TYPE_ULESHORT:
! 204: re = &ml->root->format_short;
! 205: break;
! 206: case MAGIC_TYPE_LONG:
! 207: case MAGIC_TYPE_ULONG:
! 208: case MAGIC_TYPE_BELONG:
! 209: case MAGIC_TYPE_UBELONG:
! 210: case MAGIC_TYPE_LELONG:
! 211: case MAGIC_TYPE_ULELONG:
! 212: case MAGIC_TYPE_MELONG:
! 213: re = &ml->root->format_long;
! 214: break;
! 215: case MAGIC_TYPE_QUAD:
! 216: case MAGIC_TYPE_UQUAD:
! 217: case MAGIC_TYPE_BEQUAD:
! 218: case MAGIC_TYPE_UBEQUAD:
! 219: case MAGIC_TYPE_LEQUAD:
! 220: case MAGIC_TYPE_ULEQUAD:
! 221: re = &ml->root->format_quad;
! 222: break;
! 223: case MAGIC_TYPE_FLOAT:
! 224: case MAGIC_TYPE_BEFLOAT:
! 225: case MAGIC_TYPE_LEFLOAT:
! 226: case MAGIC_TYPE_DOUBLE:
! 227: case MAGIC_TYPE_BEDOUBLE:
! 228: case MAGIC_TYPE_LEDOUBLE:
! 229: re = &ml->root->format_float;
! 230: break;
! 231: case MAGIC_TYPE_DATE:
! 232: case MAGIC_TYPE_LDATE:
! 233: case MAGIC_TYPE_UDATE:
! 234: case MAGIC_TYPE_ULDATE:
! 235: case MAGIC_TYPE_BEDATE:
! 236: case MAGIC_TYPE_BELDATE:
! 237: case MAGIC_TYPE_UBEDATE:
! 238: case MAGIC_TYPE_UBELDATE:
! 239: case MAGIC_TYPE_QDATE:
! 240: case MAGIC_TYPE_QLDATE:
! 241: case MAGIC_TYPE_UQDATE:
! 242: case MAGIC_TYPE_UQLDATE:
! 243: case MAGIC_TYPE_BEQDATE:
! 244: case MAGIC_TYPE_BEQLDATE:
! 245: case MAGIC_TYPE_UBEQDATE:
! 246: case MAGIC_TYPE_UBEQLDATE:
! 247: case MAGIC_TYPE_LEQDATE:
! 248: case MAGIC_TYPE_LEQLDATE:
! 249: case MAGIC_TYPE_ULEQDATE:
! 250: case MAGIC_TYPE_ULEQLDATE:
! 251: case MAGIC_TYPE_LEDATE:
! 252: case MAGIC_TYPE_LELDATE:
! 253: case MAGIC_TYPE_ULEDATE:
! 254: case MAGIC_TYPE_ULELDATE:
! 255: case MAGIC_TYPE_MEDATE:
! 256: case MAGIC_TYPE_MELDATE:
! 257: case MAGIC_TYPE_STRING:
! 258: case MAGIC_TYPE_PSTRING:
! 259: case MAGIC_TYPE_REGEX:
! 260: case MAGIC_TYPE_SEARCH:
! 261: re = &ml->root->format_string;
! 262: break;
! 263: case MAGIC_TYPE_BESTRING16:
! 264: case MAGIC_TYPE_LESTRING16:
! 265: magic_warn(ml, "unsupported type %s", ml->type_string);
! 266: return (-1);
! 267: }
! 268: }
! 269:
! 270: pmatch.rm_so = 0;
! 271: pmatch.rm_eo = fmtlen;
! 272: if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
! 273: magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
! 274: (int)fmtlen, fmt);
! 275: return (-1);
! 276: }
! 277:
! 278: return (0);
! 279: }
! 280:
! 281: static u_int
! 282: magic_get_strength(struct magic_line *ml)
! 283: {
! 284: int n;
! 285: size_t size;
! 286:
! 287: if (ml->test_not || ml->test_operator == 'x')
! 288: return (1);
! 289:
! 290: n = 20;
! 291: switch (ml->type) {
! 292: case MAGIC_TYPE_NONE:
! 293: case MAGIC_TYPE_DEFAULT:
! 294: return (0);
! 295: case MAGIC_TYPE_BYTE:
! 296: case MAGIC_TYPE_UBYTE:
! 297: n += 1 * MAGIC_STRENGTH_MULTIPLIER;
! 298: break;
! 299: case MAGIC_TYPE_SHORT:
! 300: case MAGIC_TYPE_USHORT:
! 301: case MAGIC_TYPE_BESHORT:
! 302: case MAGIC_TYPE_UBESHORT:
! 303: case MAGIC_TYPE_LESHORT:
! 304: case MAGIC_TYPE_ULESHORT:
! 305: n += 2 * MAGIC_STRENGTH_MULTIPLIER;
! 306: break;
! 307: case MAGIC_TYPE_LONG:
! 308: case MAGIC_TYPE_ULONG:
! 309: case MAGIC_TYPE_FLOAT:
! 310: case MAGIC_TYPE_DATE:
! 311: case MAGIC_TYPE_LDATE:
! 312: case MAGIC_TYPE_UDATE:
! 313: case MAGIC_TYPE_ULDATE:
! 314: case MAGIC_TYPE_BELONG:
! 315: case MAGIC_TYPE_UBELONG:
! 316: case MAGIC_TYPE_BEFLOAT:
! 317: case MAGIC_TYPE_BEDATE:
! 318: case MAGIC_TYPE_BELDATE:
! 319: case MAGIC_TYPE_UBEDATE:
! 320: case MAGIC_TYPE_UBELDATE:
! 321: n += 4 * MAGIC_STRENGTH_MULTIPLIER;
! 322: break;
! 323: case MAGIC_TYPE_QUAD:
! 324: case MAGIC_TYPE_UQUAD:
! 325: case MAGIC_TYPE_DOUBLE:
! 326: case MAGIC_TYPE_QDATE:
! 327: case MAGIC_TYPE_QLDATE:
! 328: case MAGIC_TYPE_UQDATE:
! 329: case MAGIC_TYPE_UQLDATE:
! 330: case MAGIC_TYPE_BEQUAD:
! 331: case MAGIC_TYPE_UBEQUAD:
! 332: case MAGIC_TYPE_BEDOUBLE:
! 333: case MAGIC_TYPE_BEQDATE:
! 334: case MAGIC_TYPE_BEQLDATE:
! 335: case MAGIC_TYPE_UBEQDATE:
! 336: case MAGIC_TYPE_UBEQLDATE:
! 337: case MAGIC_TYPE_LEQUAD:
! 338: case MAGIC_TYPE_ULEQUAD:
! 339: case MAGIC_TYPE_LEDOUBLE:
! 340: case MAGIC_TYPE_LEQDATE:
! 341: case MAGIC_TYPE_LEQLDATE:
! 342: case MAGIC_TYPE_ULEQDATE:
! 343: case MAGIC_TYPE_ULEQLDATE:
! 344: case MAGIC_TYPE_LELONG:
! 345: case MAGIC_TYPE_ULELONG:
! 346: case MAGIC_TYPE_LEFLOAT:
! 347: case MAGIC_TYPE_LEDATE:
! 348: case MAGIC_TYPE_LELDATE:
! 349: case MAGIC_TYPE_ULEDATE:
! 350: case MAGIC_TYPE_ULELDATE:
! 351: case MAGIC_TYPE_MELONG:
! 352: case MAGIC_TYPE_MEDATE:
! 353: case MAGIC_TYPE_MELDATE:
! 354: n += 8 * MAGIC_STRENGTH_MULTIPLIER;
! 355: break;
! 356: case MAGIC_TYPE_STRING:
! 357: case MAGIC_TYPE_PSTRING:
! 358: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
! 359: break;
! 360: case MAGIC_TYPE_BESTRING16:
! 361: case MAGIC_TYPE_LESTRING16:
! 362: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
! 363: break;
! 364: case MAGIC_TYPE_REGEX:
! 365: case MAGIC_TYPE_SEARCH:
! 366: size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
! 367: if (size < 1)
! 368: size = 1;
! 369: n += ml->test_string_size * size;
! 370: break;
! 371: }
! 372: switch (ml->test_operator) {
! 373: case '=':
! 374: n += MAGIC_STRENGTH_MULTIPLIER;
! 375: break;
! 376: case '<':
! 377: case '>':
! 378: case '[':
! 379: case ']':
! 380: n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
! 381: break;
! 382: case '^':
! 383: case '&':
! 384: n -= MAGIC_STRENGTH_MULTIPLIER;
! 385: break;
! 386: }
! 387: return (n <= 0 ? 1 : n);
! 388: }
! 389:
! 390: static int
! 391: magic_get_string(char **line, char *out, size_t *outlen)
! 392: {
! 393: char *start, *cp, c;
! 394: int d0, d1, d2;
! 395:
! 396: start = out;
! 397: for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
! 398: if (*cp != '\\') {
! 399: *out++ = *cp;
! 400: continue;
! 401: }
! 402:
! 403: switch (c = *++cp) {
! 404: case ' ':
! 405: *out++ = ' ';
! 406: break;
! 407: case '0':
! 408: case '1':
! 409: case '2':
! 410: case '3':
! 411: case '4':
! 412: case '5':
! 413: case '6':
! 414: case '7':
! 415: d0 = magic_odigit(cp[0]);
! 416: if (cp[0] != '\0')
! 417: d1 = magic_odigit(cp[1]);
! 418: else
! 419: d1 = -1;
! 420: if (cp[0] != '\0' && cp[1] != '\0')
! 421: d2 = magic_odigit(cp[2]);
! 422: else
! 423: d2 = -1;
! 424:
! 425: if (d0 != -1 && d1 != -1 && d2 != -1) {
! 426: *out = d2 | (d1 << 3) | (d0 << 6);
! 427: cp += 2;
! 428: } else if (d0 != -1 && d1 != -1) {
! 429: *out = d1 | (d0 << 3);
! 430: cp++;
! 431: } else if (d0 != -1)
! 432: *out = d0;
! 433: else
! 434: return (-1);
! 435: out++;
! 436: break;
! 437: case 'x':
! 438: d0 = magic_xdigit(cp[1]);
! 439: if (cp[1] != '\0')
! 440: d1 = magic_xdigit(cp[2]);
! 441: else
! 442: d1 = -1;
! 443:
! 444: if (d0 != -1 && d1 != -1) {
! 445: *out = d1 | (d0 << 4);
! 446: cp += 2;
! 447: } else if (d0 != -1) {
! 448: *out = d0;
! 449: cp++;
! 450: } else
! 451: return (-1);
! 452: out++;
! 453:
! 454: break;
! 455: case 'a':
! 456: *out++ = '\a';
! 457: break;
! 458: case 'b':
! 459: *out++ = '\b';
! 460: break;
! 461: case 't':
! 462: *out++ = '\t';
! 463: break;
! 464: case 'f':
! 465: *out++ = '\f';
! 466: break;
! 467: case 'n':
! 468: *out++ = '\n';
! 469: break;
! 470: case 'r':
! 471: *out++ = '\r';
! 472: break;
! 473: case '\\':
! 474: *out++ = '\\';
! 475: break;
! 476: case '\'':
! 477: *out++ = '\'';
! 478: break;
! 479: case '\"':
! 480: *out++ = '\"';
! 481: break;
! 482: default:
! 483: *out++ = c;
! 484: break;
! 485: }
! 486: }
! 487: *out = '\0';
! 488: *outlen = out - start;
! 489:
! 490: *line = cp;
! 491: return (0);
! 492: }
! 493:
! 494: static int
! 495: magic_parse_offset(struct magic_line *ml, char **line)
! 496: {
! 497: char *copy, *s, *cp, *endptr;
! 498:
! 499: while (isspace((u_char)**line))
! 500: (*line)++;
! 501: copy = s = cp = xmalloc(strlen(*line) + 1);
! 502: while (**line != '\0' && !isspace((u_char)**line))
! 503: *cp++ = *(*line)++;
! 504: *cp = '\0';
! 505:
! 506: ml->offset = 0;
! 507: ml->offset_relative = 0;
! 508:
! 509: ml->indirect_type = ' ';
! 510: ml->indirect_relative = 0;
! 511: ml->indirect_offset = 0;
! 512: ml->indirect_operator = ' ';
! 513: ml->indirect_operand = 0;
! 514:
! 515: if (*s == '&') {
! 516: ml->offset_relative = 1;
! 517: s++;
! 518: }
! 519:
! 520: if (*s != '(') {
! 521: endptr = magic_strtoll(s, &ml->offset);
! 522: if (endptr == NULL || *endptr != '\0') {
! 523: magic_warn(ml, "missing closing bracket");
! 524: goto fail;
! 525: }
! 526: if (ml->offset < 0 && !ml->offset_relative) {
! 527: magic_warn(ml, "negative absolute offset");
! 528: goto fail;
! 529: }
! 530: goto done;
! 531: }
! 532: s++;
! 533:
! 534: if (*s == '&') {
! 535: ml->indirect_relative = 1;
! 536: s++;
! 537: }
! 538:
! 539: endptr = magic_strtoll(s, &ml->indirect_offset);
! 540: if (endptr == NULL) {
! 541: magic_warn(ml, "can't parse offset");
! 542: goto fail;
! 543: }
! 544: s = endptr;
! 545: if (*s == ')')
! 546: goto done;
! 547:
! 548: if (*s == '.') {
! 549: s++;
! 550: if (strchr("bslBSL", *s) == NULL) {
! 551: magic_warn(ml, "unknown offset type");
! 552: goto fail;
! 553: }
! 554: ml->indirect_type = *s;
! 555: s++;
! 556: if (*s == ')')
! 557: goto done;
! 558: }
! 559:
! 560: if (strchr("+-*", *s) == NULL) {
! 561: magic_warn(ml, "unknown offset operator");
! 562: goto fail;
! 563: }
! 564: ml->indirect_operator = *s;
! 565: s++;
! 566: if (*s == ')')
! 567: goto done;
! 568:
! 569: if (*s == '(') {
! 570: s++;
! 571: endptr = magic_strtoll(s, &ml->indirect_operand);
! 572: if (endptr == NULL || *endptr != ')') {
! 573: magic_warn(ml, "missing closing bracket");
! 574: goto fail;
! 575: }
! 576: if (*++endptr != ')') {
! 577: magic_warn(ml, "missing closing bracket");
! 578: goto fail;
! 579: }
! 580: } else {
! 581: endptr = magic_strtoll(s, &ml->indirect_operand);
! 582: if (endptr == NULL || *endptr != ')') {
! 583: magic_warn(ml, "missing closing bracket");
! 584: goto fail;
! 585: }
! 586: }
! 587:
! 588: done:
! 589: free(copy);
! 590: return (0);
! 591:
! 592: fail:
! 593: free(copy);
! 594: return (-1);
! 595: }
! 596:
! 597: static int
! 598: magic_parse_type(struct magic_line *ml, char **line)
! 599: {
! 600: char *copy, *s, *cp, *endptr;
! 601:
! 602: while (isspace((u_char)**line))
! 603: (*line)++;
! 604: copy = s = cp = xmalloc(strlen(*line) + 1);
! 605: while (**line != '\0' && !isspace((u_char)**line))
! 606: *cp++ = *(*line)++;
! 607: *cp = '\0';
! 608:
! 609: ml->type = MAGIC_TYPE_NONE;
! 610: ml->type_string = xstrdup(s);
! 611:
! 612: ml->type_operator = ' ';
! 613: ml->type_operand = 0;
! 614:
! 615: if (strncmp(s, "string", (sizeof "string") - 1) == 0) {
! 616: ml->type = MAGIC_TYPE_STRING;
! 617: magic_mark_text(ml, 0);
! 618: goto done;
! 619: }
! 620: if (strncmp(s, "search", (sizeof "search") - 1) == 0) {
! 621: ml->type = MAGIC_TYPE_SEARCH;
! 622: goto done;
! 623: }
! 624: if (strncmp(s, "regex", (sizeof "regex") - 1) == 0) {
! 625: ml->type = MAGIC_TYPE_REGEX;
! 626: goto done;
! 627: }
! 628:
! 629: cp = &s[strcspn(s, "-&")];
! 630: if (*cp != '\0') {
! 631: ml->type_operator = *cp;
! 632: endptr = magic_strtoull(cp + 1, &ml->type_operand);
! 633: if (endptr == NULL || *endptr != '\0') {
! 634: magic_warn(ml, "can't parse operand");
! 635: goto fail;
! 636: }
! 637: *cp = '\0';
! 638: }
! 639:
! 640: if (strcmp(s, "byte") == 0)
! 641: ml->type = MAGIC_TYPE_BYTE;
! 642: else if (strcmp(s, "short") == 0)
! 643: ml->type = MAGIC_TYPE_SHORT;
! 644: else if (strcmp(s, "long") == 0)
! 645: ml->type = MAGIC_TYPE_LONG;
! 646: else if (strcmp(s, "quad") == 0)
! 647: ml->type = MAGIC_TYPE_QUAD;
! 648: else if (strcmp(s, "ubyte") == 0)
! 649: ml->type = MAGIC_TYPE_UBYTE;
! 650: else if (strcmp(s, "ushort") == 0)
! 651: ml->type = MAGIC_TYPE_USHORT;
! 652: else if (strcmp(s, "ulong") == 0)
! 653: ml->type = MAGIC_TYPE_ULONG;
! 654: else if (strcmp(s, "uquad") == 0)
! 655: ml->type = MAGIC_TYPE_UQUAD;
! 656: else if (strcmp(s, "float") == 0)
! 657: ml->type = MAGIC_TYPE_FLOAT;
! 658: else if (strcmp(s, "double") == 0)
! 659: ml->type = MAGIC_TYPE_DOUBLE;
! 660: else if (strcmp(s, "pstring") == 0)
! 661: ml->type = MAGIC_TYPE_PSTRING;
! 662: else if (strcmp(s, "date") == 0)
! 663: ml->type = MAGIC_TYPE_DATE;
! 664: else if (strcmp(s, "qdate") == 0)
! 665: ml->type = MAGIC_TYPE_QDATE;
! 666: else if (strcmp(s, "ldate") == 0)
! 667: ml->type = MAGIC_TYPE_LDATE;
! 668: else if (strcmp(s, "qldate") == 0)
! 669: ml->type = MAGIC_TYPE_QLDATE;
! 670: else if (strcmp(s, "udate") == 0)
! 671: ml->type = MAGIC_TYPE_UDATE;
! 672: else if (strcmp(s, "uqdate") == 0)
! 673: ml->type = MAGIC_TYPE_UQDATE;
! 674: else if (strcmp(s, "uldate") == 0)
! 675: ml->type = MAGIC_TYPE_ULDATE;
! 676: else if (strcmp(s, "uqldate") == 0)
! 677: ml->type = MAGIC_TYPE_UQLDATE;
! 678: else if (strcmp(s, "beshort") == 0)
! 679: ml->type = MAGIC_TYPE_BESHORT;
! 680: else if (strcmp(s, "belong") == 0)
! 681: ml->type = MAGIC_TYPE_BELONG;
! 682: else if (strcmp(s, "bequad") == 0)
! 683: ml->type = MAGIC_TYPE_BEQUAD;
! 684: else if (strcmp(s, "ubeshort") == 0)
! 685: ml->type = MAGIC_TYPE_UBESHORT;
! 686: else if (strcmp(s, "ubelong") == 0)
! 687: ml->type = MAGIC_TYPE_UBELONG;
! 688: else if (strcmp(s, "ubequad") == 0)
! 689: ml->type = MAGIC_TYPE_UBEQUAD;
! 690: else if (strcmp(s, "befloat") == 0)
! 691: ml->type = MAGIC_TYPE_BEFLOAT;
! 692: else if (strcmp(s, "bedouble") == 0)
! 693: ml->type = MAGIC_TYPE_BEDOUBLE;
! 694: else if (strcmp(s, "bedate") == 0)
! 695: ml->type = MAGIC_TYPE_BEDATE;
! 696: else if (strcmp(s, "beqdate") == 0)
! 697: ml->type = MAGIC_TYPE_BEQDATE;
! 698: else if (strcmp(s, "beldate") == 0)
! 699: ml->type = MAGIC_TYPE_BELDATE;
! 700: else if (strcmp(s, "beqldate") == 0)
! 701: ml->type = MAGIC_TYPE_BEQLDATE;
! 702: else if (strcmp(s, "ubedate") == 0)
! 703: ml->type = MAGIC_TYPE_UBEDATE;
! 704: else if (strcmp(s, "ubeqdate") == 0)
! 705: ml->type = MAGIC_TYPE_UBEQDATE;
! 706: else if (strcmp(s, "ubeldate") == 0)
! 707: ml->type = MAGIC_TYPE_UBELDATE;
! 708: else if (strcmp(s, "ubeqldate") == 0)
! 709: ml->type = MAGIC_TYPE_UBEQLDATE;
! 710: else if (strcmp(s, "bestring16") == 0)
! 711: ml->type = MAGIC_TYPE_BESTRING16;
! 712: else if (strcmp(s, "leshort") == 0)
! 713: ml->type = MAGIC_TYPE_LESHORT;
! 714: else if (strcmp(s, "lelong") == 0)
! 715: ml->type = MAGIC_TYPE_LELONG;
! 716: else if (strcmp(s, "lequad") == 0)
! 717: ml->type = MAGIC_TYPE_LEQUAD;
! 718: else if (strcmp(s, "uleshort") == 0)
! 719: ml->type = MAGIC_TYPE_ULESHORT;
! 720: else if (strcmp(s, "ulelong") == 0)
! 721: ml->type = MAGIC_TYPE_ULELONG;
! 722: else if (strcmp(s, "ulequad") == 0)
! 723: ml->type = MAGIC_TYPE_ULEQUAD;
! 724: else if (strcmp(s, "lefloat") == 0)
! 725: ml->type = MAGIC_TYPE_LEFLOAT;
! 726: else if (strcmp(s, "ledouble") == 0)
! 727: ml->type = MAGIC_TYPE_LEDOUBLE;
! 728: else if (strcmp(s, "ledate") == 0)
! 729: ml->type = MAGIC_TYPE_LEDATE;
! 730: else if (strcmp(s, "leqdate") == 0)
! 731: ml->type = MAGIC_TYPE_LEQDATE;
! 732: else if (strcmp(s, "leldate") == 0)
! 733: ml->type = MAGIC_TYPE_LELDATE;
! 734: else if (strcmp(s, "leqldate") == 0)
! 735: ml->type = MAGIC_TYPE_LEQLDATE;
! 736: else if (strcmp(s, "uledate") == 0)
! 737: ml->type = MAGIC_TYPE_ULEDATE;
! 738: else if (strcmp(s, "uleqdate") == 0)
! 739: ml->type = MAGIC_TYPE_ULEQDATE;
! 740: else if (strcmp(s, "uleldate") == 0)
! 741: ml->type = MAGIC_TYPE_ULELDATE;
! 742: else if (strcmp(s, "uleqldate") == 0)
! 743: ml->type = MAGIC_TYPE_ULEQLDATE;
! 744: else if (strcmp(s, "lestring16") == 0)
! 745: ml->type = MAGIC_TYPE_LESTRING16;
! 746: else if (strcmp(s, "melong") == 0)
! 747: ml->type = MAGIC_TYPE_MELONG;
! 748: else if (strcmp(s, "medate") == 0)
! 749: ml->type = MAGIC_TYPE_MEDATE;
! 750: else if (strcmp(s, "meldate") == 0)
! 751: ml->type = MAGIC_TYPE_MELDATE;
! 752: else if (strcmp(s, "default") == 0)
! 753: ml->type = MAGIC_TYPE_DEFAULT;
! 754: else {
! 755: magic_warn(ml, "unknown type");
! 756: goto fail;
! 757: }
! 758: magic_mark_text(ml, 0);
! 759:
! 760: done:
! 761: free(copy);
! 762: return (0);
! 763:
! 764: fail:
! 765: free(copy);
! 766: return (-1);
! 767: }
! 768:
! 769: static int
! 770: magic_parse_value(struct magic_line *ml, char **line)
! 771: {
! 772: char *copy, *s, *cp, *endptr;
! 773: size_t slen;
! 774:
! 775: while (isspace((u_char)**line))
! 776: (*line)++;
! 777:
! 778: ml->test_operator = '=';
! 779: ml->test_not = 0;
! 780: ml->test_string = NULL;
! 781: ml->test_string_size = 0;
! 782: ml->test_unsigned = 0;
! 783: ml->test_signed = 0;
! 784:
! 785: s = *line;
! 786: if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
! 787: (*line)++;
! 788: ml->test_operator = 'x';
! 789: return (0);
! 790: }
! 791:
! 792: if (**line == '!') {
! 793: ml->test_not = 1;
! 794: (*line)++;
! 795: }
! 796:
! 797: switch (ml->type) {
! 798: case MAGIC_TYPE_STRING:
! 799: case MAGIC_TYPE_PSTRING:
! 800: case MAGIC_TYPE_SEARCH:
! 801: if (**line == '>' || **line == '<' || **line == '=') {
! 802: ml->test_operator = **line;
! 803: (*line)++;
! 804: }
! 805: /* FALLTHROUGH */
! 806: case MAGIC_TYPE_REGEX:
! 807: copy = s = xmalloc(strlen(*line) + 1);
! 808: if (magic_get_string(line, s, &slen) != 0) {
! 809: magic_warn(ml, "can't parse string");
! 810: goto fail;
! 811: }
! 812: ml->test_string_size = slen;
! 813: ml->test_string = s;
! 814: return (0); /* do not free */
! 815: default:
! 816: break;
! 817: }
! 818:
! 819: copy = s = cp = xmalloc(strlen(*line) + 1);
! 820: if ((*line)[0] == '=' && (*line)[1] == ' ') {
! 821: /*
! 822: * Extra spaces such as "byte&7 = 0" are accepted, which is
! 823: * annoying. But it seems to be only for =, so special case it.
! 824: */
! 825: *cp++ = '=';
! 826: (*line) += 2;
! 827: }
! 828: while (**line != '\0' && !isspace((u_char)**line))
! 829: *cp++ = *(*line)++;
! 830: *cp = '\0';
! 831:
! 832: if (*s == '\0')
! 833: goto done;
! 834:
! 835: if (s[0] == '<' && s[1] == '=') {
! 836: ml->test_operator = '[';
! 837: s += 2;
! 838: } else if (s[0] == '>' && s[1] == '=') {
! 839: ml->test_operator = ']';
! 840: s += 2;
! 841: } else if (strchr("=<>&^", *s) != NULL) {
! 842: ml->test_operator = *s;
! 843: s++;
! 844: }
! 845:
! 846: if (*ml->type_string == 'u')
! 847: endptr = magic_strtoull(s, &ml->test_unsigned);
! 848: else
! 849: endptr = magic_strtoll(s, &ml->test_signed);
! 850: if (endptr == NULL || *endptr != '\0') {
! 851: magic_warn(ml, "can't parse number");
! 852: goto fail;
! 853: }
! 854:
! 855: done:
! 856: free(copy);
! 857: return (0);
! 858:
! 859: fail:
! 860: free(copy);
! 861: return (-1);
! 862: }
! 863:
! 864: static void
! 865: magic_free_line(struct magic_line *ml)
! 866: {
! 867: free((void*)ml->type_string);
! 868:
! 869: free((void*)ml->mimetype);
! 870: free((void*)ml->result);
! 871:
! 872: free(ml);
! 873: }
! 874:
! 875: int
! 876: magic_compare(struct magic_line *ml1, struct magic_line *ml2)
! 877: {
! 878: if (ml1->strength < ml2->strength)
! 879: return (1);
! 880: if (ml1->strength > ml2->strength)
! 881: return (-1);
! 882:
! 883: /*
! 884: * The original file depends on the (undefined!) qsort(3) behaviour
! 885: * when the strength is equal. This is impossible to reproduce with an
! 886: * RB tree so just use the line number and hope for the best.
! 887: */
! 888: if (ml1->line < ml2->line)
! 889: return (-1);
! 890: if (ml1->line > ml2->line)
! 891: return (1);
! 892:
! 893: return (0);
! 894: }
! 895: RB_GENERATE(magic_tree, magic_line, node, magic_compare);
! 896:
! 897: static void
! 898: magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
! 899: {
! 900: char *mimetype, *cp;
! 901:
! 902: mimetype = line + (sizeof "!:mime") - 1;
! 903: while (isspace((u_char)*mimetype))
! 904: mimetype++;
! 905:
! 906: cp = strchr(mimetype, '#');
! 907: if (cp != NULL)
! 908: *cp = '\0';
! 909:
! 910: if (*mimetype != '\0') {
! 911: cp = mimetype + strlen(mimetype) - 1;
! 912: while (cp != mimetype && isspace((u_char)*cp))
! 913: *cp-- = '\0';
! 914: }
! 915:
! 916: cp = mimetype;
! 917: while (*cp != '\0') {
! 918: if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
! 919: break;
! 920: cp++;
! 921: }
! 922: if (*mimetype == '\0' || *cp != '\0') {
! 923: fprintf(stderr, "%s:%u: invalid MIME type: %s\n", m->path, at,
! 924: mimetype);
! 925: return;
! 926: }
! 927: if (ml == NULL) {
! 928: fprintf(stderr, "%s:%u: stray MIME type: %s\n", m->path, at,
! 929: mimetype);
! 930: return;
! 931: }
! 932: ml->mimetype = xstrdup(mimetype);
! 933: }
! 934:
! 935: struct magic *
! 936: magic_load(FILE *f, const char *path, int warnings)
! 937: {
! 938: struct magic *m;
! 939: struct magic_line *ml = NULL, *parent, *parent0;
! 940: char *line, *tmp;
! 941: size_t size;
! 942: u_int at, level, n, i;
! 943:
! 944: m = xcalloc(1, sizeof *m);
! 945: m->path = xstrdup(path);
! 946: m->warnings = warnings;
! 947: RB_INIT(&m->tree);
! 948:
! 949: parent = NULL;
! 950: parent0 = NULL;
! 951: level = 0;
! 952:
! 953: at = 0;
! 954: tmp = NULL;
! 955: while ((line = fgetln(f, &size))) {
! 956: if (line[size - 1] == '\n')
! 957: line[size - 1] = '\0';
! 958: else {
! 959: tmp = xmalloc(size + 1);
! 960: memcpy(tmp, line, size);
! 961: tmp[size] = '\0';
! 962: line = tmp;
! 963: }
! 964: at++;
! 965:
! 966: while (isspace((u_char)*line))
! 967: line++;
! 968: if (*line == '\0' || *line == '#')
! 969: continue;
! 970:
! 971: if (strncmp (line, "!:mime", (sizeof "!:mime") - 1) == 0) {
! 972: magic_set_mimetype(m, at, ml, line);
! 973: continue;
! 974: }
! 975:
! 976: n = 0;
! 977: for (; *line == '>'; line++)
! 978: n++;
! 979:
! 980: ml = xcalloc(1, sizeof *ml);
! 981: ml->root = m;
! 982: ml->line = at;
! 983: ml->type = MAGIC_TYPE_NONE;
! 984: TAILQ_INIT(&ml->children);
! 985: ml->text = 1;
! 986:
! 987: if (n == level + 1) {
! 988: parent = parent0;
! 989: } else if (n < level) {
! 990: for (i = n; i < level && parent != NULL; i++)
! 991: parent = parent->parent;
! 992: } else if (n != level) {
! 993: magic_warn(ml, "level skipped (%u->%u)", level, n);
! 994: free(ml);
! 995: continue;
! 996: }
! 997: ml->parent = parent;
! 998: level = n;
! 999:
! 1000: if (magic_parse_offset(ml, &line) != 0 ||
! 1001: magic_parse_type(ml, &line) != 0 ||
! 1002: magic_parse_value(ml, &line) != 0 ||
! 1003: magic_set_result(ml, line) != 0) {
! 1004: magic_free_line(ml);
! 1005: ml = NULL;
! 1006: continue;
! 1007: }
! 1008:
! 1009: ml->strength = magic_get_strength(ml);
! 1010: if (ml->parent == NULL)
! 1011: RB_INSERT(magic_tree, &m->tree, ml);
! 1012: else
! 1013: TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
! 1014: parent0 = ml;
! 1015: }
! 1016: free(tmp);
! 1017:
! 1018: fclose(f);
! 1019: return (m);
! 1020: }