Annotation of src/usr.bin/file/apprentice.c, Revision 1.19
1.19 ! tedu 1: /* $OpenBSD$ */
1.1 deraadt 2: /*
1.16 ian 3: * Copyright (c) Ian F. Darwin 1986-1995.
4: * Software written by Ian F. Darwin and others;
5: * maintained 1995-present by Christos Zoulas and others.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice immediately at the beginning of the file, without modification,
12: * this list of conditions, and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
1.1 deraadt 28: */
1.19 ! tedu 29: /*
! 30: * apprentice - make one pass through /etc/magic, learning its secrets.
! 31: */
1.1 deraadt 32:
1.19 ! tedu 33: #include "file.h"
! 34: #include "magic.h"
1.1 deraadt 35: #include <stdlib.h>
1.19 ! tedu 36: #ifdef HAVE_UNISTD_H
! 37: #include <unistd.h>
! 38: #endif
1.1 deraadt 39: #include <string.h>
40: #include <ctype.h>
1.19 ! tedu 41: #include <fcntl.h>
! 42: #include <sys/stat.h>
! 43: #include <sys/param.h>
! 44: #ifdef QUICK
! 45: #include <sys/mman.h>
! 46: #endif
1.1 deraadt 47:
48: #ifndef lint
1.19 ! tedu 49: FILE_RCSID("@(#)$Id: apprentice.c,v 1.75 2004/03/22 18:48:56 christos Exp $")
1.1 deraadt 50: #endif /* lint */
51:
52: #define EATAB {while (isascii((unsigned char) *l) && \
53: isspace((unsigned char) *l)) ++l;}
54: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
55: tolower((unsigned char) (l)) : (l))
1.19 ! tedu 56: /*
! 57: * Work around a bug in headers on Digital Unix.
! 58: * At least confirmed for: OSF1 V4.0 878
! 59: */
! 60: #if defined(__osf__) && defined(__DECC)
! 61: #ifdef MAP_FAILED
! 62: #undef MAP_FAILED
! 63: #endif
! 64: #endif
! 65:
! 66: #ifndef MAP_FAILED
! 67: #define MAP_FAILED (void *) -1
! 68: #endif
! 69:
! 70: #ifndef MAP_FILE
! 71: #define MAP_FILE 0
! 72: #endif
! 73:
! 74: #ifndef MAXPATHLEN
! 75: #define MAXPATHLEN 1024
! 76: #endif
! 77:
! 78: private int getvalue(struct magic_set *ms, struct magic *, char **);
! 79: private int hextoint(int);
! 80: private char *getstr(struct magic_set *, char *, char *, int, int *);
! 81: private int parse(struct magic_set *, struct magic **, uint32_t *, char *, int);
! 82: private void eatsize(char **);
! 83: private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
! 84: private int apprentice_file(struct magic_set *, struct magic **, uint32_t *,
! 85: const char *, int);
! 86: private void byteswap(struct magic *, uint32_t);
! 87: private void bs1(struct magic *);
! 88: private uint16_t swap2(uint16_t);
! 89: private uint32_t swap4(uint32_t);
! 90: private char *mkdbname(const char *, char *, size_t);
! 91: private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
! 92: const char *);
! 93: private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
! 94: const char *);
! 95:
! 96: private size_t maxmagic = 0;
! 97: private size_t magicsize = sizeof(struct magic);
! 98:
! 99: #ifdef COMPILE_ONLY
! 100: const char *magicfile;
! 101: char *progname;
! 102: int lineno;
! 103:
! 104: int main(int, char *[]);
! 105:
! 106: int
! 107: main(int argc, char *argv[])
! 108: {
! 109: int ret;
! 110:
! 111: if ((progname = strrchr(argv[0], '/')) != NULL)
! 112: progname++;
! 113: else
! 114: progname = argv[0];
! 115:
! 116: if (argc != 2) {
! 117: (void)fprintf(stderr, "usage: %s file\n", progname);
! 118: exit(1);
! 119: }
! 120: magicfile = argv[1];
! 121:
! 122: exit(file_apprentice(magicfile, COMPILE, MAGIC_CHECK) == -1 ? 1 : 0);
! 123: }
! 124: #endif /* COMPILE_ONLY */
! 125:
! 126:
! 127: /*
! 128: * Handle one file.
! 129: */
! 130: private int
! 131: apprentice_1(struct magic_set *ms, const char *fn, int action,
! 132: struct mlist *mlist)
! 133: {
! 134: struct magic *magic = NULL;
! 135: uint32_t nmagic = 0;
! 136: struct mlist *ml;
! 137: int rv = -1;
! 138: int mapped;
! 139:
! 140: if (magicsize != FILE_MAGICSIZE) {
! 141: file_error(ms, 0, "magic element size %lu != %lu",
! 142: (unsigned long)sizeof(*magic),
! 143: (unsigned long)FILE_MAGICSIZE);
! 144: return -1;
! 145: }
1.1 deraadt 146:
1.19 ! tedu 147: if (action == FILE_COMPILE) {
! 148: rv = apprentice_file(ms, &magic, &nmagic, fn, action);
! 149: if (rv != 0)
! 150: return -1;
! 151: rv = apprentice_compile(ms, &magic, &nmagic, fn);
! 152: free(magic);
! 153: return rv;
! 154: }
! 155: #ifndef COMPILE_ONLY
! 156: if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
! 157: if (ms->flags & MAGIC_CHECK)
! 158: file_magwarn("using regular magic file `%s'", fn);
! 159: rv = apprentice_file(ms, &magic, &nmagic, fn, action);
! 160: if (rv != 0)
! 161: return -1;
! 162: mapped = 0;
! 163: }
1.1 deraadt 164:
1.19 ! tedu 165: if (rv == -1)
! 166: return rv;
! 167: mapped = rv;
! 168:
! 169: if (magic == NULL || nmagic == 0) {
! 170: file_delmagic(magic, mapped, nmagic);
! 171: return -1;
! 172: }
1.1 deraadt 173:
1.19 ! tedu 174: if ((ml = malloc(sizeof(*ml))) == NULL) {
! 175: file_delmagic(magic, mapped, nmagic);
! 176: file_oomem(ms);
! 177: return -1;
! 178: }
1.1 deraadt 179:
1.19 ! tedu 180: ml->magic = magic;
! 181: ml->nmagic = nmagic;
! 182: ml->mapped = mapped;
! 183:
! 184: mlist->prev->next = ml;
! 185: ml->prev = mlist->prev;
! 186: ml->next = mlist;
! 187: mlist->prev = ml;
1.1 deraadt 188:
1.19 ! tedu 189: return 0;
! 190: #endif /* COMPILE_ONLY */
! 191: }
! 192:
! 193: protected void
! 194: file_delmagic(struct magic *p, int type, size_t entries)
! 195: {
! 196: if (p == NULL)
! 197: return;
! 198: switch (type) {
! 199: case 2:
! 200: p--;
! 201: (void)munmap((void *)p, sizeof(*p) * (entries + 1));
! 202: break;
! 203: case 1:
! 204: p--;
! 205: case 0:
! 206: free(p);
! 207: break;
! 208: default:
! 209: abort();
! 210: }
! 211: }
! 212:
! 213:
! 214: /* const char *fn: list of magic files */
! 215: protected struct mlist *
! 216: file_apprentice(struct magic_set *ms, const char *fn, int action)
1.2 deraadt 217: {
1.19 ! tedu 218: char *p, *mfn, *afn = NULL;
1.2 deraadt 219: int file_err, errs = -1;
1.19 ! tedu 220: struct mlist *mlist;
1.2 deraadt 221:
1.19 ! tedu 222: if (fn == NULL)
! 223: fn = getenv("MAGIC");
! 224: if (fn == NULL)
! 225: fn = MAGIC;
! 226:
! 227: if ((fn = mfn = strdup(fn)) == NULL) {
! 228: file_oomem(ms);
! 229: return NULL;
! 230: }
! 231:
! 232: if ((mlist = malloc(sizeof(*mlist))) == NULL) {
! 233: free(mfn);
! 234: file_oomem(ms);
! 235: return NULL;
1.2 deraadt 236: }
1.19 ! tedu 237: mlist->next = mlist->prev = mlist;
1.17 deraadt 238:
1.2 deraadt 239: while (fn) {
1.19 ! tedu 240: p = strchr(fn, PATHSEP);
1.2 deraadt 241: if (p)
242: *p++ = '\0';
1.19 ! tedu 243: if (*fn == '\0')
! 244: break;
! 245: if (ms->flags & MAGIC_MIME) {
! 246: size_t len = strlen(fn) + 5 + 1;
! 247: if ((afn = malloc(len)) == NULL) {
! 248: free(mfn);
! 249: free(mlist);
! 250: file_oomem(ms);
! 251: return NULL;
! 252: }
! 253: (void)strlcpy(afn, fn, len);
! 254: (void)strlcat(afn, ".mime", len);
! 255: fn = afn;
! 256: }
! 257: file_err = apprentice_1(ms, fn, action, mlist);
1.2 deraadt 258: if (file_err > errs)
259: errs = file_err;
1.19 ! tedu 260: if (afn) {
! 261: free(afn);
! 262: afn = NULL;
! 263: }
1.2 deraadt 264: fn = p;
265: }
1.19 ! tedu 266: if (errs == -1) {
! 267: free(mfn);
! 268: free(mlist);
! 269: mlist = NULL;
! 270: file_error(ms, 0, "could not find any magic files!");
! 271: return NULL;
! 272: }
1.2 deraadt 273: free(mfn);
1.19 ! tedu 274: return mlist;
1.2 deraadt 275: }
276:
1.19 ! tedu 277: /*
! 278: * parse from a file
! 279: * const char *fn: name of magic file
! 280: */
! 281: private int
! 282: apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
! 283: const char *fn, int action)
1.1 deraadt 284: {
1.19 ! tedu 285: private const char hdr[] =
1.2 deraadt 286: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 287: FILE *f;
288: char line[BUFSIZ+1];
1.19 ! tedu 289: int lineno;
1.1 deraadt 290: int errs = 0;
291:
292: f = fopen(fn, "r");
1.19 ! tedu 293: if (f == NULL) {
1.2 deraadt 294: if (errno != ENOENT)
1.19 ! tedu 295: file_error(ms, errno, "cannot read magic file `%s'",
! 296: fn);
1.2 deraadt 297: return -1;
1.1 deraadt 298: }
299:
1.19 ! tedu 300: maxmagic = MAXMAGIS;
! 301: *magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
! 302: if (*magicp == NULL) {
! 303: (void)fclose(f);
! 304: file_oomem(ms);
! 305: return -1;
! 306: }
! 307:
! 308: /* print silly verbose header for USG compat. */
! 309: if (action == FILE_CHECK)
! 310: (void)fprintf(stderr, "%s\n", hdr);
! 311:
1.1 deraadt 312: /* parse it */
1.19 ! tedu 313: for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++) {
1.1 deraadt 314: if (line[0]=='#') /* comment, do not parse */
315: continue;
316: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
317: continue;
318: line[strlen(line)-1] = '\0'; /* delete newline */
1.19 ! tedu 319: if (parse(ms, magicp, nmagicp, line, action) != 0)
1.2 deraadt 320: errs = 1;
1.1 deraadt 321: }
322:
1.19 ! tedu 323: (void)fclose(f);
! 324: if (errs) {
! 325: free(*magicp);
! 326: *magicp = NULL;
! 327: *nmagicp = 0;
! 328: }
1.2 deraadt 329: return errs;
1.1 deraadt 330: }
331:
332: /*
333: * extend the sign bit if the comparison is to be signed
334: */
1.19 ! tedu 335: protected uint32_t
! 336: file_signextend(struct magic_set *ms, struct magic *m, uint32_t v)
1.1 deraadt 337: {
338: if (!(m->flag & UNSIGNED))
339: switch(m->type) {
340: /*
341: * Do not remove the casts below. They are
342: * vital. When later compared with the data,
343: * the sign extension must have happened.
344: */
1.19 ! tedu 345: case FILE_BYTE:
1.1 deraadt 346: v = (char) v;
347: break;
1.19 ! tedu 348: case FILE_SHORT:
! 349: case FILE_BESHORT:
! 350: case FILE_LESHORT:
1.1 deraadt 351: v = (short) v;
352: break;
1.19 ! tedu 353: case FILE_DATE:
! 354: case FILE_BEDATE:
! 355: case FILE_LEDATE:
! 356: case FILE_LDATE:
! 357: case FILE_BELDATE:
! 358: case FILE_LELDATE:
! 359: case FILE_LONG:
! 360: case FILE_BELONG:
! 361: case FILE_LELONG:
1.14 itojun 362: v = (int32_t) v;
1.1 deraadt 363: break;
1.19 ! tedu 364: case FILE_STRING:
! 365: case FILE_PSTRING:
! 366: break;
! 367: case FILE_REGEX:
1.1 deraadt 368: break;
369: default:
1.19 ! tedu 370: if (ms->flags & MAGIC_CHECK)
! 371: file_magwarn("cannot happen: m->type=%d\n",
! 372: m->type);
! 373: return ~0U;
1.1 deraadt 374: }
375: return v;
376: }
377:
378: /*
379: * parse one line from magic file, put into magic[index++] if valid
380: */
1.19 ! tedu 381: private int
! 382: parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l,
! 383: int action)
1.1 deraadt 384: {
1.19 ! tedu 385: int i = 0;
1.1 deraadt 386: struct magic *m;
1.19 ! tedu 387: char *t;
! 388: private const char *fops = FILE_OPS;
! 389: uint32_t val;
! 390:
! 391: #define ALLOC_INCR 200
! 392: if (*nmagicp + 1 >= maxmagic){
! 393: maxmagic += ALLOC_INCR;
! 394: if ((m = (struct magic *) realloc(*magicp,
! 395: sizeof(struct magic) * maxmagic)) == NULL) {
! 396: file_oomem(ms);
! 397: if (*magicp)
! 398: free(*magicp);
1.1 deraadt 399: return -1;
1.19 ! tedu 400: }
! 401: *magicp = m;
! 402: memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
! 403: * ALLOC_INCR);
1.1 deraadt 404: }
1.19 ! tedu 405: m = &(*magicp)[*nmagicp];
1.1 deraadt 406: m->flag = 0;
407: m->cont_level = 0;
408:
409: while (*l == '>') {
410: ++l; /* step over */
411: m->cont_level++;
412: }
413:
414: if (m->cont_level != 0 && *l == '(') {
415: ++l; /* step over */
416: m->flag |= INDIR;
417: }
1.4 millert 418: if (m->cont_level != 0 && *l == '&') {
419: ++l; /* step over */
1.19 ! tedu 420: m->flag |= OFFADD;
1.4 millert 421: }
1.1 deraadt 422:
423: /* get offset, then skip over it */
1.19 ! tedu 424: m->offset = (uint32_t)strtoul(l, &t, 0);
1.1 deraadt 425: if (l == t)
1.19 ! tedu 426: if (ms->flags & MAGIC_CHECK)
! 427: file_magwarn("offset %s invalid", l);
1.1 deraadt 428: l = t;
429:
430: if (m->flag & INDIR) {
1.19 ! tedu 431: m->in_type = FILE_LONG;
! 432: m->in_offset = 0;
1.1 deraadt 433: /*
434: * read [.lbs][+-]nnnnn)
435: */
436: if (*l == '.') {
437: l++;
1.19 ! tedu 438: switch (*l) {
1.1 deraadt 439: case 'l':
1.19 ! tedu 440: m->in_type = FILE_LELONG;
! 441: break;
! 442: case 'L':
! 443: m->in_type = FILE_BELONG;
1.1 deraadt 444: break;
445: case 'h':
446: case 's':
1.19 ! tedu 447: m->in_type = FILE_LESHORT;
! 448: break;
! 449: case 'H':
! 450: case 'S':
! 451: m->in_type = FILE_BESHORT;
1.1 deraadt 452: break;
453: case 'c':
454: case 'b':
1.19 ! tedu 455: case 'C':
! 456: case 'B':
! 457: m->in_type = FILE_BYTE;
1.1 deraadt 458: break;
459: default:
1.19 ! tedu 460: if (ms->flags & MAGIC_CHECK)
! 461: file_magwarn(
! 462: "indirect offset type %c invalid",
! 463: *l);
1.1 deraadt 464: break;
465: }
466: l++;
467: }
1.19 ! tedu 468: if (*l == '~') {
! 469: m->in_op = FILE_OPINVERSE;
! 470: l++;
! 471: }
! 472: switch (*l) {
! 473: case '&':
! 474: m->in_op |= FILE_OPAND;
! 475: l++;
! 476: break;
! 477: case '|':
! 478: m->in_op |= FILE_OPOR;
! 479: l++;
! 480: break;
! 481: case '^':
! 482: m->in_op |= FILE_OPXOR;
! 483: l++;
! 484: break;
! 485: case '+':
! 486: m->in_op |= FILE_OPADD;
! 487: l++;
! 488: break;
! 489: case '-':
! 490: m->in_op |= FILE_OPMINUS;
! 491: l++;
! 492: break;
! 493: case '*':
! 494: m->in_op |= FILE_OPMULTIPLY;
! 495: l++;
! 496: break;
! 497: case '/':
! 498: m->in_op |= FILE_OPDIVIDE;
! 499: l++;
! 500: break;
! 501: case '%':
! 502: m->in_op |= FILE_OPMODULO;
! 503: l++;
! 504: break;
1.1 deraadt 505: }
1.19 ! tedu 506: if (isdigit((unsigned char)*l))
! 507: m->in_offset = (uint32_t)strtoul(l, &t, 0);
1.1 deraadt 508: else
509: t = l;
510: if (*t++ != ')')
1.19 ! tedu 511: if (ms->flags & MAGIC_CHECK)
! 512: file_magwarn("missing ')' in indirect offset");
1.1 deraadt 513: l = t;
514: }
515:
516:
517: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
518: ++l;
519: EATAB;
520:
521: #define NBYTE 4
522: #define NSHORT 5
523: #define NLONG 4
524: #define NSTRING 6
525: #define NDATE 4
526: #define NBESHORT 7
527: #define NBELONG 6
528: #define NBEDATE 6
529: #define NLESHORT 7
530: #define NLELONG 6
531: #define NLEDATE 6
1.19 ! tedu 532: #define NPSTRING 7
! 533: #define NLDATE 5
! 534: #define NBELDATE 7
! 535: #define NLELDATE 7
! 536: #define NREGEX 5
1.1 deraadt 537:
538: if (*l == 'u') {
539: ++l;
540: m->flag |= UNSIGNED;
541: }
542:
543: /* get type, skip it */
1.19 ! tedu 544: if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
! 545: m->type = FILE_BYTE;
! 546: l += NBYTE;
! 547: } else if (strncmp(l, "byte", NBYTE)==0) {
! 548: m->type = FILE_BYTE;
1.1 deraadt 549: l += NBYTE;
550: } else if (strncmp(l, "short", NSHORT)==0) {
1.19 ! tedu 551: m->type = FILE_SHORT;
1.1 deraadt 552: l += NSHORT;
553: } else if (strncmp(l, "long", NLONG)==0) {
1.19 ! tedu 554: m->type = FILE_LONG;
1.1 deraadt 555: l += NLONG;
556: } else if (strncmp(l, "string", NSTRING)==0) {
1.19 ! tedu 557: m->type = FILE_STRING;
1.1 deraadt 558: l += NSTRING;
559: } else if (strncmp(l, "date", NDATE)==0) {
1.19 ! tedu 560: m->type = FILE_DATE;
1.1 deraadt 561: l += NDATE;
562: } else if (strncmp(l, "beshort", NBESHORT)==0) {
1.19 ! tedu 563: m->type = FILE_BESHORT;
1.1 deraadt 564: l += NBESHORT;
565: } else if (strncmp(l, "belong", NBELONG)==0) {
1.19 ! tedu 566: m->type = FILE_BELONG;
1.1 deraadt 567: l += NBELONG;
568: } else if (strncmp(l, "bedate", NBEDATE)==0) {
1.19 ! tedu 569: m->type = FILE_BEDATE;
1.1 deraadt 570: l += NBEDATE;
571: } else if (strncmp(l, "leshort", NLESHORT)==0) {
1.19 ! tedu 572: m->type = FILE_LESHORT;
1.1 deraadt 573: l += NLESHORT;
574: } else if (strncmp(l, "lelong", NLELONG)==0) {
1.19 ! tedu 575: m->type = FILE_LELONG;
1.1 deraadt 576: l += NLELONG;
577: } else if (strncmp(l, "ledate", NLEDATE)==0) {
1.19 ! tedu 578: m->type = FILE_LEDATE;
1.1 deraadt 579: l += NLEDATE;
1.19 ! tedu 580: } else if (strncmp(l, "pstring", NPSTRING)==0) {
! 581: m->type = FILE_PSTRING;
! 582: l += NPSTRING;
! 583: } else if (strncmp(l, "ldate", NLDATE)==0) {
! 584: m->type = FILE_LDATE;
! 585: l += NLDATE;
! 586: } else if (strncmp(l, "beldate", NBELDATE)==0) {
! 587: m->type = FILE_BELDATE;
! 588: l += NBELDATE;
! 589: } else if (strncmp(l, "leldate", NLELDATE)==0) {
! 590: m->type = FILE_LELDATE;
! 591: l += NLELDATE;
! 592: } else if (strncmp(l, "regex", NREGEX)==0) {
! 593: m->type = FILE_REGEX;
! 594: l += sizeof("regex");
1.1 deraadt 595: } else {
1.19 ! tedu 596: if (ms->flags & MAGIC_CHECK)
! 597: file_magwarn("type %s invalid", l);
1.1 deraadt 598: return -1;
599: }
600: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1.19 ! tedu 601: /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
! 602: if (*l == '~') {
! 603: if (FILE_STRING != m->type && FILE_PSTRING != m->type)
! 604: m->mask_op = FILE_OPINVERSE;
1.1 deraadt 605: ++l;
1.19 ! tedu 606: }
! 607: if ((t = strchr(fops, *l)) != NULL) {
! 608: uint32_t op = (uint32_t)(t - fops);
! 609: if (op != FILE_OPDIVIDE ||
! 610: (FILE_STRING != m->type && FILE_PSTRING != m->type)) {
! 611: ++l;
! 612: m->mask_op |= op;
! 613: val = (uint32_t)strtoul(l, &l, 0);
! 614: m->mask = file_signextend(ms, m, val);
! 615: eatsize(&l);
! 616: } else {
! 617: m->mask = 0L;
! 618: while (!isspace((unsigned char)*++l)) {
! 619: switch (*l) {
! 620: case CHAR_IGNORE_LOWERCASE:
! 621: m->mask |= STRING_IGNORE_LOWERCASE;
! 622: break;
! 623: case CHAR_COMPACT_BLANK:
! 624: m->mask |= STRING_COMPACT_BLANK;
! 625: break;
! 626: case CHAR_COMPACT_OPTIONAL_BLANK:
! 627: m->mask |=
! 628: STRING_COMPACT_OPTIONAL_BLANK;
! 629: break;
! 630: default:
! 631: if (ms->flags & MAGIC_CHECK)
! 632: file_magwarn(
! 633: "string extension %c invalid",
! 634: *l);
! 635: return -1;
! 636: }
! 637: }
! 638: }
! 639: }
! 640: /*
! 641: * We used to set mask to all 1's here, instead let's just not do
! 642: * anything if mask = 0 (unless you have a better idea)
! 643: */
1.1 deraadt 644: EATAB;
645:
646: switch (*l) {
647: case '>':
648: case '<':
649: /* Old-style anding: "0 byte &0x80 dynamically linked" */
650: case '&':
651: case '^':
652: case '=':
653: m->reln = *l;
654: ++l;
1.19 ! tedu 655: if (*l == '=') {
! 656: /* HP compat: ignore &= etc. */
! 657: ++l;
! 658: }
1.1 deraadt 659: break;
660: case '!':
1.19 ! tedu 661: if (m->type != FILE_STRING && m->type != FILE_PSTRING) {
1.1 deraadt 662: m->reln = *l;
663: ++l;
664: break;
665: }
1.19 ! tedu 666: /*FALLTHROUGH*/
1.1 deraadt 667: default:
668: if (*l == 'x' && isascii((unsigned char)l[1]) &&
669: isspace((unsigned char)l[1])) {
670: m->reln = *l;
671: ++l;
672: goto GetDesc; /* Bill The Cat */
673: }
674: m->reln = '=';
675: break;
676: }
677: EATAB;
678:
1.19 ! tedu 679: if (getvalue(ms, m, &l))
1.1 deraadt 680: return -1;
681: /*
682: * TODO finish this macro and start using it!
683: * #define offsetcheck {if (offset > HOWMANY-1)
1.19 ! tedu 684: * magwarn("offset too big"); }
1.1 deraadt 685: */
686:
687: /*
688: * now get last part - the description
689: */
690: GetDesc:
691: EATAB;
692: if (l[0] == '\b') {
693: ++l;
694: m->nospflag = 1;
695: } else if ((l[0] == '\\') && (l[1] == 'b')) {
696: ++l;
697: ++l;
698: m->nospflag = 1;
699: } else
700: m->nospflag = 0;
1.19 ! tedu 701: while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC)
1.1 deraadt 702: /* NULLBODY */;
703:
1.19 ! tedu 704: #ifndef COMPILE_ONLY
! 705: if (action == FILE_CHECK) {
! 706: file_mdump(m);
1.1 deraadt 707: }
1.19 ! tedu 708: #endif
! 709: ++(*nmagicp); /* make room for next */
1.1 deraadt 710: return 0;
711: }
712:
713: /*
714: * Read a numeric value from a pointer, into the value union of a magic
715: * pointer, according to the magic type. Update the string pointer to point
716: * just after the number read. Return 0 for success, non-zero for failure.
717: */
1.19 ! tedu 718: private int
! 719: getvalue(struct magic_set *ms, struct magic *m, char **p)
1.1 deraadt 720: {
721: int slen;
722:
1.19 ! tedu 723: switch (m->type) {
! 724: case FILE_STRING:
! 725: case FILE_PSTRING:
! 726: case FILE_REGEX:
! 727: *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen);
! 728: if (*p == NULL) {
! 729: if (ms->flags & MAGIC_CHECK)
! 730: file_magwarn("cannot get string from `%s'",
! 731: m->value.s);
! 732: return -1;
! 733: }
1.1 deraadt 734: m->vallen = slen;
1.19 ! tedu 735: return 0;
! 736: default:
1.1 deraadt 737: if (m->reln != 'x') {
1.19 ! tedu 738: m->value.l = file_signextend(ms, m,
! 739: (uint32_t)strtoul(*p, p, 0));
1.1 deraadt 740: eatsize(p);
741: }
1.19 ! tedu 742: return 0;
! 743: }
1.1 deraadt 744: }
745:
746: /*
747: * Convert a string containing C character escapes. Stop at an unescaped
748: * space or tab.
749: * Copy the converted version to "p", returning its length in *slen.
750: * Return updated scan pointer as function result.
751: */
1.19 ! tedu 752: private char *
! 753: getstr(struct magic_set *ms, char *s, char *p, int plen, int *slen)
1.1 deraadt 754: {
755: char *origs = s, *origp = p;
756: char *pmax = p + plen - 1;
1.11 mpech 757: int c;
758: int val;
1.1 deraadt 759:
760: while ((c = *s++) != '\0') {
761: if (isspace((unsigned char) c))
762: break;
763: if (p >= pmax) {
1.19 ! tedu 764: file_error(ms, 0, "string too long: `%s'", origs);
! 765: return NULL;
1.1 deraadt 766: }
767: if(c == '\\') {
768: switch(c = *s++) {
769:
770: case '\0':
771: goto out;
772:
773: default:
774: *p++ = (char) c;
775: break;
776:
777: case 'n':
778: *p++ = '\n';
779: break;
780:
781: case 'r':
782: *p++ = '\r';
783: break;
784:
785: case 'b':
786: *p++ = '\b';
787: break;
788:
789: case 't':
790: *p++ = '\t';
791: break;
792:
793: case 'f':
794: *p++ = '\f';
795: break;
796:
797: case 'v':
798: *p++ = '\v';
799: break;
800:
801: /* \ and up to 3 octal digits */
802: case '0':
803: case '1':
804: case '2':
805: case '3':
806: case '4':
807: case '5':
808: case '6':
809: case '7':
810: val = c - '0';
811: c = *s++; /* try for 2 */
812: if(c >= '0' && c <= '7') {
813: val = (val<<3) | (c - '0');
814: c = *s++; /* try for 3 */
815: if(c >= '0' && c <= '7')
816: val = (val<<3) | (c-'0');
817: else
818: --s;
819: }
820: else
821: --s;
822: *p++ = (char)val;
823: break;
824:
1.4 millert 825: /* \x and up to 2 hex digits */
1.1 deraadt 826: case 'x':
827: val = 'x'; /* Default if no digits */
828: c = hextoint(*s++); /* Get next char */
829: if (c >= 0) {
830: val = c;
831: c = hextoint(*s++);
1.4 millert 832: if (c >= 0)
1.1 deraadt 833: val = (val << 4) + c;
1.4 millert 834: else
1.1 deraadt 835: --s;
836: } else
837: --s;
838: *p++ = (char)val;
839: break;
840: }
841: } else
842: *p++ = (char)c;
843: }
844: out:
845: *p = '\0';
846: *slen = p - origp;
847: return s;
848: }
849:
850:
851: /* Single hex char to int; -1 if not a hex char. */
1.19 ! tedu 852: private int
! 853: hextoint(int c)
! 854: {
! 855: if (!isascii((unsigned char) c))
! 856: return -1;
! 857: if (isdigit((unsigned char) c))
! 858: return c - '0';
! 859: if ((c >= 'a')&&(c <= 'f'))
! 860: return c + 10 - 'a';
! 861: if (( c>= 'A')&&(c <= 'F'))
! 862: return c + 10 - 'A';
! 863: return -1;
1.1 deraadt 864: }
865:
866:
867: /*
868: * Print a string containing C character escapes.
869: */
1.19 ! tedu 870: protected void
! 871: file_showstr(FILE *fp, const char *s, size_t len)
1.1 deraadt 872: {
1.11 mpech 873: char c;
1.1 deraadt 874:
875: for (;;) {
876: c = *s++;
1.19 ! tedu 877: if (len == ~0U) {
1.1 deraadt 878: if (c == '\0')
879: break;
880: }
881: else {
882: if (len-- == 0)
883: break;
884: }
885: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
886: (void) fputc(c, fp);
887: else {
888: (void) fputc('\\', fp);
889: switch (c) {
890:
891: case '\n':
892: (void) fputc('n', fp);
893: break;
894:
895: case '\r':
896: (void) fputc('r', fp);
897: break;
898:
899: case '\b':
900: (void) fputc('b', fp);
901: break;
902:
903: case '\t':
904: (void) fputc('t', fp);
905: break;
906:
907: case '\f':
908: (void) fputc('f', fp);
909: break;
910:
911: case '\v':
912: (void) fputc('v', fp);
913: break;
914:
915: default:
916: (void) fprintf(fp, "%.3o", c & 0377);
917: break;
918: }
919: }
920: }
921: }
922:
923: /*
924: * eatsize(): Eat the size spec from a number [eg. 10UL]
925: */
1.19 ! tedu 926: private void
! 927: eatsize(char **p)
1.1 deraadt 928: {
929: char *l = *p;
930:
931: if (LOWCASE(*l) == 'u')
932: l++;
933:
934: switch (LOWCASE(*l)) {
935: case 'l': /* long */
936: case 's': /* short */
937: case 'h': /* short */
938: case 'b': /* char/byte */
939: case 'c': /* char/byte */
940: l++;
941: /*FALLTHROUGH*/
942: default:
943: break;
944: }
945:
946: *p = l;
1.19 ! tedu 947: }
! 948:
! 949: /*
! 950: * handle a compiled file.
! 951: */
! 952: private int
! 953: apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
! 954: const char *fn)
! 955: {
! 956: int fd;
! 957: struct stat st;
! 958: uint32_t *ptr;
! 959: uint32_t version;
! 960: int needsbyteswap;
! 961: char buf[MAXPATHLEN];
! 962: char *dbname = mkdbname(fn, buf, sizeof(buf));
! 963: void *mm = NULL;
! 964:
! 965: if (dbname == NULL)
! 966: return -1;
! 967:
! 968: if ((fd = open(dbname, O_RDONLY)) == -1)
! 969: return -1;
! 970:
! 971: if (fstat(fd, &st) == -1) {
! 972: file_error(ms, errno, "cannot stat `%s'", dbname);
! 973: goto error;
! 974: }
! 975: if (st.st_size < 16) {
! 976: file_error(ms, 0, "file `%s' is too small", dbname);
! 977: goto error;
! 978: }
! 979:
! 980: #ifdef QUICK
! 981: if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
! 982: MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
! 983: file_error(ms, errno, "cannot map `%s'", dbname);
! 984: goto error;
! 985: }
! 986: #define RET 2
! 987: #else
! 988: if ((mm = malloc((size_t)st.st_size)) == NULL) {
! 989: file_oomem(ms);
! 990: goto error;
! 991: }
! 992: if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
! 993: file_badread(ms);
! 994: goto error;
! 995: }
! 996: #define RET 1
! 997: #endif
! 998: *magicp = mm;
! 999: (void)close(fd);
! 1000: fd = -1;
! 1001: ptr = (uint32_t *)(void *)*magicp;
! 1002: if (*ptr != MAGICNO) {
! 1003: if (swap4(*ptr) != MAGICNO) {
! 1004: file_error(ms, 0, "bad magic in `%s'");
! 1005: goto error;
! 1006: }
! 1007: needsbyteswap = 1;
! 1008: } else
! 1009: needsbyteswap = 0;
! 1010: if (needsbyteswap)
! 1011: version = swap4(ptr[1]);
! 1012: else
! 1013: version = ptr[1];
! 1014: if (version != VERSIONNO) {
! 1015: file_error(ms, 0, "version mismatch (%d != %d) in `%s'",
! 1016: version, VERSIONNO, dbname);
! 1017: goto error;
! 1018: }
! 1019: *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1;
! 1020: (*magicp)++;
! 1021: if (needsbyteswap)
! 1022: byteswap(*magicp, *nmagicp);
! 1023: return RET;
! 1024:
! 1025: error:
! 1026: if (fd != -1)
! 1027: (void)close(fd);
! 1028: if (mm) {
! 1029: #ifdef QUICK
! 1030: (void)munmap((void *)mm, (size_t)st.st_size);
! 1031: #else
! 1032: free(mm);
! 1033: #endif
! 1034: } else {
! 1035: *magicp = NULL;
! 1036: *nmagicp = 0;
! 1037: }
! 1038: return -1;
! 1039: }
! 1040:
! 1041: private const uint32_t ar[] = {
! 1042: MAGICNO, VERSIONNO
! 1043: };
! 1044: /*
! 1045: * handle an mmaped file.
! 1046: */
! 1047: private int
! 1048: apprentice_compile(struct magic_set *ms, struct magic **magicp,
! 1049: uint32_t *nmagicp, const char *fn)
! 1050: {
! 1051: int fd;
! 1052: char buf[MAXPATHLEN];
! 1053: char *dbname = mkdbname(fn, buf, sizeof(buf));
! 1054:
! 1055: if (dbname == NULL)
! 1056: return -1;
! 1057:
! 1058: if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
! 1059: file_error(ms, errno, "cannot open `%s'", dbname);
! 1060: return -1;
! 1061: }
! 1062:
! 1063: if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
! 1064: file_error(ms, errno, "error writing `%s'", dbname);
! 1065: return -1;
! 1066: }
! 1067:
! 1068: if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
! 1069: != sizeof(struct magic)) {
! 1070: file_error(ms, errno, "error seeking `%s'", dbname);
! 1071: return -1;
! 1072: }
! 1073:
! 1074: if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
! 1075: != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
! 1076: file_error(ms, errno, "error writing `%s'", dbname);
! 1077: return -1;
! 1078: }
! 1079:
! 1080: (void)close(fd);
! 1081: return 0;
! 1082: }
! 1083:
! 1084: private const char ext[] = ".mgc";
! 1085: /*
! 1086: * make a dbname
! 1087: */
! 1088: private char *
! 1089: mkdbname(const char *fn, char *buf, size_t bufsiz)
! 1090: {
! 1091: #ifdef notdef
! 1092: const char *p;
! 1093: if ((p = strrchr(fn, '/')) != NULL)
! 1094: fn = ++p;
! 1095: #endif
! 1096: (void)snprintf(buf, bufsiz, "%s%s", fn, ext);
! 1097: return buf;
! 1098: }
! 1099:
! 1100: /*
! 1101: * Byteswap an mmap'ed file if needed
! 1102: */
! 1103: private void
! 1104: byteswap(struct magic *magic, uint32_t nmagic)
! 1105: {
! 1106: uint32_t i;
! 1107: for (i = 0; i < nmagic; i++)
! 1108: bs1(&magic[i]);
! 1109: }
! 1110:
! 1111: /*
! 1112: * swap a short
! 1113: */
! 1114: private uint16_t
! 1115: swap2(uint16_t sv)
! 1116: {
! 1117: uint16_t rv;
! 1118: uint8_t *s = (uint8_t *)(void *)&sv;
! 1119: uint8_t *d = (uint8_t *)(void *)&rv;
! 1120: d[0] = s[1];
! 1121: d[1] = s[0];
! 1122: return rv;
! 1123: }
! 1124:
! 1125: /*
! 1126: * swap an int
! 1127: */
! 1128: private uint32_t
! 1129: swap4(uint32_t sv)
! 1130: {
! 1131: uint32_t rv;
! 1132: uint8_t *s = (uint8_t *)(void *)&sv;
! 1133: uint8_t *d = (uint8_t *)(void *)&rv;
! 1134: d[0] = s[3];
! 1135: d[1] = s[2];
! 1136: d[2] = s[1];
! 1137: d[3] = s[0];
! 1138: return rv;
! 1139: }
! 1140:
! 1141: /*
! 1142: * byteswap a single magic entry
! 1143: */
! 1144: private void
! 1145: bs1(struct magic *m)
! 1146: {
! 1147: m->cont_level = swap2(m->cont_level);
! 1148: m->offset = swap4((uint32_t)m->offset);
! 1149: m->in_offset = swap4((uint32_t)m->in_offset);
! 1150: if (m->type != FILE_STRING)
! 1151: m->value.l = swap4(m->value.l);
! 1152: m->mask = swap4(m->mask);
1.1 deraadt 1153: }