Annotation of src/usr.bin/file/apprentice.c, Revision 1.1
1.1 ! deraadt 1: /*
! 2: * apprentice - make one pass through /etc/magic, learning its secrets.
! 3: *
! 4: * Copyright (c) Ian F. Darwin, 1987.
! 5: * Written by Ian F. Darwin.
! 6: *
! 7: * This software is not subject to any license of the American Telephone
! 8: * and Telegraph Company or of the Regents of the University of California.
! 9: *
! 10: * Permission is granted to anyone to use this software for any purpose on
! 11: * any computer system, and to alter it and redistribute it freely, subject
! 12: * to the following restrictions:
! 13: *
! 14: * 1. The author is not responsible for the consequences of use of this
! 15: * software, no matter how awful, even if they arise from flaws in it.
! 16: *
! 17: * 2. The origin of this software must not be misrepresented, either by
! 18: * explicit claim or by omission. Since few users ever read sources,
! 19: * credits must appear in the documentation.
! 20: *
! 21: * 3. Altered versions must be plainly marked as such, and must not be
! 22: * misrepresented as being the original software. Since few users
! 23: * ever read sources, credits must appear in the documentation.
! 24: *
! 25: * 4. This notice may not be removed or altered.
! 26: */
! 27:
! 28: #include <stdio.h>
! 29: #include <stdlib.h>
! 30: #include <string.h>
! 31: #include <ctype.h>
! 32: #include "file.h"
! 33:
! 34: #ifndef lint
! 35: static char *moduleid =
! 36: "@(#)$Id: apprentice.c,v 1.9 1995/05/21 00:13:24 christos Exp $";
! 37: #endif /* lint */
! 38:
! 39: #define EATAB {while (isascii((unsigned char) *l) && \
! 40: isspace((unsigned char) *l)) ++l;}
! 41: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
! 42: tolower((unsigned char) (l)) : (l))
! 43:
! 44:
! 45: static int getvalue __P((struct magic *, char **));
! 46: static int hextoint __P((int));
! 47: static char *getstr __P((char *, char *, int, int *));
! 48: static int parse __P((char *, int *, int));
! 49: static void eatsize __P((char **));
! 50:
! 51: static int maxmagic = 0;
! 52:
! 53:
! 54: int
! 55: apprentice(fn, check)
! 56: char *fn; /* name of magic file */
! 57: int check; /* non-zero? checking-only run. */
! 58: {
! 59: FILE *f;
! 60: char line[BUFSIZ+1];
! 61: int errs = 0;
! 62:
! 63: f = fopen(fn, "r");
! 64: if (f==NULL) {
! 65: (void) fprintf(stderr, "%s: can't read magic file %s\n",
! 66: progname, fn);
! 67: if (check)
! 68: return -1;
! 69: else
! 70: exit(1);
! 71: }
! 72:
! 73: maxmagic = MAXMAGIS;
! 74: if ((magic = (struct magic *) calloc(sizeof(struct magic), maxmagic))
! 75: == NULL) {
! 76: (void) fprintf(stderr, "%s: Out of memory.\n", progname);
! 77: if (check)
! 78: return -1;
! 79: else
! 80: exit(1);
! 81: }
! 82:
! 83: /* parse it */
! 84: if (check) /* print silly verbose header for USG compat. */
! 85: (void) printf("cont\toffset\ttype\topcode\tmask\tvalue\tdesc\n");
! 86:
! 87: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
! 88: if (line[0]=='#') /* comment, do not parse */
! 89: continue;
! 90: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
! 91: continue;
! 92: line[strlen(line)-1] = '\0'; /* delete newline */
! 93: if (parse(line, &nmagic, check) != 0)
! 94: ++errs;
! 95: }
! 96:
! 97: (void) fclose(f);
! 98: return errs ? -1 : 0;
! 99: }
! 100:
! 101: /*
! 102: * extend the sign bit if the comparison is to be signed
! 103: */
! 104: unsigned long
! 105: signextend(m, v)
! 106: struct magic *m;
! 107: unsigned long v;
! 108: {
! 109: if (!(m->flag & UNSIGNED))
! 110: switch(m->type) {
! 111: /*
! 112: * Do not remove the casts below. They are
! 113: * vital. When later compared with the data,
! 114: * the sign extension must have happened.
! 115: */
! 116: case BYTE:
! 117: v = (char) v;
! 118: break;
! 119: case SHORT:
! 120: case BESHORT:
! 121: case LESHORT:
! 122: v = (short) v;
! 123: break;
! 124: case DATE:
! 125: case BEDATE:
! 126: case LEDATE:
! 127: case LONG:
! 128: case BELONG:
! 129: case LELONG:
! 130: v = (long) v;
! 131: break;
! 132: case STRING:
! 133: break;
! 134: default:
! 135: magwarn("can't happen: m->type=%d\n",
! 136: m->type);
! 137: return -1;
! 138: }
! 139: return v;
! 140: }
! 141:
! 142: /*
! 143: * parse one line from magic file, put into magic[index++] if valid
! 144: */
! 145: static int
! 146: parse(l, ndx, check)
! 147: char *l;
! 148: int *ndx, check;
! 149: {
! 150: int i = 0, nd = *ndx;
! 151: struct magic *m;
! 152: char *t, *s;
! 153:
! 154: #define ALLOC_INCR 20
! 155: if (nd+1 >= maxmagic){
! 156: maxmagic += ALLOC_INCR;
! 157: if ((magic = (struct magic *) realloc(magic,
! 158: sizeof(struct magic) *
! 159: maxmagic)) == NULL) {
! 160: (void) fprintf(stderr, "%s: Out of memory.\n", progname);
! 161: if (check)
! 162: return -1;
! 163: else
! 164: exit(1);
! 165: }
! 166: memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
! 167: }
! 168: m = &magic[*ndx];
! 169: m->flag = 0;
! 170: m->cont_level = 0;
! 171:
! 172: while (*l == '>') {
! 173: ++l; /* step over */
! 174: m->cont_level++;
! 175: }
! 176:
! 177: if (m->cont_level != 0 && *l == '(') {
! 178: ++l; /* step over */
! 179: m->flag |= INDIR;
! 180: }
! 181:
! 182: /* get offset, then skip over it */
! 183: m->offset = (int) strtoul(l,&t,0);
! 184: if (l == t)
! 185: magwarn("offset %s invalid", l);
! 186: l = t;
! 187:
! 188: if (m->flag & INDIR) {
! 189: m->in.type = LONG;
! 190: m->in.offset = 0;
! 191: /*
! 192: * read [.lbs][+-]nnnnn)
! 193: */
! 194: if (*l == '.') {
! 195: l++;
! 196: switch (LOWCASE(*l)) {
! 197: case 'l':
! 198: m->in.type = LONG;
! 199: break;
! 200: case 'h':
! 201: case 's':
! 202: m->in.type = SHORT;
! 203: break;
! 204: case 'c':
! 205: case 'b':
! 206: m->in.type = BYTE;
! 207: break;
! 208: default:
! 209: magwarn("indirect offset type %c invalid", *l);
! 210: break;
! 211: }
! 212: l++;
! 213: }
! 214: s = l;
! 215: if (*l == '+' || *l == '-') l++;
! 216: if (isdigit((unsigned char)*l)) {
! 217: m->in.offset = strtoul(l, &t, 0);
! 218: if (*s == '-') m->in.offset = - m->in.offset;
! 219: }
! 220: else
! 221: t = l;
! 222: if (*t++ != ')')
! 223: magwarn("missing ')' in indirect offset");
! 224: l = t;
! 225: }
! 226:
! 227:
! 228: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
! 229: ++l;
! 230: EATAB;
! 231:
! 232: #define NBYTE 4
! 233: #define NSHORT 5
! 234: #define NLONG 4
! 235: #define NSTRING 6
! 236: #define NDATE 4
! 237: #define NBESHORT 7
! 238: #define NBELONG 6
! 239: #define NBEDATE 6
! 240: #define NLESHORT 7
! 241: #define NLELONG 6
! 242: #define NLEDATE 6
! 243:
! 244: if (*l == 'u') {
! 245: ++l;
! 246: m->flag |= UNSIGNED;
! 247: }
! 248:
! 249: /* get type, skip it */
! 250: if (strncmp(l, "byte", NBYTE)==0) {
! 251: m->type = BYTE;
! 252: l += NBYTE;
! 253: } else if (strncmp(l, "short", NSHORT)==0) {
! 254: m->type = SHORT;
! 255: l += NSHORT;
! 256: } else if (strncmp(l, "long", NLONG)==0) {
! 257: m->type = LONG;
! 258: l += NLONG;
! 259: } else if (strncmp(l, "string", NSTRING)==0) {
! 260: m->type = STRING;
! 261: l += NSTRING;
! 262: } else if (strncmp(l, "date", NDATE)==0) {
! 263: m->type = DATE;
! 264: l += NDATE;
! 265: } else if (strncmp(l, "beshort", NBESHORT)==0) {
! 266: m->type = BESHORT;
! 267: l += NBESHORT;
! 268: } else if (strncmp(l, "belong", NBELONG)==0) {
! 269: m->type = BELONG;
! 270: l += NBELONG;
! 271: } else if (strncmp(l, "bedate", NBEDATE)==0) {
! 272: m->type = BEDATE;
! 273: l += NBEDATE;
! 274: } else if (strncmp(l, "leshort", NLESHORT)==0) {
! 275: m->type = LESHORT;
! 276: l += NLESHORT;
! 277: } else if (strncmp(l, "lelong", NLELONG)==0) {
! 278: m->type = LELONG;
! 279: l += NLELONG;
! 280: } else if (strncmp(l, "ledate", NLEDATE)==0) {
! 281: m->type = LEDATE;
! 282: l += NLEDATE;
! 283: } else {
! 284: magwarn("type %s invalid", l);
! 285: return -1;
! 286: }
! 287: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
! 288: if (*l == '&') {
! 289: ++l;
! 290: m->mask = signextend(m, strtoul(l, &l, 0));
! 291: eatsize(&l);
! 292: } else
! 293: m->mask = ~0L;
! 294: EATAB;
! 295:
! 296: switch (*l) {
! 297: case '>':
! 298: case '<':
! 299: /* Old-style anding: "0 byte &0x80 dynamically linked" */
! 300: case '&':
! 301: case '^':
! 302: case '=':
! 303: m->reln = *l;
! 304: ++l;
! 305: break;
! 306: case '!':
! 307: if (m->type != STRING) {
! 308: m->reln = *l;
! 309: ++l;
! 310: break;
! 311: }
! 312: /* FALL THROUGH */
! 313: default:
! 314: if (*l == 'x' && isascii((unsigned char)l[1]) &&
! 315: isspace((unsigned char)l[1])) {
! 316: m->reln = *l;
! 317: ++l;
! 318: goto GetDesc; /* Bill The Cat */
! 319: }
! 320: m->reln = '=';
! 321: break;
! 322: }
! 323: EATAB;
! 324:
! 325: if (getvalue(m, &l))
! 326: return -1;
! 327: /*
! 328: * TODO finish this macro and start using it!
! 329: * #define offsetcheck {if (offset > HOWMANY-1)
! 330: * magwarn("offset too big"); }
! 331: */
! 332:
! 333: /*
! 334: * now get last part - the description
! 335: */
! 336: GetDesc:
! 337: EATAB;
! 338: if (l[0] == '\b') {
! 339: ++l;
! 340: m->nospflag = 1;
! 341: } else if ((l[0] == '\\') && (l[1] == 'b')) {
! 342: ++l;
! 343: ++l;
! 344: m->nospflag = 1;
! 345: } else
! 346: m->nospflag = 0;
! 347: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
! 348: /* NULLBODY */;
! 349:
! 350: if (check) {
! 351: mdump(m);
! 352: }
! 353: ++(*ndx); /* make room for next */
! 354: return 0;
! 355: }
! 356:
! 357: /*
! 358: * Read a numeric value from a pointer, into the value union of a magic
! 359: * pointer, according to the magic type. Update the string pointer to point
! 360: * just after the number read. Return 0 for success, non-zero for failure.
! 361: */
! 362: static int
! 363: getvalue(m, p)
! 364: struct magic *m;
! 365: char **p;
! 366: {
! 367: int slen;
! 368:
! 369: if (m->type == STRING) {
! 370: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
! 371: m->vallen = slen;
! 372: } else
! 373: if (m->reln != 'x') {
! 374: m->value.l = signextend(m, strtoul(*p, p, 0));
! 375: eatsize(p);
! 376: }
! 377: return 0;
! 378: }
! 379:
! 380: /*
! 381: * Convert a string containing C character escapes. Stop at an unescaped
! 382: * space or tab.
! 383: * Copy the converted version to "p", returning its length in *slen.
! 384: * Return updated scan pointer as function result.
! 385: */
! 386: static char *
! 387: getstr(s, p, plen, slen)
! 388: register char *s;
! 389: register char *p;
! 390: int plen, *slen;
! 391: {
! 392: char *origs = s, *origp = p;
! 393: char *pmax = p + plen - 1;
! 394: register int c;
! 395: register int val;
! 396:
! 397: while ((c = *s++) != '\0') {
! 398: if (isspace((unsigned char) c))
! 399: break;
! 400: if (p >= pmax) {
! 401: fprintf(stderr, "String too long: %s\n", origs);
! 402: break;
! 403: }
! 404: if(c == '\\') {
! 405: switch(c = *s++) {
! 406:
! 407: case '\0':
! 408: goto out;
! 409:
! 410: default:
! 411: *p++ = (char) c;
! 412: break;
! 413:
! 414: case 'n':
! 415: *p++ = '\n';
! 416: break;
! 417:
! 418: case 'r':
! 419: *p++ = '\r';
! 420: break;
! 421:
! 422: case 'b':
! 423: *p++ = '\b';
! 424: break;
! 425:
! 426: case 't':
! 427: *p++ = '\t';
! 428: break;
! 429:
! 430: case 'f':
! 431: *p++ = '\f';
! 432: break;
! 433:
! 434: case 'v':
! 435: *p++ = '\v';
! 436: break;
! 437:
! 438: /* \ and up to 3 octal digits */
! 439: case '0':
! 440: case '1':
! 441: case '2':
! 442: case '3':
! 443: case '4':
! 444: case '5':
! 445: case '6':
! 446: case '7':
! 447: val = c - '0';
! 448: c = *s++; /* try for 2 */
! 449: if(c >= '0' && c <= '7') {
! 450: val = (val<<3) | (c - '0');
! 451: c = *s++; /* try for 3 */
! 452: if(c >= '0' && c <= '7')
! 453: val = (val<<3) | (c-'0');
! 454: else
! 455: --s;
! 456: }
! 457: else
! 458: --s;
! 459: *p++ = (char)val;
! 460: break;
! 461:
! 462: /* \x and up to 3 hex digits */
! 463: case 'x':
! 464: val = 'x'; /* Default if no digits */
! 465: c = hextoint(*s++); /* Get next char */
! 466: if (c >= 0) {
! 467: val = c;
! 468: c = hextoint(*s++);
! 469: if (c >= 0) {
! 470: val = (val << 4) + c;
! 471: c = hextoint(*s++);
! 472: if (c >= 0) {
! 473: val = (val << 4) + c;
! 474: } else
! 475: --s;
! 476: } else
! 477: --s;
! 478: } else
! 479: --s;
! 480: *p++ = (char)val;
! 481: break;
! 482: }
! 483: } else
! 484: *p++ = (char)c;
! 485: }
! 486: out:
! 487: *p = '\0';
! 488: *slen = p - origp;
! 489: return s;
! 490: }
! 491:
! 492:
! 493: /* Single hex char to int; -1 if not a hex char. */
! 494: static int
! 495: hextoint(c)
! 496: int c;
! 497: {
! 498: if (!isascii((unsigned char) c)) return -1;
! 499: if (isdigit((unsigned char) c)) return c - '0';
! 500: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
! 501: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
! 502: return -1;
! 503: }
! 504:
! 505:
! 506: /*
! 507: * Print a string containing C character escapes.
! 508: */
! 509: void
! 510: showstr(fp, s, len)
! 511: FILE *fp;
! 512: const char *s;
! 513: int len;
! 514: {
! 515: register char c;
! 516:
! 517: for (;;) {
! 518: c = *s++;
! 519: if (len == -1) {
! 520: if (c == '\0')
! 521: break;
! 522: }
! 523: else {
! 524: if (len-- == 0)
! 525: break;
! 526: }
! 527: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
! 528: (void) fputc(c, fp);
! 529: else {
! 530: (void) fputc('\\', fp);
! 531: switch (c) {
! 532:
! 533: case '\n':
! 534: (void) fputc('n', fp);
! 535: break;
! 536:
! 537: case '\r':
! 538: (void) fputc('r', fp);
! 539: break;
! 540:
! 541: case '\b':
! 542: (void) fputc('b', fp);
! 543: break;
! 544:
! 545: case '\t':
! 546: (void) fputc('t', fp);
! 547: break;
! 548:
! 549: case '\f':
! 550: (void) fputc('f', fp);
! 551: break;
! 552:
! 553: case '\v':
! 554: (void) fputc('v', fp);
! 555: break;
! 556:
! 557: default:
! 558: (void) fprintf(fp, "%.3o", c & 0377);
! 559: break;
! 560: }
! 561: }
! 562: }
! 563: }
! 564:
! 565: /*
! 566: * eatsize(): Eat the size spec from a number [eg. 10UL]
! 567: */
! 568: static void
! 569: eatsize(p)
! 570: char **p;
! 571: {
! 572: char *l = *p;
! 573:
! 574: if (LOWCASE(*l) == 'u')
! 575: l++;
! 576:
! 577: switch (LOWCASE(*l)) {
! 578: case 'l': /* long */
! 579: case 's': /* short */
! 580: case 'h': /* short */
! 581: case 'b': /* char/byte */
! 582: case 'c': /* char/byte */
! 583: l++;
! 584: /*FALLTHROUGH*/
! 585: default:
! 586: break;
! 587: }
! 588:
! 589: *p = l;
! 590: }