Annotation of src/usr.bin/file/apprentice.c, Revision 1.16
1.16 ! ian 1: /* $OpenBSD: apprentice.c,v 1.15 2002/08/12 00:42:56 aaron Exp $ */
1.4 millert 2:
1.1 deraadt 3: /*
4: * apprentice - make one pass through /etc/magic, learning its secrets.
5: *
1.16 ! ian 6: * Copyright (c) Ian F. Darwin 1986-1995.
! 7: * Software written by Ian F. Darwin and others;
! 8: * maintained 1995-present by Christos Zoulas and others.
! 9: *
! 10: * Redistribution and use in source and binary forms, with or without
! 11: * modification, are permitted provided that the following conditions
! 12: * are met:
! 13: * 1. Redistributions of source code must retain the above copyright
! 14: * notice immediately at the beginning of the file, without modification,
! 15: * this list of conditions, and the following disclaimer.
! 16: * 2. Redistributions in binary form must reproduce the above copyright
! 17: * notice, this list of conditions and the following disclaimer in the
! 18: * documentation and/or other materials provided with the distribution.
! 19: * 3. All advertising materials mentioning features or use of this software
! 20: * must display the following acknowledgement:
! 21: * This product includes software developed by Ian F. Darwin and others.
! 22: * 4. The name of the author may not be used to endorse or promote products
! 23: * derived from this software without specific prior written permission.
! 24: *
! 25: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 28: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
! 29: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 35: * SUCH DAMAGE.
1.1 deraadt 36: */
37:
38: #include <stdio.h>
39: #include <stdlib.h>
40: #include <string.h>
41: #include <ctype.h>
1.2 deraadt 42: #include <errno.h>
1.5 mickey 43: #include <err.h>
1.1 deraadt 44: #include "file.h"
45:
46: #ifndef lint
1.16 ! ian 47: static char *moduleid = "$OpenBSD: apprentice.c,v 1.15 2002/08/12 00:42:56 aaron Exp $";
1.1 deraadt 48: #endif /* lint */
49:
50: #define EATAB {while (isascii((unsigned char) *l) && \
51: isspace((unsigned char) *l)) ++l;}
52: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
53: tolower((unsigned char) (l)) : (l))
54:
55:
1.12 millert 56: static int getvalue(struct magic *, char **);
57: static int hextoint(int);
58: static char *getstr(char *, char *, int, int *);
59: static int parse(char *, int *, int);
60: static void eatsize(char **);
1.1 deraadt 61:
62: static int maxmagic = 0;
1.8 ian 63: static int alloc_incr = 256;
1.1 deraadt 64:
1.12 millert 65: static int apprentice_1(char *, int);
1.1 deraadt 66:
67: int
68: apprentice(fn, check)
1.2 deraadt 69: char *fn; /* list of magic files */
70: int check; /* non-zero? checking-only run. */
71: {
72: char *p, *mfn;
73: int file_err, errs = -1;
74:
75: maxmagic = MAXMAGIS;
1.15 aaron 76: magic = (struct magic *) calloc(maxmagic, sizeof(struct magic));
1.2 deraadt 77: mfn = malloc(strlen(fn)+1);
78: if (magic == NULL || mfn == NULL) {
1.5 mickey 79: warn("malloc");
1.2 deraadt 80: if (check)
81: return -1;
82: else
83: exit(1);
84: }
1.9 deraadt 85: fn = strcpy(mfn, fn); /* ok */
1.2 deraadt 86:
87: while (fn) {
88: p = strchr(fn, ':');
89: if (p)
90: *p++ = '\0';
91: file_err = apprentice_1(fn, check);
92: if (file_err > errs)
93: errs = file_err;
94: fn = p;
95: }
96: if (errs == -1)
1.5 mickey 97: warnx("couldn't find any magic files!");
1.2 deraadt 98: if (!check && errs)
99: exit(1);
100:
101: free(mfn);
102: return errs;
103: }
104:
105: static int
106: apprentice_1(fn, check)
1.1 deraadt 107: char *fn; /* name of magic file */
108: int check; /* non-zero? checking-only run. */
109: {
1.2 deraadt 110: static const char hdr[] =
111: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 112: FILE *f;
113: char line[BUFSIZ+1];
114: int errs = 0;
115:
116: f = fopen(fn, "r");
117: if (f==NULL) {
1.2 deraadt 118: if (errno != ENOENT)
1.10 millert 119: warn("%s", fn);
1.2 deraadt 120: return -1;
1.1 deraadt 121: }
122:
123: /* parse it */
124: if (check) /* print silly verbose header for USG compat. */
1.2 deraadt 125: (void) printf("%s\n", hdr);
1.1 deraadt 126:
127: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
128: if (line[0]=='#') /* comment, do not parse */
129: continue;
130: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
131: continue;
132: line[strlen(line)-1] = '\0'; /* delete newline */
133: if (parse(line, &nmagic, check) != 0)
1.2 deraadt 134: errs = 1;
1.1 deraadt 135: }
136:
137: (void) fclose(f);
1.2 deraadt 138: return errs;
1.1 deraadt 139: }
140:
141: /*
142: * extend the sign bit if the comparison is to be signed
143: */
1.14 itojun 144: uint32_t
1.1 deraadt 145: signextend(m, v)
146: struct magic *m;
1.14 itojun 147: uint32_t v;
1.1 deraadt 148: {
149: if (!(m->flag & UNSIGNED))
150: switch(m->type) {
151: /*
152: * Do not remove the casts below. They are
153: * vital. When later compared with the data,
154: * the sign extension must have happened.
155: */
156: case BYTE:
157: v = (char) v;
158: break;
159: case SHORT:
160: case BESHORT:
161: case LESHORT:
162: v = (short) v;
163: break;
164: case DATE:
165: case BEDATE:
166: case LEDATE:
167: case LONG:
168: case BELONG:
169: case LELONG:
1.14 itojun 170: v = (int32_t) v;
1.1 deraadt 171: break;
172: case STRING:
173: break;
174: default:
1.13 mpech 175: warnx("can't happen: m->type=%d", m->type);
1.1 deraadt 176: return -1;
177: }
178: return v;
179: }
180:
181: /*
182: * parse one line from magic file, put into magic[index++] if valid
183: */
184: static int
185: parse(l, ndx, check)
186: char *l;
187: int *ndx, check;
188: {
189: int i = 0, nd = *ndx;
190: struct magic *m;
191: char *t, *s;
192:
193: if (nd+1 >= maxmagic){
1.6 deraadt 194: struct magic *mtmp;
195:
1.8 ian 196: maxmagic += alloc_incr;
1.6 deraadt 197: if ((mtmp = (struct magic *) realloc(magic,
1.1 deraadt 198: sizeof(struct magic) *
199: maxmagic)) == NULL) {
1.5 mickey 200: warn("malloc");
1.6 deraadt 201: if (check) {
202: if (magic)
203: free(magic);
1.1 deraadt 204: return -1;
1.6 deraadt 205: } else
1.1 deraadt 206: exit(1);
207: }
1.7 deraadt 208: magic = mtmp;
1.8 ian 209: memset(&magic[*ndx], 0, sizeof(struct magic) * alloc_incr);
210: alloc_incr *= 2;
1.1 deraadt 211: }
212: m = &magic[*ndx];
213: m->flag = 0;
214: m->cont_level = 0;
215:
216: while (*l == '>') {
217: ++l; /* step over */
218: m->cont_level++;
219: }
220:
221: if (m->cont_level != 0 && *l == '(') {
222: ++l; /* step over */
223: m->flag |= INDIR;
224: }
1.4 millert 225: if (m->cont_level != 0 && *l == '&') {
226: ++l; /* step over */
227: m->flag |= ADD;
228: }
1.1 deraadt 229:
230: /* get offset, then skip over it */
231: m->offset = (int) strtoul(l,&t,0);
232: if (l == t)
1.5 mickey 233: warnx("offset %s invalid", l);
1.1 deraadt 234: l = t;
235:
236: if (m->flag & INDIR) {
237: m->in.type = LONG;
238: m->in.offset = 0;
239: /*
240: * read [.lbs][+-]nnnnn)
241: */
242: if (*l == '.') {
243: l++;
244: switch (LOWCASE(*l)) {
245: case 'l':
246: m->in.type = LONG;
247: break;
248: case 'h':
249: case 's':
250: m->in.type = SHORT;
251: break;
252: case 'c':
253: case 'b':
254: m->in.type = BYTE;
255: break;
256: default:
1.5 mickey 257: warnx("indirect offset type %c invalid", *l);
1.1 deraadt 258: break;
259: }
260: l++;
261: }
262: s = l;
263: if (*l == '+' || *l == '-') l++;
264: if (isdigit((unsigned char)*l)) {
265: m->in.offset = strtoul(l, &t, 0);
266: if (*s == '-') m->in.offset = - m->in.offset;
267: }
268: else
269: t = l;
270: if (*t++ != ')')
1.5 mickey 271: warnx("missing ')' in indirect offset");
1.1 deraadt 272: l = t;
273: }
274:
275:
276: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
277: ++l;
278: EATAB;
279:
280: #define NBYTE 4
281: #define NSHORT 5
282: #define NLONG 4
283: #define NSTRING 6
284: #define NDATE 4
285: #define NBESHORT 7
286: #define NBELONG 6
287: #define NBEDATE 6
288: #define NLESHORT 7
289: #define NLELONG 6
290: #define NLEDATE 6
291:
292: if (*l == 'u') {
293: ++l;
294: m->flag |= UNSIGNED;
295: }
296:
297: /* get type, skip it */
298: if (strncmp(l, "byte", NBYTE)==0) {
299: m->type = BYTE;
300: l += NBYTE;
301: } else if (strncmp(l, "short", NSHORT)==0) {
302: m->type = SHORT;
303: l += NSHORT;
304: } else if (strncmp(l, "long", NLONG)==0) {
305: m->type = LONG;
306: l += NLONG;
307: } else if (strncmp(l, "string", NSTRING)==0) {
308: m->type = STRING;
309: l += NSTRING;
310: } else if (strncmp(l, "date", NDATE)==0) {
311: m->type = DATE;
312: l += NDATE;
313: } else if (strncmp(l, "beshort", NBESHORT)==0) {
314: m->type = BESHORT;
315: l += NBESHORT;
316: } else if (strncmp(l, "belong", NBELONG)==0) {
317: m->type = BELONG;
318: l += NBELONG;
319: } else if (strncmp(l, "bedate", NBEDATE)==0) {
320: m->type = BEDATE;
321: l += NBEDATE;
322: } else if (strncmp(l, "leshort", NLESHORT)==0) {
323: m->type = LESHORT;
324: l += NLESHORT;
325: } else if (strncmp(l, "lelong", NLELONG)==0) {
326: m->type = LELONG;
327: l += NLELONG;
328: } else if (strncmp(l, "ledate", NLEDATE)==0) {
329: m->type = LEDATE;
330: l += NLEDATE;
331: } else {
1.5 mickey 332: warnx("type %s invalid", l);
1.1 deraadt 333: return -1;
334: }
335: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
336: if (*l == '&') {
337: ++l;
338: m->mask = signextend(m, strtoul(l, &l, 0));
339: eatsize(&l);
340: } else
341: m->mask = ~0L;
342: EATAB;
343:
344: switch (*l) {
345: case '>':
346: case '<':
347: /* Old-style anding: "0 byte &0x80 dynamically linked" */
348: case '&':
349: case '^':
350: case '=':
351: m->reln = *l;
352: ++l;
353: break;
354: case '!':
355: if (m->type != STRING) {
356: m->reln = *l;
357: ++l;
358: break;
359: }
360: /* FALL THROUGH */
361: default:
362: if (*l == 'x' && isascii((unsigned char)l[1]) &&
363: isspace((unsigned char)l[1])) {
364: m->reln = *l;
365: ++l;
366: goto GetDesc; /* Bill The Cat */
367: }
368: m->reln = '=';
369: break;
370: }
371: EATAB;
372:
373: if (getvalue(m, &l))
374: return -1;
375: /*
376: * TODO finish this macro and start using it!
377: * #define offsetcheck {if (offset > HOWMANY-1)
1.5 mickey 378: * warnx("offset too big"); }
1.1 deraadt 379: */
380:
381: /*
382: * now get last part - the description
383: */
384: GetDesc:
385: EATAB;
386: if (l[0] == '\b') {
387: ++l;
388: m->nospflag = 1;
389: } else if ((l[0] == '\\') && (l[1] == 'b')) {
390: ++l;
391: ++l;
392: m->nospflag = 1;
393: } else
394: m->nospflag = 0;
395: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
396: /* NULLBODY */;
397:
398: if (check) {
399: mdump(m);
400: }
401: ++(*ndx); /* make room for next */
402: return 0;
403: }
404:
405: /*
406: * Read a numeric value from a pointer, into the value union of a magic
407: * pointer, according to the magic type. Update the string pointer to point
408: * just after the number read. Return 0 for success, non-zero for failure.
409: */
410: static int
411: getvalue(m, p)
412: struct magic *m;
413: char **p;
414: {
415: int slen;
416:
417: if (m->type == STRING) {
418: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
419: m->vallen = slen;
420: } else
421: if (m->reln != 'x') {
422: m->value.l = signextend(m, strtoul(*p, p, 0));
423: eatsize(p);
424: }
425: return 0;
426: }
427:
428: /*
429: * Convert a string containing C character escapes. Stop at an unescaped
430: * space or tab.
431: * Copy the converted version to "p", returning its length in *slen.
432: * Return updated scan pointer as function result.
433: */
434: static char *
435: getstr(s, p, plen, slen)
1.11 mpech 436: char *s;
437: char *p;
1.1 deraadt 438: int plen, *slen;
439: {
440: char *origs = s, *origp = p;
441: char *pmax = p + plen - 1;
1.11 mpech 442: int c;
443: int val;
1.1 deraadt 444:
445: while ((c = *s++) != '\0') {
446: if (isspace((unsigned char) c))
447: break;
448: if (p >= pmax) {
449: fprintf(stderr, "String too long: %s\n", origs);
450: break;
451: }
452: if(c == '\\') {
453: switch(c = *s++) {
454:
455: case '\0':
456: goto out;
457:
458: default:
459: *p++ = (char) c;
460: break;
461:
462: case 'n':
463: *p++ = '\n';
464: break;
465:
466: case 'r':
467: *p++ = '\r';
468: break;
469:
470: case 'b':
471: *p++ = '\b';
472: break;
473:
474: case 't':
475: *p++ = '\t';
476: break;
477:
478: case 'f':
479: *p++ = '\f';
480: break;
481:
482: case 'v':
483: *p++ = '\v';
484: break;
485:
486: /* \ and up to 3 octal digits */
487: case '0':
488: case '1':
489: case '2':
490: case '3':
491: case '4':
492: case '5':
493: case '6':
494: case '7':
495: val = c - '0';
496: c = *s++; /* try for 2 */
497: if(c >= '0' && c <= '7') {
498: val = (val<<3) | (c - '0');
499: c = *s++; /* try for 3 */
500: if(c >= '0' && c <= '7')
501: val = (val<<3) | (c-'0');
502: else
503: --s;
504: }
505: else
506: --s;
507: *p++ = (char)val;
508: break;
509:
1.4 millert 510: /* \x and up to 2 hex digits */
1.1 deraadt 511: case 'x':
512: val = 'x'; /* Default if no digits */
513: c = hextoint(*s++); /* Get next char */
514: if (c >= 0) {
515: val = c;
516: c = hextoint(*s++);
1.4 millert 517: if (c >= 0)
1.1 deraadt 518: val = (val << 4) + c;
1.4 millert 519: else
1.1 deraadt 520: --s;
521: } else
522: --s;
523: *p++ = (char)val;
524: break;
525: }
526: } else
527: *p++ = (char)c;
528: }
529: out:
530: *p = '\0';
531: *slen = p - origp;
532: return s;
533: }
534:
535:
536: /* Single hex char to int; -1 if not a hex char. */
537: static int
538: hextoint(c)
539: int c;
540: {
541: if (!isascii((unsigned char) c)) return -1;
542: if (isdigit((unsigned char) c)) return c - '0';
543: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
544: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
545: return -1;
546: }
547:
548:
549: /*
550: * Print a string containing C character escapes.
551: */
552: void
553: showstr(fp, s, len)
554: FILE *fp;
555: const char *s;
556: int len;
557: {
1.11 mpech 558: char c;
1.1 deraadt 559:
560: for (;;) {
561: c = *s++;
562: if (len == -1) {
563: if (c == '\0')
564: break;
565: }
566: else {
567: if (len-- == 0)
568: break;
569: }
570: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
571: (void) fputc(c, fp);
572: else {
573: (void) fputc('\\', fp);
574: switch (c) {
575:
576: case '\n':
577: (void) fputc('n', fp);
578: break;
579:
580: case '\r':
581: (void) fputc('r', fp);
582: break;
583:
584: case '\b':
585: (void) fputc('b', fp);
586: break;
587:
588: case '\t':
589: (void) fputc('t', fp);
590: break;
591:
592: case '\f':
593: (void) fputc('f', fp);
594: break;
595:
596: case '\v':
597: (void) fputc('v', fp);
598: break;
599:
600: default:
601: (void) fprintf(fp, "%.3o", c & 0377);
602: break;
603: }
604: }
605: }
606: }
607:
608: /*
609: * eatsize(): Eat the size spec from a number [eg. 10UL]
610: */
611: static void
612: eatsize(p)
613: char **p;
614: {
615: char *l = *p;
616:
617: if (LOWCASE(*l) == 'u')
618: l++;
619:
620: switch (LOWCASE(*l)) {
621: case 'l': /* long */
622: case 's': /* short */
623: case 'h': /* short */
624: case 'b': /* char/byte */
625: case 'c': /* char/byte */
626: l++;
627: /*FALLTHROUGH*/
628: default:
629: break;
630: }
631:
632: *p = l;
633: }