Annotation of src/usr.bin/file/apprentice.c, Revision 1.18
1.18 ! deraadt 1: /* $OpenBSD: apprentice.c,v 1.17 2003/04/07 19:03:46 deraadt Exp $ */
1.4 millert 2:
1.1 deraadt 3: /*
4: * apprentice - make one pass through /etc/magic, learning its secrets.
5: *
1.16 ian 6: * Copyright (c) Ian F. Darwin 1986-1995.
7: * Software written by Ian F. Darwin and others;
8: * maintained 1995-present by Christos Zoulas and others.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice immediately at the beginning of the file, without modification,
15: * this list of conditions, and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
24: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
1.1 deraadt 31: */
32:
33: #include <stdio.h>
34: #include <stdlib.h>
35: #include <string.h>
36: #include <ctype.h>
1.2 deraadt 37: #include <errno.h>
1.5 mickey 38: #include <err.h>
1.1 deraadt 39: #include "file.h"
40:
41: #ifndef lint
1.18 ! deraadt 42: static char *moduleid = "$OpenBSD: apprentice.c,v 1.17 2003/04/07 19:03:46 deraadt Exp $";
1.1 deraadt 43: #endif /* lint */
44:
45: #define EATAB {while (isascii((unsigned char) *l) && \
46: isspace((unsigned char) *l)) ++l;}
47: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
48: tolower((unsigned char) (l)) : (l))
49:
50:
1.12 millert 51: static int getvalue(struct magic *, char **);
52: static int hextoint(int);
53: static char *getstr(char *, char *, int, int *);
54: static int parse(char *, int *, int);
55: static void eatsize(char **);
1.1 deraadt 56:
57: static int maxmagic = 0;
1.8 ian 58: static int alloc_incr = 256;
1.1 deraadt 59:
1.12 millert 60: static int apprentice_1(char *, int);
1.1 deraadt 61:
62: int
63: apprentice(fn, check)
1.2 deraadt 64: char *fn; /* list of magic files */
65: int check; /* non-zero? checking-only run. */
66: {
67: char *p, *mfn;
68: int file_err, errs = -1;
1.17 deraadt 69: size_t len;
1.2 deraadt 70:
71: maxmagic = MAXMAGIS;
1.15 aaron 72: magic = (struct magic *) calloc(maxmagic, sizeof(struct magic));
1.17 deraadt 73: len = strlen(fn)+1;
74: mfn = malloc(len);
1.2 deraadt 75: if (magic == NULL || mfn == NULL) {
1.5 mickey 76: warn("malloc");
1.2 deraadt 77: if (check)
78: return -1;
79: else
80: exit(1);
81: }
1.17 deraadt 82: strlcpy(mfn, fn, len);
83: fn = mfn;
84:
1.2 deraadt 85: while (fn) {
86: p = strchr(fn, ':');
87: if (p)
88: *p++ = '\0';
89: file_err = apprentice_1(fn, check);
90: if (file_err > errs)
91: errs = file_err;
92: fn = p;
93: }
94: if (errs == -1)
1.5 mickey 95: warnx("couldn't find any magic files!");
1.2 deraadt 96: if (!check && errs)
97: exit(1);
98:
99: free(mfn);
100: return errs;
101: }
102:
103: static int
104: apprentice_1(fn, check)
1.1 deraadt 105: char *fn; /* name of magic file */
106: int check; /* non-zero? checking-only run. */
107: {
1.2 deraadt 108: static const char hdr[] =
109: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 110: FILE *f;
111: char line[BUFSIZ+1];
112: int errs = 0;
113:
114: f = fopen(fn, "r");
115: if (f==NULL) {
1.2 deraadt 116: if (errno != ENOENT)
1.10 millert 117: warn("%s", fn);
1.2 deraadt 118: return -1;
1.1 deraadt 119: }
120:
121: /* parse it */
122: if (check) /* print silly verbose header for USG compat. */
1.2 deraadt 123: (void) printf("%s\n", hdr);
1.1 deraadt 124:
125: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
126: if (line[0]=='#') /* comment, do not parse */
127: continue;
128: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
129: continue;
130: line[strlen(line)-1] = '\0'; /* delete newline */
131: if (parse(line, &nmagic, check) != 0)
1.2 deraadt 132: errs = 1;
1.1 deraadt 133: }
134:
135: (void) fclose(f);
1.2 deraadt 136: return errs;
1.1 deraadt 137: }
138:
139: /*
140: * extend the sign bit if the comparison is to be signed
141: */
1.14 itojun 142: uint32_t
1.1 deraadt 143: signextend(m, v)
144: struct magic *m;
1.14 itojun 145: uint32_t v;
1.1 deraadt 146: {
147: if (!(m->flag & UNSIGNED))
148: switch(m->type) {
149: /*
150: * Do not remove the casts below. They are
151: * vital. When later compared with the data,
152: * the sign extension must have happened.
153: */
154: case BYTE:
155: v = (char) v;
156: break;
157: case SHORT:
158: case BESHORT:
159: case LESHORT:
160: v = (short) v;
161: break;
162: case DATE:
163: case BEDATE:
164: case LEDATE:
165: case LONG:
166: case BELONG:
167: case LELONG:
1.14 itojun 168: v = (int32_t) v;
1.1 deraadt 169: break;
170: case STRING:
171: break;
172: default:
1.13 mpech 173: warnx("can't happen: m->type=%d", m->type);
1.1 deraadt 174: return -1;
175: }
176: return v;
177: }
178:
179: /*
180: * parse one line from magic file, put into magic[index++] if valid
181: */
182: static int
183: parse(l, ndx, check)
184: char *l;
185: int *ndx, check;
186: {
187: int i = 0, nd = *ndx;
188: struct magic *m;
189: char *t, *s;
190:
191: if (nd+1 >= maxmagic){
1.6 deraadt 192: struct magic *mtmp;
193:
1.8 ian 194: maxmagic += alloc_incr;
1.6 deraadt 195: if ((mtmp = (struct magic *) realloc(magic,
1.1 deraadt 196: sizeof(struct magic) *
197: maxmagic)) == NULL) {
1.5 mickey 198: warn("malloc");
1.6 deraadt 199: if (check) {
200: if (magic)
201: free(magic);
1.1 deraadt 202: return -1;
1.6 deraadt 203: } else
1.1 deraadt 204: exit(1);
205: }
1.7 deraadt 206: magic = mtmp;
1.8 ian 207: memset(&magic[*ndx], 0, sizeof(struct magic) * alloc_incr);
208: alloc_incr *= 2;
1.1 deraadt 209: }
210: m = &magic[*ndx];
211: m->flag = 0;
212: m->cont_level = 0;
213:
214: while (*l == '>') {
215: ++l; /* step over */
216: m->cont_level++;
217: }
218:
219: if (m->cont_level != 0 && *l == '(') {
220: ++l; /* step over */
221: m->flag |= INDIR;
222: }
1.4 millert 223: if (m->cont_level != 0 && *l == '&') {
224: ++l; /* step over */
225: m->flag |= ADD;
226: }
1.1 deraadt 227:
228: /* get offset, then skip over it */
229: m->offset = (int) strtoul(l,&t,0);
230: if (l == t)
1.5 mickey 231: warnx("offset %s invalid", l);
1.1 deraadt 232: l = t;
233:
234: if (m->flag & INDIR) {
235: m->in.type = LONG;
236: m->in.offset = 0;
237: /*
238: * read [.lbs][+-]nnnnn)
239: */
240: if (*l == '.') {
241: l++;
242: switch (LOWCASE(*l)) {
243: case 'l':
244: m->in.type = LONG;
245: break;
246: case 'h':
247: case 's':
248: m->in.type = SHORT;
249: break;
250: case 'c':
251: case 'b':
252: m->in.type = BYTE;
253: break;
254: default:
1.5 mickey 255: warnx("indirect offset type %c invalid", *l);
1.1 deraadt 256: break;
257: }
258: l++;
259: }
260: s = l;
261: if (*l == '+' || *l == '-') l++;
262: if (isdigit((unsigned char)*l)) {
263: m->in.offset = strtoul(l, &t, 0);
264: if (*s == '-') m->in.offset = - m->in.offset;
265: }
266: else
267: t = l;
268: if (*t++ != ')')
1.5 mickey 269: warnx("missing ')' in indirect offset");
1.1 deraadt 270: l = t;
271: }
272:
273:
274: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
275: ++l;
276: EATAB;
277:
278: #define NBYTE 4
279: #define NSHORT 5
280: #define NLONG 4
281: #define NSTRING 6
282: #define NDATE 4
283: #define NBESHORT 7
284: #define NBELONG 6
285: #define NBEDATE 6
286: #define NLESHORT 7
287: #define NLELONG 6
288: #define NLEDATE 6
289:
290: if (*l == 'u') {
291: ++l;
292: m->flag |= UNSIGNED;
293: }
294:
295: /* get type, skip it */
296: if (strncmp(l, "byte", NBYTE)==0) {
297: m->type = BYTE;
298: l += NBYTE;
299: } else if (strncmp(l, "short", NSHORT)==0) {
300: m->type = SHORT;
301: l += NSHORT;
302: } else if (strncmp(l, "long", NLONG)==0) {
303: m->type = LONG;
304: l += NLONG;
305: } else if (strncmp(l, "string", NSTRING)==0) {
306: m->type = STRING;
307: l += NSTRING;
308: } else if (strncmp(l, "date", NDATE)==0) {
309: m->type = DATE;
310: l += NDATE;
311: } else if (strncmp(l, "beshort", NBESHORT)==0) {
312: m->type = BESHORT;
313: l += NBESHORT;
314: } else if (strncmp(l, "belong", NBELONG)==0) {
315: m->type = BELONG;
316: l += NBELONG;
317: } else if (strncmp(l, "bedate", NBEDATE)==0) {
318: m->type = BEDATE;
319: l += NBEDATE;
320: } else if (strncmp(l, "leshort", NLESHORT)==0) {
321: m->type = LESHORT;
322: l += NLESHORT;
323: } else if (strncmp(l, "lelong", NLELONG)==0) {
324: m->type = LELONG;
325: l += NLELONG;
326: } else if (strncmp(l, "ledate", NLEDATE)==0) {
327: m->type = LEDATE;
328: l += NLEDATE;
329: } else {
1.5 mickey 330: warnx("type %s invalid", l);
1.1 deraadt 331: return -1;
332: }
333: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
334: if (*l == '&') {
335: ++l;
336: m->mask = signextend(m, strtoul(l, &l, 0));
337: eatsize(&l);
338: } else
339: m->mask = ~0L;
340: EATAB;
341:
342: switch (*l) {
343: case '>':
344: case '<':
345: /* Old-style anding: "0 byte &0x80 dynamically linked" */
346: case '&':
347: case '^':
348: case '=':
349: m->reln = *l;
350: ++l;
351: break;
352: case '!':
353: if (m->type != STRING) {
354: m->reln = *l;
355: ++l;
356: break;
357: }
358: /* FALL THROUGH */
359: default:
360: if (*l == 'x' && isascii((unsigned char)l[1]) &&
361: isspace((unsigned char)l[1])) {
362: m->reln = *l;
363: ++l;
364: goto GetDesc; /* Bill The Cat */
365: }
366: m->reln = '=';
367: break;
368: }
369: EATAB;
370:
371: if (getvalue(m, &l))
372: return -1;
373: /*
374: * TODO finish this macro and start using it!
375: * #define offsetcheck {if (offset > HOWMANY-1)
1.5 mickey 376: * warnx("offset too big"); }
1.1 deraadt 377: */
378:
379: /*
380: * now get last part - the description
381: */
382: GetDesc:
383: EATAB;
384: if (l[0] == '\b') {
385: ++l;
386: m->nospflag = 1;
387: } else if ((l[0] == '\\') && (l[1] == 'b')) {
388: ++l;
389: ++l;
390: m->nospflag = 1;
391: } else
392: m->nospflag = 0;
393: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
394: /* NULLBODY */;
395:
396: if (check) {
397: mdump(m);
398: }
399: ++(*ndx); /* make room for next */
400: return 0;
401: }
402:
403: /*
404: * Read a numeric value from a pointer, into the value union of a magic
405: * pointer, according to the magic type. Update the string pointer to point
406: * just after the number read. Return 0 for success, non-zero for failure.
407: */
408: static int
409: getvalue(m, p)
410: struct magic *m;
411: char **p;
412: {
413: int slen;
414:
415: if (m->type == STRING) {
416: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
417: m->vallen = slen;
418: } else
419: if (m->reln != 'x') {
420: m->value.l = signextend(m, strtoul(*p, p, 0));
421: eatsize(p);
422: }
423: return 0;
424: }
425:
426: /*
427: * Convert a string containing C character escapes. Stop at an unescaped
428: * space or tab.
429: * Copy the converted version to "p", returning its length in *slen.
430: * Return updated scan pointer as function result.
431: */
432: static char *
433: getstr(s, p, plen, slen)
1.11 mpech 434: char *s;
435: char *p;
1.1 deraadt 436: int plen, *slen;
437: {
438: char *origs = s, *origp = p;
439: char *pmax = p + plen - 1;
1.11 mpech 440: int c;
441: int val;
1.1 deraadt 442:
443: while ((c = *s++) != '\0') {
444: if (isspace((unsigned char) c))
445: break;
446: if (p >= pmax) {
447: fprintf(stderr, "String too long: %s\n", origs);
448: break;
449: }
450: if(c == '\\') {
451: switch(c = *s++) {
452:
453: case '\0':
454: goto out;
455:
456: default:
457: *p++ = (char) c;
458: break;
459:
460: case 'n':
461: *p++ = '\n';
462: break;
463:
464: case 'r':
465: *p++ = '\r';
466: break;
467:
468: case 'b':
469: *p++ = '\b';
470: break;
471:
472: case 't':
473: *p++ = '\t';
474: break;
475:
476: case 'f':
477: *p++ = '\f';
478: break;
479:
480: case 'v':
481: *p++ = '\v';
482: break;
483:
484: /* \ and up to 3 octal digits */
485: case '0':
486: case '1':
487: case '2':
488: case '3':
489: case '4':
490: case '5':
491: case '6':
492: case '7':
493: val = c - '0';
494: c = *s++; /* try for 2 */
495: if(c >= '0' && c <= '7') {
496: val = (val<<3) | (c - '0');
497: c = *s++; /* try for 3 */
498: if(c >= '0' && c <= '7')
499: val = (val<<3) | (c-'0');
500: else
501: --s;
502: }
503: else
504: --s;
505: *p++ = (char)val;
506: break;
507:
1.4 millert 508: /* \x and up to 2 hex digits */
1.1 deraadt 509: case 'x':
510: val = 'x'; /* Default if no digits */
511: c = hextoint(*s++); /* Get next char */
512: if (c >= 0) {
513: val = c;
514: c = hextoint(*s++);
1.4 millert 515: if (c >= 0)
1.1 deraadt 516: val = (val << 4) + c;
1.4 millert 517: else
1.1 deraadt 518: --s;
519: } else
520: --s;
521: *p++ = (char)val;
522: break;
523: }
524: } else
525: *p++ = (char)c;
526: }
527: out:
528: *p = '\0';
529: *slen = p - origp;
530: return s;
531: }
532:
533:
534: /* Single hex char to int; -1 if not a hex char. */
535: static int
536: hextoint(c)
537: int c;
538: {
539: if (!isascii((unsigned char) c)) return -1;
540: if (isdigit((unsigned char) c)) return c - '0';
541: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
542: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
543: return -1;
544: }
545:
546:
547: /*
548: * Print a string containing C character escapes.
549: */
550: void
551: showstr(fp, s, len)
552: FILE *fp;
553: const char *s;
554: int len;
555: {
1.11 mpech 556: char c;
1.1 deraadt 557:
558: for (;;) {
559: c = *s++;
560: if (len == -1) {
561: if (c == '\0')
562: break;
563: }
564: else {
565: if (len-- == 0)
566: break;
567: }
568: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
569: (void) fputc(c, fp);
570: else {
571: (void) fputc('\\', fp);
572: switch (c) {
573:
574: case '\n':
575: (void) fputc('n', fp);
576: break;
577:
578: case '\r':
579: (void) fputc('r', fp);
580: break;
581:
582: case '\b':
583: (void) fputc('b', fp);
584: break;
585:
586: case '\t':
587: (void) fputc('t', fp);
588: break;
589:
590: case '\f':
591: (void) fputc('f', fp);
592: break;
593:
594: case '\v':
595: (void) fputc('v', fp);
596: break;
597:
598: default:
599: (void) fprintf(fp, "%.3o", c & 0377);
600: break;
601: }
602: }
603: }
604: }
605:
606: /*
607: * eatsize(): Eat the size spec from a number [eg. 10UL]
608: */
609: static void
610: eatsize(p)
611: char **p;
612: {
613: char *l = *p;
614:
615: if (LOWCASE(*l) == 'u')
616: l++;
617:
618: switch (LOWCASE(*l)) {
619: case 'l': /* long */
620: case 's': /* short */
621: case 'h': /* short */
622: case 'b': /* char/byte */
623: case 'c': /* char/byte */
624: l++;
625: /*FALLTHROUGH*/
626: default:
627: break;
628: }
629:
630: *p = l;
631: }