Annotation of src/usr.bin/file/apprentice.c, Revision 1.13
1.13 ! mpech 1: /* $OpenBSD: apprentice.c,v 1.12 2002/02/16 21:27:46 millert Exp $ */
1.4 millert 2:
1.1 deraadt 3: /*
4: * apprentice - make one pass through /etc/magic, learning its secrets.
5: *
6: * Copyright (c) Ian F. Darwin, 1987.
7: * Written by Ian F. Darwin.
8: *
9: * This software is not subject to any license of the American Telephone
10: * and Telegraph Company or of the Regents of the University of California.
11: *
12: * Permission is granted to anyone to use this software for any purpose on
13: * any computer system, and to alter it and redistribute it freely, subject
14: * to the following restrictions:
15: *
16: * 1. The author is not responsible for the consequences of use of this
17: * software, no matter how awful, even if they arise from flaws in it.
18: *
19: * 2. The origin of this software must not be misrepresented, either by
20: * explicit claim or by omission. Since few users ever read sources,
21: * credits must appear in the documentation.
22: *
23: * 3. Altered versions must be plainly marked as such, and must not be
24: * misrepresented as being the original software. Since few users
25: * ever read sources, credits must appear in the documentation.
26: *
27: * 4. This notice may not be removed or altered.
28: */
29:
30: #include <stdio.h>
31: #include <stdlib.h>
32: #include <string.h>
33: #include <ctype.h>
1.2 deraadt 34: #include <errno.h>
1.5 mickey 35: #include <err.h>
1.1 deraadt 36: #include "file.h"
37:
38: #ifndef lint
1.13 ! mpech 39: static char *moduleid = "$OpenBSD: apprentice.c,v 1.12 2002/02/16 21:27:46 millert Exp $";
1.1 deraadt 40: #endif /* lint */
41:
42: #define EATAB {while (isascii((unsigned char) *l) && \
43: isspace((unsigned char) *l)) ++l;}
44: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
45: tolower((unsigned char) (l)) : (l))
46:
47:
1.12 millert 48: static int getvalue(struct magic *, char **);
49: static int hextoint(int);
50: static char *getstr(char *, char *, int, int *);
51: static int parse(char *, int *, int);
52: static void eatsize(char **);
1.1 deraadt 53:
54: static int maxmagic = 0;
1.8 ian 55: static int alloc_incr = 256;
1.1 deraadt 56:
1.12 millert 57: static int apprentice_1(char *, int);
1.1 deraadt 58:
59: int
60: apprentice(fn, check)
1.2 deraadt 61: char *fn; /* list of magic files */
62: int check; /* non-zero? checking-only run. */
63: {
64: char *p, *mfn;
65: int file_err, errs = -1;
66:
67: maxmagic = MAXMAGIS;
68: magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
69: mfn = malloc(strlen(fn)+1);
70: if (magic == NULL || mfn == NULL) {
1.5 mickey 71: warn("malloc");
1.2 deraadt 72: if (check)
73: return -1;
74: else
75: exit(1);
76: }
1.9 deraadt 77: fn = strcpy(mfn, fn); /* ok */
1.2 deraadt 78:
79: while (fn) {
80: p = strchr(fn, ':');
81: if (p)
82: *p++ = '\0';
83: file_err = apprentice_1(fn, check);
84: if (file_err > errs)
85: errs = file_err;
86: fn = p;
87: }
88: if (errs == -1)
1.5 mickey 89: warnx("couldn't find any magic files!");
1.2 deraadt 90: if (!check && errs)
91: exit(1);
92:
93: free(mfn);
94: return errs;
95: }
96:
97: static int
98: apprentice_1(fn, check)
1.1 deraadt 99: char *fn; /* name of magic file */
100: int check; /* non-zero? checking-only run. */
101: {
1.2 deraadt 102: static const char hdr[] =
103: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 104: FILE *f;
105: char line[BUFSIZ+1];
106: int errs = 0;
107:
108: f = fopen(fn, "r");
109: if (f==NULL) {
1.2 deraadt 110: if (errno != ENOENT)
1.10 millert 111: warn("%s", fn);
1.2 deraadt 112: return -1;
1.1 deraadt 113: }
114:
115: /* parse it */
116: if (check) /* print silly verbose header for USG compat. */
1.2 deraadt 117: (void) printf("%s\n", hdr);
1.1 deraadt 118:
119: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
120: if (line[0]=='#') /* comment, do not parse */
121: continue;
122: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
123: continue;
124: line[strlen(line)-1] = '\0'; /* delete newline */
125: if (parse(line, &nmagic, check) != 0)
1.2 deraadt 126: errs = 1;
1.1 deraadt 127: }
128:
129: (void) fclose(f);
1.2 deraadt 130: return errs;
1.1 deraadt 131: }
132:
133: /*
134: * extend the sign bit if the comparison is to be signed
135: */
1.4 millert 136: uint32
1.1 deraadt 137: signextend(m, v)
138: struct magic *m;
1.4 millert 139: uint32 v;
1.1 deraadt 140: {
141: if (!(m->flag & UNSIGNED))
142: switch(m->type) {
143: /*
144: * Do not remove the casts below. They are
145: * vital. When later compared with the data,
146: * the sign extension must have happened.
147: */
148: case BYTE:
149: v = (char) v;
150: break;
151: case SHORT:
152: case BESHORT:
153: case LESHORT:
154: v = (short) v;
155: break;
156: case DATE:
157: case BEDATE:
158: case LEDATE:
159: case LONG:
160: case BELONG:
161: case LELONG:
1.4 millert 162: v = (int32) v;
1.1 deraadt 163: break;
164: case STRING:
165: break;
166: default:
1.13 ! mpech 167: warnx("can't happen: m->type=%d", m->type);
1.1 deraadt 168: return -1;
169: }
170: return v;
171: }
172:
173: /*
174: * parse one line from magic file, put into magic[index++] if valid
175: */
176: static int
177: parse(l, ndx, check)
178: char *l;
179: int *ndx, check;
180: {
181: int i = 0, nd = *ndx;
182: struct magic *m;
183: char *t, *s;
184:
185: if (nd+1 >= maxmagic){
1.6 deraadt 186: struct magic *mtmp;
187:
1.8 ian 188: maxmagic += alloc_incr;
1.6 deraadt 189: if ((mtmp = (struct magic *) realloc(magic,
1.1 deraadt 190: sizeof(struct magic) *
191: maxmagic)) == NULL) {
1.5 mickey 192: warn("malloc");
1.6 deraadt 193: if (check) {
194: if (magic)
195: free(magic);
1.1 deraadt 196: return -1;
1.6 deraadt 197: } else
1.1 deraadt 198: exit(1);
199: }
1.7 deraadt 200: magic = mtmp;
1.8 ian 201: memset(&magic[*ndx], 0, sizeof(struct magic) * alloc_incr);
202: alloc_incr *= 2;
1.1 deraadt 203: }
204: m = &magic[*ndx];
205: m->flag = 0;
206: m->cont_level = 0;
207:
208: while (*l == '>') {
209: ++l; /* step over */
210: m->cont_level++;
211: }
212:
213: if (m->cont_level != 0 && *l == '(') {
214: ++l; /* step over */
215: m->flag |= INDIR;
216: }
1.4 millert 217: if (m->cont_level != 0 && *l == '&') {
218: ++l; /* step over */
219: m->flag |= ADD;
220: }
1.1 deraadt 221:
222: /* get offset, then skip over it */
223: m->offset = (int) strtoul(l,&t,0);
224: if (l == t)
1.5 mickey 225: warnx("offset %s invalid", l);
1.1 deraadt 226: l = t;
227:
228: if (m->flag & INDIR) {
229: m->in.type = LONG;
230: m->in.offset = 0;
231: /*
232: * read [.lbs][+-]nnnnn)
233: */
234: if (*l == '.') {
235: l++;
236: switch (LOWCASE(*l)) {
237: case 'l':
238: m->in.type = LONG;
239: break;
240: case 'h':
241: case 's':
242: m->in.type = SHORT;
243: break;
244: case 'c':
245: case 'b':
246: m->in.type = BYTE;
247: break;
248: default:
1.5 mickey 249: warnx("indirect offset type %c invalid", *l);
1.1 deraadt 250: break;
251: }
252: l++;
253: }
254: s = l;
255: if (*l == '+' || *l == '-') l++;
256: if (isdigit((unsigned char)*l)) {
257: m->in.offset = strtoul(l, &t, 0);
258: if (*s == '-') m->in.offset = - m->in.offset;
259: }
260: else
261: t = l;
262: if (*t++ != ')')
1.5 mickey 263: warnx("missing ')' in indirect offset");
1.1 deraadt 264: l = t;
265: }
266:
267:
268: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
269: ++l;
270: EATAB;
271:
272: #define NBYTE 4
273: #define NSHORT 5
274: #define NLONG 4
275: #define NSTRING 6
276: #define NDATE 4
277: #define NBESHORT 7
278: #define NBELONG 6
279: #define NBEDATE 6
280: #define NLESHORT 7
281: #define NLELONG 6
282: #define NLEDATE 6
283:
284: if (*l == 'u') {
285: ++l;
286: m->flag |= UNSIGNED;
287: }
288:
289: /* get type, skip it */
290: if (strncmp(l, "byte", NBYTE)==0) {
291: m->type = BYTE;
292: l += NBYTE;
293: } else if (strncmp(l, "short", NSHORT)==0) {
294: m->type = SHORT;
295: l += NSHORT;
296: } else if (strncmp(l, "long", NLONG)==0) {
297: m->type = LONG;
298: l += NLONG;
299: } else if (strncmp(l, "string", NSTRING)==0) {
300: m->type = STRING;
301: l += NSTRING;
302: } else if (strncmp(l, "date", NDATE)==0) {
303: m->type = DATE;
304: l += NDATE;
305: } else if (strncmp(l, "beshort", NBESHORT)==0) {
306: m->type = BESHORT;
307: l += NBESHORT;
308: } else if (strncmp(l, "belong", NBELONG)==0) {
309: m->type = BELONG;
310: l += NBELONG;
311: } else if (strncmp(l, "bedate", NBEDATE)==0) {
312: m->type = BEDATE;
313: l += NBEDATE;
314: } else if (strncmp(l, "leshort", NLESHORT)==0) {
315: m->type = LESHORT;
316: l += NLESHORT;
317: } else if (strncmp(l, "lelong", NLELONG)==0) {
318: m->type = LELONG;
319: l += NLELONG;
320: } else if (strncmp(l, "ledate", NLEDATE)==0) {
321: m->type = LEDATE;
322: l += NLEDATE;
323: } else {
1.5 mickey 324: warnx("type %s invalid", l);
1.1 deraadt 325: return -1;
326: }
327: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
328: if (*l == '&') {
329: ++l;
330: m->mask = signextend(m, strtoul(l, &l, 0));
331: eatsize(&l);
332: } else
333: m->mask = ~0L;
334: EATAB;
335:
336: switch (*l) {
337: case '>':
338: case '<':
339: /* Old-style anding: "0 byte &0x80 dynamically linked" */
340: case '&':
341: case '^':
342: case '=':
343: m->reln = *l;
344: ++l;
345: break;
346: case '!':
347: if (m->type != STRING) {
348: m->reln = *l;
349: ++l;
350: break;
351: }
352: /* FALL THROUGH */
353: default:
354: if (*l == 'x' && isascii((unsigned char)l[1]) &&
355: isspace((unsigned char)l[1])) {
356: m->reln = *l;
357: ++l;
358: goto GetDesc; /* Bill The Cat */
359: }
360: m->reln = '=';
361: break;
362: }
363: EATAB;
364:
365: if (getvalue(m, &l))
366: return -1;
367: /*
368: * TODO finish this macro and start using it!
369: * #define offsetcheck {if (offset > HOWMANY-1)
1.5 mickey 370: * warnx("offset too big"); }
1.1 deraadt 371: */
372:
373: /*
374: * now get last part - the description
375: */
376: GetDesc:
377: EATAB;
378: if (l[0] == '\b') {
379: ++l;
380: m->nospflag = 1;
381: } else if ((l[0] == '\\') && (l[1] == 'b')) {
382: ++l;
383: ++l;
384: m->nospflag = 1;
385: } else
386: m->nospflag = 0;
387: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
388: /* NULLBODY */;
389:
390: if (check) {
391: mdump(m);
392: }
393: ++(*ndx); /* make room for next */
394: return 0;
395: }
396:
397: /*
398: * Read a numeric value from a pointer, into the value union of a magic
399: * pointer, according to the magic type. Update the string pointer to point
400: * just after the number read. Return 0 for success, non-zero for failure.
401: */
402: static int
403: getvalue(m, p)
404: struct magic *m;
405: char **p;
406: {
407: int slen;
408:
409: if (m->type == STRING) {
410: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
411: m->vallen = slen;
412: } else
413: if (m->reln != 'x') {
414: m->value.l = signextend(m, strtoul(*p, p, 0));
415: eatsize(p);
416: }
417: return 0;
418: }
419:
420: /*
421: * Convert a string containing C character escapes. Stop at an unescaped
422: * space or tab.
423: * Copy the converted version to "p", returning its length in *slen.
424: * Return updated scan pointer as function result.
425: */
426: static char *
427: getstr(s, p, plen, slen)
1.11 mpech 428: char *s;
429: char *p;
1.1 deraadt 430: int plen, *slen;
431: {
432: char *origs = s, *origp = p;
433: char *pmax = p + plen - 1;
1.11 mpech 434: int c;
435: int val;
1.1 deraadt 436:
437: while ((c = *s++) != '\0') {
438: if (isspace((unsigned char) c))
439: break;
440: if (p >= pmax) {
441: fprintf(stderr, "String too long: %s\n", origs);
442: break;
443: }
444: if(c == '\\') {
445: switch(c = *s++) {
446:
447: case '\0':
448: goto out;
449:
450: default:
451: *p++ = (char) c;
452: break;
453:
454: case 'n':
455: *p++ = '\n';
456: break;
457:
458: case 'r':
459: *p++ = '\r';
460: break;
461:
462: case 'b':
463: *p++ = '\b';
464: break;
465:
466: case 't':
467: *p++ = '\t';
468: break;
469:
470: case 'f':
471: *p++ = '\f';
472: break;
473:
474: case 'v':
475: *p++ = '\v';
476: break;
477:
478: /* \ and up to 3 octal digits */
479: case '0':
480: case '1':
481: case '2':
482: case '3':
483: case '4':
484: case '5':
485: case '6':
486: case '7':
487: val = c - '0';
488: c = *s++; /* try for 2 */
489: if(c >= '0' && c <= '7') {
490: val = (val<<3) | (c - '0');
491: c = *s++; /* try for 3 */
492: if(c >= '0' && c <= '7')
493: val = (val<<3) | (c-'0');
494: else
495: --s;
496: }
497: else
498: --s;
499: *p++ = (char)val;
500: break;
501:
1.4 millert 502: /* \x and up to 2 hex digits */
1.1 deraadt 503: case 'x':
504: val = 'x'; /* Default if no digits */
505: c = hextoint(*s++); /* Get next char */
506: if (c >= 0) {
507: val = c;
508: c = hextoint(*s++);
1.4 millert 509: if (c >= 0)
1.1 deraadt 510: val = (val << 4) + c;
1.4 millert 511: else
1.1 deraadt 512: --s;
513: } else
514: --s;
515: *p++ = (char)val;
516: break;
517: }
518: } else
519: *p++ = (char)c;
520: }
521: out:
522: *p = '\0';
523: *slen = p - origp;
524: return s;
525: }
526:
527:
528: /* Single hex char to int; -1 if not a hex char. */
529: static int
530: hextoint(c)
531: int c;
532: {
533: if (!isascii((unsigned char) c)) return -1;
534: if (isdigit((unsigned char) c)) return c - '0';
535: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
536: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
537: return -1;
538: }
539:
540:
541: /*
542: * Print a string containing C character escapes.
543: */
544: void
545: showstr(fp, s, len)
546: FILE *fp;
547: const char *s;
548: int len;
549: {
1.11 mpech 550: char c;
1.1 deraadt 551:
552: for (;;) {
553: c = *s++;
554: if (len == -1) {
555: if (c == '\0')
556: break;
557: }
558: else {
559: if (len-- == 0)
560: break;
561: }
562: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
563: (void) fputc(c, fp);
564: else {
565: (void) fputc('\\', fp);
566: switch (c) {
567:
568: case '\n':
569: (void) fputc('n', fp);
570: break;
571:
572: case '\r':
573: (void) fputc('r', fp);
574: break;
575:
576: case '\b':
577: (void) fputc('b', fp);
578: break;
579:
580: case '\t':
581: (void) fputc('t', fp);
582: break;
583:
584: case '\f':
585: (void) fputc('f', fp);
586: break;
587:
588: case '\v':
589: (void) fputc('v', fp);
590: break;
591:
592: default:
593: (void) fprintf(fp, "%.3o", c & 0377);
594: break;
595: }
596: }
597: }
598: }
599:
600: /*
601: * eatsize(): Eat the size spec from a number [eg. 10UL]
602: */
603: static void
604: eatsize(p)
605: char **p;
606: {
607: char *l = *p;
608:
609: if (LOWCASE(*l) == 'u')
610: l++;
611:
612: switch (LOWCASE(*l)) {
613: case 'l': /* long */
614: case 's': /* short */
615: case 'h': /* short */
616: case 'b': /* char/byte */
617: case 'c': /* char/byte */
618: l++;
619: /*FALLTHROUGH*/
620: default:
621: break;
622: }
623:
624: *p = l;
625: }