Annotation of src/usr.bin/file/apprentice.c, Revision 1.6
1.6 ! deraadt 1: /* $OpenBSD: apprentice.c,v 1.5 1998/07/10 15:05:13 mickey Exp $ */
1.4 millert 2:
1.1 deraadt 3: /*
4: * apprentice - make one pass through /etc/magic, learning its secrets.
5: *
6: * Copyright (c) Ian F. Darwin, 1987.
7: * Written by Ian F. Darwin.
8: *
9: * This software is not subject to any license of the American Telephone
10: * and Telegraph Company or of the Regents of the University of California.
11: *
12: * Permission is granted to anyone to use this software for any purpose on
13: * any computer system, and to alter it and redistribute it freely, subject
14: * to the following restrictions:
15: *
16: * 1. The author is not responsible for the consequences of use of this
17: * software, no matter how awful, even if they arise from flaws in it.
18: *
19: * 2. The origin of this software must not be misrepresented, either by
20: * explicit claim or by omission. Since few users ever read sources,
21: * credits must appear in the documentation.
22: *
23: * 3. Altered versions must be plainly marked as such, and must not be
24: * misrepresented as being the original software. Since few users
25: * ever read sources, credits must appear in the documentation.
26: *
27: * 4. This notice may not be removed or altered.
28: */
29:
30: #include <stdio.h>
31: #include <stdlib.h>
32: #include <string.h>
33: #include <ctype.h>
1.2 deraadt 34: #include <errno.h>
1.5 mickey 35: #include <err.h>
1.1 deraadt 36: #include "file.h"
37:
38: #ifndef lint
1.6 ! deraadt 39: static char *moduleid = "$OpenBSD: apprentice.c,v 1.5 1998/07/10 15:05:13 mickey Exp $";
1.1 deraadt 40: #endif /* lint */
41:
42: #define EATAB {while (isascii((unsigned char) *l) && \
43: isspace((unsigned char) *l)) ++l;}
44: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
45: tolower((unsigned char) (l)) : (l))
46:
47:
48: static int getvalue __P((struct magic *, char **));
49: static int hextoint __P((int));
50: static char *getstr __P((char *, char *, int, int *));
51: static int parse __P((char *, int *, int));
52: static void eatsize __P((char **));
53:
54: static int maxmagic = 0;
55:
1.2 deraadt 56: static int apprentice_1 __P((char *, int));
1.1 deraadt 57:
58: int
59: apprentice(fn, check)
1.2 deraadt 60: char *fn; /* list of magic files */
61: int check; /* non-zero? checking-only run. */
62: {
63: char *p, *mfn;
64: int file_err, errs = -1;
65:
66: maxmagic = MAXMAGIS;
67: magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
68: mfn = malloc(strlen(fn)+1);
69: if (magic == NULL || mfn == NULL) {
1.5 mickey 70: warn("malloc");
1.2 deraadt 71: if (check)
72: return -1;
73: else
74: exit(1);
75: }
76: fn = strcpy(mfn, fn);
77:
78: while (fn) {
79: p = strchr(fn, ':');
80: if (p)
81: *p++ = '\0';
82: file_err = apprentice_1(fn, check);
83: if (file_err > errs)
84: errs = file_err;
85: fn = p;
86: }
87: if (errs == -1)
1.5 mickey 88: warnx("couldn't find any magic files!");
1.2 deraadt 89: if (!check && errs)
90: exit(1);
91:
92: free(mfn);
93: return errs;
94: }
95:
96: static int
97: apprentice_1(fn, check)
1.1 deraadt 98: char *fn; /* name of magic file */
99: int check; /* non-zero? checking-only run. */
100: {
1.2 deraadt 101: static const char hdr[] =
102: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 103: FILE *f;
104: char line[BUFSIZ+1];
105: int errs = 0;
106:
107: f = fopen(fn, "r");
108: if (f==NULL) {
1.2 deraadt 109: if (errno != ENOENT)
1.5 mickey 110: warn(fn);
1.2 deraadt 111: return -1;
1.1 deraadt 112: }
113:
114: /* parse it */
115: if (check) /* print silly verbose header for USG compat. */
1.2 deraadt 116: (void) printf("%s\n", hdr);
1.1 deraadt 117:
118: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
119: if (line[0]=='#') /* comment, do not parse */
120: continue;
121: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
122: continue;
123: line[strlen(line)-1] = '\0'; /* delete newline */
124: if (parse(line, &nmagic, check) != 0)
1.2 deraadt 125: errs = 1;
1.1 deraadt 126: }
127:
128: (void) fclose(f);
1.2 deraadt 129: return errs;
1.1 deraadt 130: }
131:
132: /*
133: * extend the sign bit if the comparison is to be signed
134: */
1.4 millert 135: uint32
1.1 deraadt 136: signextend(m, v)
137: struct magic *m;
1.4 millert 138: uint32 v;
1.1 deraadt 139: {
140: if (!(m->flag & UNSIGNED))
141: switch(m->type) {
142: /*
143: * Do not remove the casts below. They are
144: * vital. When later compared with the data,
145: * the sign extension must have happened.
146: */
147: case BYTE:
148: v = (char) v;
149: break;
150: case SHORT:
151: case BESHORT:
152: case LESHORT:
153: v = (short) v;
154: break;
155: case DATE:
156: case BEDATE:
157: case LEDATE:
158: case LONG:
159: case BELONG:
160: case LELONG:
1.4 millert 161: v = (int32) v;
1.1 deraadt 162: break;
163: case STRING:
164: break;
165: default:
1.5 mickey 166: warnx("can't happen: m->type=%d\n", m->type);
1.1 deraadt 167: return -1;
168: }
169: return v;
170: }
171:
172: /*
173: * parse one line from magic file, put into magic[index++] if valid
174: */
175: static int
176: parse(l, ndx, check)
177: char *l;
178: int *ndx, check;
179: {
180: int i = 0, nd = *ndx;
181: struct magic *m;
182: char *t, *s;
183:
184: #define ALLOC_INCR 20
185: if (nd+1 >= maxmagic){
1.6 ! deraadt 186: struct magic *mtmp;
! 187:
1.1 deraadt 188: maxmagic += ALLOC_INCR;
1.6 ! deraadt 189: if ((mtmp = (struct magic *) realloc(magic,
1.1 deraadt 190: sizeof(struct magic) *
191: maxmagic)) == NULL) {
1.5 mickey 192: warn("malloc");
1.6 ! deraadt 193: if (check) {
! 194: if (magic)
! 195: free(magic);
! 196: magic = mtmp;
1.1 deraadt 197: return -1;
1.6 ! deraadt 198: } else
1.1 deraadt 199: exit(1);
200: }
201: memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
202: }
203: m = &magic[*ndx];
204: m->flag = 0;
205: m->cont_level = 0;
206:
207: while (*l == '>') {
208: ++l; /* step over */
209: m->cont_level++;
210: }
211:
212: if (m->cont_level != 0 && *l == '(') {
213: ++l; /* step over */
214: m->flag |= INDIR;
215: }
1.4 millert 216: if (m->cont_level != 0 && *l == '&') {
217: ++l; /* step over */
218: m->flag |= ADD;
219: }
1.1 deraadt 220:
221: /* get offset, then skip over it */
222: m->offset = (int) strtoul(l,&t,0);
223: if (l == t)
1.5 mickey 224: warnx("offset %s invalid", l);
1.1 deraadt 225: l = t;
226:
227: if (m->flag & INDIR) {
228: m->in.type = LONG;
229: m->in.offset = 0;
230: /*
231: * read [.lbs][+-]nnnnn)
232: */
233: if (*l == '.') {
234: l++;
235: switch (LOWCASE(*l)) {
236: case 'l':
237: m->in.type = LONG;
238: break;
239: case 'h':
240: case 's':
241: m->in.type = SHORT;
242: break;
243: case 'c':
244: case 'b':
245: m->in.type = BYTE;
246: break;
247: default:
1.5 mickey 248: warnx("indirect offset type %c invalid", *l);
1.1 deraadt 249: break;
250: }
251: l++;
252: }
253: s = l;
254: if (*l == '+' || *l == '-') l++;
255: if (isdigit((unsigned char)*l)) {
256: m->in.offset = strtoul(l, &t, 0);
257: if (*s == '-') m->in.offset = - m->in.offset;
258: }
259: else
260: t = l;
261: if (*t++ != ')')
1.5 mickey 262: warnx("missing ')' in indirect offset");
1.1 deraadt 263: l = t;
264: }
265:
266:
267: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
268: ++l;
269: EATAB;
270:
271: #define NBYTE 4
272: #define NSHORT 5
273: #define NLONG 4
274: #define NSTRING 6
275: #define NDATE 4
276: #define NBESHORT 7
277: #define NBELONG 6
278: #define NBEDATE 6
279: #define NLESHORT 7
280: #define NLELONG 6
281: #define NLEDATE 6
282:
283: if (*l == 'u') {
284: ++l;
285: m->flag |= UNSIGNED;
286: }
287:
288: /* get type, skip it */
289: if (strncmp(l, "byte", NBYTE)==0) {
290: m->type = BYTE;
291: l += NBYTE;
292: } else if (strncmp(l, "short", NSHORT)==0) {
293: m->type = SHORT;
294: l += NSHORT;
295: } else if (strncmp(l, "long", NLONG)==0) {
296: m->type = LONG;
297: l += NLONG;
298: } else if (strncmp(l, "string", NSTRING)==0) {
299: m->type = STRING;
300: l += NSTRING;
301: } else if (strncmp(l, "date", NDATE)==0) {
302: m->type = DATE;
303: l += NDATE;
304: } else if (strncmp(l, "beshort", NBESHORT)==0) {
305: m->type = BESHORT;
306: l += NBESHORT;
307: } else if (strncmp(l, "belong", NBELONG)==0) {
308: m->type = BELONG;
309: l += NBELONG;
310: } else if (strncmp(l, "bedate", NBEDATE)==0) {
311: m->type = BEDATE;
312: l += NBEDATE;
313: } else if (strncmp(l, "leshort", NLESHORT)==0) {
314: m->type = LESHORT;
315: l += NLESHORT;
316: } else if (strncmp(l, "lelong", NLELONG)==0) {
317: m->type = LELONG;
318: l += NLELONG;
319: } else if (strncmp(l, "ledate", NLEDATE)==0) {
320: m->type = LEDATE;
321: l += NLEDATE;
322: } else {
1.5 mickey 323: warnx("type %s invalid", l);
1.1 deraadt 324: return -1;
325: }
326: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
327: if (*l == '&') {
328: ++l;
329: m->mask = signextend(m, strtoul(l, &l, 0));
330: eatsize(&l);
331: } else
332: m->mask = ~0L;
333: EATAB;
334:
335: switch (*l) {
336: case '>':
337: case '<':
338: /* Old-style anding: "0 byte &0x80 dynamically linked" */
339: case '&':
340: case '^':
341: case '=':
342: m->reln = *l;
343: ++l;
344: break;
345: case '!':
346: if (m->type != STRING) {
347: m->reln = *l;
348: ++l;
349: break;
350: }
351: /* FALL THROUGH */
352: default:
353: if (*l == 'x' && isascii((unsigned char)l[1]) &&
354: isspace((unsigned char)l[1])) {
355: m->reln = *l;
356: ++l;
357: goto GetDesc; /* Bill The Cat */
358: }
359: m->reln = '=';
360: break;
361: }
362: EATAB;
363:
364: if (getvalue(m, &l))
365: return -1;
366: /*
367: * TODO finish this macro and start using it!
368: * #define offsetcheck {if (offset > HOWMANY-1)
1.5 mickey 369: * warnx("offset too big"); }
1.1 deraadt 370: */
371:
372: /*
373: * now get last part - the description
374: */
375: GetDesc:
376: EATAB;
377: if (l[0] == '\b') {
378: ++l;
379: m->nospflag = 1;
380: } else if ((l[0] == '\\') && (l[1] == 'b')) {
381: ++l;
382: ++l;
383: m->nospflag = 1;
384: } else
385: m->nospflag = 0;
386: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
387: /* NULLBODY */;
388:
389: if (check) {
390: mdump(m);
391: }
392: ++(*ndx); /* make room for next */
393: return 0;
394: }
395:
396: /*
397: * Read a numeric value from a pointer, into the value union of a magic
398: * pointer, according to the magic type. Update the string pointer to point
399: * just after the number read. Return 0 for success, non-zero for failure.
400: */
401: static int
402: getvalue(m, p)
403: struct magic *m;
404: char **p;
405: {
406: int slen;
407:
408: if (m->type == STRING) {
409: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
410: m->vallen = slen;
411: } else
412: if (m->reln != 'x') {
413: m->value.l = signextend(m, strtoul(*p, p, 0));
414: eatsize(p);
415: }
416: return 0;
417: }
418:
419: /*
420: * Convert a string containing C character escapes. Stop at an unescaped
421: * space or tab.
422: * Copy the converted version to "p", returning its length in *slen.
423: * Return updated scan pointer as function result.
424: */
425: static char *
426: getstr(s, p, plen, slen)
427: register char *s;
428: register char *p;
429: int plen, *slen;
430: {
431: char *origs = s, *origp = p;
432: char *pmax = p + plen - 1;
433: register int c;
434: register int val;
435:
436: while ((c = *s++) != '\0') {
437: if (isspace((unsigned char) c))
438: break;
439: if (p >= pmax) {
440: fprintf(stderr, "String too long: %s\n", origs);
441: break;
442: }
443: if(c == '\\') {
444: switch(c = *s++) {
445:
446: case '\0':
447: goto out;
448:
449: default:
450: *p++ = (char) c;
451: break;
452:
453: case 'n':
454: *p++ = '\n';
455: break;
456:
457: case 'r':
458: *p++ = '\r';
459: break;
460:
461: case 'b':
462: *p++ = '\b';
463: break;
464:
465: case 't':
466: *p++ = '\t';
467: break;
468:
469: case 'f':
470: *p++ = '\f';
471: break;
472:
473: case 'v':
474: *p++ = '\v';
475: break;
476:
477: /* \ and up to 3 octal digits */
478: case '0':
479: case '1':
480: case '2':
481: case '3':
482: case '4':
483: case '5':
484: case '6':
485: case '7':
486: val = c - '0';
487: c = *s++; /* try for 2 */
488: if(c >= '0' && c <= '7') {
489: val = (val<<3) | (c - '0');
490: c = *s++; /* try for 3 */
491: if(c >= '0' && c <= '7')
492: val = (val<<3) | (c-'0');
493: else
494: --s;
495: }
496: else
497: --s;
498: *p++ = (char)val;
499: break;
500:
1.4 millert 501: /* \x and up to 2 hex digits */
1.1 deraadt 502: case 'x':
503: val = 'x'; /* Default if no digits */
504: c = hextoint(*s++); /* Get next char */
505: if (c >= 0) {
506: val = c;
507: c = hextoint(*s++);
1.4 millert 508: if (c >= 0)
1.1 deraadt 509: val = (val << 4) + c;
1.4 millert 510: else
1.1 deraadt 511: --s;
512: } else
513: --s;
514: *p++ = (char)val;
515: break;
516: }
517: } else
518: *p++ = (char)c;
519: }
520: out:
521: *p = '\0';
522: *slen = p - origp;
523: return s;
524: }
525:
526:
527: /* Single hex char to int; -1 if not a hex char. */
528: static int
529: hextoint(c)
530: int c;
531: {
532: if (!isascii((unsigned char) c)) return -1;
533: if (isdigit((unsigned char) c)) return c - '0';
534: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
535: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
536: return -1;
537: }
538:
539:
540: /*
541: * Print a string containing C character escapes.
542: */
543: void
544: showstr(fp, s, len)
545: FILE *fp;
546: const char *s;
547: int len;
548: {
549: register char c;
550:
551: for (;;) {
552: c = *s++;
553: if (len == -1) {
554: if (c == '\0')
555: break;
556: }
557: else {
558: if (len-- == 0)
559: break;
560: }
561: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
562: (void) fputc(c, fp);
563: else {
564: (void) fputc('\\', fp);
565: switch (c) {
566:
567: case '\n':
568: (void) fputc('n', fp);
569: break;
570:
571: case '\r':
572: (void) fputc('r', fp);
573: break;
574:
575: case '\b':
576: (void) fputc('b', fp);
577: break;
578:
579: case '\t':
580: (void) fputc('t', fp);
581: break;
582:
583: case '\f':
584: (void) fputc('f', fp);
585: break;
586:
587: case '\v':
588: (void) fputc('v', fp);
589: break;
590:
591: default:
592: (void) fprintf(fp, "%.3o", c & 0377);
593: break;
594: }
595: }
596: }
597: }
598:
599: /*
600: * eatsize(): Eat the size spec from a number [eg. 10UL]
601: */
602: static void
603: eatsize(p)
604: char **p;
605: {
606: char *l = *p;
607:
608: if (LOWCASE(*l) == 'u')
609: l++;
610:
611: switch (LOWCASE(*l)) {
612: case 'l': /* long */
613: case 's': /* short */
614: case 'h': /* short */
615: case 'b': /* char/byte */
616: case 'c': /* char/byte */
617: l++;
618: /*FALLTHROUGH*/
619: default:
620: break;
621: }
622:
623: *p = l;
624: }