Annotation of src/usr.bin/file/apprentice.c, Revision 1.4
1.4 ! millert 1: /* $OpenBSD: apprentice.c,v 1.3 1996/06/26 05:32:54 deraadt Exp $ */
! 2:
1.1 deraadt 3: /*
4: * apprentice - make one pass through /etc/magic, learning its secrets.
5: *
6: * Copyright (c) Ian F. Darwin, 1987.
7: * Written by Ian F. Darwin.
8: *
9: * This software is not subject to any license of the American Telephone
10: * and Telegraph Company or of the Regents of the University of California.
11: *
12: * Permission is granted to anyone to use this software for any purpose on
13: * any computer system, and to alter it and redistribute it freely, subject
14: * to the following restrictions:
15: *
16: * 1. The author is not responsible for the consequences of use of this
17: * software, no matter how awful, even if they arise from flaws in it.
18: *
19: * 2. The origin of this software must not be misrepresented, either by
20: * explicit claim or by omission. Since few users ever read sources,
21: * credits must appear in the documentation.
22: *
23: * 3. Altered versions must be plainly marked as such, and must not be
24: * misrepresented as being the original software. Since few users
25: * ever read sources, credits must appear in the documentation.
26: *
27: * 4. This notice may not be removed or altered.
28: */
29:
30: #include <stdio.h>
31: #include <stdlib.h>
32: #include <string.h>
33: #include <ctype.h>
1.2 deraadt 34: #include <errno.h>
1.1 deraadt 35: #include "file.h"
36:
37: #ifndef lint
1.4 ! millert 38: static char *moduleid = "$OpenBSD: apprentice.c,v 1.3 1996/06/26 05:32:54 deraadt Exp $";
1.1 deraadt 39: #endif /* lint */
40:
41: #define EATAB {while (isascii((unsigned char) *l) && \
42: isspace((unsigned char) *l)) ++l;}
43: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
44: tolower((unsigned char) (l)) : (l))
45:
46:
47: static int getvalue __P((struct magic *, char **));
48: static int hextoint __P((int));
49: static char *getstr __P((char *, char *, int, int *));
50: static int parse __P((char *, int *, int));
51: static void eatsize __P((char **));
52:
53: static int maxmagic = 0;
54:
1.2 deraadt 55: static int apprentice_1 __P((char *, int));
1.1 deraadt 56:
57: int
58: apprentice(fn, check)
1.2 deraadt 59: char *fn; /* list of magic files */
60: int check; /* non-zero? checking-only run. */
61: {
62: char *p, *mfn;
63: int file_err, errs = -1;
64:
65: maxmagic = MAXMAGIS;
66: magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
67: mfn = malloc(strlen(fn)+1);
68: if (magic == NULL || mfn == NULL) {
69: (void) fprintf(stderr, "%s: Out of memory.\n", progname);
70: if (check)
71: return -1;
72: else
73: exit(1);
74: }
75: fn = strcpy(mfn, fn);
76:
77: while (fn) {
78: p = strchr(fn, ':');
79: if (p)
80: *p++ = '\0';
81: file_err = apprentice_1(fn, check);
82: if (file_err > errs)
83: errs = file_err;
84: fn = p;
85: }
86: if (errs == -1)
87: (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
88: progname);
89: if (!check && errs)
90: exit(1);
91:
92: free(mfn);
93: return errs;
94: }
95:
96: static int
97: apprentice_1(fn, check)
1.1 deraadt 98: char *fn; /* name of magic file */
99: int check; /* non-zero? checking-only run. */
100: {
1.2 deraadt 101: static const char hdr[] =
102: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 103: FILE *f;
104: char line[BUFSIZ+1];
105: int errs = 0;
106:
107: f = fopen(fn, "r");
108: if (f==NULL) {
1.2 deraadt 109: if (errno != ENOENT)
110: (void) fprintf(stderr,
111: "%s: can't read magic file %s (%s)\n",
112: progname, fn, strerror(errno));
113: return -1;
1.1 deraadt 114: }
115:
116: /* parse it */
117: if (check) /* print silly verbose header for USG compat. */
1.2 deraadt 118: (void) printf("%s\n", hdr);
1.1 deraadt 119:
120: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
121: if (line[0]=='#') /* comment, do not parse */
122: continue;
123: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
124: continue;
125: line[strlen(line)-1] = '\0'; /* delete newline */
126: if (parse(line, &nmagic, check) != 0)
1.2 deraadt 127: errs = 1;
1.1 deraadt 128: }
129:
130: (void) fclose(f);
1.2 deraadt 131: return errs;
1.1 deraadt 132: }
133:
134: /*
135: * extend the sign bit if the comparison is to be signed
136: */
1.4 ! millert 137: uint32
1.1 deraadt 138: signextend(m, v)
139: struct magic *m;
1.4 ! millert 140: uint32 v;
1.1 deraadt 141: {
142: if (!(m->flag & UNSIGNED))
143: switch(m->type) {
144: /*
145: * Do not remove the casts below. They are
146: * vital. When later compared with the data,
147: * the sign extension must have happened.
148: */
149: case BYTE:
150: v = (char) v;
151: break;
152: case SHORT:
153: case BESHORT:
154: case LESHORT:
155: v = (short) v;
156: break;
157: case DATE:
158: case BEDATE:
159: case LEDATE:
160: case LONG:
161: case BELONG:
162: case LELONG:
1.4 ! millert 163: v = (int32) v;
1.1 deraadt 164: break;
165: case STRING:
166: break;
167: default:
168: magwarn("can't happen: m->type=%d\n",
169: m->type);
170: return -1;
171: }
172: return v;
173: }
174:
175: /*
176: * parse one line from magic file, put into magic[index++] if valid
177: */
178: static int
179: parse(l, ndx, check)
180: char *l;
181: int *ndx, check;
182: {
183: int i = 0, nd = *ndx;
184: struct magic *m;
185: char *t, *s;
186:
187: #define ALLOC_INCR 20
188: if (nd+1 >= maxmagic){
189: maxmagic += ALLOC_INCR;
190: if ((magic = (struct magic *) realloc(magic,
191: sizeof(struct magic) *
192: maxmagic)) == NULL) {
193: (void) fprintf(stderr, "%s: Out of memory.\n", progname);
194: if (check)
195: return -1;
196: else
197: exit(1);
198: }
199: memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
200: }
201: m = &magic[*ndx];
202: m->flag = 0;
203: m->cont_level = 0;
204:
205: while (*l == '>') {
206: ++l; /* step over */
207: m->cont_level++;
208: }
209:
210: if (m->cont_level != 0 && *l == '(') {
211: ++l; /* step over */
212: m->flag |= INDIR;
213: }
1.4 ! millert 214: if (m->cont_level != 0 && *l == '&') {
! 215: ++l; /* step over */
! 216: m->flag |= ADD;
! 217: }
1.1 deraadt 218:
219: /* get offset, then skip over it */
220: m->offset = (int) strtoul(l,&t,0);
221: if (l == t)
222: magwarn("offset %s invalid", l);
223: l = t;
224:
225: if (m->flag & INDIR) {
226: m->in.type = LONG;
227: m->in.offset = 0;
228: /*
229: * read [.lbs][+-]nnnnn)
230: */
231: if (*l == '.') {
232: l++;
233: switch (LOWCASE(*l)) {
234: case 'l':
235: m->in.type = LONG;
236: break;
237: case 'h':
238: case 's':
239: m->in.type = SHORT;
240: break;
241: case 'c':
242: case 'b':
243: m->in.type = BYTE;
244: break;
245: default:
246: magwarn("indirect offset type %c invalid", *l);
247: break;
248: }
249: l++;
250: }
251: s = l;
252: if (*l == '+' || *l == '-') l++;
253: if (isdigit((unsigned char)*l)) {
254: m->in.offset = strtoul(l, &t, 0);
255: if (*s == '-') m->in.offset = - m->in.offset;
256: }
257: else
258: t = l;
259: if (*t++ != ')')
260: magwarn("missing ')' in indirect offset");
261: l = t;
262: }
263:
264:
265: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
266: ++l;
267: EATAB;
268:
269: #define NBYTE 4
270: #define NSHORT 5
271: #define NLONG 4
272: #define NSTRING 6
273: #define NDATE 4
274: #define NBESHORT 7
275: #define NBELONG 6
276: #define NBEDATE 6
277: #define NLESHORT 7
278: #define NLELONG 6
279: #define NLEDATE 6
280:
281: if (*l == 'u') {
282: ++l;
283: m->flag |= UNSIGNED;
284: }
285:
286: /* get type, skip it */
287: if (strncmp(l, "byte", NBYTE)==0) {
288: m->type = BYTE;
289: l += NBYTE;
290: } else if (strncmp(l, "short", NSHORT)==0) {
291: m->type = SHORT;
292: l += NSHORT;
293: } else if (strncmp(l, "long", NLONG)==0) {
294: m->type = LONG;
295: l += NLONG;
296: } else if (strncmp(l, "string", NSTRING)==0) {
297: m->type = STRING;
298: l += NSTRING;
299: } else if (strncmp(l, "date", NDATE)==0) {
300: m->type = DATE;
301: l += NDATE;
302: } else if (strncmp(l, "beshort", NBESHORT)==0) {
303: m->type = BESHORT;
304: l += NBESHORT;
305: } else if (strncmp(l, "belong", NBELONG)==0) {
306: m->type = BELONG;
307: l += NBELONG;
308: } else if (strncmp(l, "bedate", NBEDATE)==0) {
309: m->type = BEDATE;
310: l += NBEDATE;
311: } else if (strncmp(l, "leshort", NLESHORT)==0) {
312: m->type = LESHORT;
313: l += NLESHORT;
314: } else if (strncmp(l, "lelong", NLELONG)==0) {
315: m->type = LELONG;
316: l += NLELONG;
317: } else if (strncmp(l, "ledate", NLEDATE)==0) {
318: m->type = LEDATE;
319: l += NLEDATE;
320: } else {
321: magwarn("type %s invalid", l);
322: return -1;
323: }
324: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
325: if (*l == '&') {
326: ++l;
327: m->mask = signextend(m, strtoul(l, &l, 0));
328: eatsize(&l);
329: } else
330: m->mask = ~0L;
331: EATAB;
332:
333: switch (*l) {
334: case '>':
335: case '<':
336: /* Old-style anding: "0 byte &0x80 dynamically linked" */
337: case '&':
338: case '^':
339: case '=':
340: m->reln = *l;
341: ++l;
342: break;
343: case '!':
344: if (m->type != STRING) {
345: m->reln = *l;
346: ++l;
347: break;
348: }
349: /* FALL THROUGH */
350: default:
351: if (*l == 'x' && isascii((unsigned char)l[1]) &&
352: isspace((unsigned char)l[1])) {
353: m->reln = *l;
354: ++l;
355: goto GetDesc; /* Bill The Cat */
356: }
357: m->reln = '=';
358: break;
359: }
360: EATAB;
361:
362: if (getvalue(m, &l))
363: return -1;
364: /*
365: * TODO finish this macro and start using it!
366: * #define offsetcheck {if (offset > HOWMANY-1)
367: * magwarn("offset too big"); }
368: */
369:
370: /*
371: * now get last part - the description
372: */
373: GetDesc:
374: EATAB;
375: if (l[0] == '\b') {
376: ++l;
377: m->nospflag = 1;
378: } else if ((l[0] == '\\') && (l[1] == 'b')) {
379: ++l;
380: ++l;
381: m->nospflag = 1;
382: } else
383: m->nospflag = 0;
384: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
385: /* NULLBODY */;
386:
387: if (check) {
388: mdump(m);
389: }
390: ++(*ndx); /* make room for next */
391: return 0;
392: }
393:
394: /*
395: * Read a numeric value from a pointer, into the value union of a magic
396: * pointer, according to the magic type. Update the string pointer to point
397: * just after the number read. Return 0 for success, non-zero for failure.
398: */
399: static int
400: getvalue(m, p)
401: struct magic *m;
402: char **p;
403: {
404: int slen;
405:
406: if (m->type == STRING) {
407: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
408: m->vallen = slen;
409: } else
410: if (m->reln != 'x') {
411: m->value.l = signextend(m, strtoul(*p, p, 0));
412: eatsize(p);
413: }
414: return 0;
415: }
416:
417: /*
418: * Convert a string containing C character escapes. Stop at an unescaped
419: * space or tab.
420: * Copy the converted version to "p", returning its length in *slen.
421: * Return updated scan pointer as function result.
422: */
423: static char *
424: getstr(s, p, plen, slen)
425: register char *s;
426: register char *p;
427: int plen, *slen;
428: {
429: char *origs = s, *origp = p;
430: char *pmax = p + plen - 1;
431: register int c;
432: register int val;
433:
434: while ((c = *s++) != '\0') {
435: if (isspace((unsigned char) c))
436: break;
437: if (p >= pmax) {
438: fprintf(stderr, "String too long: %s\n", origs);
439: break;
440: }
441: if(c == '\\') {
442: switch(c = *s++) {
443:
444: case '\0':
445: goto out;
446:
447: default:
448: *p++ = (char) c;
449: break;
450:
451: case 'n':
452: *p++ = '\n';
453: break;
454:
455: case 'r':
456: *p++ = '\r';
457: break;
458:
459: case 'b':
460: *p++ = '\b';
461: break;
462:
463: case 't':
464: *p++ = '\t';
465: break;
466:
467: case 'f':
468: *p++ = '\f';
469: break;
470:
471: case 'v':
472: *p++ = '\v';
473: break;
474:
475: /* \ and up to 3 octal digits */
476: case '0':
477: case '1':
478: case '2':
479: case '3':
480: case '4':
481: case '5':
482: case '6':
483: case '7':
484: val = c - '0';
485: c = *s++; /* try for 2 */
486: if(c >= '0' && c <= '7') {
487: val = (val<<3) | (c - '0');
488: c = *s++; /* try for 3 */
489: if(c >= '0' && c <= '7')
490: val = (val<<3) | (c-'0');
491: else
492: --s;
493: }
494: else
495: --s;
496: *p++ = (char)val;
497: break;
498:
1.4 ! millert 499: /* \x and up to 2 hex digits */
1.1 deraadt 500: case 'x':
501: val = 'x'; /* Default if no digits */
502: c = hextoint(*s++); /* Get next char */
503: if (c >= 0) {
504: val = c;
505: c = hextoint(*s++);
1.4 ! millert 506: if (c >= 0)
1.1 deraadt 507: val = (val << 4) + c;
1.4 ! millert 508: else
1.1 deraadt 509: --s;
510: } else
511: --s;
512: *p++ = (char)val;
513: break;
514: }
515: } else
516: *p++ = (char)c;
517: }
518: out:
519: *p = '\0';
520: *slen = p - origp;
521: return s;
522: }
523:
524:
525: /* Single hex char to int; -1 if not a hex char. */
526: static int
527: hextoint(c)
528: int c;
529: {
530: if (!isascii((unsigned char) c)) return -1;
531: if (isdigit((unsigned char) c)) return c - '0';
532: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
533: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
534: return -1;
535: }
536:
537:
538: /*
539: * Print a string containing C character escapes.
540: */
541: void
542: showstr(fp, s, len)
543: FILE *fp;
544: const char *s;
545: int len;
546: {
547: register char c;
548:
549: for (;;) {
550: c = *s++;
551: if (len == -1) {
552: if (c == '\0')
553: break;
554: }
555: else {
556: if (len-- == 0)
557: break;
558: }
559: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
560: (void) fputc(c, fp);
561: else {
562: (void) fputc('\\', fp);
563: switch (c) {
564:
565: case '\n':
566: (void) fputc('n', fp);
567: break;
568:
569: case '\r':
570: (void) fputc('r', fp);
571: break;
572:
573: case '\b':
574: (void) fputc('b', fp);
575: break;
576:
577: case '\t':
578: (void) fputc('t', fp);
579: break;
580:
581: case '\f':
582: (void) fputc('f', fp);
583: break;
584:
585: case '\v':
586: (void) fputc('v', fp);
587: break;
588:
589: default:
590: (void) fprintf(fp, "%.3o", c & 0377);
591: break;
592: }
593: }
594: }
595: }
596:
597: /*
598: * eatsize(): Eat the size spec from a number [eg. 10UL]
599: */
600: static void
601: eatsize(p)
602: char **p;
603: {
604: char *l = *p;
605:
606: if (LOWCASE(*l) == 'u')
607: l++;
608:
609: switch (LOWCASE(*l)) {
610: case 'l': /* long */
611: case 's': /* short */
612: case 'h': /* short */
613: case 'b': /* char/byte */
614: case 'c': /* char/byte */
615: l++;
616: /*FALLTHROUGH*/
617: default:
618: break;
619: }
620:
621: *p = l;
622: }