Annotation of src/usr.bin/file/apprentice.c, Revision 1.5
1.5 ! mickey 1: /* $OpenBSD: apprentice.c,v 1.4 1997/02/09 23:58:16 millert Exp $ */
1.4 millert 2:
1.1 deraadt 3: /*
4: * apprentice - make one pass through /etc/magic, learning its secrets.
5: *
6: * Copyright (c) Ian F. Darwin, 1987.
7: * Written by Ian F. Darwin.
8: *
9: * This software is not subject to any license of the American Telephone
10: * and Telegraph Company or of the Regents of the University of California.
11: *
12: * Permission is granted to anyone to use this software for any purpose on
13: * any computer system, and to alter it and redistribute it freely, subject
14: * to the following restrictions:
15: *
16: * 1. The author is not responsible for the consequences of use of this
17: * software, no matter how awful, even if they arise from flaws in it.
18: *
19: * 2. The origin of this software must not be misrepresented, either by
20: * explicit claim or by omission. Since few users ever read sources,
21: * credits must appear in the documentation.
22: *
23: * 3. Altered versions must be plainly marked as such, and must not be
24: * misrepresented as being the original software. Since few users
25: * ever read sources, credits must appear in the documentation.
26: *
27: * 4. This notice may not be removed or altered.
28: */
29:
30: #include <stdio.h>
31: #include <stdlib.h>
32: #include <string.h>
33: #include <ctype.h>
1.2 deraadt 34: #include <errno.h>
1.5 ! mickey 35: #include <err.h>
1.1 deraadt 36: #include "file.h"
37:
38: #ifndef lint
1.5 ! mickey 39: static char *moduleid = "$OpenBSD: apprentice.c,v 1.4 1997/02/09 23:58:16 millert Exp $";
1.1 deraadt 40: #endif /* lint */
41:
42: #define EATAB {while (isascii((unsigned char) *l) && \
43: isspace((unsigned char) *l)) ++l;}
44: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
45: tolower((unsigned char) (l)) : (l))
46:
47:
48: static int getvalue __P((struct magic *, char **));
49: static int hextoint __P((int));
50: static char *getstr __P((char *, char *, int, int *));
51: static int parse __P((char *, int *, int));
52: static void eatsize __P((char **));
53:
54: static int maxmagic = 0;
55:
1.2 deraadt 56: static int apprentice_1 __P((char *, int));
1.1 deraadt 57:
58: int
59: apprentice(fn, check)
1.2 deraadt 60: char *fn; /* list of magic files */
61: int check; /* non-zero? checking-only run. */
62: {
63: char *p, *mfn;
64: int file_err, errs = -1;
65:
66: maxmagic = MAXMAGIS;
67: magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
68: mfn = malloc(strlen(fn)+1);
69: if (magic == NULL || mfn == NULL) {
1.5 ! mickey 70: warn("malloc");
1.2 deraadt 71: if (check)
72: return -1;
73: else
74: exit(1);
75: }
76: fn = strcpy(mfn, fn);
77:
78: while (fn) {
79: p = strchr(fn, ':');
80: if (p)
81: *p++ = '\0';
82: file_err = apprentice_1(fn, check);
83: if (file_err > errs)
84: errs = file_err;
85: fn = p;
86: }
87: if (errs == -1)
1.5 ! mickey 88: warnx("couldn't find any magic files!");
1.2 deraadt 89: if (!check && errs)
90: exit(1);
91:
92: free(mfn);
93: return errs;
94: }
95:
96: static int
97: apprentice_1(fn, check)
1.1 deraadt 98: char *fn; /* name of magic file */
99: int check; /* non-zero? checking-only run. */
100: {
1.2 deraadt 101: static const char hdr[] =
102: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 103: FILE *f;
104: char line[BUFSIZ+1];
105: int errs = 0;
106:
107: f = fopen(fn, "r");
108: if (f==NULL) {
1.2 deraadt 109: if (errno != ENOENT)
1.5 ! mickey 110: warn(fn);
1.2 deraadt 111: return -1;
1.1 deraadt 112: }
113:
114: /* parse it */
115: if (check) /* print silly verbose header for USG compat. */
1.2 deraadt 116: (void) printf("%s\n", hdr);
1.1 deraadt 117:
118: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
119: if (line[0]=='#') /* comment, do not parse */
120: continue;
121: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
122: continue;
123: line[strlen(line)-1] = '\0'; /* delete newline */
124: if (parse(line, &nmagic, check) != 0)
1.2 deraadt 125: errs = 1;
1.1 deraadt 126: }
127:
128: (void) fclose(f);
1.2 deraadt 129: return errs;
1.1 deraadt 130: }
131:
132: /*
133: * extend the sign bit if the comparison is to be signed
134: */
1.4 millert 135: uint32
1.1 deraadt 136: signextend(m, v)
137: struct magic *m;
1.4 millert 138: uint32 v;
1.1 deraadt 139: {
140: if (!(m->flag & UNSIGNED))
141: switch(m->type) {
142: /*
143: * Do not remove the casts below. They are
144: * vital. When later compared with the data,
145: * the sign extension must have happened.
146: */
147: case BYTE:
148: v = (char) v;
149: break;
150: case SHORT:
151: case BESHORT:
152: case LESHORT:
153: v = (short) v;
154: break;
155: case DATE:
156: case BEDATE:
157: case LEDATE:
158: case LONG:
159: case BELONG:
160: case LELONG:
1.4 millert 161: v = (int32) v;
1.1 deraadt 162: break;
163: case STRING:
164: break;
165: default:
1.5 ! mickey 166: warnx("can't happen: m->type=%d\n", m->type);
1.1 deraadt 167: return -1;
168: }
169: return v;
170: }
171:
172: /*
173: * parse one line from magic file, put into magic[index++] if valid
174: */
175: static int
176: parse(l, ndx, check)
177: char *l;
178: int *ndx, check;
179: {
180: int i = 0, nd = *ndx;
181: struct magic *m;
182: char *t, *s;
183:
184: #define ALLOC_INCR 20
185: if (nd+1 >= maxmagic){
186: maxmagic += ALLOC_INCR;
187: if ((magic = (struct magic *) realloc(magic,
188: sizeof(struct magic) *
189: maxmagic)) == NULL) {
1.5 ! mickey 190: warn("malloc");
1.1 deraadt 191: if (check)
192: return -1;
193: else
194: exit(1);
195: }
196: memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
197: }
198: m = &magic[*ndx];
199: m->flag = 0;
200: m->cont_level = 0;
201:
202: while (*l == '>') {
203: ++l; /* step over */
204: m->cont_level++;
205: }
206:
207: if (m->cont_level != 0 && *l == '(') {
208: ++l; /* step over */
209: m->flag |= INDIR;
210: }
1.4 millert 211: if (m->cont_level != 0 && *l == '&') {
212: ++l; /* step over */
213: m->flag |= ADD;
214: }
1.1 deraadt 215:
216: /* get offset, then skip over it */
217: m->offset = (int) strtoul(l,&t,0);
218: if (l == t)
1.5 ! mickey 219: warnx("offset %s invalid", l);
1.1 deraadt 220: l = t;
221:
222: if (m->flag & INDIR) {
223: m->in.type = LONG;
224: m->in.offset = 0;
225: /*
226: * read [.lbs][+-]nnnnn)
227: */
228: if (*l == '.') {
229: l++;
230: switch (LOWCASE(*l)) {
231: case 'l':
232: m->in.type = LONG;
233: break;
234: case 'h':
235: case 's':
236: m->in.type = SHORT;
237: break;
238: case 'c':
239: case 'b':
240: m->in.type = BYTE;
241: break;
242: default:
1.5 ! mickey 243: warnx("indirect offset type %c invalid", *l);
1.1 deraadt 244: break;
245: }
246: l++;
247: }
248: s = l;
249: if (*l == '+' || *l == '-') l++;
250: if (isdigit((unsigned char)*l)) {
251: m->in.offset = strtoul(l, &t, 0);
252: if (*s == '-') m->in.offset = - m->in.offset;
253: }
254: else
255: t = l;
256: if (*t++ != ')')
1.5 ! mickey 257: warnx("missing ')' in indirect offset");
1.1 deraadt 258: l = t;
259: }
260:
261:
262: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
263: ++l;
264: EATAB;
265:
266: #define NBYTE 4
267: #define NSHORT 5
268: #define NLONG 4
269: #define NSTRING 6
270: #define NDATE 4
271: #define NBESHORT 7
272: #define NBELONG 6
273: #define NBEDATE 6
274: #define NLESHORT 7
275: #define NLELONG 6
276: #define NLEDATE 6
277:
278: if (*l == 'u') {
279: ++l;
280: m->flag |= UNSIGNED;
281: }
282:
283: /* get type, skip it */
284: if (strncmp(l, "byte", NBYTE)==0) {
285: m->type = BYTE;
286: l += NBYTE;
287: } else if (strncmp(l, "short", NSHORT)==0) {
288: m->type = SHORT;
289: l += NSHORT;
290: } else if (strncmp(l, "long", NLONG)==0) {
291: m->type = LONG;
292: l += NLONG;
293: } else if (strncmp(l, "string", NSTRING)==0) {
294: m->type = STRING;
295: l += NSTRING;
296: } else if (strncmp(l, "date", NDATE)==0) {
297: m->type = DATE;
298: l += NDATE;
299: } else if (strncmp(l, "beshort", NBESHORT)==0) {
300: m->type = BESHORT;
301: l += NBESHORT;
302: } else if (strncmp(l, "belong", NBELONG)==0) {
303: m->type = BELONG;
304: l += NBELONG;
305: } else if (strncmp(l, "bedate", NBEDATE)==0) {
306: m->type = BEDATE;
307: l += NBEDATE;
308: } else if (strncmp(l, "leshort", NLESHORT)==0) {
309: m->type = LESHORT;
310: l += NLESHORT;
311: } else if (strncmp(l, "lelong", NLELONG)==0) {
312: m->type = LELONG;
313: l += NLELONG;
314: } else if (strncmp(l, "ledate", NLEDATE)==0) {
315: m->type = LEDATE;
316: l += NLEDATE;
317: } else {
1.5 ! mickey 318: warnx("type %s invalid", l);
1.1 deraadt 319: return -1;
320: }
321: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
322: if (*l == '&') {
323: ++l;
324: m->mask = signextend(m, strtoul(l, &l, 0));
325: eatsize(&l);
326: } else
327: m->mask = ~0L;
328: EATAB;
329:
330: switch (*l) {
331: case '>':
332: case '<':
333: /* Old-style anding: "0 byte &0x80 dynamically linked" */
334: case '&':
335: case '^':
336: case '=':
337: m->reln = *l;
338: ++l;
339: break;
340: case '!':
341: if (m->type != STRING) {
342: m->reln = *l;
343: ++l;
344: break;
345: }
346: /* FALL THROUGH */
347: default:
348: if (*l == 'x' && isascii((unsigned char)l[1]) &&
349: isspace((unsigned char)l[1])) {
350: m->reln = *l;
351: ++l;
352: goto GetDesc; /* Bill The Cat */
353: }
354: m->reln = '=';
355: break;
356: }
357: EATAB;
358:
359: if (getvalue(m, &l))
360: return -1;
361: /*
362: * TODO finish this macro and start using it!
363: * #define offsetcheck {if (offset > HOWMANY-1)
1.5 ! mickey 364: * warnx("offset too big"); }
1.1 deraadt 365: */
366:
367: /*
368: * now get last part - the description
369: */
370: GetDesc:
371: EATAB;
372: if (l[0] == '\b') {
373: ++l;
374: m->nospflag = 1;
375: } else if ((l[0] == '\\') && (l[1] == 'b')) {
376: ++l;
377: ++l;
378: m->nospflag = 1;
379: } else
380: m->nospflag = 0;
381: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
382: /* NULLBODY */;
383:
384: if (check) {
385: mdump(m);
386: }
387: ++(*ndx); /* make room for next */
388: return 0;
389: }
390:
391: /*
392: * Read a numeric value from a pointer, into the value union of a magic
393: * pointer, according to the magic type. Update the string pointer to point
394: * just after the number read. Return 0 for success, non-zero for failure.
395: */
396: static int
397: getvalue(m, p)
398: struct magic *m;
399: char **p;
400: {
401: int slen;
402:
403: if (m->type == STRING) {
404: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
405: m->vallen = slen;
406: } else
407: if (m->reln != 'x') {
408: m->value.l = signextend(m, strtoul(*p, p, 0));
409: eatsize(p);
410: }
411: return 0;
412: }
413:
414: /*
415: * Convert a string containing C character escapes. Stop at an unescaped
416: * space or tab.
417: * Copy the converted version to "p", returning its length in *slen.
418: * Return updated scan pointer as function result.
419: */
420: static char *
421: getstr(s, p, plen, slen)
422: register char *s;
423: register char *p;
424: int plen, *slen;
425: {
426: char *origs = s, *origp = p;
427: char *pmax = p + plen - 1;
428: register int c;
429: register int val;
430:
431: while ((c = *s++) != '\0') {
432: if (isspace((unsigned char) c))
433: break;
434: if (p >= pmax) {
435: fprintf(stderr, "String too long: %s\n", origs);
436: break;
437: }
438: if(c == '\\') {
439: switch(c = *s++) {
440:
441: case '\0':
442: goto out;
443:
444: default:
445: *p++ = (char) c;
446: break;
447:
448: case 'n':
449: *p++ = '\n';
450: break;
451:
452: case 'r':
453: *p++ = '\r';
454: break;
455:
456: case 'b':
457: *p++ = '\b';
458: break;
459:
460: case 't':
461: *p++ = '\t';
462: break;
463:
464: case 'f':
465: *p++ = '\f';
466: break;
467:
468: case 'v':
469: *p++ = '\v';
470: break;
471:
472: /* \ and up to 3 octal digits */
473: case '0':
474: case '1':
475: case '2':
476: case '3':
477: case '4':
478: case '5':
479: case '6':
480: case '7':
481: val = c - '0';
482: c = *s++; /* try for 2 */
483: if(c >= '0' && c <= '7') {
484: val = (val<<3) | (c - '0');
485: c = *s++; /* try for 3 */
486: if(c >= '0' && c <= '7')
487: val = (val<<3) | (c-'0');
488: else
489: --s;
490: }
491: else
492: --s;
493: *p++ = (char)val;
494: break;
495:
1.4 millert 496: /* \x and up to 2 hex digits */
1.1 deraadt 497: case 'x':
498: val = 'x'; /* Default if no digits */
499: c = hextoint(*s++); /* Get next char */
500: if (c >= 0) {
501: val = c;
502: c = hextoint(*s++);
1.4 millert 503: if (c >= 0)
1.1 deraadt 504: val = (val << 4) + c;
1.4 millert 505: else
1.1 deraadt 506: --s;
507: } else
508: --s;
509: *p++ = (char)val;
510: break;
511: }
512: } else
513: *p++ = (char)c;
514: }
515: out:
516: *p = '\0';
517: *slen = p - origp;
518: return s;
519: }
520:
521:
522: /* Single hex char to int; -1 if not a hex char. */
523: static int
524: hextoint(c)
525: int c;
526: {
527: if (!isascii((unsigned char) c)) return -1;
528: if (isdigit((unsigned char) c)) return c - '0';
529: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
530: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
531: return -1;
532: }
533:
534:
535: /*
536: * Print a string containing C character escapes.
537: */
538: void
539: showstr(fp, s, len)
540: FILE *fp;
541: const char *s;
542: int len;
543: {
544: register char c;
545:
546: for (;;) {
547: c = *s++;
548: if (len == -1) {
549: if (c == '\0')
550: break;
551: }
552: else {
553: if (len-- == 0)
554: break;
555: }
556: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
557: (void) fputc(c, fp);
558: else {
559: (void) fputc('\\', fp);
560: switch (c) {
561:
562: case '\n':
563: (void) fputc('n', fp);
564: break;
565:
566: case '\r':
567: (void) fputc('r', fp);
568: break;
569:
570: case '\b':
571: (void) fputc('b', fp);
572: break;
573:
574: case '\t':
575: (void) fputc('t', fp);
576: break;
577:
578: case '\f':
579: (void) fputc('f', fp);
580: break;
581:
582: case '\v':
583: (void) fputc('v', fp);
584: break;
585:
586: default:
587: (void) fprintf(fp, "%.3o", c & 0377);
588: break;
589: }
590: }
591: }
592: }
593:
594: /*
595: * eatsize(): Eat the size spec from a number [eg. 10UL]
596: */
597: static void
598: eatsize(p)
599: char **p;
600: {
601: char *l = *p;
602:
603: if (LOWCASE(*l) == 'u')
604: l++;
605:
606: switch (LOWCASE(*l)) {
607: case 'l': /* long */
608: case 's': /* short */
609: case 'h': /* short */
610: case 'b': /* char/byte */
611: case 'c': /* char/byte */
612: l++;
613: /*FALLTHROUGH*/
614: default:
615: break;
616: }
617:
618: *p = l;
619: }