Annotation of src/usr.bin/file/apprentice.c, Revision 1.17
1.17 ! deraadt 1: /* $OpenBSD: apprentice.c,v 1.16 2003/03/11 21:26:26 ian Exp $ */
1.4 millert 2:
1.1 deraadt 3: /*
4: * apprentice - make one pass through /etc/magic, learning its secrets.
5: *
1.16 ian 6: * Copyright (c) Ian F. Darwin 1986-1995.
7: * Software written by Ian F. Darwin and others;
8: * maintained 1995-present by Christos Zoulas and others.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice immediately at the beginning of the file, without modification,
15: * this list of conditions, and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by Ian F. Darwin and others.
22: * 4. The name of the author may not be used to endorse or promote products
23: * derived from this software without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
29: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35: * SUCH DAMAGE.
1.1 deraadt 36: */
37:
38: #include <stdio.h>
39: #include <stdlib.h>
40: #include <string.h>
41: #include <ctype.h>
1.2 deraadt 42: #include <errno.h>
1.5 mickey 43: #include <err.h>
1.1 deraadt 44: #include "file.h"
45:
46: #ifndef lint
1.17 ! deraadt 47: static char *moduleid = "$OpenBSD: apprentice.c,v 1.16 2003/03/11 21:26:26 ian Exp $";
1.1 deraadt 48: #endif /* lint */
49:
50: #define EATAB {while (isascii((unsigned char) *l) && \
51: isspace((unsigned char) *l)) ++l;}
52: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
53: tolower((unsigned char) (l)) : (l))
54:
55:
1.12 millert 56: static int getvalue(struct magic *, char **);
57: static int hextoint(int);
58: static char *getstr(char *, char *, int, int *);
59: static int parse(char *, int *, int);
60: static void eatsize(char **);
1.1 deraadt 61:
62: static int maxmagic = 0;
1.8 ian 63: static int alloc_incr = 256;
1.1 deraadt 64:
1.12 millert 65: static int apprentice_1(char *, int);
1.1 deraadt 66:
67: int
68: apprentice(fn, check)
1.2 deraadt 69: char *fn; /* list of magic files */
70: int check; /* non-zero? checking-only run. */
71: {
72: char *p, *mfn;
73: int file_err, errs = -1;
1.17 ! deraadt 74: size_t len;
1.2 deraadt 75:
76: maxmagic = MAXMAGIS;
1.15 aaron 77: magic = (struct magic *) calloc(maxmagic, sizeof(struct magic));
1.17 ! deraadt 78: len = strlen(fn)+1;
! 79: mfn = malloc(len);
1.2 deraadt 80: if (magic == NULL || mfn == NULL) {
1.5 mickey 81: warn("malloc");
1.2 deraadt 82: if (check)
83: return -1;
84: else
85: exit(1);
86: }
1.17 ! deraadt 87: strlcpy(mfn, fn, len);
! 88: fn = mfn;
! 89:
1.2 deraadt 90: while (fn) {
91: p = strchr(fn, ':');
92: if (p)
93: *p++ = '\0';
94: file_err = apprentice_1(fn, check);
95: if (file_err > errs)
96: errs = file_err;
97: fn = p;
98: }
99: if (errs == -1)
1.5 mickey 100: warnx("couldn't find any magic files!");
1.2 deraadt 101: if (!check && errs)
102: exit(1);
103:
104: free(mfn);
105: return errs;
106: }
107:
108: static int
109: apprentice_1(fn, check)
1.1 deraadt 110: char *fn; /* name of magic file */
111: int check; /* non-zero? checking-only run. */
112: {
1.2 deraadt 113: static const char hdr[] =
114: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 115: FILE *f;
116: char line[BUFSIZ+1];
117: int errs = 0;
118:
119: f = fopen(fn, "r");
120: if (f==NULL) {
1.2 deraadt 121: if (errno != ENOENT)
1.10 millert 122: warn("%s", fn);
1.2 deraadt 123: return -1;
1.1 deraadt 124: }
125:
126: /* parse it */
127: if (check) /* print silly verbose header for USG compat. */
1.2 deraadt 128: (void) printf("%s\n", hdr);
1.1 deraadt 129:
130: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
131: if (line[0]=='#') /* comment, do not parse */
132: continue;
133: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
134: continue;
135: line[strlen(line)-1] = '\0'; /* delete newline */
136: if (parse(line, &nmagic, check) != 0)
1.2 deraadt 137: errs = 1;
1.1 deraadt 138: }
139:
140: (void) fclose(f);
1.2 deraadt 141: return errs;
1.1 deraadt 142: }
143:
144: /*
145: * extend the sign bit if the comparison is to be signed
146: */
1.14 itojun 147: uint32_t
1.1 deraadt 148: signextend(m, v)
149: struct magic *m;
1.14 itojun 150: uint32_t v;
1.1 deraadt 151: {
152: if (!(m->flag & UNSIGNED))
153: switch(m->type) {
154: /*
155: * Do not remove the casts below. They are
156: * vital. When later compared with the data,
157: * the sign extension must have happened.
158: */
159: case BYTE:
160: v = (char) v;
161: break;
162: case SHORT:
163: case BESHORT:
164: case LESHORT:
165: v = (short) v;
166: break;
167: case DATE:
168: case BEDATE:
169: case LEDATE:
170: case LONG:
171: case BELONG:
172: case LELONG:
1.14 itojun 173: v = (int32_t) v;
1.1 deraadt 174: break;
175: case STRING:
176: break;
177: default:
1.13 mpech 178: warnx("can't happen: m->type=%d", m->type);
1.1 deraadt 179: return -1;
180: }
181: return v;
182: }
183:
184: /*
185: * parse one line from magic file, put into magic[index++] if valid
186: */
187: static int
188: parse(l, ndx, check)
189: char *l;
190: int *ndx, check;
191: {
192: int i = 0, nd = *ndx;
193: struct magic *m;
194: char *t, *s;
195:
196: if (nd+1 >= maxmagic){
1.6 deraadt 197: struct magic *mtmp;
198:
1.8 ian 199: maxmagic += alloc_incr;
1.6 deraadt 200: if ((mtmp = (struct magic *) realloc(magic,
1.1 deraadt 201: sizeof(struct magic) *
202: maxmagic)) == NULL) {
1.5 mickey 203: warn("malloc");
1.6 deraadt 204: if (check) {
205: if (magic)
206: free(magic);
1.1 deraadt 207: return -1;
1.6 deraadt 208: } else
1.1 deraadt 209: exit(1);
210: }
1.7 deraadt 211: magic = mtmp;
1.8 ian 212: memset(&magic[*ndx], 0, sizeof(struct magic) * alloc_incr);
213: alloc_incr *= 2;
1.1 deraadt 214: }
215: m = &magic[*ndx];
216: m->flag = 0;
217: m->cont_level = 0;
218:
219: while (*l == '>') {
220: ++l; /* step over */
221: m->cont_level++;
222: }
223:
224: if (m->cont_level != 0 && *l == '(') {
225: ++l; /* step over */
226: m->flag |= INDIR;
227: }
1.4 millert 228: if (m->cont_level != 0 && *l == '&') {
229: ++l; /* step over */
230: m->flag |= ADD;
231: }
1.1 deraadt 232:
233: /* get offset, then skip over it */
234: m->offset = (int) strtoul(l,&t,0);
235: if (l == t)
1.5 mickey 236: warnx("offset %s invalid", l);
1.1 deraadt 237: l = t;
238:
239: if (m->flag & INDIR) {
240: m->in.type = LONG;
241: m->in.offset = 0;
242: /*
243: * read [.lbs][+-]nnnnn)
244: */
245: if (*l == '.') {
246: l++;
247: switch (LOWCASE(*l)) {
248: case 'l':
249: m->in.type = LONG;
250: break;
251: case 'h':
252: case 's':
253: m->in.type = SHORT;
254: break;
255: case 'c':
256: case 'b':
257: m->in.type = BYTE;
258: break;
259: default:
1.5 mickey 260: warnx("indirect offset type %c invalid", *l);
1.1 deraadt 261: break;
262: }
263: l++;
264: }
265: s = l;
266: if (*l == '+' || *l == '-') l++;
267: if (isdigit((unsigned char)*l)) {
268: m->in.offset = strtoul(l, &t, 0);
269: if (*s == '-') m->in.offset = - m->in.offset;
270: }
271: else
272: t = l;
273: if (*t++ != ')')
1.5 mickey 274: warnx("missing ')' in indirect offset");
1.1 deraadt 275: l = t;
276: }
277:
278:
279: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
280: ++l;
281: EATAB;
282:
283: #define NBYTE 4
284: #define NSHORT 5
285: #define NLONG 4
286: #define NSTRING 6
287: #define NDATE 4
288: #define NBESHORT 7
289: #define NBELONG 6
290: #define NBEDATE 6
291: #define NLESHORT 7
292: #define NLELONG 6
293: #define NLEDATE 6
294:
295: if (*l == 'u') {
296: ++l;
297: m->flag |= UNSIGNED;
298: }
299:
300: /* get type, skip it */
301: if (strncmp(l, "byte", NBYTE)==0) {
302: m->type = BYTE;
303: l += NBYTE;
304: } else if (strncmp(l, "short", NSHORT)==0) {
305: m->type = SHORT;
306: l += NSHORT;
307: } else if (strncmp(l, "long", NLONG)==0) {
308: m->type = LONG;
309: l += NLONG;
310: } else if (strncmp(l, "string", NSTRING)==0) {
311: m->type = STRING;
312: l += NSTRING;
313: } else if (strncmp(l, "date", NDATE)==0) {
314: m->type = DATE;
315: l += NDATE;
316: } else if (strncmp(l, "beshort", NBESHORT)==0) {
317: m->type = BESHORT;
318: l += NBESHORT;
319: } else if (strncmp(l, "belong", NBELONG)==0) {
320: m->type = BELONG;
321: l += NBELONG;
322: } else if (strncmp(l, "bedate", NBEDATE)==0) {
323: m->type = BEDATE;
324: l += NBEDATE;
325: } else if (strncmp(l, "leshort", NLESHORT)==0) {
326: m->type = LESHORT;
327: l += NLESHORT;
328: } else if (strncmp(l, "lelong", NLELONG)==0) {
329: m->type = LELONG;
330: l += NLELONG;
331: } else if (strncmp(l, "ledate", NLEDATE)==0) {
332: m->type = LEDATE;
333: l += NLEDATE;
334: } else {
1.5 mickey 335: warnx("type %s invalid", l);
1.1 deraadt 336: return -1;
337: }
338: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
339: if (*l == '&') {
340: ++l;
341: m->mask = signextend(m, strtoul(l, &l, 0));
342: eatsize(&l);
343: } else
344: m->mask = ~0L;
345: EATAB;
346:
347: switch (*l) {
348: case '>':
349: case '<':
350: /* Old-style anding: "0 byte &0x80 dynamically linked" */
351: case '&':
352: case '^':
353: case '=':
354: m->reln = *l;
355: ++l;
356: break;
357: case '!':
358: if (m->type != STRING) {
359: m->reln = *l;
360: ++l;
361: break;
362: }
363: /* FALL THROUGH */
364: default:
365: if (*l == 'x' && isascii((unsigned char)l[1]) &&
366: isspace((unsigned char)l[1])) {
367: m->reln = *l;
368: ++l;
369: goto GetDesc; /* Bill The Cat */
370: }
371: m->reln = '=';
372: break;
373: }
374: EATAB;
375:
376: if (getvalue(m, &l))
377: return -1;
378: /*
379: * TODO finish this macro and start using it!
380: * #define offsetcheck {if (offset > HOWMANY-1)
1.5 mickey 381: * warnx("offset too big"); }
1.1 deraadt 382: */
383:
384: /*
385: * now get last part - the description
386: */
387: GetDesc:
388: EATAB;
389: if (l[0] == '\b') {
390: ++l;
391: m->nospflag = 1;
392: } else if ((l[0] == '\\') && (l[1] == 'b')) {
393: ++l;
394: ++l;
395: m->nospflag = 1;
396: } else
397: m->nospflag = 0;
398: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
399: /* NULLBODY */;
400:
401: if (check) {
402: mdump(m);
403: }
404: ++(*ndx); /* make room for next */
405: return 0;
406: }
407:
408: /*
409: * Read a numeric value from a pointer, into the value union of a magic
410: * pointer, according to the magic type. Update the string pointer to point
411: * just after the number read. Return 0 for success, non-zero for failure.
412: */
413: static int
414: getvalue(m, p)
415: struct magic *m;
416: char **p;
417: {
418: int slen;
419:
420: if (m->type == STRING) {
421: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
422: m->vallen = slen;
423: } else
424: if (m->reln != 'x') {
425: m->value.l = signextend(m, strtoul(*p, p, 0));
426: eatsize(p);
427: }
428: return 0;
429: }
430:
431: /*
432: * Convert a string containing C character escapes. Stop at an unescaped
433: * space or tab.
434: * Copy the converted version to "p", returning its length in *slen.
435: * Return updated scan pointer as function result.
436: */
437: static char *
438: getstr(s, p, plen, slen)
1.11 mpech 439: char *s;
440: char *p;
1.1 deraadt 441: int plen, *slen;
442: {
443: char *origs = s, *origp = p;
444: char *pmax = p + plen - 1;
1.11 mpech 445: int c;
446: int val;
1.1 deraadt 447:
448: while ((c = *s++) != '\0') {
449: if (isspace((unsigned char) c))
450: break;
451: if (p >= pmax) {
452: fprintf(stderr, "String too long: %s\n", origs);
453: break;
454: }
455: if(c == '\\') {
456: switch(c = *s++) {
457:
458: case '\0':
459: goto out;
460:
461: default:
462: *p++ = (char) c;
463: break;
464:
465: case 'n':
466: *p++ = '\n';
467: break;
468:
469: case 'r':
470: *p++ = '\r';
471: break;
472:
473: case 'b':
474: *p++ = '\b';
475: break;
476:
477: case 't':
478: *p++ = '\t';
479: break;
480:
481: case 'f':
482: *p++ = '\f';
483: break;
484:
485: case 'v':
486: *p++ = '\v';
487: break;
488:
489: /* \ and up to 3 octal digits */
490: case '0':
491: case '1':
492: case '2':
493: case '3':
494: case '4':
495: case '5':
496: case '6':
497: case '7':
498: val = c - '0';
499: c = *s++; /* try for 2 */
500: if(c >= '0' && c <= '7') {
501: val = (val<<3) | (c - '0');
502: c = *s++; /* try for 3 */
503: if(c >= '0' && c <= '7')
504: val = (val<<3) | (c-'0');
505: else
506: --s;
507: }
508: else
509: --s;
510: *p++ = (char)val;
511: break;
512:
1.4 millert 513: /* \x and up to 2 hex digits */
1.1 deraadt 514: case 'x':
515: val = 'x'; /* Default if no digits */
516: c = hextoint(*s++); /* Get next char */
517: if (c >= 0) {
518: val = c;
519: c = hextoint(*s++);
1.4 millert 520: if (c >= 0)
1.1 deraadt 521: val = (val << 4) + c;
1.4 millert 522: else
1.1 deraadt 523: --s;
524: } else
525: --s;
526: *p++ = (char)val;
527: break;
528: }
529: } else
530: *p++ = (char)c;
531: }
532: out:
533: *p = '\0';
534: *slen = p - origp;
535: return s;
536: }
537:
538:
539: /* Single hex char to int; -1 if not a hex char. */
540: static int
541: hextoint(c)
542: int c;
543: {
544: if (!isascii((unsigned char) c)) return -1;
545: if (isdigit((unsigned char) c)) return c - '0';
546: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
547: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
548: return -1;
549: }
550:
551:
552: /*
553: * Print a string containing C character escapes.
554: */
555: void
556: showstr(fp, s, len)
557: FILE *fp;
558: const char *s;
559: int len;
560: {
1.11 mpech 561: char c;
1.1 deraadt 562:
563: for (;;) {
564: c = *s++;
565: if (len == -1) {
566: if (c == '\0')
567: break;
568: }
569: else {
570: if (len-- == 0)
571: break;
572: }
573: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
574: (void) fputc(c, fp);
575: else {
576: (void) fputc('\\', fp);
577: switch (c) {
578:
579: case '\n':
580: (void) fputc('n', fp);
581: break;
582:
583: case '\r':
584: (void) fputc('r', fp);
585: break;
586:
587: case '\b':
588: (void) fputc('b', fp);
589: break;
590:
591: case '\t':
592: (void) fputc('t', fp);
593: break;
594:
595: case '\f':
596: (void) fputc('f', fp);
597: break;
598:
599: case '\v':
600: (void) fputc('v', fp);
601: break;
602:
603: default:
604: (void) fprintf(fp, "%.3o", c & 0377);
605: break;
606: }
607: }
608: }
609: }
610:
611: /*
612: * eatsize(): Eat the size spec from a number [eg. 10UL]
613: */
614: static void
615: eatsize(p)
616: char **p;
617: {
618: char *l = *p;
619:
620: if (LOWCASE(*l) == 'u')
621: l++;
622:
623: switch (LOWCASE(*l)) {
624: case 'l': /* long */
625: case 's': /* short */
626: case 'h': /* short */
627: case 'b': /* char/byte */
628: case 'c': /* char/byte */
629: l++;
630: /*FALLTHROUGH*/
631: default:
632: break;
633: }
634:
635: *p = l;
636: }