Annotation of src/usr.bin/file/apprentice.c, Revision 1.3
1.3 ! deraadt 1: /* $OpenBSD: apprentice.c,v 1.2 1995/12/14 03:30:01 deraadt Exp $ */
1.1 deraadt 2: /*
3: * apprentice - make one pass through /etc/magic, learning its secrets.
4: *
5: * Copyright (c) Ian F. Darwin, 1987.
6: * Written by Ian F. Darwin.
7: *
8: * This software is not subject to any license of the American Telephone
9: * and Telegraph Company or of the Regents of the University of California.
10: *
11: * Permission is granted to anyone to use this software for any purpose on
12: * any computer system, and to alter it and redistribute it freely, subject
13: * to the following restrictions:
14: *
15: * 1. The author is not responsible for the consequences of use of this
16: * software, no matter how awful, even if they arise from flaws in it.
17: *
18: * 2. The origin of this software must not be misrepresented, either by
19: * explicit claim or by omission. Since few users ever read sources,
20: * credits must appear in the documentation.
21: *
22: * 3. Altered versions must be plainly marked as such, and must not be
23: * misrepresented as being the original software. Since few users
24: * ever read sources, credits must appear in the documentation.
25: *
26: * 4. This notice may not be removed or altered.
27: */
28:
29: #include <stdio.h>
30: #include <stdlib.h>
31: #include <string.h>
32: #include <ctype.h>
1.2 deraadt 33: #include <errno.h>
1.1 deraadt 34: #include "file.h"
35:
36: #ifndef lint
1.3 ! deraadt 37: static char *moduleid = "$OpenBSD$";
1.1 deraadt 38: #endif /* lint */
39:
40: #define EATAB {while (isascii((unsigned char) *l) && \
41: isspace((unsigned char) *l)) ++l;}
42: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
43: tolower((unsigned char) (l)) : (l))
44:
45:
46: static int getvalue __P((struct magic *, char **));
47: static int hextoint __P((int));
48: static char *getstr __P((char *, char *, int, int *));
49: static int parse __P((char *, int *, int));
50: static void eatsize __P((char **));
51:
52: static int maxmagic = 0;
53:
1.2 deraadt 54: static int apprentice_1 __P((char *, int));
1.1 deraadt 55:
56: int
57: apprentice(fn, check)
1.2 deraadt 58: char *fn; /* list of magic files */
59: int check; /* non-zero? checking-only run. */
60: {
61: char *p, *mfn;
62: int file_err, errs = -1;
63:
64: maxmagic = MAXMAGIS;
65: magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
66: mfn = malloc(strlen(fn)+1);
67: if (magic == NULL || mfn == NULL) {
68: (void) fprintf(stderr, "%s: Out of memory.\n", progname);
69: if (check)
70: return -1;
71: else
72: exit(1);
73: }
74: fn = strcpy(mfn, fn);
75:
76: while (fn) {
77: p = strchr(fn, ':');
78: if (p)
79: *p++ = '\0';
80: file_err = apprentice_1(fn, check);
81: if (file_err > errs)
82: errs = file_err;
83: fn = p;
84: }
85: if (errs == -1)
86: (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
87: progname);
88: if (!check && errs)
89: exit(1);
90:
91: free(mfn);
92: return errs;
93: }
94:
95: static int
96: apprentice_1(fn, check)
1.1 deraadt 97: char *fn; /* name of magic file */
98: int check; /* non-zero? checking-only run. */
99: {
1.2 deraadt 100: static const char hdr[] =
101: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 102: FILE *f;
103: char line[BUFSIZ+1];
104: int errs = 0;
105:
106: f = fopen(fn, "r");
107: if (f==NULL) {
1.2 deraadt 108: if (errno != ENOENT)
109: (void) fprintf(stderr,
110: "%s: can't read magic file %s (%s)\n",
111: progname, fn, strerror(errno));
112: return -1;
1.1 deraadt 113: }
114:
115: /* parse it */
116: if (check) /* print silly verbose header for USG compat. */
1.2 deraadt 117: (void) printf("%s\n", hdr);
1.1 deraadt 118:
119: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
120: if (line[0]=='#') /* comment, do not parse */
121: continue;
122: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
123: continue;
124: line[strlen(line)-1] = '\0'; /* delete newline */
125: if (parse(line, &nmagic, check) != 0)
1.2 deraadt 126: errs = 1;
1.1 deraadt 127: }
128:
129: (void) fclose(f);
1.2 deraadt 130: return errs;
1.1 deraadt 131: }
132:
133: /*
134: * extend the sign bit if the comparison is to be signed
135: */
136: unsigned long
137: signextend(m, v)
138: struct magic *m;
139: unsigned long v;
140: {
141: if (!(m->flag & UNSIGNED))
142: switch(m->type) {
143: /*
144: * Do not remove the casts below. They are
145: * vital. When later compared with the data,
146: * the sign extension must have happened.
147: */
148: case BYTE:
149: v = (char) v;
150: break;
151: case SHORT:
152: case BESHORT:
153: case LESHORT:
154: v = (short) v;
155: break;
156: case DATE:
157: case BEDATE:
158: case LEDATE:
159: case LONG:
160: case BELONG:
161: case LELONG:
162: v = (long) v;
163: break;
164: case STRING:
165: break;
166: default:
167: magwarn("can't happen: m->type=%d\n",
168: m->type);
169: return -1;
170: }
171: return v;
172: }
173:
174: /*
175: * parse one line from magic file, put into magic[index++] if valid
176: */
177: static int
178: parse(l, ndx, check)
179: char *l;
180: int *ndx, check;
181: {
182: int i = 0, nd = *ndx;
183: struct magic *m;
184: char *t, *s;
185:
186: #define ALLOC_INCR 20
187: if (nd+1 >= maxmagic){
188: maxmagic += ALLOC_INCR;
189: if ((magic = (struct magic *) realloc(magic,
190: sizeof(struct magic) *
191: maxmagic)) == NULL) {
192: (void) fprintf(stderr, "%s: Out of memory.\n", progname);
193: if (check)
194: return -1;
195: else
196: exit(1);
197: }
198: memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
199: }
200: m = &magic[*ndx];
201: m->flag = 0;
202: m->cont_level = 0;
203:
204: while (*l == '>') {
205: ++l; /* step over */
206: m->cont_level++;
207: }
208:
209: if (m->cont_level != 0 && *l == '(') {
210: ++l; /* step over */
211: m->flag |= INDIR;
212: }
213:
214: /* get offset, then skip over it */
215: m->offset = (int) strtoul(l,&t,0);
216: if (l == t)
217: magwarn("offset %s invalid", l);
218: l = t;
219:
220: if (m->flag & INDIR) {
221: m->in.type = LONG;
222: m->in.offset = 0;
223: /*
224: * read [.lbs][+-]nnnnn)
225: */
226: if (*l == '.') {
227: l++;
228: switch (LOWCASE(*l)) {
229: case 'l':
230: m->in.type = LONG;
231: break;
232: case 'h':
233: case 's':
234: m->in.type = SHORT;
235: break;
236: case 'c':
237: case 'b':
238: m->in.type = BYTE;
239: break;
240: default:
241: magwarn("indirect offset type %c invalid", *l);
242: break;
243: }
244: l++;
245: }
246: s = l;
247: if (*l == '+' || *l == '-') l++;
248: if (isdigit((unsigned char)*l)) {
249: m->in.offset = strtoul(l, &t, 0);
250: if (*s == '-') m->in.offset = - m->in.offset;
251: }
252: else
253: t = l;
254: if (*t++ != ')')
255: magwarn("missing ')' in indirect offset");
256: l = t;
257: }
258:
259:
260: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
261: ++l;
262: EATAB;
263:
264: #define NBYTE 4
265: #define NSHORT 5
266: #define NLONG 4
267: #define NSTRING 6
268: #define NDATE 4
269: #define NBESHORT 7
270: #define NBELONG 6
271: #define NBEDATE 6
272: #define NLESHORT 7
273: #define NLELONG 6
274: #define NLEDATE 6
275:
276: if (*l == 'u') {
277: ++l;
278: m->flag |= UNSIGNED;
279: }
280:
281: /* get type, skip it */
282: if (strncmp(l, "byte", NBYTE)==0) {
283: m->type = BYTE;
284: l += NBYTE;
285: } else if (strncmp(l, "short", NSHORT)==0) {
286: m->type = SHORT;
287: l += NSHORT;
288: } else if (strncmp(l, "long", NLONG)==0) {
289: m->type = LONG;
290: l += NLONG;
291: } else if (strncmp(l, "string", NSTRING)==0) {
292: m->type = STRING;
293: l += NSTRING;
294: } else if (strncmp(l, "date", NDATE)==0) {
295: m->type = DATE;
296: l += NDATE;
297: } else if (strncmp(l, "beshort", NBESHORT)==0) {
298: m->type = BESHORT;
299: l += NBESHORT;
300: } else if (strncmp(l, "belong", NBELONG)==0) {
301: m->type = BELONG;
302: l += NBELONG;
303: } else if (strncmp(l, "bedate", NBEDATE)==0) {
304: m->type = BEDATE;
305: l += NBEDATE;
306: } else if (strncmp(l, "leshort", NLESHORT)==0) {
307: m->type = LESHORT;
308: l += NLESHORT;
309: } else if (strncmp(l, "lelong", NLELONG)==0) {
310: m->type = LELONG;
311: l += NLELONG;
312: } else if (strncmp(l, "ledate", NLEDATE)==0) {
313: m->type = LEDATE;
314: l += NLEDATE;
315: } else {
316: magwarn("type %s invalid", l);
317: return -1;
318: }
319: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
320: if (*l == '&') {
321: ++l;
322: m->mask = signextend(m, strtoul(l, &l, 0));
323: eatsize(&l);
324: } else
325: m->mask = ~0L;
326: EATAB;
327:
328: switch (*l) {
329: case '>':
330: case '<':
331: /* Old-style anding: "0 byte &0x80 dynamically linked" */
332: case '&':
333: case '^':
334: case '=':
335: m->reln = *l;
336: ++l;
337: break;
338: case '!':
339: if (m->type != STRING) {
340: m->reln = *l;
341: ++l;
342: break;
343: }
344: /* FALL THROUGH */
345: default:
346: if (*l == 'x' && isascii((unsigned char)l[1]) &&
347: isspace((unsigned char)l[1])) {
348: m->reln = *l;
349: ++l;
350: goto GetDesc; /* Bill The Cat */
351: }
352: m->reln = '=';
353: break;
354: }
355: EATAB;
356:
357: if (getvalue(m, &l))
358: return -1;
359: /*
360: * TODO finish this macro and start using it!
361: * #define offsetcheck {if (offset > HOWMANY-1)
362: * magwarn("offset too big"); }
363: */
364:
365: /*
366: * now get last part - the description
367: */
368: GetDesc:
369: EATAB;
370: if (l[0] == '\b') {
371: ++l;
372: m->nospflag = 1;
373: } else if ((l[0] == '\\') && (l[1] == 'b')) {
374: ++l;
375: ++l;
376: m->nospflag = 1;
377: } else
378: m->nospflag = 0;
379: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
380: /* NULLBODY */;
381:
382: if (check) {
383: mdump(m);
384: }
385: ++(*ndx); /* make room for next */
386: return 0;
387: }
388:
389: /*
390: * Read a numeric value from a pointer, into the value union of a magic
391: * pointer, according to the magic type. Update the string pointer to point
392: * just after the number read. Return 0 for success, non-zero for failure.
393: */
394: static int
395: getvalue(m, p)
396: struct magic *m;
397: char **p;
398: {
399: int slen;
400:
401: if (m->type == STRING) {
402: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
403: m->vallen = slen;
404: } else
405: if (m->reln != 'x') {
406: m->value.l = signextend(m, strtoul(*p, p, 0));
407: eatsize(p);
408: }
409: return 0;
410: }
411:
412: /*
413: * Convert a string containing C character escapes. Stop at an unescaped
414: * space or tab.
415: * Copy the converted version to "p", returning its length in *slen.
416: * Return updated scan pointer as function result.
417: */
418: static char *
419: getstr(s, p, plen, slen)
420: register char *s;
421: register char *p;
422: int plen, *slen;
423: {
424: char *origs = s, *origp = p;
425: char *pmax = p + plen - 1;
426: register int c;
427: register int val;
428:
429: while ((c = *s++) != '\0') {
430: if (isspace((unsigned char) c))
431: break;
432: if (p >= pmax) {
433: fprintf(stderr, "String too long: %s\n", origs);
434: break;
435: }
436: if(c == '\\') {
437: switch(c = *s++) {
438:
439: case '\0':
440: goto out;
441:
442: default:
443: *p++ = (char) c;
444: break;
445:
446: case 'n':
447: *p++ = '\n';
448: break;
449:
450: case 'r':
451: *p++ = '\r';
452: break;
453:
454: case 'b':
455: *p++ = '\b';
456: break;
457:
458: case 't':
459: *p++ = '\t';
460: break;
461:
462: case 'f':
463: *p++ = '\f';
464: break;
465:
466: case 'v':
467: *p++ = '\v';
468: break;
469:
470: /* \ and up to 3 octal digits */
471: case '0':
472: case '1':
473: case '2':
474: case '3':
475: case '4':
476: case '5':
477: case '6':
478: case '7':
479: val = c - '0';
480: c = *s++; /* try for 2 */
481: if(c >= '0' && c <= '7') {
482: val = (val<<3) | (c - '0');
483: c = *s++; /* try for 3 */
484: if(c >= '0' && c <= '7')
485: val = (val<<3) | (c-'0');
486: else
487: --s;
488: }
489: else
490: --s;
491: *p++ = (char)val;
492: break;
493:
494: /* \x and up to 3 hex digits */
495: case 'x':
496: val = 'x'; /* Default if no digits */
497: c = hextoint(*s++); /* Get next char */
498: if (c >= 0) {
499: val = c;
500: c = hextoint(*s++);
501: if (c >= 0) {
502: val = (val << 4) + c;
503: c = hextoint(*s++);
504: if (c >= 0) {
505: val = (val << 4) + c;
506: } else
507: --s;
508: } else
509: --s;
510: } else
511: --s;
512: *p++ = (char)val;
513: break;
514: }
515: } else
516: *p++ = (char)c;
517: }
518: out:
519: *p = '\0';
520: *slen = p - origp;
521: return s;
522: }
523:
524:
525: /* Single hex char to int; -1 if not a hex char. */
526: static int
527: hextoint(c)
528: int c;
529: {
530: if (!isascii((unsigned char) c)) return -1;
531: if (isdigit((unsigned char) c)) return c - '0';
532: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
533: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
534: return -1;
535: }
536:
537:
538: /*
539: * Print a string containing C character escapes.
540: */
541: void
542: showstr(fp, s, len)
543: FILE *fp;
544: const char *s;
545: int len;
546: {
547: register char c;
548:
549: for (;;) {
550: c = *s++;
551: if (len == -1) {
552: if (c == '\0')
553: break;
554: }
555: else {
556: if (len-- == 0)
557: break;
558: }
559: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
560: (void) fputc(c, fp);
561: else {
562: (void) fputc('\\', fp);
563: switch (c) {
564:
565: case '\n':
566: (void) fputc('n', fp);
567: break;
568:
569: case '\r':
570: (void) fputc('r', fp);
571: break;
572:
573: case '\b':
574: (void) fputc('b', fp);
575: break;
576:
577: case '\t':
578: (void) fputc('t', fp);
579: break;
580:
581: case '\f':
582: (void) fputc('f', fp);
583: break;
584:
585: case '\v':
586: (void) fputc('v', fp);
587: break;
588:
589: default:
590: (void) fprintf(fp, "%.3o", c & 0377);
591: break;
592: }
593: }
594: }
595: }
596:
597: /*
598: * eatsize(): Eat the size spec from a number [eg. 10UL]
599: */
600: static void
601: eatsize(p)
602: char **p;
603: {
604: char *l = *p;
605:
606: if (LOWCASE(*l) == 'u')
607: l++;
608:
609: switch (LOWCASE(*l)) {
610: case 'l': /* long */
611: case 's': /* short */
612: case 'h': /* short */
613: case 'b': /* char/byte */
614: case 'c': /* char/byte */
615: l++;
616: /*FALLTHROUGH*/
617: default:
618: break;
619: }
620:
621: *p = l;
622: }