Annotation of src/usr.bin/file/apprentice.c, Revision 1.1.1.1
1.1 deraadt 1: /*
2: * apprentice - make one pass through /etc/magic, learning its secrets.
3: *
4: * Copyright (c) Ian F. Darwin, 1987.
5: * Written by Ian F. Darwin.
6: *
7: * This software is not subject to any license of the American Telephone
8: * and Telegraph Company or of the Regents of the University of California.
9: *
10: * Permission is granted to anyone to use this software for any purpose on
11: * any computer system, and to alter it and redistribute it freely, subject
12: * to the following restrictions:
13: *
14: * 1. The author is not responsible for the consequences of use of this
15: * software, no matter how awful, even if they arise from flaws in it.
16: *
17: * 2. The origin of this software must not be misrepresented, either by
18: * explicit claim or by omission. Since few users ever read sources,
19: * credits must appear in the documentation.
20: *
21: * 3. Altered versions must be plainly marked as such, and must not be
22: * misrepresented as being the original software. Since few users
23: * ever read sources, credits must appear in the documentation.
24: *
25: * 4. This notice may not be removed or altered.
26: */
27:
28: #include <stdio.h>
29: #include <stdlib.h>
30: #include <string.h>
31: #include <ctype.h>
32: #include "file.h"
33:
34: #ifndef lint
35: static char *moduleid =
36: "@(#)$Id: apprentice.c,v 1.9 1995/05/21 00:13:24 christos Exp $";
37: #endif /* lint */
38:
39: #define EATAB {while (isascii((unsigned char) *l) && \
40: isspace((unsigned char) *l)) ++l;}
41: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
42: tolower((unsigned char) (l)) : (l))
43:
44:
45: static int getvalue __P((struct magic *, char **));
46: static int hextoint __P((int));
47: static char *getstr __P((char *, char *, int, int *));
48: static int parse __P((char *, int *, int));
49: static void eatsize __P((char **));
50:
51: static int maxmagic = 0;
52:
53:
54: int
55: apprentice(fn, check)
56: char *fn; /* name of magic file */
57: int check; /* non-zero? checking-only run. */
58: {
59: FILE *f;
60: char line[BUFSIZ+1];
61: int errs = 0;
62:
63: f = fopen(fn, "r");
64: if (f==NULL) {
65: (void) fprintf(stderr, "%s: can't read magic file %s\n",
66: progname, fn);
67: if (check)
68: return -1;
69: else
70: exit(1);
71: }
72:
73: maxmagic = MAXMAGIS;
74: if ((magic = (struct magic *) calloc(sizeof(struct magic), maxmagic))
75: == NULL) {
76: (void) fprintf(stderr, "%s: Out of memory.\n", progname);
77: if (check)
78: return -1;
79: else
80: exit(1);
81: }
82:
83: /* parse it */
84: if (check) /* print silly verbose header for USG compat. */
85: (void) printf("cont\toffset\ttype\topcode\tmask\tvalue\tdesc\n");
86:
87: for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
88: if (line[0]=='#') /* comment, do not parse */
89: continue;
90: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
91: continue;
92: line[strlen(line)-1] = '\0'; /* delete newline */
93: if (parse(line, &nmagic, check) != 0)
94: ++errs;
95: }
96:
97: (void) fclose(f);
98: return errs ? -1 : 0;
99: }
100:
101: /*
102: * extend the sign bit if the comparison is to be signed
103: */
104: unsigned long
105: signextend(m, v)
106: struct magic *m;
107: unsigned long v;
108: {
109: if (!(m->flag & UNSIGNED))
110: switch(m->type) {
111: /*
112: * Do not remove the casts below. They are
113: * vital. When later compared with the data,
114: * the sign extension must have happened.
115: */
116: case BYTE:
117: v = (char) v;
118: break;
119: case SHORT:
120: case BESHORT:
121: case LESHORT:
122: v = (short) v;
123: break;
124: case DATE:
125: case BEDATE:
126: case LEDATE:
127: case LONG:
128: case BELONG:
129: case LELONG:
130: v = (long) v;
131: break;
132: case STRING:
133: break;
134: default:
135: magwarn("can't happen: m->type=%d\n",
136: m->type);
137: return -1;
138: }
139: return v;
140: }
141:
142: /*
143: * parse one line from magic file, put into magic[index++] if valid
144: */
145: static int
146: parse(l, ndx, check)
147: char *l;
148: int *ndx, check;
149: {
150: int i = 0, nd = *ndx;
151: struct magic *m;
152: char *t, *s;
153:
154: #define ALLOC_INCR 20
155: if (nd+1 >= maxmagic){
156: maxmagic += ALLOC_INCR;
157: if ((magic = (struct magic *) realloc(magic,
158: sizeof(struct magic) *
159: maxmagic)) == NULL) {
160: (void) fprintf(stderr, "%s: Out of memory.\n", progname);
161: if (check)
162: return -1;
163: else
164: exit(1);
165: }
166: memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
167: }
168: m = &magic[*ndx];
169: m->flag = 0;
170: m->cont_level = 0;
171:
172: while (*l == '>') {
173: ++l; /* step over */
174: m->cont_level++;
175: }
176:
177: if (m->cont_level != 0 && *l == '(') {
178: ++l; /* step over */
179: m->flag |= INDIR;
180: }
181:
182: /* get offset, then skip over it */
183: m->offset = (int) strtoul(l,&t,0);
184: if (l == t)
185: magwarn("offset %s invalid", l);
186: l = t;
187:
188: if (m->flag & INDIR) {
189: m->in.type = LONG;
190: m->in.offset = 0;
191: /*
192: * read [.lbs][+-]nnnnn)
193: */
194: if (*l == '.') {
195: l++;
196: switch (LOWCASE(*l)) {
197: case 'l':
198: m->in.type = LONG;
199: break;
200: case 'h':
201: case 's':
202: m->in.type = SHORT;
203: break;
204: case 'c':
205: case 'b':
206: m->in.type = BYTE;
207: break;
208: default:
209: magwarn("indirect offset type %c invalid", *l);
210: break;
211: }
212: l++;
213: }
214: s = l;
215: if (*l == '+' || *l == '-') l++;
216: if (isdigit((unsigned char)*l)) {
217: m->in.offset = strtoul(l, &t, 0);
218: if (*s == '-') m->in.offset = - m->in.offset;
219: }
220: else
221: t = l;
222: if (*t++ != ')')
223: magwarn("missing ')' in indirect offset");
224: l = t;
225: }
226:
227:
228: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
229: ++l;
230: EATAB;
231:
232: #define NBYTE 4
233: #define NSHORT 5
234: #define NLONG 4
235: #define NSTRING 6
236: #define NDATE 4
237: #define NBESHORT 7
238: #define NBELONG 6
239: #define NBEDATE 6
240: #define NLESHORT 7
241: #define NLELONG 6
242: #define NLEDATE 6
243:
244: if (*l == 'u') {
245: ++l;
246: m->flag |= UNSIGNED;
247: }
248:
249: /* get type, skip it */
250: if (strncmp(l, "byte", NBYTE)==0) {
251: m->type = BYTE;
252: l += NBYTE;
253: } else if (strncmp(l, "short", NSHORT)==0) {
254: m->type = SHORT;
255: l += NSHORT;
256: } else if (strncmp(l, "long", NLONG)==0) {
257: m->type = LONG;
258: l += NLONG;
259: } else if (strncmp(l, "string", NSTRING)==0) {
260: m->type = STRING;
261: l += NSTRING;
262: } else if (strncmp(l, "date", NDATE)==0) {
263: m->type = DATE;
264: l += NDATE;
265: } else if (strncmp(l, "beshort", NBESHORT)==0) {
266: m->type = BESHORT;
267: l += NBESHORT;
268: } else if (strncmp(l, "belong", NBELONG)==0) {
269: m->type = BELONG;
270: l += NBELONG;
271: } else if (strncmp(l, "bedate", NBEDATE)==0) {
272: m->type = BEDATE;
273: l += NBEDATE;
274: } else if (strncmp(l, "leshort", NLESHORT)==0) {
275: m->type = LESHORT;
276: l += NLESHORT;
277: } else if (strncmp(l, "lelong", NLELONG)==0) {
278: m->type = LELONG;
279: l += NLELONG;
280: } else if (strncmp(l, "ledate", NLEDATE)==0) {
281: m->type = LEDATE;
282: l += NLEDATE;
283: } else {
284: magwarn("type %s invalid", l);
285: return -1;
286: }
287: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
288: if (*l == '&') {
289: ++l;
290: m->mask = signextend(m, strtoul(l, &l, 0));
291: eatsize(&l);
292: } else
293: m->mask = ~0L;
294: EATAB;
295:
296: switch (*l) {
297: case '>':
298: case '<':
299: /* Old-style anding: "0 byte &0x80 dynamically linked" */
300: case '&':
301: case '^':
302: case '=':
303: m->reln = *l;
304: ++l;
305: break;
306: case '!':
307: if (m->type != STRING) {
308: m->reln = *l;
309: ++l;
310: break;
311: }
312: /* FALL THROUGH */
313: default:
314: if (*l == 'x' && isascii((unsigned char)l[1]) &&
315: isspace((unsigned char)l[1])) {
316: m->reln = *l;
317: ++l;
318: goto GetDesc; /* Bill The Cat */
319: }
320: m->reln = '=';
321: break;
322: }
323: EATAB;
324:
325: if (getvalue(m, &l))
326: return -1;
327: /*
328: * TODO finish this macro and start using it!
329: * #define offsetcheck {if (offset > HOWMANY-1)
330: * magwarn("offset too big"); }
331: */
332:
333: /*
334: * now get last part - the description
335: */
336: GetDesc:
337: EATAB;
338: if (l[0] == '\b') {
339: ++l;
340: m->nospflag = 1;
341: } else if ((l[0] == '\\') && (l[1] == 'b')) {
342: ++l;
343: ++l;
344: m->nospflag = 1;
345: } else
346: m->nospflag = 0;
347: while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
348: /* NULLBODY */;
349:
350: if (check) {
351: mdump(m);
352: }
353: ++(*ndx); /* make room for next */
354: return 0;
355: }
356:
357: /*
358: * Read a numeric value from a pointer, into the value union of a magic
359: * pointer, according to the magic type. Update the string pointer to point
360: * just after the number read. Return 0 for success, non-zero for failure.
361: */
362: static int
363: getvalue(m, p)
364: struct magic *m;
365: char **p;
366: {
367: int slen;
368:
369: if (m->type == STRING) {
370: *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
371: m->vallen = slen;
372: } else
373: if (m->reln != 'x') {
374: m->value.l = signextend(m, strtoul(*p, p, 0));
375: eatsize(p);
376: }
377: return 0;
378: }
379:
380: /*
381: * Convert a string containing C character escapes. Stop at an unescaped
382: * space or tab.
383: * Copy the converted version to "p", returning its length in *slen.
384: * Return updated scan pointer as function result.
385: */
386: static char *
387: getstr(s, p, plen, slen)
388: register char *s;
389: register char *p;
390: int plen, *slen;
391: {
392: char *origs = s, *origp = p;
393: char *pmax = p + plen - 1;
394: register int c;
395: register int val;
396:
397: while ((c = *s++) != '\0') {
398: if (isspace((unsigned char) c))
399: break;
400: if (p >= pmax) {
401: fprintf(stderr, "String too long: %s\n", origs);
402: break;
403: }
404: if(c == '\\') {
405: switch(c = *s++) {
406:
407: case '\0':
408: goto out;
409:
410: default:
411: *p++ = (char) c;
412: break;
413:
414: case 'n':
415: *p++ = '\n';
416: break;
417:
418: case 'r':
419: *p++ = '\r';
420: break;
421:
422: case 'b':
423: *p++ = '\b';
424: break;
425:
426: case 't':
427: *p++ = '\t';
428: break;
429:
430: case 'f':
431: *p++ = '\f';
432: break;
433:
434: case 'v':
435: *p++ = '\v';
436: break;
437:
438: /* \ and up to 3 octal digits */
439: case '0':
440: case '1':
441: case '2':
442: case '3':
443: case '4':
444: case '5':
445: case '6':
446: case '7':
447: val = c - '0';
448: c = *s++; /* try for 2 */
449: if(c >= '0' && c <= '7') {
450: val = (val<<3) | (c - '0');
451: c = *s++; /* try for 3 */
452: if(c >= '0' && c <= '7')
453: val = (val<<3) | (c-'0');
454: else
455: --s;
456: }
457: else
458: --s;
459: *p++ = (char)val;
460: break;
461:
462: /* \x and up to 3 hex digits */
463: case 'x':
464: val = 'x'; /* Default if no digits */
465: c = hextoint(*s++); /* Get next char */
466: if (c >= 0) {
467: val = c;
468: c = hextoint(*s++);
469: if (c >= 0) {
470: val = (val << 4) + c;
471: c = hextoint(*s++);
472: if (c >= 0) {
473: val = (val << 4) + c;
474: } else
475: --s;
476: } else
477: --s;
478: } else
479: --s;
480: *p++ = (char)val;
481: break;
482: }
483: } else
484: *p++ = (char)c;
485: }
486: out:
487: *p = '\0';
488: *slen = p - origp;
489: return s;
490: }
491:
492:
493: /* Single hex char to int; -1 if not a hex char. */
494: static int
495: hextoint(c)
496: int c;
497: {
498: if (!isascii((unsigned char) c)) return -1;
499: if (isdigit((unsigned char) c)) return c - '0';
500: if ((c>='a')&&(c<='f')) return c + 10 - 'a';
501: if ((c>='A')&&(c<='F')) return c + 10 - 'A';
502: return -1;
503: }
504:
505:
506: /*
507: * Print a string containing C character escapes.
508: */
509: void
510: showstr(fp, s, len)
511: FILE *fp;
512: const char *s;
513: int len;
514: {
515: register char c;
516:
517: for (;;) {
518: c = *s++;
519: if (len == -1) {
520: if (c == '\0')
521: break;
522: }
523: else {
524: if (len-- == 0)
525: break;
526: }
527: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
528: (void) fputc(c, fp);
529: else {
530: (void) fputc('\\', fp);
531: switch (c) {
532:
533: case '\n':
534: (void) fputc('n', fp);
535: break;
536:
537: case '\r':
538: (void) fputc('r', fp);
539: break;
540:
541: case '\b':
542: (void) fputc('b', fp);
543: break;
544:
545: case '\t':
546: (void) fputc('t', fp);
547: break;
548:
549: case '\f':
550: (void) fputc('f', fp);
551: break;
552:
553: case '\v':
554: (void) fputc('v', fp);
555: break;
556:
557: default:
558: (void) fprintf(fp, "%.3o", c & 0377);
559: break;
560: }
561: }
562: }
563: }
564:
565: /*
566: * eatsize(): Eat the size spec from a number [eg. 10UL]
567: */
568: static void
569: eatsize(p)
570: char **p;
571: {
572: char *l = *p;
573:
574: if (LOWCASE(*l) == 'u')
575: l++;
576:
577: switch (LOWCASE(*l)) {
578: case 'l': /* long */
579: case 's': /* short */
580: case 'h': /* short */
581: case 'b': /* char/byte */
582: case 'c': /* char/byte */
583: l++;
584: /*FALLTHROUGH*/
585: default:
586: break;
587: }
588:
589: *p = l;
590: }