Annotation of src/usr.bin/file/apprentice.c, Revision 1.23
1.23 ! ray 1: /* $OpenBSD: apprentice.c,v 1.22 2006/04/04 14:17:01 pedro Exp $ */
1.1 deraadt 2: /*
1.16 ian 3: * Copyright (c) Ian F. Darwin 1986-1995.
4: * Software written by Ian F. Darwin and others;
5: * maintained 1995-present by Christos Zoulas and others.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice immediately at the beginning of the file, without modification,
12: * this list of conditions, and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
1.1 deraadt 28: */
1.19 tedu 29: /*
30: * apprentice - make one pass through /etc/magic, learning its secrets.
31: */
1.1 deraadt 32:
1.19 tedu 33: #include "file.h"
34: #include "magic.h"
1.1 deraadt 35: #include <stdlib.h>
1.19 tedu 36: #ifdef HAVE_UNISTD_H
37: #include <unistd.h>
38: #endif
1.1 deraadt 39: #include <string.h>
40: #include <ctype.h>
1.19 tedu 41: #include <fcntl.h>
42: #include <sys/stat.h>
43: #include <sys/param.h>
44: #ifdef QUICK
45: #include <sys/mman.h>
46: #endif
1.1 deraadt 47:
48: #ifndef lint
1.23 ! ray 49: FILE_RCSID("@(#)$Id: apprentice.c,v 1.22 2006/04/04 14:17:01 pedro Exp $")
1.1 deraadt 50: #endif /* lint */
51:
52: #define EATAB {while (isascii((unsigned char) *l) && \
53: isspace((unsigned char) *l)) ++l;}
54: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
55: tolower((unsigned char) (l)) : (l))
1.19 tedu 56: /*
57: * Work around a bug in headers on Digital Unix.
58: * At least confirmed for: OSF1 V4.0 878
59: */
60: #if defined(__osf__) && defined(__DECC)
61: #ifdef MAP_FAILED
62: #undef MAP_FAILED
63: #endif
64: #endif
65:
66: #ifndef MAP_FAILED
67: #define MAP_FAILED (void *) -1
68: #endif
69:
70: #ifndef MAP_FILE
71: #define MAP_FILE 0
72: #endif
73:
74: #ifndef MAXPATHLEN
75: #define MAXPATHLEN 1024
76: #endif
77:
78: private int getvalue(struct magic_set *ms, struct magic *, char **);
79: private int hextoint(int);
80: private char *getstr(struct magic_set *, char *, char *, int, int *);
81: private int parse(struct magic_set *, struct magic **, uint32_t *, char *, int);
82: private void eatsize(char **);
83: private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
84: private int apprentice_file(struct magic_set *, struct magic **, uint32_t *,
85: const char *, int);
86: private void byteswap(struct magic *, uint32_t);
87: private void bs1(struct magic *);
88: private uint16_t swap2(uint16_t);
89: private uint32_t swap4(uint32_t);
90: private char *mkdbname(const char *, char *, size_t);
91: private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
92: const char *);
93: private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
94: const char *);
95:
96: private size_t maxmagic = 0;
97: private size_t magicsize = sizeof(struct magic);
98:
99: #ifdef COMPILE_ONLY
100: const char *magicfile;
101: char *progname;
102: int lineno;
103:
104: int main(int, char *[]);
105:
106: int
107: main(int argc, char *argv[])
108: {
109: int ret;
110:
111: if ((progname = strrchr(argv[0], '/')) != NULL)
112: progname++;
113: else
114: progname = argv[0];
115:
116: if (argc != 2) {
117: (void)fprintf(stderr, "usage: %s file\n", progname);
118: exit(1);
119: }
120: magicfile = argv[1];
121:
122: exit(file_apprentice(magicfile, COMPILE, MAGIC_CHECK) == -1 ? 1 : 0);
123: }
124: #endif /* COMPILE_ONLY */
125:
126:
127: /*
128: * Handle one file.
129: */
130: private int
131: apprentice_1(struct magic_set *ms, const char *fn, int action,
132: struct mlist *mlist)
133: {
134: struct magic *magic = NULL;
135: uint32_t nmagic = 0;
136: struct mlist *ml;
137: int rv = -1;
138: int mapped;
139:
140: if (magicsize != FILE_MAGICSIZE) {
141: file_error(ms, 0, "magic element size %lu != %lu",
142: (unsigned long)sizeof(*magic),
143: (unsigned long)FILE_MAGICSIZE);
144: return -1;
145: }
1.1 deraadt 146:
1.19 tedu 147: if (action == FILE_COMPILE) {
148: rv = apprentice_file(ms, &magic, &nmagic, fn, action);
149: if (rv != 0)
150: return -1;
151: rv = apprentice_compile(ms, &magic, &nmagic, fn);
152: free(magic);
153: return rv;
154: }
155: #ifndef COMPILE_ONLY
156: if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
157: if (ms->flags & MAGIC_CHECK)
158: file_magwarn("using regular magic file `%s'", fn);
159: rv = apprentice_file(ms, &magic, &nmagic, fn, action);
160: if (rv != 0)
161: return -1;
162: mapped = 0;
163: }
1.1 deraadt 164:
1.19 tedu 165: if (rv == -1)
166: return rv;
167: mapped = rv;
168:
169: if (magic == NULL || nmagic == 0) {
170: file_delmagic(magic, mapped, nmagic);
171: return -1;
172: }
1.1 deraadt 173:
1.19 tedu 174: if ((ml = malloc(sizeof(*ml))) == NULL) {
175: file_delmagic(magic, mapped, nmagic);
176: file_oomem(ms);
177: return -1;
178: }
1.1 deraadt 179:
1.19 tedu 180: ml->magic = magic;
181: ml->nmagic = nmagic;
182: ml->mapped = mapped;
183:
184: mlist->prev->next = ml;
185: ml->prev = mlist->prev;
186: ml->next = mlist;
187: mlist->prev = ml;
1.1 deraadt 188:
1.19 tedu 189: return 0;
190: #endif /* COMPILE_ONLY */
191: }
192:
193: protected void
194: file_delmagic(struct magic *p, int type, size_t entries)
195: {
196: if (p == NULL)
197: return;
198: switch (type) {
199: case 2:
200: p--;
201: (void)munmap((void *)p, sizeof(*p) * (entries + 1));
202: break;
203: case 1:
204: p--;
205: case 0:
206: free(p);
207: break;
208: default:
209: abort();
210: }
211: }
212:
213:
214: /* const char *fn: list of magic files */
215: protected struct mlist *
216: file_apprentice(struct magic_set *ms, const char *fn, int action)
1.2 deraadt 217: {
1.19 tedu 218: char *p, *mfn, *afn = NULL;
1.2 deraadt 219: int file_err, errs = -1;
1.19 tedu 220: struct mlist *mlist;
1.2 deraadt 221:
1.19 tedu 222: if (fn == NULL)
223: fn = getenv("MAGIC");
224: if (fn == NULL)
225: fn = MAGIC;
226:
227: if ((fn = mfn = strdup(fn)) == NULL) {
228: file_oomem(ms);
229: return NULL;
230: }
231:
232: if ((mlist = malloc(sizeof(*mlist))) == NULL) {
233: free(mfn);
234: file_oomem(ms);
235: return NULL;
1.2 deraadt 236: }
1.19 tedu 237: mlist->next = mlist->prev = mlist;
1.17 deraadt 238:
1.2 deraadt 239: while (fn) {
1.19 tedu 240: p = strchr(fn, PATHSEP);
1.2 deraadt 241: if (p)
242: *p++ = '\0';
1.19 tedu 243: if (*fn == '\0')
244: break;
245: if (ms->flags & MAGIC_MIME) {
246: size_t len = strlen(fn) + 5 + 1;
247: if ((afn = malloc(len)) == NULL) {
248: free(mfn);
249: free(mlist);
250: file_oomem(ms);
251: return NULL;
252: }
253: (void)strlcpy(afn, fn, len);
254: (void)strlcat(afn, ".mime", len);
255: fn = afn;
256: }
257: file_err = apprentice_1(ms, fn, action, mlist);
1.2 deraadt 258: if (file_err > errs)
259: errs = file_err;
1.19 tedu 260: if (afn) {
261: free(afn);
262: afn = NULL;
263: }
1.2 deraadt 264: fn = p;
265: }
1.19 tedu 266: if (errs == -1) {
267: free(mfn);
268: free(mlist);
269: mlist = NULL;
270: file_error(ms, 0, "could not find any magic files!");
271: return NULL;
272: }
1.2 deraadt 273: free(mfn);
1.19 tedu 274: return mlist;
1.2 deraadt 275: }
276:
1.19 tedu 277: /*
278: * parse from a file
279: * const char *fn: name of magic file
280: */
281: private int
282: apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
283: const char *fn, int action)
1.1 deraadt 284: {
1.19 tedu 285: private const char hdr[] =
1.2 deraadt 286: "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1 deraadt 287: FILE *f;
1.20 otto 288: char line[BUFSIZ];
1.19 tedu 289: int lineno;
1.1 deraadt 290: int errs = 0;
291:
292: f = fopen(fn, "r");
1.19 tedu 293: if (f == NULL) {
1.2 deraadt 294: if (errno != ENOENT)
1.19 tedu 295: file_error(ms, errno, "cannot read magic file `%s'",
296: fn);
1.2 deraadt 297: return -1;
1.1 deraadt 298: }
299:
1.19 tedu 300: maxmagic = MAXMAGIS;
301: *magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
302: if (*magicp == NULL) {
303: (void)fclose(f);
304: file_oomem(ms);
305: return -1;
306: }
307:
308: /* print silly verbose header for USG compat. */
309: if (action == FILE_CHECK)
310: (void)fprintf(stderr, "%s\n", hdr);
311:
1.1 deraadt 312: /* parse it */
1.20 otto 313: for (lineno = 1; fgets(line, sizeof(line), f) != NULL; lineno++) {
1.23 ! ray 314: char *p;
! 315:
1.1 deraadt 316: if (line[0]=='#') /* comment, do not parse */
317: continue;
318: if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
319: continue;
1.23 ! ray 320: /* delete newline */
! 321: if ((p = strchr(line, '\n')) != NULL)
! 322: *p = '\0';
1.19 tedu 323: if (parse(ms, magicp, nmagicp, line, action) != 0)
1.2 deraadt 324: errs = 1;
1.1 deraadt 325: }
326:
1.19 tedu 327: (void)fclose(f);
328: if (errs) {
329: free(*magicp);
330: *magicp = NULL;
331: *nmagicp = 0;
332: }
1.2 deraadt 333: return errs;
1.1 deraadt 334: }
335:
336: /*
337: * extend the sign bit if the comparison is to be signed
338: */
1.19 tedu 339: protected uint32_t
340: file_signextend(struct magic_set *ms, struct magic *m, uint32_t v)
1.1 deraadt 341: {
342: if (!(m->flag & UNSIGNED))
343: switch(m->type) {
344: /*
345: * Do not remove the casts below. They are
346: * vital. When later compared with the data,
347: * the sign extension must have happened.
348: */
1.19 tedu 349: case FILE_BYTE:
1.1 deraadt 350: v = (char) v;
351: break;
1.19 tedu 352: case FILE_SHORT:
353: case FILE_BESHORT:
354: case FILE_LESHORT:
1.1 deraadt 355: v = (short) v;
356: break;
1.19 tedu 357: case FILE_DATE:
358: case FILE_BEDATE:
359: case FILE_LEDATE:
360: case FILE_LDATE:
361: case FILE_BELDATE:
362: case FILE_LELDATE:
363: case FILE_LONG:
364: case FILE_BELONG:
365: case FILE_LELONG:
1.14 itojun 366: v = (int32_t) v;
1.1 deraadt 367: break;
1.19 tedu 368: case FILE_STRING:
369: case FILE_PSTRING:
370: break;
371: case FILE_REGEX:
1.1 deraadt 372: break;
373: default:
1.19 tedu 374: if (ms->flags & MAGIC_CHECK)
375: file_magwarn("cannot happen: m->type=%d\n",
376: m->type);
377: return ~0U;
1.1 deraadt 378: }
379: return v;
380: }
381:
382: /*
383: * parse one line from magic file, put into magic[index++] if valid
384: */
1.19 tedu 385: private int
386: parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l,
387: int action)
1.1 deraadt 388: {
389: struct magic *m;
1.19 tedu 390: char *t;
391: private const char *fops = FILE_OPS;
392: uint32_t val;
393:
394: #define ALLOC_INCR 200
395: if (*nmagicp + 1 >= maxmagic){
396: maxmagic += ALLOC_INCR;
397: if ((m = (struct magic *) realloc(*magicp,
398: sizeof(struct magic) * maxmagic)) == NULL) {
399: file_oomem(ms);
400: if (*magicp)
401: free(*magicp);
1.1 deraadt 402: return -1;
1.19 tedu 403: }
404: *magicp = m;
405: memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
406: * ALLOC_INCR);
1.1 deraadt 407: }
1.19 tedu 408: m = &(*magicp)[*nmagicp];
1.1 deraadt 409: m->flag = 0;
410: m->cont_level = 0;
411:
412: while (*l == '>') {
413: ++l; /* step over */
414: m->cont_level++;
415: }
416:
417: if (m->cont_level != 0 && *l == '(') {
418: ++l; /* step over */
419: m->flag |= INDIR;
420: }
1.4 millert 421: if (m->cont_level != 0 && *l == '&') {
422: ++l; /* step over */
1.19 tedu 423: m->flag |= OFFADD;
1.4 millert 424: }
1.1 deraadt 425:
426: /* get offset, then skip over it */
1.19 tedu 427: m->offset = (uint32_t)strtoul(l, &t, 0);
1.1 deraadt 428: if (l == t)
1.19 tedu 429: if (ms->flags & MAGIC_CHECK)
430: file_magwarn("offset %s invalid", l);
1.1 deraadt 431: l = t;
432:
433: if (m->flag & INDIR) {
1.19 tedu 434: m->in_type = FILE_LONG;
435: m->in_offset = 0;
1.1 deraadt 436: /*
437: * read [.lbs][+-]nnnnn)
438: */
439: if (*l == '.') {
440: l++;
1.19 tedu 441: switch (*l) {
1.1 deraadt 442: case 'l':
1.19 tedu 443: m->in_type = FILE_LELONG;
444: break;
445: case 'L':
446: m->in_type = FILE_BELONG;
1.1 deraadt 447: break;
448: case 'h':
449: case 's':
1.19 tedu 450: m->in_type = FILE_LESHORT;
451: break;
452: case 'H':
453: case 'S':
454: m->in_type = FILE_BESHORT;
1.1 deraadt 455: break;
456: case 'c':
457: case 'b':
1.19 tedu 458: case 'C':
459: case 'B':
460: m->in_type = FILE_BYTE;
1.1 deraadt 461: break;
462: default:
1.19 tedu 463: if (ms->flags & MAGIC_CHECK)
464: file_magwarn(
465: "indirect offset type %c invalid",
466: *l);
1.1 deraadt 467: break;
468: }
469: l++;
470: }
1.19 tedu 471: if (*l == '~') {
472: m->in_op = FILE_OPINVERSE;
473: l++;
474: }
475: switch (*l) {
476: case '&':
477: m->in_op |= FILE_OPAND;
478: l++;
479: break;
480: case '|':
481: m->in_op |= FILE_OPOR;
482: l++;
483: break;
484: case '^':
485: m->in_op |= FILE_OPXOR;
486: l++;
487: break;
488: case '+':
489: m->in_op |= FILE_OPADD;
490: l++;
491: break;
492: case '-':
493: m->in_op |= FILE_OPMINUS;
494: l++;
495: break;
496: case '*':
497: m->in_op |= FILE_OPMULTIPLY;
498: l++;
499: break;
500: case '/':
501: m->in_op |= FILE_OPDIVIDE;
502: l++;
503: break;
504: case '%':
505: m->in_op |= FILE_OPMODULO;
506: l++;
507: break;
1.1 deraadt 508: }
1.19 tedu 509: if (isdigit((unsigned char)*l))
510: m->in_offset = (uint32_t)strtoul(l, &t, 0);
1.1 deraadt 511: else
512: t = l;
513: if (*t++ != ')')
1.19 tedu 514: if (ms->flags & MAGIC_CHECK)
515: file_magwarn("missing ')' in indirect offset");
1.1 deraadt 516: l = t;
517: }
518:
519:
520: while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
521: ++l;
522: EATAB;
523:
524: #define NBYTE 4
525: #define NSHORT 5
526: #define NLONG 4
527: #define NSTRING 6
528: #define NDATE 4
529: #define NBESHORT 7
530: #define NBELONG 6
531: #define NBEDATE 6
532: #define NLESHORT 7
533: #define NLELONG 6
534: #define NLEDATE 6
1.19 tedu 535: #define NPSTRING 7
536: #define NLDATE 5
537: #define NBELDATE 7
538: #define NLELDATE 7
539: #define NREGEX 5
1.1 deraadt 540:
541: if (*l == 'u') {
542: ++l;
543: m->flag |= UNSIGNED;
544: }
545:
546: /* get type, skip it */
1.19 tedu 547: if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
548: m->type = FILE_BYTE;
549: l += NBYTE;
550: } else if (strncmp(l, "byte", NBYTE)==0) {
551: m->type = FILE_BYTE;
1.1 deraadt 552: l += NBYTE;
553: } else if (strncmp(l, "short", NSHORT)==0) {
1.19 tedu 554: m->type = FILE_SHORT;
1.1 deraadt 555: l += NSHORT;
556: } else if (strncmp(l, "long", NLONG)==0) {
1.19 tedu 557: m->type = FILE_LONG;
1.1 deraadt 558: l += NLONG;
559: } else if (strncmp(l, "string", NSTRING)==0) {
1.19 tedu 560: m->type = FILE_STRING;
1.1 deraadt 561: l += NSTRING;
562: } else if (strncmp(l, "date", NDATE)==0) {
1.19 tedu 563: m->type = FILE_DATE;
1.1 deraadt 564: l += NDATE;
565: } else if (strncmp(l, "beshort", NBESHORT)==0) {
1.19 tedu 566: m->type = FILE_BESHORT;
1.1 deraadt 567: l += NBESHORT;
568: } else if (strncmp(l, "belong", NBELONG)==0) {
1.19 tedu 569: m->type = FILE_BELONG;
1.1 deraadt 570: l += NBELONG;
571: } else if (strncmp(l, "bedate", NBEDATE)==0) {
1.19 tedu 572: m->type = FILE_BEDATE;
1.1 deraadt 573: l += NBEDATE;
574: } else if (strncmp(l, "leshort", NLESHORT)==0) {
1.19 tedu 575: m->type = FILE_LESHORT;
1.1 deraadt 576: l += NLESHORT;
577: } else if (strncmp(l, "lelong", NLELONG)==0) {
1.19 tedu 578: m->type = FILE_LELONG;
1.1 deraadt 579: l += NLELONG;
580: } else if (strncmp(l, "ledate", NLEDATE)==0) {
1.19 tedu 581: m->type = FILE_LEDATE;
1.1 deraadt 582: l += NLEDATE;
1.19 tedu 583: } else if (strncmp(l, "pstring", NPSTRING)==0) {
584: m->type = FILE_PSTRING;
585: l += NPSTRING;
586: } else if (strncmp(l, "ldate", NLDATE)==0) {
587: m->type = FILE_LDATE;
588: l += NLDATE;
589: } else if (strncmp(l, "beldate", NBELDATE)==0) {
590: m->type = FILE_BELDATE;
591: l += NBELDATE;
592: } else if (strncmp(l, "leldate", NLELDATE)==0) {
593: m->type = FILE_LELDATE;
594: l += NLELDATE;
595: } else if (strncmp(l, "regex", NREGEX)==0) {
596: m->type = FILE_REGEX;
597: l += sizeof("regex");
1.1 deraadt 598: } else {
1.19 tedu 599: if (ms->flags & MAGIC_CHECK)
600: file_magwarn("type %s invalid", l);
1.1 deraadt 601: return -1;
602: }
603: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1.19 tedu 604: /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
605: if (*l == '~') {
606: if (FILE_STRING != m->type && FILE_PSTRING != m->type)
607: m->mask_op = FILE_OPINVERSE;
1.1 deraadt 608: ++l;
1.19 tedu 609: }
610: if ((t = strchr(fops, *l)) != NULL) {
611: uint32_t op = (uint32_t)(t - fops);
612: if (op != FILE_OPDIVIDE ||
613: (FILE_STRING != m->type && FILE_PSTRING != m->type)) {
614: ++l;
615: m->mask_op |= op;
616: val = (uint32_t)strtoul(l, &l, 0);
617: m->mask = file_signextend(ms, m, val);
618: eatsize(&l);
619: } else {
620: m->mask = 0L;
621: while (!isspace((unsigned char)*++l)) {
622: switch (*l) {
623: case CHAR_IGNORE_LOWERCASE:
624: m->mask |= STRING_IGNORE_LOWERCASE;
625: break;
626: case CHAR_COMPACT_BLANK:
627: m->mask |= STRING_COMPACT_BLANK;
628: break;
629: case CHAR_COMPACT_OPTIONAL_BLANK:
630: m->mask |=
631: STRING_COMPACT_OPTIONAL_BLANK;
632: break;
633: default:
634: if (ms->flags & MAGIC_CHECK)
635: file_magwarn(
636: "string extension %c invalid",
637: *l);
638: return -1;
639: }
640: }
641: }
642: }
643: /*
644: * We used to set mask to all 1's here, instead let's just not do
645: * anything if mask = 0 (unless you have a better idea)
646: */
1.1 deraadt 647: EATAB;
648:
649: switch (*l) {
650: case '>':
651: case '<':
652: /* Old-style anding: "0 byte &0x80 dynamically linked" */
653: case '&':
654: case '^':
655: case '=':
656: m->reln = *l;
657: ++l;
1.19 tedu 658: if (*l == '=') {
659: /* HP compat: ignore &= etc. */
660: ++l;
661: }
1.1 deraadt 662: break;
663: case '!':
1.19 tedu 664: if (m->type != FILE_STRING && m->type != FILE_PSTRING) {
1.1 deraadt 665: m->reln = *l;
666: ++l;
667: break;
668: }
1.19 tedu 669: /*FALLTHROUGH*/
1.1 deraadt 670: default:
671: if (*l == 'x' && isascii((unsigned char)l[1]) &&
672: isspace((unsigned char)l[1])) {
673: m->reln = *l;
674: ++l;
675: goto GetDesc; /* Bill The Cat */
676: }
677: m->reln = '=';
678: break;
679: }
680: EATAB;
681:
1.19 tedu 682: if (getvalue(ms, m, &l))
1.1 deraadt 683: return -1;
684: /*
685: * TODO finish this macro and start using it!
686: * #define offsetcheck {if (offset > HOWMANY-1)
1.19 tedu 687: * magwarn("offset too big"); }
1.1 deraadt 688: */
689:
690: /*
691: * now get last part - the description
692: */
693: GetDesc:
694: EATAB;
695: if (l[0] == '\b') {
696: ++l;
697: m->nospflag = 1;
698: } else if ((l[0] == '\\') && (l[1] == 'b')) {
699: ++l;
700: ++l;
701: m->nospflag = 1;
702: } else
703: m->nospflag = 0;
1.21 pedro 704:
705: strlcpy(m->desc, l, sizeof(m->desc));
1.1 deraadt 706:
1.19 tedu 707: #ifndef COMPILE_ONLY
708: if (action == FILE_CHECK) {
709: file_mdump(m);
1.1 deraadt 710: }
1.19 tedu 711: #endif
712: ++(*nmagicp); /* make room for next */
1.1 deraadt 713: return 0;
714: }
715:
716: /*
717: * Read a numeric value from a pointer, into the value union of a magic
718: * pointer, according to the magic type. Update the string pointer to point
719: * just after the number read. Return 0 for success, non-zero for failure.
720: */
1.19 tedu 721: private int
722: getvalue(struct magic_set *ms, struct magic *m, char **p)
1.1 deraadt 723: {
724: int slen;
725:
1.19 tedu 726: switch (m->type) {
727: case FILE_STRING:
728: case FILE_PSTRING:
729: case FILE_REGEX:
730: *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen);
731: if (*p == NULL) {
732: if (ms->flags & MAGIC_CHECK)
733: file_magwarn("cannot get string from `%s'",
734: m->value.s);
735: return -1;
736: }
1.1 deraadt 737: m->vallen = slen;
1.19 tedu 738: return 0;
739: default:
1.1 deraadt 740: if (m->reln != 'x') {
1.19 tedu 741: m->value.l = file_signextend(ms, m,
742: (uint32_t)strtoul(*p, p, 0));
1.1 deraadt 743: eatsize(p);
744: }
1.19 tedu 745: return 0;
746: }
1.1 deraadt 747: }
748:
749: /*
750: * Convert a string containing C character escapes. Stop at an unescaped
751: * space or tab.
752: * Copy the converted version to "p", returning its length in *slen.
753: * Return updated scan pointer as function result.
754: */
1.19 tedu 755: private char *
756: getstr(struct magic_set *ms, char *s, char *p, int plen, int *slen)
1.1 deraadt 757: {
758: char *origs = s, *origp = p;
759: char *pmax = p + plen - 1;
1.11 mpech 760: int c;
761: int val;
1.1 deraadt 762:
763: while ((c = *s++) != '\0') {
764: if (isspace((unsigned char) c))
765: break;
766: if (p >= pmax) {
1.19 tedu 767: file_error(ms, 0, "string too long: `%s'", origs);
768: return NULL;
1.1 deraadt 769: }
770: if(c == '\\') {
771: switch(c = *s++) {
772:
773: case '\0':
774: goto out;
775:
776: default:
777: *p++ = (char) c;
778: break;
779:
780: case 'n':
781: *p++ = '\n';
782: break;
783:
784: case 'r':
785: *p++ = '\r';
786: break;
787:
788: case 'b':
789: *p++ = '\b';
790: break;
791:
792: case 't':
793: *p++ = '\t';
794: break;
795:
796: case 'f':
797: *p++ = '\f';
798: break;
799:
800: case 'v':
801: *p++ = '\v';
802: break;
803:
804: /* \ and up to 3 octal digits */
805: case '0':
806: case '1':
807: case '2':
808: case '3':
809: case '4':
810: case '5':
811: case '6':
812: case '7':
813: val = c - '0';
814: c = *s++; /* try for 2 */
815: if(c >= '0' && c <= '7') {
816: val = (val<<3) | (c - '0');
817: c = *s++; /* try for 3 */
818: if(c >= '0' && c <= '7')
819: val = (val<<3) | (c-'0');
820: else
821: --s;
822: }
823: else
824: --s;
825: *p++ = (char)val;
826: break;
827:
1.4 millert 828: /* \x and up to 2 hex digits */
1.1 deraadt 829: case 'x':
830: val = 'x'; /* Default if no digits */
831: c = hextoint(*s++); /* Get next char */
832: if (c >= 0) {
833: val = c;
834: c = hextoint(*s++);
1.4 millert 835: if (c >= 0)
1.1 deraadt 836: val = (val << 4) + c;
1.4 millert 837: else
1.1 deraadt 838: --s;
839: } else
840: --s;
841: *p++ = (char)val;
842: break;
843: }
844: } else
845: *p++ = (char)c;
846: }
847: out:
848: *p = '\0';
849: *slen = p - origp;
850: return s;
851: }
852:
853:
854: /* Single hex char to int; -1 if not a hex char. */
1.19 tedu 855: private int
856: hextoint(int c)
857: {
858: if (!isascii((unsigned char) c))
859: return -1;
860: if (isdigit((unsigned char) c))
861: return c - '0';
862: if ((c >= 'a')&&(c <= 'f'))
863: return c + 10 - 'a';
864: if (( c>= 'A')&&(c <= 'F'))
865: return c + 10 - 'A';
866: return -1;
1.1 deraadt 867: }
868:
869:
870: /*
871: * Print a string containing C character escapes.
872: */
1.19 tedu 873: protected void
874: file_showstr(FILE *fp, const char *s, size_t len)
1.1 deraadt 875: {
1.11 mpech 876: char c;
1.1 deraadt 877:
878: for (;;) {
879: c = *s++;
1.19 tedu 880: if (len == ~0U) {
1.1 deraadt 881: if (c == '\0')
882: break;
883: }
884: else {
885: if (len-- == 0)
886: break;
887: }
888: if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
889: (void) fputc(c, fp);
890: else {
891: (void) fputc('\\', fp);
892: switch (c) {
893:
894: case '\n':
895: (void) fputc('n', fp);
896: break;
897:
898: case '\r':
899: (void) fputc('r', fp);
900: break;
901:
902: case '\b':
903: (void) fputc('b', fp);
904: break;
905:
906: case '\t':
907: (void) fputc('t', fp);
908: break;
909:
910: case '\f':
911: (void) fputc('f', fp);
912: break;
913:
914: case '\v':
915: (void) fputc('v', fp);
916: break;
917:
918: default:
919: (void) fprintf(fp, "%.3o", c & 0377);
920: break;
921: }
922: }
923: }
924: }
925:
926: /*
927: * eatsize(): Eat the size spec from a number [eg. 10UL]
928: */
1.19 tedu 929: private void
930: eatsize(char **p)
1.1 deraadt 931: {
932: char *l = *p;
933:
934: if (LOWCASE(*l) == 'u')
935: l++;
936:
937: switch (LOWCASE(*l)) {
938: case 'l': /* long */
939: case 's': /* short */
940: case 'h': /* short */
941: case 'b': /* char/byte */
942: case 'c': /* char/byte */
943: l++;
944: /*FALLTHROUGH*/
945: default:
946: break;
947: }
948:
949: *p = l;
1.19 tedu 950: }
951:
952: /*
953: * handle a compiled file.
954: */
955: private int
956: apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
957: const char *fn)
958: {
959: int fd;
960: struct stat st;
961: uint32_t *ptr;
962: uint32_t version;
963: int needsbyteswap;
964: char buf[MAXPATHLEN];
965: char *dbname = mkdbname(fn, buf, sizeof(buf));
966: void *mm = NULL;
967:
968: if (dbname == NULL)
969: return -1;
970:
971: if ((fd = open(dbname, O_RDONLY)) == -1)
972: return -1;
973:
974: if (fstat(fd, &st) == -1) {
975: file_error(ms, errno, "cannot stat `%s'", dbname);
976: goto error;
977: }
978: if (st.st_size < 16) {
979: file_error(ms, 0, "file `%s' is too small", dbname);
980: goto error;
981: }
982:
983: #ifdef QUICK
984: if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
985: MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
986: file_error(ms, errno, "cannot map `%s'", dbname);
987: goto error;
988: }
989: #define RET 2
990: #else
991: if ((mm = malloc((size_t)st.st_size)) == NULL) {
992: file_oomem(ms);
993: goto error;
994: }
995: if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
996: file_badread(ms);
997: goto error;
998: }
999: #define RET 1
1000: #endif
1001: *magicp = mm;
1002: (void)close(fd);
1003: fd = -1;
1004: ptr = (uint32_t *)(void *)*magicp;
1005: if (*ptr != MAGICNO) {
1006: if (swap4(*ptr) != MAGICNO) {
1007: file_error(ms, 0, "bad magic in `%s'");
1008: goto error;
1009: }
1010: needsbyteswap = 1;
1011: } else
1012: needsbyteswap = 0;
1013: if (needsbyteswap)
1014: version = swap4(ptr[1]);
1015: else
1016: version = ptr[1];
1017: if (version != VERSIONNO) {
1018: file_error(ms, 0, "version mismatch (%d != %d) in `%s'",
1019: version, VERSIONNO, dbname);
1020: goto error;
1021: }
1022: *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1;
1023: (*magicp)++;
1024: if (needsbyteswap)
1025: byteswap(*magicp, *nmagicp);
1026: return RET;
1027:
1028: error:
1029: if (fd != -1)
1030: (void)close(fd);
1031: if (mm) {
1032: #ifdef QUICK
1033: (void)munmap((void *)mm, (size_t)st.st_size);
1034: #else
1035: free(mm);
1036: #endif
1037: } else {
1038: *magicp = NULL;
1039: *nmagicp = 0;
1040: }
1041: return -1;
1042: }
1043:
1044: private const uint32_t ar[] = {
1045: MAGICNO, VERSIONNO
1046: };
1047: /*
1048: * handle an mmaped file.
1049: */
1050: private int
1051: apprentice_compile(struct magic_set *ms, struct magic **magicp,
1052: uint32_t *nmagicp, const char *fn)
1053: {
1054: int fd;
1055: char buf[MAXPATHLEN];
1056: char *dbname = mkdbname(fn, buf, sizeof(buf));
1057:
1058: if (dbname == NULL)
1059: return -1;
1060:
1061: if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
1062: file_error(ms, errno, "cannot open `%s'", dbname);
1063: return -1;
1064: }
1065:
1066: if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
1067: file_error(ms, errno, "error writing `%s'", dbname);
1068: return -1;
1069: }
1070:
1071: if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
1072: != sizeof(struct magic)) {
1073: file_error(ms, errno, "error seeking `%s'", dbname);
1074: return -1;
1075: }
1076:
1077: if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
1078: != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
1079: file_error(ms, errno, "error writing `%s'", dbname);
1080: return -1;
1081: }
1082:
1083: (void)close(fd);
1084: return 0;
1085: }
1086:
1087: private const char ext[] = ".mgc";
1088: /*
1089: * make a dbname
1090: */
1091: private char *
1092: mkdbname(const char *fn, char *buf, size_t bufsiz)
1093: {
1094: #ifdef notdef
1095: const char *p;
1096: if ((p = strrchr(fn, '/')) != NULL)
1097: fn = ++p;
1098: #endif
1099: (void)snprintf(buf, bufsiz, "%s%s", fn, ext);
1100: return buf;
1101: }
1102:
1103: /*
1104: * Byteswap an mmap'ed file if needed
1105: */
1106: private void
1107: byteswap(struct magic *magic, uint32_t nmagic)
1108: {
1109: uint32_t i;
1110: for (i = 0; i < nmagic; i++)
1111: bs1(&magic[i]);
1112: }
1113:
1114: /*
1115: * swap a short
1116: */
1117: private uint16_t
1118: swap2(uint16_t sv)
1119: {
1120: uint16_t rv;
1121: uint8_t *s = (uint8_t *)(void *)&sv;
1122: uint8_t *d = (uint8_t *)(void *)&rv;
1123: d[0] = s[1];
1124: d[1] = s[0];
1125: return rv;
1126: }
1127:
1128: /*
1129: * swap an int
1130: */
1131: private uint32_t
1132: swap4(uint32_t sv)
1133: {
1134: uint32_t rv;
1135: uint8_t *s = (uint8_t *)(void *)&sv;
1136: uint8_t *d = (uint8_t *)(void *)&rv;
1137: d[0] = s[3];
1138: d[1] = s[2];
1139: d[2] = s[1];
1140: d[3] = s[0];
1141: return rv;
1142: }
1143:
1144: /*
1145: * byteswap a single magic entry
1146: */
1147: private void
1148: bs1(struct magic *m)
1149: {
1150: m->cont_level = swap2(m->cont_level);
1151: m->offset = swap4((uint32_t)m->offset);
1152: m->in_offset = swap4((uint32_t)m->in_offset);
1153: if (m->type != FILE_STRING)
1154: m->value.l = swap4(m->value.l);
1155: m->mask = swap4(m->mask);
1.1 deraadt 1156: }