Annotation of src/usr.bin/file/magic-load.c, Revision 1.3
1.3 ! nicm 1: /* $OpenBSD: magic-load.c,v 1.2 2015/04/24 16:45:32 nicm Exp $ */
1.1 nicm 2:
3: /*
4: * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15: * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18:
19: #include <sys/types.h>
20:
21: #include <ctype.h>
22: #include <errno.h>
23: #include <limits.h>
24: #include <regex.h>
25: #include <stdarg.h>
26: #include <stdio.h>
27: #include <stdlib.h>
28: #include <string.h>
29:
30: #include "magic.h"
31: #include "xmalloc.h"
32:
33: static int
34: magic_odigit(u_char c)
35: {
36: if (c >= '0' && c <= '7')
37: return (c - '0');
38: return (-1);
39: }
40:
41: static int
42: magic_xdigit(u_char c)
43: {
44: if (c >= '0' && c <= '9')
45: return (c - '0');
46: if (c >= 'a' && c <= 'f')
47: return (10 + c - 'a');
48: if (c >= 'A' && c <= 'F')
49: return (10 + c - 'A');
50: return (-1);
51: }
52:
53: static void
54: magic_mark_text(struct magic_line *ml, int text)
55: {
56: do {
57: ml->text = text;
58: ml = ml->parent;
59: } while (ml != NULL);
60: }
61:
62: static int
63: magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
64: const char *p)
65: {
66: int error;
67: char errbuf[256];
68:
69: error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
70: if (error != 0) {
71: regerror(error, re, errbuf, sizeof errbuf);
72: magic_warn(ml, "bad %s pattern: %s", name, errbuf);
73: return (-1);
74: }
75: return (0);
76: }
77:
78: static int
79: magic_set_result(struct magic_line *ml, const char *s)
80: {
81: const char *fmt;
82: const char *endfmt;
83: const char *cp;
84: regex_t *re = NULL;
85: regmatch_t pmatch;
86: size_t fmtlen;
87:
88: while (isspace((u_char)*s))
89: s++;
90: if (*s == '\0') {
91: ml->result = NULL;
92: return (0);
93: }
94: ml->result = xstrdup(s);
95:
96: fmt = NULL;
97: for (cp = s; *cp != '\0'; cp++) {
98: if (cp[0] == '%' && cp[1] != '%') {
99: if (fmt != NULL) {
100: magic_warn(ml, "multiple formats");
101: return (-1);
102: }
103: fmt = cp;
104: }
105: }
106: if (fmt == NULL)
107: return (0);
108: fmt++;
109:
110: for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
111: if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
112: break;
113: }
114: if (*endfmt == '\0') {
115: magic_warn(ml, "unterminated format");
116: return (-1);
117: }
118: fmtlen = endfmt + 1 - fmt;
119: if (fmtlen > 32) {
120: magic_warn(ml, "format too long");
121: return (-1);
122: }
123:
124: if (*endfmt == 's') {
125: switch (ml->type) {
126: case MAGIC_TYPE_DATE:
127: case MAGIC_TYPE_LDATE:
128: case MAGIC_TYPE_UDATE:
129: case MAGIC_TYPE_ULDATE:
130: case MAGIC_TYPE_BEDATE:
131: case MAGIC_TYPE_BELDATE:
132: case MAGIC_TYPE_UBEDATE:
133: case MAGIC_TYPE_UBELDATE:
134: case MAGIC_TYPE_QDATE:
135: case MAGIC_TYPE_QLDATE:
136: case MAGIC_TYPE_UQDATE:
137: case MAGIC_TYPE_UQLDATE:
138: case MAGIC_TYPE_BEQDATE:
139: case MAGIC_TYPE_BEQLDATE:
140: case MAGIC_TYPE_UBEQDATE:
141: case MAGIC_TYPE_UBEQLDATE:
142: case MAGIC_TYPE_LEQDATE:
143: case MAGIC_TYPE_LEQLDATE:
144: case MAGIC_TYPE_ULEQDATE:
145: case MAGIC_TYPE_ULEQLDATE:
146: case MAGIC_TYPE_LEDATE:
147: case MAGIC_TYPE_LELDATE:
148: case MAGIC_TYPE_ULEDATE:
149: case MAGIC_TYPE_ULELDATE:
150: case MAGIC_TYPE_MEDATE:
151: case MAGIC_TYPE_MELDATE:
152: case MAGIC_TYPE_STRING:
153: case MAGIC_TYPE_PSTRING:
154: case MAGIC_TYPE_BESTRING16:
155: case MAGIC_TYPE_LESTRING16:
156: case MAGIC_TYPE_REGEX:
157: case MAGIC_TYPE_SEARCH:
158: break;
159: default:
160: ml->stringify = 1;
161: break;
162: }
163: }
164:
165: if (!ml->root->compiled) {
166: /*
167: * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
168: * with byte, short, long. We get lucky because our first and
169: * only argument ends up in a register. Accept it for now.
170: */
171: if (magic_make_pattern(ml, "short", &ml->root->format_short,
172: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
173: return (-1);
174: if (magic_make_pattern(ml, "long", &ml->root->format_long,
175: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
176: return (-1);
177: if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
178: "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
179: return (-1);
180: if (magic_make_pattern(ml, "float", &ml->root->format_float,
181: "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
182: return (-1);
183: if (magic_make_pattern(ml, "string", &ml->root->format_string,
184: "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
185: return (-1);
186: ml->root->compiled = 1;
187: }
188:
189: if (ml->stringify)
190: re = &ml->root->format_string;
191: else {
192: switch (ml->type) {
193: case MAGIC_TYPE_NONE:
194: case MAGIC_TYPE_DEFAULT:
195: return (0); /* don't use result */
196: case MAGIC_TYPE_BYTE:
197: case MAGIC_TYPE_UBYTE:
198: case MAGIC_TYPE_SHORT:
199: case MAGIC_TYPE_USHORT:
200: case MAGIC_TYPE_BESHORT:
201: case MAGIC_TYPE_UBESHORT:
202: case MAGIC_TYPE_LESHORT:
203: case MAGIC_TYPE_ULESHORT:
204: re = &ml->root->format_short;
205: break;
206: case MAGIC_TYPE_LONG:
207: case MAGIC_TYPE_ULONG:
208: case MAGIC_TYPE_BELONG:
209: case MAGIC_TYPE_UBELONG:
210: case MAGIC_TYPE_LELONG:
211: case MAGIC_TYPE_ULELONG:
212: case MAGIC_TYPE_MELONG:
213: re = &ml->root->format_long;
214: break;
215: case MAGIC_TYPE_QUAD:
216: case MAGIC_TYPE_UQUAD:
217: case MAGIC_TYPE_BEQUAD:
218: case MAGIC_TYPE_UBEQUAD:
219: case MAGIC_TYPE_LEQUAD:
220: case MAGIC_TYPE_ULEQUAD:
221: re = &ml->root->format_quad;
222: break;
223: case MAGIC_TYPE_FLOAT:
224: case MAGIC_TYPE_BEFLOAT:
225: case MAGIC_TYPE_LEFLOAT:
226: case MAGIC_TYPE_DOUBLE:
227: case MAGIC_TYPE_BEDOUBLE:
228: case MAGIC_TYPE_LEDOUBLE:
229: re = &ml->root->format_float;
230: break;
231: case MAGIC_TYPE_DATE:
232: case MAGIC_TYPE_LDATE:
233: case MAGIC_TYPE_UDATE:
234: case MAGIC_TYPE_ULDATE:
235: case MAGIC_TYPE_BEDATE:
236: case MAGIC_TYPE_BELDATE:
237: case MAGIC_TYPE_UBEDATE:
238: case MAGIC_TYPE_UBELDATE:
239: case MAGIC_TYPE_QDATE:
240: case MAGIC_TYPE_QLDATE:
241: case MAGIC_TYPE_UQDATE:
242: case MAGIC_TYPE_UQLDATE:
243: case MAGIC_TYPE_BEQDATE:
244: case MAGIC_TYPE_BEQLDATE:
245: case MAGIC_TYPE_UBEQDATE:
246: case MAGIC_TYPE_UBEQLDATE:
247: case MAGIC_TYPE_LEQDATE:
248: case MAGIC_TYPE_LEQLDATE:
249: case MAGIC_TYPE_ULEQDATE:
250: case MAGIC_TYPE_ULEQLDATE:
251: case MAGIC_TYPE_LEDATE:
252: case MAGIC_TYPE_LELDATE:
253: case MAGIC_TYPE_ULEDATE:
254: case MAGIC_TYPE_ULELDATE:
255: case MAGIC_TYPE_MEDATE:
256: case MAGIC_TYPE_MELDATE:
257: case MAGIC_TYPE_STRING:
258: case MAGIC_TYPE_PSTRING:
259: case MAGIC_TYPE_REGEX:
260: case MAGIC_TYPE_SEARCH:
261: re = &ml->root->format_string;
262: break;
263: case MAGIC_TYPE_BESTRING16:
264: case MAGIC_TYPE_LESTRING16:
265: magic_warn(ml, "unsupported type %s", ml->type_string);
266: return (-1);
267: }
268: }
269:
270: pmatch.rm_so = 0;
271: pmatch.rm_eo = fmtlen;
272: if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
273: magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
274: (int)fmtlen, fmt);
275: return (-1);
276: }
277:
278: return (0);
279: }
280:
281: static u_int
282: magic_get_strength(struct magic_line *ml)
283: {
284: int n;
285: size_t size;
286:
287: if (ml->test_not || ml->test_operator == 'x')
288: return (1);
289:
290: n = 20;
291: switch (ml->type) {
292: case MAGIC_TYPE_NONE:
293: case MAGIC_TYPE_DEFAULT:
294: return (0);
295: case MAGIC_TYPE_BYTE:
296: case MAGIC_TYPE_UBYTE:
297: n += 1 * MAGIC_STRENGTH_MULTIPLIER;
298: break;
299: case MAGIC_TYPE_SHORT:
300: case MAGIC_TYPE_USHORT:
301: case MAGIC_TYPE_BESHORT:
302: case MAGIC_TYPE_UBESHORT:
303: case MAGIC_TYPE_LESHORT:
304: case MAGIC_TYPE_ULESHORT:
305: n += 2 * MAGIC_STRENGTH_MULTIPLIER;
306: break;
307: case MAGIC_TYPE_LONG:
308: case MAGIC_TYPE_ULONG:
309: case MAGIC_TYPE_FLOAT:
310: case MAGIC_TYPE_DATE:
311: case MAGIC_TYPE_LDATE:
312: case MAGIC_TYPE_UDATE:
313: case MAGIC_TYPE_ULDATE:
314: case MAGIC_TYPE_BELONG:
315: case MAGIC_TYPE_UBELONG:
316: case MAGIC_TYPE_BEFLOAT:
317: case MAGIC_TYPE_BEDATE:
318: case MAGIC_TYPE_BELDATE:
319: case MAGIC_TYPE_UBEDATE:
320: case MAGIC_TYPE_UBELDATE:
321: n += 4 * MAGIC_STRENGTH_MULTIPLIER;
322: break;
323: case MAGIC_TYPE_QUAD:
324: case MAGIC_TYPE_UQUAD:
325: case MAGIC_TYPE_DOUBLE:
326: case MAGIC_TYPE_QDATE:
327: case MAGIC_TYPE_QLDATE:
328: case MAGIC_TYPE_UQDATE:
329: case MAGIC_TYPE_UQLDATE:
330: case MAGIC_TYPE_BEQUAD:
331: case MAGIC_TYPE_UBEQUAD:
332: case MAGIC_TYPE_BEDOUBLE:
333: case MAGIC_TYPE_BEQDATE:
334: case MAGIC_TYPE_BEQLDATE:
335: case MAGIC_TYPE_UBEQDATE:
336: case MAGIC_TYPE_UBEQLDATE:
337: case MAGIC_TYPE_LEQUAD:
338: case MAGIC_TYPE_ULEQUAD:
339: case MAGIC_TYPE_LEDOUBLE:
340: case MAGIC_TYPE_LEQDATE:
341: case MAGIC_TYPE_LEQLDATE:
342: case MAGIC_TYPE_ULEQDATE:
343: case MAGIC_TYPE_ULEQLDATE:
344: case MAGIC_TYPE_LELONG:
345: case MAGIC_TYPE_ULELONG:
346: case MAGIC_TYPE_LEFLOAT:
347: case MAGIC_TYPE_LEDATE:
348: case MAGIC_TYPE_LELDATE:
349: case MAGIC_TYPE_ULEDATE:
350: case MAGIC_TYPE_ULELDATE:
351: case MAGIC_TYPE_MELONG:
352: case MAGIC_TYPE_MEDATE:
353: case MAGIC_TYPE_MELDATE:
354: n += 8 * MAGIC_STRENGTH_MULTIPLIER;
355: break;
356: case MAGIC_TYPE_STRING:
357: case MAGIC_TYPE_PSTRING:
358: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
359: break;
360: case MAGIC_TYPE_BESTRING16:
361: case MAGIC_TYPE_LESTRING16:
362: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
363: break;
364: case MAGIC_TYPE_REGEX:
365: case MAGIC_TYPE_SEARCH:
366: size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
367: if (size < 1)
368: size = 1;
369: n += ml->test_string_size * size;
370: break;
371: }
372: switch (ml->test_operator) {
373: case '=':
374: n += MAGIC_STRENGTH_MULTIPLIER;
375: break;
376: case '<':
377: case '>':
378: case '[':
379: case ']':
380: n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
381: break;
382: case '^':
383: case '&':
384: n -= MAGIC_STRENGTH_MULTIPLIER;
385: break;
386: }
387: return (n <= 0 ? 1 : n);
388: }
389:
390: static int
391: magic_get_string(char **line, char *out, size_t *outlen)
392: {
393: char *start, *cp, c;
394: int d0, d1, d2;
395:
396: start = out;
397: for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
398: if (*cp != '\\') {
399: *out++ = *cp;
400: continue;
401: }
402:
403: switch (c = *++cp) {
1.3 ! nicm 404: case '\0': /* end of line */
! 405: return (-1);
1.1 nicm 406: case ' ':
407: *out++ = ' ';
408: break;
409: case '0':
410: case '1':
411: case '2':
412: case '3':
413: case '4':
414: case '5':
415: case '6':
416: case '7':
417: d0 = magic_odigit(cp[0]);
418: if (cp[0] != '\0')
419: d1 = magic_odigit(cp[1]);
420: else
421: d1 = -1;
422: if (cp[0] != '\0' && cp[1] != '\0')
423: d2 = magic_odigit(cp[2]);
424: else
425: d2 = -1;
426:
427: if (d0 != -1 && d1 != -1 && d2 != -1) {
428: *out = d2 | (d1 << 3) | (d0 << 6);
429: cp += 2;
430: } else if (d0 != -1 && d1 != -1) {
431: *out = d1 | (d0 << 3);
432: cp++;
433: } else if (d0 != -1)
434: *out = d0;
435: else
436: return (-1);
437: out++;
438: break;
439: case 'x':
440: d0 = magic_xdigit(cp[1]);
441: if (cp[1] != '\0')
442: d1 = magic_xdigit(cp[2]);
443: else
444: d1 = -1;
445:
446: if (d0 != -1 && d1 != -1) {
447: *out = d1 | (d0 << 4);
448: cp += 2;
449: } else if (d0 != -1) {
450: *out = d0;
451: cp++;
452: } else
453: return (-1);
454: out++;
455:
456: break;
457: case 'a':
458: *out++ = '\a';
459: break;
460: case 'b':
461: *out++ = '\b';
462: break;
463: case 't':
464: *out++ = '\t';
465: break;
466: case 'f':
467: *out++ = '\f';
468: break;
469: case 'n':
470: *out++ = '\n';
471: break;
472: case 'r':
473: *out++ = '\r';
474: break;
475: case '\\':
476: *out++ = '\\';
477: break;
478: case '\'':
479: *out++ = '\'';
480: break;
481: case '\"':
482: *out++ = '\"';
483: break;
484: default:
485: *out++ = c;
486: break;
487: }
488: }
489: *out = '\0';
490: *outlen = out - start;
491:
492: *line = cp;
493: return (0);
494: }
495:
496: static int
497: magic_parse_offset(struct magic_line *ml, char **line)
498: {
499: char *copy, *s, *cp, *endptr;
500:
501: while (isspace((u_char)**line))
502: (*line)++;
503: copy = s = cp = xmalloc(strlen(*line) + 1);
504: while (**line != '\0' && !isspace((u_char)**line))
505: *cp++ = *(*line)++;
506: *cp = '\0';
507:
508: ml->offset = 0;
509: ml->offset_relative = 0;
510:
511: ml->indirect_type = ' ';
512: ml->indirect_relative = 0;
513: ml->indirect_offset = 0;
514: ml->indirect_operator = ' ';
515: ml->indirect_operand = 0;
516:
517: if (*s == '&') {
518: ml->offset_relative = 1;
519: s++;
520: }
521:
522: if (*s != '(') {
523: endptr = magic_strtoll(s, &ml->offset);
524: if (endptr == NULL || *endptr != '\0') {
525: magic_warn(ml, "missing closing bracket");
526: goto fail;
527: }
528: if (ml->offset < 0 && !ml->offset_relative) {
529: magic_warn(ml, "negative absolute offset");
530: goto fail;
531: }
532: goto done;
533: }
534: s++;
535:
536: if (*s == '&') {
537: ml->indirect_relative = 1;
538: s++;
539: }
540:
541: endptr = magic_strtoll(s, &ml->indirect_offset);
542: if (endptr == NULL) {
543: magic_warn(ml, "can't parse offset");
544: goto fail;
545: }
546: s = endptr;
547: if (*s == ')')
548: goto done;
549:
550: if (*s == '.') {
551: s++;
552: if (strchr("bslBSL", *s) == NULL) {
553: magic_warn(ml, "unknown offset type");
554: goto fail;
555: }
556: ml->indirect_type = *s;
557: s++;
558: if (*s == ')')
559: goto done;
560: }
561:
562: if (strchr("+-*", *s) == NULL) {
563: magic_warn(ml, "unknown offset operator");
564: goto fail;
565: }
566: ml->indirect_operator = *s;
567: s++;
568: if (*s == ')')
569: goto done;
570:
571: if (*s == '(') {
572: s++;
573: endptr = magic_strtoll(s, &ml->indirect_operand);
574: if (endptr == NULL || *endptr != ')') {
575: magic_warn(ml, "missing closing bracket");
576: goto fail;
577: }
578: if (*++endptr != ')') {
579: magic_warn(ml, "missing closing bracket");
580: goto fail;
581: }
582: } else {
583: endptr = magic_strtoll(s, &ml->indirect_operand);
584: if (endptr == NULL || *endptr != ')') {
585: magic_warn(ml, "missing closing bracket");
586: goto fail;
587: }
588: }
589:
590: done:
591: free(copy);
592: return (0);
593:
594: fail:
595: free(copy);
596: return (-1);
597: }
598:
599: static int
600: magic_parse_type(struct magic_line *ml, char **line)
601: {
602: char *copy, *s, *cp, *endptr;
603:
604: while (isspace((u_char)**line))
605: (*line)++;
606: copy = s = cp = xmalloc(strlen(*line) + 1);
607: while (**line != '\0' && !isspace((u_char)**line))
608: *cp++ = *(*line)++;
609: *cp = '\0';
610:
611: ml->type = MAGIC_TYPE_NONE;
612: ml->type_string = xstrdup(s);
613:
614: ml->type_operator = ' ';
615: ml->type_operand = 0;
616:
617: if (strncmp(s, "string", (sizeof "string") - 1) == 0) {
618: ml->type = MAGIC_TYPE_STRING;
619: magic_mark_text(ml, 0);
620: goto done;
621: }
622: if (strncmp(s, "search", (sizeof "search") - 1) == 0) {
623: ml->type = MAGIC_TYPE_SEARCH;
624: goto done;
625: }
626: if (strncmp(s, "regex", (sizeof "regex") - 1) == 0) {
627: ml->type = MAGIC_TYPE_REGEX;
628: goto done;
629: }
630:
631: cp = &s[strcspn(s, "-&")];
632: if (*cp != '\0') {
633: ml->type_operator = *cp;
634: endptr = magic_strtoull(cp + 1, &ml->type_operand);
635: if (endptr == NULL || *endptr != '\0') {
636: magic_warn(ml, "can't parse operand");
637: goto fail;
638: }
639: *cp = '\0';
640: }
641:
642: if (strcmp(s, "byte") == 0)
643: ml->type = MAGIC_TYPE_BYTE;
644: else if (strcmp(s, "short") == 0)
645: ml->type = MAGIC_TYPE_SHORT;
646: else if (strcmp(s, "long") == 0)
647: ml->type = MAGIC_TYPE_LONG;
648: else if (strcmp(s, "quad") == 0)
649: ml->type = MAGIC_TYPE_QUAD;
650: else if (strcmp(s, "ubyte") == 0)
651: ml->type = MAGIC_TYPE_UBYTE;
652: else if (strcmp(s, "ushort") == 0)
653: ml->type = MAGIC_TYPE_USHORT;
654: else if (strcmp(s, "ulong") == 0)
655: ml->type = MAGIC_TYPE_ULONG;
656: else if (strcmp(s, "uquad") == 0)
657: ml->type = MAGIC_TYPE_UQUAD;
658: else if (strcmp(s, "float") == 0)
659: ml->type = MAGIC_TYPE_FLOAT;
660: else if (strcmp(s, "double") == 0)
661: ml->type = MAGIC_TYPE_DOUBLE;
662: else if (strcmp(s, "pstring") == 0)
663: ml->type = MAGIC_TYPE_PSTRING;
664: else if (strcmp(s, "date") == 0)
665: ml->type = MAGIC_TYPE_DATE;
666: else if (strcmp(s, "qdate") == 0)
667: ml->type = MAGIC_TYPE_QDATE;
668: else if (strcmp(s, "ldate") == 0)
669: ml->type = MAGIC_TYPE_LDATE;
670: else if (strcmp(s, "qldate") == 0)
671: ml->type = MAGIC_TYPE_QLDATE;
672: else if (strcmp(s, "udate") == 0)
673: ml->type = MAGIC_TYPE_UDATE;
674: else if (strcmp(s, "uqdate") == 0)
675: ml->type = MAGIC_TYPE_UQDATE;
676: else if (strcmp(s, "uldate") == 0)
677: ml->type = MAGIC_TYPE_ULDATE;
678: else if (strcmp(s, "uqldate") == 0)
679: ml->type = MAGIC_TYPE_UQLDATE;
680: else if (strcmp(s, "beshort") == 0)
681: ml->type = MAGIC_TYPE_BESHORT;
682: else if (strcmp(s, "belong") == 0)
683: ml->type = MAGIC_TYPE_BELONG;
684: else if (strcmp(s, "bequad") == 0)
685: ml->type = MAGIC_TYPE_BEQUAD;
686: else if (strcmp(s, "ubeshort") == 0)
687: ml->type = MAGIC_TYPE_UBESHORT;
688: else if (strcmp(s, "ubelong") == 0)
689: ml->type = MAGIC_TYPE_UBELONG;
690: else if (strcmp(s, "ubequad") == 0)
691: ml->type = MAGIC_TYPE_UBEQUAD;
692: else if (strcmp(s, "befloat") == 0)
693: ml->type = MAGIC_TYPE_BEFLOAT;
694: else if (strcmp(s, "bedouble") == 0)
695: ml->type = MAGIC_TYPE_BEDOUBLE;
696: else if (strcmp(s, "bedate") == 0)
697: ml->type = MAGIC_TYPE_BEDATE;
698: else if (strcmp(s, "beqdate") == 0)
699: ml->type = MAGIC_TYPE_BEQDATE;
700: else if (strcmp(s, "beldate") == 0)
701: ml->type = MAGIC_TYPE_BELDATE;
702: else if (strcmp(s, "beqldate") == 0)
703: ml->type = MAGIC_TYPE_BEQLDATE;
704: else if (strcmp(s, "ubedate") == 0)
705: ml->type = MAGIC_TYPE_UBEDATE;
706: else if (strcmp(s, "ubeqdate") == 0)
707: ml->type = MAGIC_TYPE_UBEQDATE;
708: else if (strcmp(s, "ubeldate") == 0)
709: ml->type = MAGIC_TYPE_UBELDATE;
710: else if (strcmp(s, "ubeqldate") == 0)
711: ml->type = MAGIC_TYPE_UBEQLDATE;
712: else if (strcmp(s, "bestring16") == 0)
713: ml->type = MAGIC_TYPE_BESTRING16;
714: else if (strcmp(s, "leshort") == 0)
715: ml->type = MAGIC_TYPE_LESHORT;
716: else if (strcmp(s, "lelong") == 0)
717: ml->type = MAGIC_TYPE_LELONG;
718: else if (strcmp(s, "lequad") == 0)
719: ml->type = MAGIC_TYPE_LEQUAD;
720: else if (strcmp(s, "uleshort") == 0)
721: ml->type = MAGIC_TYPE_ULESHORT;
722: else if (strcmp(s, "ulelong") == 0)
723: ml->type = MAGIC_TYPE_ULELONG;
724: else if (strcmp(s, "ulequad") == 0)
725: ml->type = MAGIC_TYPE_ULEQUAD;
726: else if (strcmp(s, "lefloat") == 0)
727: ml->type = MAGIC_TYPE_LEFLOAT;
728: else if (strcmp(s, "ledouble") == 0)
729: ml->type = MAGIC_TYPE_LEDOUBLE;
730: else if (strcmp(s, "ledate") == 0)
731: ml->type = MAGIC_TYPE_LEDATE;
732: else if (strcmp(s, "leqdate") == 0)
733: ml->type = MAGIC_TYPE_LEQDATE;
734: else if (strcmp(s, "leldate") == 0)
735: ml->type = MAGIC_TYPE_LELDATE;
736: else if (strcmp(s, "leqldate") == 0)
737: ml->type = MAGIC_TYPE_LEQLDATE;
738: else if (strcmp(s, "uledate") == 0)
739: ml->type = MAGIC_TYPE_ULEDATE;
740: else if (strcmp(s, "uleqdate") == 0)
741: ml->type = MAGIC_TYPE_ULEQDATE;
742: else if (strcmp(s, "uleldate") == 0)
743: ml->type = MAGIC_TYPE_ULELDATE;
744: else if (strcmp(s, "uleqldate") == 0)
745: ml->type = MAGIC_TYPE_ULEQLDATE;
746: else if (strcmp(s, "lestring16") == 0)
747: ml->type = MAGIC_TYPE_LESTRING16;
748: else if (strcmp(s, "melong") == 0)
749: ml->type = MAGIC_TYPE_MELONG;
750: else if (strcmp(s, "medate") == 0)
751: ml->type = MAGIC_TYPE_MEDATE;
752: else if (strcmp(s, "meldate") == 0)
753: ml->type = MAGIC_TYPE_MELDATE;
754: else if (strcmp(s, "default") == 0)
755: ml->type = MAGIC_TYPE_DEFAULT;
756: else {
757: magic_warn(ml, "unknown type");
758: goto fail;
759: }
760: magic_mark_text(ml, 0);
761:
762: done:
763: free(copy);
764: return (0);
765:
766: fail:
767: free(copy);
768: return (-1);
769: }
770:
771: static int
772: magic_parse_value(struct magic_line *ml, char **line)
773: {
774: char *copy, *s, *cp, *endptr;
775: size_t slen;
776:
777: while (isspace((u_char)**line))
778: (*line)++;
779:
780: ml->test_operator = '=';
781: ml->test_not = 0;
782: ml->test_string = NULL;
783: ml->test_string_size = 0;
784: ml->test_unsigned = 0;
785: ml->test_signed = 0;
786:
787: s = *line;
788: if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
789: (*line)++;
790: ml->test_operator = 'x';
791: return (0);
792: }
793:
794: if (**line == '!') {
795: ml->test_not = 1;
796: (*line)++;
797: }
798:
799: switch (ml->type) {
800: case MAGIC_TYPE_STRING:
801: case MAGIC_TYPE_PSTRING:
802: case MAGIC_TYPE_SEARCH:
803: if (**line == '>' || **line == '<' || **line == '=') {
804: ml->test_operator = **line;
805: (*line)++;
806: }
807: /* FALLTHROUGH */
808: case MAGIC_TYPE_REGEX:
809: copy = s = xmalloc(strlen(*line) + 1);
810: if (magic_get_string(line, s, &slen) != 0) {
811: magic_warn(ml, "can't parse string");
812: goto fail;
813: }
814: ml->test_string_size = slen;
815: ml->test_string = s;
816: return (0); /* do not free */
817: default:
818: break;
819: }
820:
821: copy = s = cp = xmalloc(strlen(*line) + 1);
822: if ((*line)[0] == '=' && (*line)[1] == ' ') {
823: /*
824: * Extra spaces such as "byte&7 = 0" are accepted, which is
825: * annoying. But it seems to be only for =, so special case it.
826: */
827: *cp++ = '=';
828: (*line) += 2;
829: }
830: while (**line != '\0' && !isspace((u_char)**line))
831: *cp++ = *(*line)++;
832: *cp = '\0';
833:
834: if (*s == '\0')
835: goto done;
836:
837: if (s[0] == '<' && s[1] == '=') {
838: ml->test_operator = '[';
839: s += 2;
840: } else if (s[0] == '>' && s[1] == '=') {
841: ml->test_operator = ']';
842: s += 2;
843: } else if (strchr("=<>&^", *s) != NULL) {
844: ml->test_operator = *s;
845: s++;
846: }
847:
848: if (*ml->type_string == 'u')
849: endptr = magic_strtoull(s, &ml->test_unsigned);
850: else
851: endptr = magic_strtoll(s, &ml->test_signed);
852: if (endptr == NULL || *endptr != '\0') {
853: magic_warn(ml, "can't parse number");
854: goto fail;
855: }
856:
857: done:
858: free(copy);
859: return (0);
860:
861: fail:
862: free(copy);
863: return (-1);
864: }
865:
866: static void
867: magic_free_line(struct magic_line *ml)
868: {
1.2 nicm 869: free((void *)ml->type_string);
1.1 nicm 870:
1.2 nicm 871: free((void *)ml->mimetype);
872: free((void *)ml->result);
1.1 nicm 873:
874: free(ml);
875: }
876:
877: int
878: magic_compare(struct magic_line *ml1, struct magic_line *ml2)
879: {
880: if (ml1->strength < ml2->strength)
881: return (1);
882: if (ml1->strength > ml2->strength)
883: return (-1);
884:
885: /*
886: * The original file depends on the (undefined!) qsort(3) behaviour
887: * when the strength is equal. This is impossible to reproduce with an
888: * RB tree so just use the line number and hope for the best.
889: */
890: if (ml1->line < ml2->line)
891: return (-1);
892: if (ml1->line > ml2->line)
893: return (1);
894:
895: return (0);
896: }
897: RB_GENERATE(magic_tree, magic_line, node, magic_compare);
898:
899: static void
900: magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
901: {
902: char *mimetype, *cp;
903:
904: mimetype = line + (sizeof "!:mime") - 1;
905: while (isspace((u_char)*mimetype))
906: mimetype++;
907:
908: cp = strchr(mimetype, '#');
909: if (cp != NULL)
910: *cp = '\0';
911:
912: if (*mimetype != '\0') {
913: cp = mimetype + strlen(mimetype) - 1;
914: while (cp != mimetype && isspace((u_char)*cp))
915: *cp-- = '\0';
916: }
917:
918: cp = mimetype;
919: while (*cp != '\0') {
920: if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
921: break;
922: cp++;
923: }
924: if (*mimetype == '\0' || *cp != '\0') {
925: fprintf(stderr, "%s:%u: invalid MIME type: %s\n", m->path, at,
926: mimetype);
927: return;
928: }
929: if (ml == NULL) {
930: fprintf(stderr, "%s:%u: stray MIME type: %s\n", m->path, at,
931: mimetype);
932: return;
933: }
934: ml->mimetype = xstrdup(mimetype);
935: }
936:
937: struct magic *
938: magic_load(FILE *f, const char *path, int warnings)
939: {
940: struct magic *m;
941: struct magic_line *ml = NULL, *parent, *parent0;
942: char *line, *tmp;
943: size_t size;
944: u_int at, level, n, i;
945:
946: m = xcalloc(1, sizeof *m);
947: m->path = xstrdup(path);
948: m->warnings = warnings;
949: RB_INIT(&m->tree);
950:
951: parent = NULL;
952: parent0 = NULL;
953: level = 0;
954:
955: at = 0;
956: tmp = NULL;
957: while ((line = fgetln(f, &size))) {
958: if (line[size - 1] == '\n')
959: line[size - 1] = '\0';
960: else {
961: tmp = xmalloc(size + 1);
962: memcpy(tmp, line, size);
963: tmp[size] = '\0';
964: line = tmp;
965: }
966: at++;
967:
968: while (isspace((u_char)*line))
969: line++;
970: if (*line == '\0' || *line == '#')
971: continue;
972:
973: if (strncmp (line, "!:mime", (sizeof "!:mime") - 1) == 0) {
974: magic_set_mimetype(m, at, ml, line);
975: continue;
976: }
977:
978: n = 0;
979: for (; *line == '>'; line++)
980: n++;
981:
982: ml = xcalloc(1, sizeof *ml);
983: ml->root = m;
984: ml->line = at;
985: ml->type = MAGIC_TYPE_NONE;
986: TAILQ_INIT(&ml->children);
987: ml->text = 1;
988:
989: if (n == level + 1) {
990: parent = parent0;
991: } else if (n < level) {
992: for (i = n; i < level && parent != NULL; i++)
993: parent = parent->parent;
994: } else if (n != level) {
995: magic_warn(ml, "level skipped (%u->%u)", level, n);
996: free(ml);
997: continue;
998: }
999: ml->parent = parent;
1000: level = n;
1001:
1002: if (magic_parse_offset(ml, &line) != 0 ||
1003: magic_parse_type(ml, &line) != 0 ||
1004: magic_parse_value(ml, &line) != 0 ||
1005: magic_set_result(ml, line) != 0) {
1006: magic_free_line(ml);
1007: ml = NULL;
1008: continue;
1009: }
1010:
1011: ml->strength = magic_get_strength(ml);
1012: if (ml->parent == NULL)
1013: RB_INSERT(magic_tree, &m->tree, ml);
1014: else
1015: TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1016: parent0 = ml;
1017: }
1018: free(tmp);
1019:
1020: fclose(f);
1021: return (m);
1022: }