Annotation of src/usr.bin/file/magic-load.c, Revision 1.26
1.26 ! brynet 1: /* $OpenBSD: magic-load.c,v 1.25 2017/07/01 14:34:29 brynet Exp $ */
1.1 nicm 2:
3: /*
4: * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15: * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18:
19: #include <sys/types.h>
20:
21: #include <ctype.h>
1.26 ! brynet 22: #include <err.h>
1.1 nicm 23: #include <errno.h>
24: #include <limits.h>
25: #include <regex.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30:
31: #include "magic.h"
32: #include "xmalloc.h"
33:
34: static int
35: magic_odigit(u_char c)
36: {
37: if (c >= '0' && c <= '7')
38: return (c - '0');
39: return (-1);
40: }
41:
42: static int
43: magic_xdigit(u_char c)
44: {
45: if (c >= '0' && c <= '9')
46: return (c - '0');
47: if (c >= 'a' && c <= 'f')
48: return (10 + c - 'a');
49: if (c >= 'A' && c <= 'F')
50: return (10 + c - 'A');
51: return (-1);
52: }
53:
54: static void
55: magic_mark_text(struct magic_line *ml, int text)
56: {
57: do {
58: ml->text = text;
59: ml = ml->parent;
60: } while (ml != NULL);
61: }
62:
63: static int
64: magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
65: const char *p)
66: {
67: int error;
68: char errbuf[256];
69:
70: error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
71: if (error != 0) {
72: regerror(error, re, errbuf, sizeof errbuf);
73: magic_warn(ml, "bad %s pattern: %s", name, errbuf);
74: return (-1);
75: }
76: return (0);
77: }
78:
79: static int
80: magic_set_result(struct magic_line *ml, const char *s)
81: {
1.24 nicm 82: const char *fmt, *endfmt, *cp;
1.1 nicm 83: regex_t *re = NULL;
84: regmatch_t pmatch;
85: size_t fmtlen;
86:
87: while (isspace((u_char)*s))
88: s++;
89: if (*s == '\0') {
90: ml->result = NULL;
91: return (0);
92: }
93: ml->result = xstrdup(s);
94:
95: fmt = NULL;
96: for (cp = s; *cp != '\0'; cp++) {
97: if (cp[0] == '%' && cp[1] != '%') {
98: if (fmt != NULL) {
99: magic_warn(ml, "multiple formats");
100: return (-1);
101: }
102: fmt = cp;
103: }
104: }
105: if (fmt == NULL)
106: return (0);
107: fmt++;
108:
109: for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
110: if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
111: break;
112: }
113: if (*endfmt == '\0') {
114: magic_warn(ml, "unterminated format");
115: return (-1);
116: }
117: fmtlen = endfmt + 1 - fmt;
118: if (fmtlen > 32) {
119: magic_warn(ml, "format too long");
120: return (-1);
121: }
122:
123: if (*endfmt == 's') {
124: switch (ml->type) {
125: case MAGIC_TYPE_DATE:
126: case MAGIC_TYPE_LDATE:
127: case MAGIC_TYPE_UDATE:
128: case MAGIC_TYPE_ULDATE:
129: case MAGIC_TYPE_BEDATE:
130: case MAGIC_TYPE_BELDATE:
131: case MAGIC_TYPE_UBEDATE:
132: case MAGIC_TYPE_UBELDATE:
133: case MAGIC_TYPE_QDATE:
134: case MAGIC_TYPE_QLDATE:
135: case MAGIC_TYPE_UQDATE:
136: case MAGIC_TYPE_UQLDATE:
137: case MAGIC_TYPE_BEQDATE:
138: case MAGIC_TYPE_BEQLDATE:
139: case MAGIC_TYPE_UBEQDATE:
140: case MAGIC_TYPE_UBEQLDATE:
141: case MAGIC_TYPE_LEQDATE:
142: case MAGIC_TYPE_LEQLDATE:
143: case MAGIC_TYPE_ULEQDATE:
144: case MAGIC_TYPE_ULEQLDATE:
145: case MAGIC_TYPE_LEDATE:
146: case MAGIC_TYPE_LELDATE:
147: case MAGIC_TYPE_ULEDATE:
148: case MAGIC_TYPE_ULELDATE:
149: case MAGIC_TYPE_MEDATE:
150: case MAGIC_TYPE_MELDATE:
151: case MAGIC_TYPE_STRING:
152: case MAGIC_TYPE_PSTRING:
153: case MAGIC_TYPE_BESTRING16:
154: case MAGIC_TYPE_LESTRING16:
155: case MAGIC_TYPE_REGEX:
156: case MAGIC_TYPE_SEARCH:
157: break;
158: default:
159: ml->stringify = 1;
160: break;
161: }
162: }
163:
164: if (!ml->root->compiled) {
165: /*
166: * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
167: * with byte, short, long. We get lucky because our first and
168: * only argument ends up in a register. Accept it for now.
169: */
170: if (magic_make_pattern(ml, "short", &ml->root->format_short,
171: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
172: return (-1);
173: if (magic_make_pattern(ml, "long", &ml->root->format_long,
174: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
175: return (-1);
176: if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
177: "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
178: return (-1);
179: if (magic_make_pattern(ml, "float", &ml->root->format_float,
180: "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
181: return (-1);
182: if (magic_make_pattern(ml, "string", &ml->root->format_string,
183: "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
184: return (-1);
185: ml->root->compiled = 1;
186: }
187:
188: if (ml->stringify)
189: re = &ml->root->format_string;
190: else {
191: switch (ml->type) {
192: case MAGIC_TYPE_NONE:
1.17 nicm 193: case MAGIC_TYPE_BESTRING16:
194: case MAGIC_TYPE_LESTRING16:
1.22 nicm 195: case MAGIC_TYPE_NAME:
196: case MAGIC_TYPE_USE:
1.1 nicm 197: return (0); /* don't use result */
198: case MAGIC_TYPE_BYTE:
199: case MAGIC_TYPE_UBYTE:
200: case MAGIC_TYPE_SHORT:
201: case MAGIC_TYPE_USHORT:
202: case MAGIC_TYPE_BESHORT:
203: case MAGIC_TYPE_UBESHORT:
204: case MAGIC_TYPE_LESHORT:
205: case MAGIC_TYPE_ULESHORT:
206: re = &ml->root->format_short;
207: break;
208: case MAGIC_TYPE_LONG:
209: case MAGIC_TYPE_ULONG:
210: case MAGIC_TYPE_BELONG:
211: case MAGIC_TYPE_UBELONG:
212: case MAGIC_TYPE_LELONG:
213: case MAGIC_TYPE_ULELONG:
214: case MAGIC_TYPE_MELONG:
215: re = &ml->root->format_long;
216: break;
217: case MAGIC_TYPE_QUAD:
218: case MAGIC_TYPE_UQUAD:
219: case MAGIC_TYPE_BEQUAD:
220: case MAGIC_TYPE_UBEQUAD:
221: case MAGIC_TYPE_LEQUAD:
222: case MAGIC_TYPE_ULEQUAD:
223: re = &ml->root->format_quad;
224: break;
225: case MAGIC_TYPE_FLOAT:
226: case MAGIC_TYPE_BEFLOAT:
227: case MAGIC_TYPE_LEFLOAT:
228: case MAGIC_TYPE_DOUBLE:
229: case MAGIC_TYPE_BEDOUBLE:
230: case MAGIC_TYPE_LEDOUBLE:
231: re = &ml->root->format_float;
232: break;
233: case MAGIC_TYPE_DATE:
234: case MAGIC_TYPE_LDATE:
235: case MAGIC_TYPE_UDATE:
236: case MAGIC_TYPE_ULDATE:
237: case MAGIC_TYPE_BEDATE:
238: case MAGIC_TYPE_BELDATE:
239: case MAGIC_TYPE_UBEDATE:
240: case MAGIC_TYPE_UBELDATE:
241: case MAGIC_TYPE_QDATE:
242: case MAGIC_TYPE_QLDATE:
243: case MAGIC_TYPE_UQDATE:
244: case MAGIC_TYPE_UQLDATE:
245: case MAGIC_TYPE_BEQDATE:
246: case MAGIC_TYPE_BEQLDATE:
247: case MAGIC_TYPE_UBEQDATE:
248: case MAGIC_TYPE_UBEQLDATE:
249: case MAGIC_TYPE_LEQDATE:
250: case MAGIC_TYPE_LEQLDATE:
251: case MAGIC_TYPE_ULEQDATE:
252: case MAGIC_TYPE_ULEQLDATE:
253: case MAGIC_TYPE_LEDATE:
254: case MAGIC_TYPE_LELDATE:
255: case MAGIC_TYPE_ULEDATE:
256: case MAGIC_TYPE_ULELDATE:
257: case MAGIC_TYPE_MEDATE:
258: case MAGIC_TYPE_MELDATE:
259: case MAGIC_TYPE_STRING:
260: case MAGIC_TYPE_PSTRING:
261: case MAGIC_TYPE_REGEX:
262: case MAGIC_TYPE_SEARCH:
1.20 nicm 263: case MAGIC_TYPE_DEFAULT:
264: case MAGIC_TYPE_CLEAR:
1.1 nicm 265: re = &ml->root->format_string;
266: break;
267: }
268: }
269:
270: pmatch.rm_so = 0;
271: pmatch.rm_eo = fmtlen;
272: if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
273: magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
274: (int)fmtlen, fmt);
275: return (-1);
276: }
277:
278: return (0);
279: }
280:
281: static u_int
282: magic_get_strength(struct magic_line *ml)
283: {
284: int n;
285: size_t size;
286:
1.13 nicm 287: if (ml->type == MAGIC_TYPE_NONE)
288: return (0);
289:
1.18 nicm 290: if (ml->test_not || ml->test_operator == 'x') {
291: n = 1;
292: goto skip;
293: }
1.1 nicm 294:
1.5 nicm 295: n = 2 * MAGIC_STRENGTH_MULTIPLIER;
1.1 nicm 296: switch (ml->type) {
297: case MAGIC_TYPE_NONE:
298: case MAGIC_TYPE_DEFAULT:
299: return (0);
1.20 nicm 300: case MAGIC_TYPE_CLEAR:
1.22 nicm 301: case MAGIC_TYPE_NAME:
302: case MAGIC_TYPE_USE:
1.20 nicm 303: break;
1.1 nicm 304: case MAGIC_TYPE_BYTE:
305: case MAGIC_TYPE_UBYTE:
306: n += 1 * MAGIC_STRENGTH_MULTIPLIER;
307: break;
308: case MAGIC_TYPE_SHORT:
309: case MAGIC_TYPE_USHORT:
310: case MAGIC_TYPE_BESHORT:
311: case MAGIC_TYPE_UBESHORT:
312: case MAGIC_TYPE_LESHORT:
313: case MAGIC_TYPE_ULESHORT:
314: n += 2 * MAGIC_STRENGTH_MULTIPLIER;
315: break;
316: case MAGIC_TYPE_LONG:
317: case MAGIC_TYPE_ULONG:
318: case MAGIC_TYPE_FLOAT:
319: case MAGIC_TYPE_DATE:
320: case MAGIC_TYPE_LDATE:
321: case MAGIC_TYPE_UDATE:
322: case MAGIC_TYPE_ULDATE:
323: case MAGIC_TYPE_BELONG:
324: case MAGIC_TYPE_UBELONG:
325: case MAGIC_TYPE_BEFLOAT:
326: case MAGIC_TYPE_BEDATE:
327: case MAGIC_TYPE_BELDATE:
328: case MAGIC_TYPE_UBEDATE:
329: case MAGIC_TYPE_UBELDATE:
330: n += 4 * MAGIC_STRENGTH_MULTIPLIER;
331: break;
332: case MAGIC_TYPE_QUAD:
333: case MAGIC_TYPE_UQUAD:
334: case MAGIC_TYPE_DOUBLE:
335: case MAGIC_TYPE_QDATE:
336: case MAGIC_TYPE_QLDATE:
337: case MAGIC_TYPE_UQDATE:
338: case MAGIC_TYPE_UQLDATE:
339: case MAGIC_TYPE_BEQUAD:
340: case MAGIC_TYPE_UBEQUAD:
341: case MAGIC_TYPE_BEDOUBLE:
342: case MAGIC_TYPE_BEQDATE:
343: case MAGIC_TYPE_BEQLDATE:
344: case MAGIC_TYPE_UBEQDATE:
345: case MAGIC_TYPE_UBEQLDATE:
346: case MAGIC_TYPE_LEQUAD:
347: case MAGIC_TYPE_ULEQUAD:
348: case MAGIC_TYPE_LEDOUBLE:
349: case MAGIC_TYPE_LEQDATE:
350: case MAGIC_TYPE_LEQLDATE:
351: case MAGIC_TYPE_ULEQDATE:
352: case MAGIC_TYPE_ULEQLDATE:
353: case MAGIC_TYPE_LELONG:
354: case MAGIC_TYPE_ULELONG:
355: case MAGIC_TYPE_LEFLOAT:
356: case MAGIC_TYPE_LEDATE:
357: case MAGIC_TYPE_LELDATE:
358: case MAGIC_TYPE_ULEDATE:
359: case MAGIC_TYPE_ULELDATE:
360: case MAGIC_TYPE_MELONG:
361: case MAGIC_TYPE_MEDATE:
362: case MAGIC_TYPE_MELDATE:
363: n += 8 * MAGIC_STRENGTH_MULTIPLIER;
364: break;
365: case MAGIC_TYPE_STRING:
366: case MAGIC_TYPE_PSTRING:
367: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
368: break;
369: case MAGIC_TYPE_BESTRING16:
370: case MAGIC_TYPE_LESTRING16:
371: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
372: break;
373: case MAGIC_TYPE_REGEX:
374: case MAGIC_TYPE_SEARCH:
375: size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
376: if (size < 1)
377: size = 1;
378: n += ml->test_string_size * size;
379: break;
380: }
381: switch (ml->test_operator) {
382: case '=':
383: n += MAGIC_STRENGTH_MULTIPLIER;
384: break;
385: case '<':
386: case '>':
387: case '[':
388: case ']':
389: n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
390: break;
391: case '^':
392: case '&':
393: n -= MAGIC_STRENGTH_MULTIPLIER;
394: break;
395: }
1.18 nicm 396:
397: skip:
398: switch (ml->strength_operator) {
399: case '+':
400: n += ml->strength_value;
401: break;
402: case '-':
403: n -= ml->strength_value;
404: break;
405: case '*':
406: n *= ml->strength_value;
407: break;
408: case '/':
409: n /= ml->strength_value;
410: break;
411: }
1.1 nicm 412: return (n <= 0 ? 1 : n);
413: }
414:
415: static int
416: magic_get_string(char **line, char *out, size_t *outlen)
417: {
418: char *start, *cp, c;
419: int d0, d1, d2;
420:
421: start = out;
422: for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
423: if (*cp != '\\') {
424: *out++ = *cp;
425: continue;
426: }
427:
428: switch (c = *++cp) {
1.3 nicm 429: case '\0': /* end of line */
430: return (-1);
1.1 nicm 431: case ' ':
432: *out++ = ' ';
433: break;
434: case '0':
435: case '1':
436: case '2':
437: case '3':
438: case '4':
439: case '5':
440: case '6':
441: case '7':
442: d0 = magic_odigit(cp[0]);
443: if (cp[0] != '\0')
444: d1 = magic_odigit(cp[1]);
445: else
446: d1 = -1;
447: if (cp[0] != '\0' && cp[1] != '\0')
448: d2 = magic_odigit(cp[2]);
449: else
450: d2 = -1;
451:
452: if (d0 != -1 && d1 != -1 && d2 != -1) {
453: *out = d2 | (d1 << 3) | (d0 << 6);
454: cp += 2;
455: } else if (d0 != -1 && d1 != -1) {
456: *out = d1 | (d0 << 3);
457: cp++;
458: } else if (d0 != -1)
459: *out = d0;
460: else
461: return (-1);
462: out++;
463: break;
464: case 'x':
465: d0 = magic_xdigit(cp[1]);
466: if (cp[1] != '\0')
467: d1 = magic_xdigit(cp[2]);
468: else
469: d1 = -1;
470:
471: if (d0 != -1 && d1 != -1) {
472: *out = d1 | (d0 << 4);
473: cp += 2;
474: } else if (d0 != -1) {
475: *out = d0;
476: cp++;
477: } else
478: return (-1);
479: out++;
480:
481: break;
482: case 'a':
483: *out++ = '\a';
484: break;
485: case 'b':
486: *out++ = '\b';
487: break;
488: case 't':
489: *out++ = '\t';
490: break;
491: case 'f':
492: *out++ = '\f';
493: break;
494: case 'n':
495: *out++ = '\n';
496: break;
497: case 'r':
498: *out++ = '\r';
499: break;
500: case '\\':
501: *out++ = '\\';
502: break;
503: case '\'':
504: *out++ = '\'';
505: break;
506: case '\"':
507: *out++ = '\"';
508: break;
509: default:
510: *out++ = c;
511: break;
512: }
513: }
514: *out = '\0';
515: *outlen = out - start;
516:
517: *line = cp;
518: return (0);
519: }
520:
521: static int
522: magic_parse_offset(struct magic_line *ml, char **line)
523: {
524: char *copy, *s, *cp, *endptr;
525:
526: while (isspace((u_char)**line))
527: (*line)++;
528: copy = s = cp = xmalloc(strlen(*line) + 1);
529: while (**line != '\0' && !isspace((u_char)**line))
530: *cp++ = *(*line)++;
531: *cp = '\0';
532:
533: ml->offset = 0;
534: ml->offset_relative = 0;
535:
536: ml->indirect_type = ' ';
537: ml->indirect_relative = 0;
538: ml->indirect_offset = 0;
539: ml->indirect_operator = ' ';
540: ml->indirect_operand = 0;
541:
542: if (*s == '&') {
543: ml->offset_relative = 1;
544: s++;
545: }
546:
547: if (*s != '(') {
548: endptr = magic_strtoll(s, &ml->offset);
549: if (endptr == NULL || *endptr != '\0') {
550: magic_warn(ml, "missing closing bracket");
551: goto fail;
552: }
553: if (ml->offset < 0 && !ml->offset_relative) {
554: magic_warn(ml, "negative absolute offset");
555: goto fail;
556: }
557: goto done;
558: }
559: s++;
560:
561: if (*s == '&') {
562: ml->indirect_relative = 1;
563: s++;
564: }
565:
566: endptr = magic_strtoll(s, &ml->indirect_offset);
567: if (endptr == NULL) {
1.8 nicm 568: magic_warn(ml, "can't parse offset: %s", s);
1.1 nicm 569: goto fail;
570: }
571: s = endptr;
572: if (*s == ')')
573: goto done;
574:
575: if (*s == '.') {
576: s++;
1.6 tobias 577: if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
1.8 nicm 578: magic_warn(ml, "unknown offset type: %c", *s);
1.1 nicm 579: goto fail;
580: }
581: ml->indirect_type = *s;
582: s++;
583: if (*s == ')')
584: goto done;
585: }
586:
1.6 tobias 587: if (*s == '\0' || strchr("+-*", *s) == NULL) {
1.8 nicm 588: magic_warn(ml, "unknown offset operator: %c", *s);
1.1 nicm 589: goto fail;
590: }
591: ml->indirect_operator = *s;
592: s++;
593: if (*s == ')')
594: goto done;
595:
596: if (*s == '(') {
597: s++;
598: endptr = magic_strtoll(s, &ml->indirect_operand);
599: if (endptr == NULL || *endptr != ')') {
600: magic_warn(ml, "missing closing bracket");
601: goto fail;
602: }
603: if (*++endptr != ')') {
604: magic_warn(ml, "missing closing bracket");
605: goto fail;
606: }
607: } else {
608: endptr = magic_strtoll(s, &ml->indirect_operand);
609: if (endptr == NULL || *endptr != ')') {
610: magic_warn(ml, "missing closing bracket");
611: goto fail;
612: }
613: }
614:
615: done:
616: free(copy);
617: return (0);
618:
619: fail:
620: free(copy);
621: return (-1);
622: }
623:
624: static int
625: magic_parse_type(struct magic_line *ml, char **line)
626: {
627: char *copy, *s, *cp, *endptr;
628:
629: while (isspace((u_char)**line))
630: (*line)++;
631: copy = s = cp = xmalloc(strlen(*line) + 1);
632: while (**line != '\0' && !isspace((u_char)**line))
633: *cp++ = *(*line)++;
634: *cp = '\0';
635:
636: ml->type = MAGIC_TYPE_NONE;
637: ml->type_operator = ' ';
638: ml->type_operand = 0;
639:
1.23 nicm 640: if (strcmp(s, "name") == 0) {
1.22 nicm 641: ml->type = MAGIC_TYPE_NAME;
642: ml->type_string = xstrdup(s);
643: goto done;
644: }
1.23 nicm 645: if (strcmp(s, "use") == 0) {
1.22 nicm 646: ml->type = MAGIC_TYPE_USE;
647: ml->type_string = xstrdup(s);
648: goto done;
649: }
650:
1.16 nicm 651: if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
652: strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
653: if (*s == 'u')
654: ml->type_string = xstrdup(s + 1);
655: else
656: ml->type_string = xstrdup(s);
1.1 nicm 657: ml->type = MAGIC_TYPE_STRING;
658: magic_mark_text(ml, 0);
659: goto done;
660: }
1.16 nicm 661: if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
662: strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
663: if (*s == 'u')
664: ml->type_string = xstrdup(s + 1);
665: else
666: ml->type_string = xstrdup(s);
667: ml->type = MAGIC_TYPE_PSTRING;
668: magic_mark_text(ml, 0);
669: goto done;
670: }
671: if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
672: strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
673: if (*s == 'u')
674: ml->type_string = xstrdup(s + 1);
675: else
676: ml->type_string = xstrdup(s);
1.1 nicm 677: ml->type = MAGIC_TYPE_SEARCH;
678: goto done;
679: }
1.16 nicm 680: if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
681: strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
682: if (*s == 'u')
683: ml->type_string = xstrdup(s + 1);
684: else
685: ml->type_string = xstrdup(s);
1.1 nicm 686: ml->type = MAGIC_TYPE_REGEX;
687: goto done;
688: }
1.16 nicm 689: ml->type_string = xstrdup(s);
1.1 nicm 690:
1.12 nicm 691: cp = &s[strcspn(s, "+-&/%*")];
1.1 nicm 692: if (*cp != '\0') {
693: ml->type_operator = *cp;
694: endptr = magic_strtoull(cp + 1, &ml->type_operand);
695: if (endptr == NULL || *endptr != '\0') {
1.8 nicm 696: magic_warn(ml, "can't parse operand: %s", cp + 1);
1.1 nicm 697: goto fail;
698: }
699: *cp = '\0';
700: }
701:
702: if (strcmp(s, "byte") == 0)
703: ml->type = MAGIC_TYPE_BYTE;
704: else if (strcmp(s, "short") == 0)
705: ml->type = MAGIC_TYPE_SHORT;
706: else if (strcmp(s, "long") == 0)
707: ml->type = MAGIC_TYPE_LONG;
708: else if (strcmp(s, "quad") == 0)
709: ml->type = MAGIC_TYPE_QUAD;
710: else if (strcmp(s, "ubyte") == 0)
711: ml->type = MAGIC_TYPE_UBYTE;
712: else if (strcmp(s, "ushort") == 0)
713: ml->type = MAGIC_TYPE_USHORT;
714: else if (strcmp(s, "ulong") == 0)
715: ml->type = MAGIC_TYPE_ULONG;
716: else if (strcmp(s, "uquad") == 0)
717: ml->type = MAGIC_TYPE_UQUAD;
1.16 nicm 718: else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
1.1 nicm 719: ml->type = MAGIC_TYPE_FLOAT;
1.16 nicm 720: else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
1.1 nicm 721: ml->type = MAGIC_TYPE_DOUBLE;
722: else if (strcmp(s, "date") == 0)
723: ml->type = MAGIC_TYPE_DATE;
724: else if (strcmp(s, "qdate") == 0)
725: ml->type = MAGIC_TYPE_QDATE;
726: else if (strcmp(s, "ldate") == 0)
727: ml->type = MAGIC_TYPE_LDATE;
728: else if (strcmp(s, "qldate") == 0)
729: ml->type = MAGIC_TYPE_QLDATE;
730: else if (strcmp(s, "udate") == 0)
731: ml->type = MAGIC_TYPE_UDATE;
732: else if (strcmp(s, "uqdate") == 0)
733: ml->type = MAGIC_TYPE_UQDATE;
734: else if (strcmp(s, "uldate") == 0)
735: ml->type = MAGIC_TYPE_ULDATE;
736: else if (strcmp(s, "uqldate") == 0)
737: ml->type = MAGIC_TYPE_UQLDATE;
738: else if (strcmp(s, "beshort") == 0)
739: ml->type = MAGIC_TYPE_BESHORT;
740: else if (strcmp(s, "belong") == 0)
741: ml->type = MAGIC_TYPE_BELONG;
742: else if (strcmp(s, "bequad") == 0)
743: ml->type = MAGIC_TYPE_BEQUAD;
744: else if (strcmp(s, "ubeshort") == 0)
745: ml->type = MAGIC_TYPE_UBESHORT;
746: else if (strcmp(s, "ubelong") == 0)
747: ml->type = MAGIC_TYPE_UBELONG;
748: else if (strcmp(s, "ubequad") == 0)
749: ml->type = MAGIC_TYPE_UBEQUAD;
1.16 nicm 750: else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
1.1 nicm 751: ml->type = MAGIC_TYPE_BEFLOAT;
1.16 nicm 752: else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
1.1 nicm 753: ml->type = MAGIC_TYPE_BEDOUBLE;
754: else if (strcmp(s, "bedate") == 0)
755: ml->type = MAGIC_TYPE_BEDATE;
756: else if (strcmp(s, "beqdate") == 0)
757: ml->type = MAGIC_TYPE_BEQDATE;
758: else if (strcmp(s, "beldate") == 0)
759: ml->type = MAGIC_TYPE_BELDATE;
760: else if (strcmp(s, "beqldate") == 0)
761: ml->type = MAGIC_TYPE_BEQLDATE;
762: else if (strcmp(s, "ubedate") == 0)
763: ml->type = MAGIC_TYPE_UBEDATE;
764: else if (strcmp(s, "ubeqdate") == 0)
765: ml->type = MAGIC_TYPE_UBEQDATE;
766: else if (strcmp(s, "ubeldate") == 0)
767: ml->type = MAGIC_TYPE_UBELDATE;
768: else if (strcmp(s, "ubeqldate") == 0)
769: ml->type = MAGIC_TYPE_UBEQLDATE;
1.16 nicm 770: else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
1.1 nicm 771: ml->type = MAGIC_TYPE_BESTRING16;
772: else if (strcmp(s, "leshort") == 0)
773: ml->type = MAGIC_TYPE_LESHORT;
774: else if (strcmp(s, "lelong") == 0)
775: ml->type = MAGIC_TYPE_LELONG;
776: else if (strcmp(s, "lequad") == 0)
777: ml->type = MAGIC_TYPE_LEQUAD;
778: else if (strcmp(s, "uleshort") == 0)
779: ml->type = MAGIC_TYPE_ULESHORT;
780: else if (strcmp(s, "ulelong") == 0)
781: ml->type = MAGIC_TYPE_ULELONG;
782: else if (strcmp(s, "ulequad") == 0)
783: ml->type = MAGIC_TYPE_ULEQUAD;
1.16 nicm 784: else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
1.1 nicm 785: ml->type = MAGIC_TYPE_LEFLOAT;
1.16 nicm 786: else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
1.1 nicm 787: ml->type = MAGIC_TYPE_LEDOUBLE;
788: else if (strcmp(s, "ledate") == 0)
789: ml->type = MAGIC_TYPE_LEDATE;
790: else if (strcmp(s, "leqdate") == 0)
791: ml->type = MAGIC_TYPE_LEQDATE;
792: else if (strcmp(s, "leldate") == 0)
793: ml->type = MAGIC_TYPE_LELDATE;
794: else if (strcmp(s, "leqldate") == 0)
795: ml->type = MAGIC_TYPE_LEQLDATE;
796: else if (strcmp(s, "uledate") == 0)
797: ml->type = MAGIC_TYPE_ULEDATE;
798: else if (strcmp(s, "uleqdate") == 0)
799: ml->type = MAGIC_TYPE_ULEQDATE;
800: else if (strcmp(s, "uleldate") == 0)
801: ml->type = MAGIC_TYPE_ULELDATE;
802: else if (strcmp(s, "uleqldate") == 0)
803: ml->type = MAGIC_TYPE_ULEQLDATE;
1.16 nicm 804: else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
1.1 nicm 805: ml->type = MAGIC_TYPE_LESTRING16;
1.16 nicm 806: else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
1.1 nicm 807: ml->type = MAGIC_TYPE_MELONG;
1.16 nicm 808: else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
1.1 nicm 809: ml->type = MAGIC_TYPE_MEDATE;
1.16 nicm 810: else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
1.1 nicm 811: ml->type = MAGIC_TYPE_MELDATE;
1.16 nicm 812: else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
1.1 nicm 813: ml->type = MAGIC_TYPE_DEFAULT;
1.20 nicm 814: else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
815: ml->type = MAGIC_TYPE_CLEAR;
1.1 nicm 816: else {
1.8 nicm 817: magic_warn(ml, "unknown type: %s", s);
1.1 nicm 818: goto fail;
819: }
820: magic_mark_text(ml, 0);
821:
822: done:
823: free(copy);
824: return (0);
825:
826: fail:
827: free(copy);
828: return (-1);
829: }
830:
831: static int
832: magic_parse_value(struct magic_line *ml, char **line)
833: {
834: char *copy, *s, *cp, *endptr;
835: size_t slen;
1.10 nicm 836: uint64_t u;
1.1 nicm 837:
838: while (isspace((u_char)**line))
839: (*line)++;
840:
841: ml->test_operator = '=';
842: ml->test_not = 0;
843: ml->test_string = NULL;
844: ml->test_string_size = 0;
845: ml->test_unsigned = 0;
846: ml->test_signed = 0;
847:
1.9 nicm 848: if (**line == '\0')
849: return (0);
850:
1.1 nicm 851: s = *line;
852: if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
853: (*line)++;
1.20 nicm 854: ml->test_operator = 'x';
855: return (0);
856: }
857:
858: if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
859: magic_warn(ml, "test specified for default or clear");
1.1 nicm 860: ml->test_operator = 'x';
861: return (0);
862: }
863:
864: if (**line == '!') {
865: ml->test_not = 1;
866: (*line)++;
867: }
868:
869: switch (ml->type) {
1.22 nicm 870: case MAGIC_TYPE_NAME:
871: case MAGIC_TYPE_USE:
872: copy = s = xmalloc(strlen(*line) + 1);
873: if (magic_get_string(line, s, &slen) != 0 || slen == 0) {
874: magic_warn(ml, "can't parse string");
875: goto fail;
876: }
877: if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) {
878: magic_warn(ml, "invalid name");
879: goto fail;
880: }
881: ml->name = s;
882: return (0); /* do not free */
1.1 nicm 883: case MAGIC_TYPE_STRING:
884: case MAGIC_TYPE_PSTRING:
885: case MAGIC_TYPE_SEARCH:
886: if (**line == '>' || **line == '<' || **line == '=') {
887: ml->test_operator = **line;
888: (*line)++;
889: }
890: /* FALLTHROUGH */
891: case MAGIC_TYPE_REGEX:
1.21 nicm 892: if (**line == '=')
893: (*line)++;
1.1 nicm 894: copy = s = xmalloc(strlen(*line) + 1);
895: if (magic_get_string(line, s, &slen) != 0) {
896: magic_warn(ml, "can't parse string");
897: goto fail;
898: }
899: ml->test_string_size = slen;
900: ml->test_string = s;
901: return (0); /* do not free */
902: default:
903: break;
904: }
905:
1.9 nicm 906: while (isspace((u_char)**line))
907: (*line)++;
908: if ((*line)[0] == '<' && (*line)[1] == '=') {
909: ml->test_operator = '[';
910: (*line) += 2;
911: } else if ((*line)[0] == '>' && (*line)[1] == '=') {
912: ml->test_operator = ']';
1.1 nicm 913: (*line) += 2;
1.19 tobias 914: } else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
1.9 nicm 915: ml->test_operator = **line;
916: (*line)++;
1.1 nicm 917: }
1.9 nicm 918:
919: while (isspace((u_char)**line))
920: (*line)++;
921: copy = cp = xmalloc(strlen(*line) + 1);
1.1 nicm 922: while (**line != '\0' && !isspace((u_char)**line))
923: *cp++ = *(*line)++;
924: *cp = '\0';
925:
1.11 nicm 926: switch (ml->type) {
927: case MAGIC_TYPE_FLOAT:
928: case MAGIC_TYPE_DOUBLE:
929: case MAGIC_TYPE_BEFLOAT:
930: case MAGIC_TYPE_BEDOUBLE:
931: case MAGIC_TYPE_LEFLOAT:
932: case MAGIC_TYPE_LEDOUBLE:
933: errno = 0;
934: ml->test_double = strtod(copy, &endptr);
935: if (errno == ERANGE)
936: endptr = NULL;
937: break;
938: default:
939: if (*ml->type_string == 'u')
940: endptr = magic_strtoull(copy, &ml->test_unsigned);
941: else {
942: endptr = magic_strtoll(copy, &ml->test_signed);
943: if (endptr == NULL || *endptr != '\0') {
944: /*
945: * If we can't parse this as a signed number,
946: * try as unsigned instead.
947: */
948: endptr = magic_strtoull(copy, &u);
949: if (endptr != NULL && *endptr == '\0')
950: ml->test_signed = (int64_t)u;
951: }
1.10 nicm 952: }
1.11 nicm 953: break;
1.10 nicm 954: }
1.1 nicm 955: if (endptr == NULL || *endptr != '\0') {
1.9 nicm 956: magic_warn(ml, "can't parse number: %s", copy);
1.1 nicm 957: goto fail;
958: }
959:
960: free(copy);
961: return (0);
962:
963: fail:
964: free(copy);
965: return (-1);
966: }
967:
968: int
969: magic_compare(struct magic_line *ml1, struct magic_line *ml2)
970: {
971: if (ml1->strength < ml2->strength)
972: return (1);
973: if (ml1->strength > ml2->strength)
974: return (-1);
975:
976: /*
977: * The original file depends on the (undefined!) qsort(3) behaviour
978: * when the strength is equal. This is impossible to reproduce with an
979: * RB tree so just use the line number and hope for the best.
980: */
981: if (ml1->line < ml2->line)
982: return (-1);
983: if (ml1->line > ml2->line)
984: return (1);
985:
986: return (0);
987: }
988: RB_GENERATE(magic_tree, magic_line, node, magic_compare);
989:
1.22 nicm 990: int
991: magic_named_compare(struct magic_line *ml1, struct magic_line *ml2)
992: {
993: return (strcmp(ml1->name, ml2->name));
994: }
995: RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare);
996:
1.1 nicm 997: static void
1.18 nicm 998: magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
999: char *line)
1000: {
1001: char *cp, *s;
1002: int64_t value;
1003:
1004: cp = line + (sizeof "!:strength") - 1;
1005: while (isspace((u_char)*cp))
1006: cp++;
1007: s = cp;
1008:
1009: cp = strchr(s, '#');
1010: if (cp != NULL)
1011: *cp = '\0';
1012: cp = s;
1013:
1.19 tobias 1014: if (*s == '\0' || strchr("+-*/", *s) == NULL) {
1.18 nicm 1015: magic_warnm(m, at, "invalid strength operator: %s", s);
1016: return;
1017: }
1018: ml->strength_operator = *cp++;
1019:
1020: while (isspace((u_char)*cp))
1021: cp++;
1022: cp = magic_strtoll(cp, &value);
1023: while (cp != NULL && isspace((u_char)*cp))
1024: cp++;
1025: if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
1026: magic_warnm(m, at, "invalid strength value: %s", s);
1027: return;
1028: }
1029: ml->strength_value = value;
1030: }
1031:
1032: static void
1.1 nicm 1033: magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
1034: {
1035: char *mimetype, *cp;
1036:
1037: mimetype = line + (sizeof "!:mime") - 1;
1038: while (isspace((u_char)*mimetype))
1039: mimetype++;
1040:
1041: cp = strchr(mimetype, '#');
1042: if (cp != NULL)
1043: *cp = '\0';
1044:
1045: if (*mimetype != '\0') {
1046: cp = mimetype + strlen(mimetype) - 1;
1047: while (cp != mimetype && isspace((u_char)*cp))
1048: *cp-- = '\0';
1049: }
1050:
1051: cp = mimetype;
1052: while (*cp != '\0') {
1053: if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
1054: break;
1055: cp++;
1056: }
1057: if (*mimetype == '\0' || *cp != '\0') {
1.7 nicm 1058: magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1.1 nicm 1059: return;
1060: }
1061: if (ml == NULL) {
1.7 nicm 1062: magic_warnm(m, at, "stray MIME type: %s", mimetype);
1.1 nicm 1063: return;
1064: }
1065: ml->mimetype = xstrdup(mimetype);
1066: }
1067:
1068: struct magic *
1069: magic_load(FILE *f, const char *path, int warnings)
1070: {
1071: struct magic *m;
1072: struct magic_line *ml = NULL, *parent, *parent0;
1073: char *line, *tmp;
1074: size_t size;
1.26 ! brynet 1075: ssize_t slen;
1.1 nicm 1076: u_int at, level, n, i;
1077:
1078: m = xcalloc(1, sizeof *m);
1079: m->path = xstrdup(path);
1080: m->warnings = warnings;
1081: RB_INIT(&m->tree);
1082:
1083: parent = NULL;
1084: parent0 = NULL;
1085: level = 0;
1086:
1087: at = 0;
1088: tmp = NULL;
1.26 ! brynet 1089: size = 0;
! 1090: while ((slen = getline(&tmp, &size, f)) != -1) {
! 1091: line = tmp;
! 1092: if (line[slen - 1] == '\n')
! 1093: line[slen - 1] = '\0';
! 1094:
1.1 nicm 1095: at++;
1096:
1097: while (isspace((u_char)*line))
1098: line++;
1099: if (*line == '\0' || *line == '#')
1100: continue;
1101:
1.15 nicm 1102: if (strncmp (line, "!:mime", 6) == 0) {
1.1 nicm 1103: magic_set_mimetype(m, at, ml, line);
1.18 nicm 1104: continue;
1105: }
1106: if (strncmp (line, "!:strength", 10) == 0) {
1107: magic_adjust_strength(m, at, ml, line);
1.15 nicm 1108: continue;
1109: }
1110: if (strncmp (line, "!:", 2) == 0) {
1111: for (i = 0; i < 64 && line[i] != '\0'; i++) {
1112: if (isspace((u_char)line[i]))
1113: break;
1114: }
1115: magic_warnm(m, at, "%.*s not supported", i, line);
1.1 nicm 1116: continue;
1117: }
1118:
1119: n = 0;
1120: for (; *line == '>'; line++)
1121: n++;
1122:
1123: ml = xcalloc(1, sizeof *ml);
1124: ml->root = m;
1125: ml->line = at;
1126: ml->type = MAGIC_TYPE_NONE;
1127: TAILQ_INIT(&ml->children);
1128: ml->text = 1;
1129:
1.13 nicm 1130: /*
1131: * At this point n is the level we want, level is the current
1132: * level. parent0 is the last line at the same level and parent
1133: * is the last line at the previous level.
1134: */
1.1 nicm 1135: if (n == level + 1) {
1136: parent = parent0;
1137: } else if (n < level) {
1138: for (i = n; i < level && parent != NULL; i++)
1139: parent = parent->parent;
1140: } else if (n != level) {
1141: magic_warn(ml, "level skipped (%u->%u)", level, n);
1142: free(ml);
1143: continue;
1144: }
1145: ml->parent = parent;
1146: level = n;
1147:
1148: if (magic_parse_offset(ml, &line) != 0 ||
1149: magic_parse_type(ml, &line) != 0 ||
1150: magic_parse_value(ml, &line) != 0 ||
1151: magic_set_result(ml, line) != 0) {
1.13 nicm 1152: /*
1153: * An invalid line still needs to appear in the tree in
1154: * case it has any children.
1155: */
1156: ml->type = MAGIC_TYPE_NONE;
1.1 nicm 1157: }
1158:
1159: ml->strength = magic_get_strength(ml);
1.22 nicm 1160: if (ml->parent == NULL) {
1161: if (ml->name != NULL)
1162: RB_INSERT(magic_named_tree, &m->named, ml);
1163: else
1164: RB_INSERT(magic_tree, &m->tree, ml);
1165: } else
1.1 nicm 1166: TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1167: parent0 = ml;
1168: }
1169: free(tmp);
1.26 ! brynet 1170: if (ferror(f))
! 1171: err(1, "%s", path);
1.1 nicm 1172:
1173: return (m);
1174: }