Annotation of src/usr.bin/file/magic-load.c, Revision 1.22
1.22 ! nicm 1: /* $OpenBSD: magic-load.c,v 1.21 2016/05/01 10:34:30 nicm Exp $ */
1.1 nicm 2:
3: /*
4: * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15: * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18:
19: #include <sys/types.h>
20:
21: #include <ctype.h>
22: #include <errno.h>
23: #include <limits.h>
24: #include <regex.h>
25: #include <stdarg.h>
26: #include <stdio.h>
27: #include <stdlib.h>
28: #include <string.h>
29:
30: #include "magic.h"
31: #include "xmalloc.h"
32:
33: static int
34: magic_odigit(u_char c)
35: {
36: if (c >= '0' && c <= '7')
37: return (c - '0');
38: return (-1);
39: }
40:
41: static int
42: magic_xdigit(u_char c)
43: {
44: if (c >= '0' && c <= '9')
45: return (c - '0');
46: if (c >= 'a' && c <= 'f')
47: return (10 + c - 'a');
48: if (c >= 'A' && c <= 'F')
49: return (10 + c - 'A');
50: return (-1);
51: }
52:
53: static void
54: magic_mark_text(struct magic_line *ml, int text)
55: {
56: do {
57: ml->text = text;
58: ml = ml->parent;
59: } while (ml != NULL);
60: }
61:
62: static int
63: magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
64: const char *p)
65: {
66: int error;
67: char errbuf[256];
68:
69: error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
70: if (error != 0) {
71: regerror(error, re, errbuf, sizeof errbuf);
72: magic_warn(ml, "bad %s pattern: %s", name, errbuf);
73: return (-1);
74: }
75: return (0);
76: }
77:
78: static int
79: magic_set_result(struct magic_line *ml, const char *s)
80: {
81: const char *fmt;
82: const char *endfmt;
83: const char *cp;
84: regex_t *re = NULL;
85: regmatch_t pmatch;
86: size_t fmtlen;
87:
88: while (isspace((u_char)*s))
89: s++;
90: if (*s == '\0') {
91: ml->result = NULL;
92: return (0);
93: }
94: ml->result = xstrdup(s);
95:
96: fmt = NULL;
97: for (cp = s; *cp != '\0'; cp++) {
98: if (cp[0] == '%' && cp[1] != '%') {
99: if (fmt != NULL) {
100: magic_warn(ml, "multiple formats");
101: return (-1);
102: }
103: fmt = cp;
104: }
105: }
106: if (fmt == NULL)
107: return (0);
108: fmt++;
109:
110: for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
111: if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
112: break;
113: }
114: if (*endfmt == '\0') {
115: magic_warn(ml, "unterminated format");
116: return (-1);
117: }
118: fmtlen = endfmt + 1 - fmt;
119: if (fmtlen > 32) {
120: magic_warn(ml, "format too long");
121: return (-1);
122: }
123:
124: if (*endfmt == 's') {
125: switch (ml->type) {
126: case MAGIC_TYPE_DATE:
127: case MAGIC_TYPE_LDATE:
128: case MAGIC_TYPE_UDATE:
129: case MAGIC_TYPE_ULDATE:
130: case MAGIC_TYPE_BEDATE:
131: case MAGIC_TYPE_BELDATE:
132: case MAGIC_TYPE_UBEDATE:
133: case MAGIC_TYPE_UBELDATE:
134: case MAGIC_TYPE_QDATE:
135: case MAGIC_TYPE_QLDATE:
136: case MAGIC_TYPE_UQDATE:
137: case MAGIC_TYPE_UQLDATE:
138: case MAGIC_TYPE_BEQDATE:
139: case MAGIC_TYPE_BEQLDATE:
140: case MAGIC_TYPE_UBEQDATE:
141: case MAGIC_TYPE_UBEQLDATE:
142: case MAGIC_TYPE_LEQDATE:
143: case MAGIC_TYPE_LEQLDATE:
144: case MAGIC_TYPE_ULEQDATE:
145: case MAGIC_TYPE_ULEQLDATE:
146: case MAGIC_TYPE_LEDATE:
147: case MAGIC_TYPE_LELDATE:
148: case MAGIC_TYPE_ULEDATE:
149: case MAGIC_TYPE_ULELDATE:
150: case MAGIC_TYPE_MEDATE:
151: case MAGIC_TYPE_MELDATE:
152: case MAGIC_TYPE_STRING:
153: case MAGIC_TYPE_PSTRING:
154: case MAGIC_TYPE_BESTRING16:
155: case MAGIC_TYPE_LESTRING16:
156: case MAGIC_TYPE_REGEX:
157: case MAGIC_TYPE_SEARCH:
158: break;
159: default:
160: ml->stringify = 1;
161: break;
162: }
163: }
164:
165: if (!ml->root->compiled) {
166: /*
167: * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
168: * with byte, short, long. We get lucky because our first and
169: * only argument ends up in a register. Accept it for now.
170: */
171: if (magic_make_pattern(ml, "short", &ml->root->format_short,
172: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
173: return (-1);
174: if (magic_make_pattern(ml, "long", &ml->root->format_long,
175: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
176: return (-1);
177: if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
178: "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
179: return (-1);
180: if (magic_make_pattern(ml, "float", &ml->root->format_float,
181: "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
182: return (-1);
183: if (magic_make_pattern(ml, "string", &ml->root->format_string,
184: "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
185: return (-1);
186: ml->root->compiled = 1;
187: }
188:
189: if (ml->stringify)
190: re = &ml->root->format_string;
191: else {
192: switch (ml->type) {
193: case MAGIC_TYPE_NONE:
1.17 nicm 194: case MAGIC_TYPE_BESTRING16:
195: case MAGIC_TYPE_LESTRING16:
1.22 ! nicm 196: case MAGIC_TYPE_NAME:
! 197: case MAGIC_TYPE_USE:
1.1 nicm 198: return (0); /* don't use result */
199: case MAGIC_TYPE_BYTE:
200: case MAGIC_TYPE_UBYTE:
201: case MAGIC_TYPE_SHORT:
202: case MAGIC_TYPE_USHORT:
203: case MAGIC_TYPE_BESHORT:
204: case MAGIC_TYPE_UBESHORT:
205: case MAGIC_TYPE_LESHORT:
206: case MAGIC_TYPE_ULESHORT:
207: re = &ml->root->format_short;
208: break;
209: case MAGIC_TYPE_LONG:
210: case MAGIC_TYPE_ULONG:
211: case MAGIC_TYPE_BELONG:
212: case MAGIC_TYPE_UBELONG:
213: case MAGIC_TYPE_LELONG:
214: case MAGIC_TYPE_ULELONG:
215: case MAGIC_TYPE_MELONG:
216: re = &ml->root->format_long;
217: break;
218: case MAGIC_TYPE_QUAD:
219: case MAGIC_TYPE_UQUAD:
220: case MAGIC_TYPE_BEQUAD:
221: case MAGIC_TYPE_UBEQUAD:
222: case MAGIC_TYPE_LEQUAD:
223: case MAGIC_TYPE_ULEQUAD:
224: re = &ml->root->format_quad;
225: break;
226: case MAGIC_TYPE_FLOAT:
227: case MAGIC_TYPE_BEFLOAT:
228: case MAGIC_TYPE_LEFLOAT:
229: case MAGIC_TYPE_DOUBLE:
230: case MAGIC_TYPE_BEDOUBLE:
231: case MAGIC_TYPE_LEDOUBLE:
232: re = &ml->root->format_float;
233: break;
234: case MAGIC_TYPE_DATE:
235: case MAGIC_TYPE_LDATE:
236: case MAGIC_TYPE_UDATE:
237: case MAGIC_TYPE_ULDATE:
238: case MAGIC_TYPE_BEDATE:
239: case MAGIC_TYPE_BELDATE:
240: case MAGIC_TYPE_UBEDATE:
241: case MAGIC_TYPE_UBELDATE:
242: case MAGIC_TYPE_QDATE:
243: case MAGIC_TYPE_QLDATE:
244: case MAGIC_TYPE_UQDATE:
245: case MAGIC_TYPE_UQLDATE:
246: case MAGIC_TYPE_BEQDATE:
247: case MAGIC_TYPE_BEQLDATE:
248: case MAGIC_TYPE_UBEQDATE:
249: case MAGIC_TYPE_UBEQLDATE:
250: case MAGIC_TYPE_LEQDATE:
251: case MAGIC_TYPE_LEQLDATE:
252: case MAGIC_TYPE_ULEQDATE:
253: case MAGIC_TYPE_ULEQLDATE:
254: case MAGIC_TYPE_LEDATE:
255: case MAGIC_TYPE_LELDATE:
256: case MAGIC_TYPE_ULEDATE:
257: case MAGIC_TYPE_ULELDATE:
258: case MAGIC_TYPE_MEDATE:
259: case MAGIC_TYPE_MELDATE:
260: case MAGIC_TYPE_STRING:
261: case MAGIC_TYPE_PSTRING:
262: case MAGIC_TYPE_REGEX:
263: case MAGIC_TYPE_SEARCH:
1.20 nicm 264: case MAGIC_TYPE_DEFAULT:
265: case MAGIC_TYPE_CLEAR:
1.1 nicm 266: re = &ml->root->format_string;
267: break;
268: }
269: }
270:
271: pmatch.rm_so = 0;
272: pmatch.rm_eo = fmtlen;
273: if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
274: magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
275: (int)fmtlen, fmt);
276: return (-1);
277: }
278:
279: return (0);
280: }
281:
282: static u_int
283: magic_get_strength(struct magic_line *ml)
284: {
285: int n;
286: size_t size;
287:
1.13 nicm 288: if (ml->type == MAGIC_TYPE_NONE)
289: return (0);
290:
1.18 nicm 291: if (ml->test_not || ml->test_operator == 'x') {
292: n = 1;
293: goto skip;
294: }
1.1 nicm 295:
1.5 nicm 296: n = 2 * MAGIC_STRENGTH_MULTIPLIER;
1.1 nicm 297: switch (ml->type) {
298: case MAGIC_TYPE_NONE:
299: case MAGIC_TYPE_DEFAULT:
300: return (0);
1.20 nicm 301: case MAGIC_TYPE_CLEAR:
1.22 ! nicm 302: case MAGIC_TYPE_NAME:
! 303: case MAGIC_TYPE_USE:
1.20 nicm 304: break;
1.1 nicm 305: case MAGIC_TYPE_BYTE:
306: case MAGIC_TYPE_UBYTE:
307: n += 1 * MAGIC_STRENGTH_MULTIPLIER;
308: break;
309: case MAGIC_TYPE_SHORT:
310: case MAGIC_TYPE_USHORT:
311: case MAGIC_TYPE_BESHORT:
312: case MAGIC_TYPE_UBESHORT:
313: case MAGIC_TYPE_LESHORT:
314: case MAGIC_TYPE_ULESHORT:
315: n += 2 * MAGIC_STRENGTH_MULTIPLIER;
316: break;
317: case MAGIC_TYPE_LONG:
318: case MAGIC_TYPE_ULONG:
319: case MAGIC_TYPE_FLOAT:
320: case MAGIC_TYPE_DATE:
321: case MAGIC_TYPE_LDATE:
322: case MAGIC_TYPE_UDATE:
323: case MAGIC_TYPE_ULDATE:
324: case MAGIC_TYPE_BELONG:
325: case MAGIC_TYPE_UBELONG:
326: case MAGIC_TYPE_BEFLOAT:
327: case MAGIC_TYPE_BEDATE:
328: case MAGIC_TYPE_BELDATE:
329: case MAGIC_TYPE_UBEDATE:
330: case MAGIC_TYPE_UBELDATE:
331: n += 4 * MAGIC_STRENGTH_MULTIPLIER;
332: break;
333: case MAGIC_TYPE_QUAD:
334: case MAGIC_TYPE_UQUAD:
335: case MAGIC_TYPE_DOUBLE:
336: case MAGIC_TYPE_QDATE:
337: case MAGIC_TYPE_QLDATE:
338: case MAGIC_TYPE_UQDATE:
339: case MAGIC_TYPE_UQLDATE:
340: case MAGIC_TYPE_BEQUAD:
341: case MAGIC_TYPE_UBEQUAD:
342: case MAGIC_TYPE_BEDOUBLE:
343: case MAGIC_TYPE_BEQDATE:
344: case MAGIC_TYPE_BEQLDATE:
345: case MAGIC_TYPE_UBEQDATE:
346: case MAGIC_TYPE_UBEQLDATE:
347: case MAGIC_TYPE_LEQUAD:
348: case MAGIC_TYPE_ULEQUAD:
349: case MAGIC_TYPE_LEDOUBLE:
350: case MAGIC_TYPE_LEQDATE:
351: case MAGIC_TYPE_LEQLDATE:
352: case MAGIC_TYPE_ULEQDATE:
353: case MAGIC_TYPE_ULEQLDATE:
354: case MAGIC_TYPE_LELONG:
355: case MAGIC_TYPE_ULELONG:
356: case MAGIC_TYPE_LEFLOAT:
357: case MAGIC_TYPE_LEDATE:
358: case MAGIC_TYPE_LELDATE:
359: case MAGIC_TYPE_ULEDATE:
360: case MAGIC_TYPE_ULELDATE:
361: case MAGIC_TYPE_MELONG:
362: case MAGIC_TYPE_MEDATE:
363: case MAGIC_TYPE_MELDATE:
364: n += 8 * MAGIC_STRENGTH_MULTIPLIER;
365: break;
366: case MAGIC_TYPE_STRING:
367: case MAGIC_TYPE_PSTRING:
368: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
369: break;
370: case MAGIC_TYPE_BESTRING16:
371: case MAGIC_TYPE_LESTRING16:
372: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
373: break;
374: case MAGIC_TYPE_REGEX:
375: case MAGIC_TYPE_SEARCH:
376: size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
377: if (size < 1)
378: size = 1;
379: n += ml->test_string_size * size;
380: break;
381: }
382: switch (ml->test_operator) {
383: case '=':
384: n += MAGIC_STRENGTH_MULTIPLIER;
385: break;
386: case '<':
387: case '>':
388: case '[':
389: case ']':
390: n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
391: break;
392: case '^':
393: case '&':
394: n -= MAGIC_STRENGTH_MULTIPLIER;
395: break;
396: }
1.18 nicm 397:
398: skip:
399: switch (ml->strength_operator) {
400: case '+':
401: n += ml->strength_value;
402: break;
403: case '-':
404: n -= ml->strength_value;
405: break;
406: case '*':
407: n *= ml->strength_value;
408: break;
409: case '/':
410: n /= ml->strength_value;
411: break;
412: }
1.1 nicm 413: return (n <= 0 ? 1 : n);
414: }
415:
416: static int
417: magic_get_string(char **line, char *out, size_t *outlen)
418: {
419: char *start, *cp, c;
420: int d0, d1, d2;
421:
422: start = out;
423: for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
424: if (*cp != '\\') {
425: *out++ = *cp;
426: continue;
427: }
428:
429: switch (c = *++cp) {
1.3 nicm 430: case '\0': /* end of line */
431: return (-1);
1.1 nicm 432: case ' ':
433: *out++ = ' ';
434: break;
435: case '0':
436: case '1':
437: case '2':
438: case '3':
439: case '4':
440: case '5':
441: case '6':
442: case '7':
443: d0 = magic_odigit(cp[0]);
444: if (cp[0] != '\0')
445: d1 = magic_odigit(cp[1]);
446: else
447: d1 = -1;
448: if (cp[0] != '\0' && cp[1] != '\0')
449: d2 = magic_odigit(cp[2]);
450: else
451: d2 = -1;
452:
453: if (d0 != -1 && d1 != -1 && d2 != -1) {
454: *out = d2 | (d1 << 3) | (d0 << 6);
455: cp += 2;
456: } else if (d0 != -1 && d1 != -1) {
457: *out = d1 | (d0 << 3);
458: cp++;
459: } else if (d0 != -1)
460: *out = d0;
461: else
462: return (-1);
463: out++;
464: break;
465: case 'x':
466: d0 = magic_xdigit(cp[1]);
467: if (cp[1] != '\0')
468: d1 = magic_xdigit(cp[2]);
469: else
470: d1 = -1;
471:
472: if (d0 != -1 && d1 != -1) {
473: *out = d1 | (d0 << 4);
474: cp += 2;
475: } else if (d0 != -1) {
476: *out = d0;
477: cp++;
478: } else
479: return (-1);
480: out++;
481:
482: break;
483: case 'a':
484: *out++ = '\a';
485: break;
486: case 'b':
487: *out++ = '\b';
488: break;
489: case 't':
490: *out++ = '\t';
491: break;
492: case 'f':
493: *out++ = '\f';
494: break;
495: case 'n':
496: *out++ = '\n';
497: break;
498: case 'r':
499: *out++ = '\r';
500: break;
501: case '\\':
502: *out++ = '\\';
503: break;
504: case '\'':
505: *out++ = '\'';
506: break;
507: case '\"':
508: *out++ = '\"';
509: break;
510: default:
511: *out++ = c;
512: break;
513: }
514: }
515: *out = '\0';
516: *outlen = out - start;
517:
518: *line = cp;
519: return (0);
520: }
521:
522: static int
523: magic_parse_offset(struct magic_line *ml, char **line)
524: {
525: char *copy, *s, *cp, *endptr;
526:
527: while (isspace((u_char)**line))
528: (*line)++;
529: copy = s = cp = xmalloc(strlen(*line) + 1);
530: while (**line != '\0' && !isspace((u_char)**line))
531: *cp++ = *(*line)++;
532: *cp = '\0';
533:
534: ml->offset = 0;
535: ml->offset_relative = 0;
536:
537: ml->indirect_type = ' ';
538: ml->indirect_relative = 0;
539: ml->indirect_offset = 0;
540: ml->indirect_operator = ' ';
541: ml->indirect_operand = 0;
542:
543: if (*s == '&') {
544: ml->offset_relative = 1;
545: s++;
546: }
547:
548: if (*s != '(') {
549: endptr = magic_strtoll(s, &ml->offset);
550: if (endptr == NULL || *endptr != '\0') {
551: magic_warn(ml, "missing closing bracket");
552: goto fail;
553: }
554: if (ml->offset < 0 && !ml->offset_relative) {
555: magic_warn(ml, "negative absolute offset");
556: goto fail;
557: }
558: goto done;
559: }
560: s++;
561:
562: if (*s == '&') {
563: ml->indirect_relative = 1;
564: s++;
565: }
566:
567: endptr = magic_strtoll(s, &ml->indirect_offset);
568: if (endptr == NULL) {
1.8 nicm 569: magic_warn(ml, "can't parse offset: %s", s);
1.1 nicm 570: goto fail;
571: }
572: s = endptr;
573: if (*s == ')')
574: goto done;
575:
576: if (*s == '.') {
577: s++;
1.6 tobias 578: if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
1.8 nicm 579: magic_warn(ml, "unknown offset type: %c", *s);
1.1 nicm 580: goto fail;
581: }
582: ml->indirect_type = *s;
583: s++;
584: if (*s == ')')
585: goto done;
586: }
587:
1.6 tobias 588: if (*s == '\0' || strchr("+-*", *s) == NULL) {
1.8 nicm 589: magic_warn(ml, "unknown offset operator: %c", *s);
1.1 nicm 590: goto fail;
591: }
592: ml->indirect_operator = *s;
593: s++;
594: if (*s == ')')
595: goto done;
596:
597: if (*s == '(') {
598: s++;
599: endptr = magic_strtoll(s, &ml->indirect_operand);
600: if (endptr == NULL || *endptr != ')') {
601: magic_warn(ml, "missing closing bracket");
602: goto fail;
603: }
604: if (*++endptr != ')') {
605: magic_warn(ml, "missing closing bracket");
606: goto fail;
607: }
608: } else {
609: endptr = magic_strtoll(s, &ml->indirect_operand);
610: if (endptr == NULL || *endptr != ')') {
611: magic_warn(ml, "missing closing bracket");
612: goto fail;
613: }
614: }
615:
616: done:
617: free(copy);
618: return (0);
619:
620: fail:
621: free(copy);
622: return (-1);
623: }
624:
625: static int
626: magic_parse_type(struct magic_line *ml, char **line)
627: {
628: char *copy, *s, *cp, *endptr;
629:
630: while (isspace((u_char)**line))
631: (*line)++;
632: copy = s = cp = xmalloc(strlen(*line) + 1);
633: while (**line != '\0' && !isspace((u_char)**line))
634: *cp++ = *(*line)++;
635: *cp = '\0';
636:
637: ml->type = MAGIC_TYPE_NONE;
638: ml->type_operator = ' ';
639: ml->type_operand = 0;
640:
1.22 ! nicm 641: if (strncmp(s, "name", (sizeof "name") - 1) == 0) {
! 642: ml->type = MAGIC_TYPE_NAME;
! 643: ml->type_string = xstrdup(s);
! 644: goto done;
! 645: }
! 646: if (strncmp(s, "use", (sizeof "use") - 1) == 0) {
! 647: ml->type = MAGIC_TYPE_USE;
! 648: ml->type_string = xstrdup(s);
! 649: goto done;
! 650: }
! 651:
1.16 nicm 652: if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
653: strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
654: if (*s == 'u')
655: ml->type_string = xstrdup(s + 1);
656: else
657: ml->type_string = xstrdup(s);
1.1 nicm 658: ml->type = MAGIC_TYPE_STRING;
659: magic_mark_text(ml, 0);
660: goto done;
661: }
1.16 nicm 662: if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
663: strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
664: if (*s == 'u')
665: ml->type_string = xstrdup(s + 1);
666: else
667: ml->type_string = xstrdup(s);
668: ml->type = MAGIC_TYPE_PSTRING;
669: magic_mark_text(ml, 0);
670: goto done;
671: }
672: if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
673: strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
674: if (*s == 'u')
675: ml->type_string = xstrdup(s + 1);
676: else
677: ml->type_string = xstrdup(s);
1.1 nicm 678: ml->type = MAGIC_TYPE_SEARCH;
679: goto done;
680: }
1.16 nicm 681: if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
682: strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
683: if (*s == 'u')
684: ml->type_string = xstrdup(s + 1);
685: else
686: ml->type_string = xstrdup(s);
1.1 nicm 687: ml->type = MAGIC_TYPE_REGEX;
688: goto done;
689: }
1.16 nicm 690: ml->type_string = xstrdup(s);
1.1 nicm 691:
1.12 nicm 692: cp = &s[strcspn(s, "+-&/%*")];
1.1 nicm 693: if (*cp != '\0') {
694: ml->type_operator = *cp;
695: endptr = magic_strtoull(cp + 1, &ml->type_operand);
696: if (endptr == NULL || *endptr != '\0') {
1.8 nicm 697: magic_warn(ml, "can't parse operand: %s", cp + 1);
1.1 nicm 698: goto fail;
699: }
700: *cp = '\0';
701: }
702:
703: if (strcmp(s, "byte") == 0)
704: ml->type = MAGIC_TYPE_BYTE;
705: else if (strcmp(s, "short") == 0)
706: ml->type = MAGIC_TYPE_SHORT;
707: else if (strcmp(s, "long") == 0)
708: ml->type = MAGIC_TYPE_LONG;
709: else if (strcmp(s, "quad") == 0)
710: ml->type = MAGIC_TYPE_QUAD;
711: else if (strcmp(s, "ubyte") == 0)
712: ml->type = MAGIC_TYPE_UBYTE;
713: else if (strcmp(s, "ushort") == 0)
714: ml->type = MAGIC_TYPE_USHORT;
715: else if (strcmp(s, "ulong") == 0)
716: ml->type = MAGIC_TYPE_ULONG;
717: else if (strcmp(s, "uquad") == 0)
718: ml->type = MAGIC_TYPE_UQUAD;
1.16 nicm 719: else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
1.1 nicm 720: ml->type = MAGIC_TYPE_FLOAT;
1.16 nicm 721: else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
1.1 nicm 722: ml->type = MAGIC_TYPE_DOUBLE;
723: else if (strcmp(s, "date") == 0)
724: ml->type = MAGIC_TYPE_DATE;
725: else if (strcmp(s, "qdate") == 0)
726: ml->type = MAGIC_TYPE_QDATE;
727: else if (strcmp(s, "ldate") == 0)
728: ml->type = MAGIC_TYPE_LDATE;
729: else if (strcmp(s, "qldate") == 0)
730: ml->type = MAGIC_TYPE_QLDATE;
731: else if (strcmp(s, "udate") == 0)
732: ml->type = MAGIC_TYPE_UDATE;
733: else if (strcmp(s, "uqdate") == 0)
734: ml->type = MAGIC_TYPE_UQDATE;
735: else if (strcmp(s, "uldate") == 0)
736: ml->type = MAGIC_TYPE_ULDATE;
737: else if (strcmp(s, "uqldate") == 0)
738: ml->type = MAGIC_TYPE_UQLDATE;
739: else if (strcmp(s, "beshort") == 0)
740: ml->type = MAGIC_TYPE_BESHORT;
741: else if (strcmp(s, "belong") == 0)
742: ml->type = MAGIC_TYPE_BELONG;
743: else if (strcmp(s, "bequad") == 0)
744: ml->type = MAGIC_TYPE_BEQUAD;
745: else if (strcmp(s, "ubeshort") == 0)
746: ml->type = MAGIC_TYPE_UBESHORT;
747: else if (strcmp(s, "ubelong") == 0)
748: ml->type = MAGIC_TYPE_UBELONG;
749: else if (strcmp(s, "ubequad") == 0)
750: ml->type = MAGIC_TYPE_UBEQUAD;
1.16 nicm 751: else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
1.1 nicm 752: ml->type = MAGIC_TYPE_BEFLOAT;
1.16 nicm 753: else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
1.1 nicm 754: ml->type = MAGIC_TYPE_BEDOUBLE;
755: else if (strcmp(s, "bedate") == 0)
756: ml->type = MAGIC_TYPE_BEDATE;
757: else if (strcmp(s, "beqdate") == 0)
758: ml->type = MAGIC_TYPE_BEQDATE;
759: else if (strcmp(s, "beldate") == 0)
760: ml->type = MAGIC_TYPE_BELDATE;
761: else if (strcmp(s, "beqldate") == 0)
762: ml->type = MAGIC_TYPE_BEQLDATE;
763: else if (strcmp(s, "ubedate") == 0)
764: ml->type = MAGIC_TYPE_UBEDATE;
765: else if (strcmp(s, "ubeqdate") == 0)
766: ml->type = MAGIC_TYPE_UBEQDATE;
767: else if (strcmp(s, "ubeldate") == 0)
768: ml->type = MAGIC_TYPE_UBELDATE;
769: else if (strcmp(s, "ubeqldate") == 0)
770: ml->type = MAGIC_TYPE_UBEQLDATE;
1.16 nicm 771: else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
1.1 nicm 772: ml->type = MAGIC_TYPE_BESTRING16;
773: else if (strcmp(s, "leshort") == 0)
774: ml->type = MAGIC_TYPE_LESHORT;
775: else if (strcmp(s, "lelong") == 0)
776: ml->type = MAGIC_TYPE_LELONG;
777: else if (strcmp(s, "lequad") == 0)
778: ml->type = MAGIC_TYPE_LEQUAD;
779: else if (strcmp(s, "uleshort") == 0)
780: ml->type = MAGIC_TYPE_ULESHORT;
781: else if (strcmp(s, "ulelong") == 0)
782: ml->type = MAGIC_TYPE_ULELONG;
783: else if (strcmp(s, "ulequad") == 0)
784: ml->type = MAGIC_TYPE_ULEQUAD;
1.16 nicm 785: else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
1.1 nicm 786: ml->type = MAGIC_TYPE_LEFLOAT;
1.16 nicm 787: else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
1.1 nicm 788: ml->type = MAGIC_TYPE_LEDOUBLE;
789: else if (strcmp(s, "ledate") == 0)
790: ml->type = MAGIC_TYPE_LEDATE;
791: else if (strcmp(s, "leqdate") == 0)
792: ml->type = MAGIC_TYPE_LEQDATE;
793: else if (strcmp(s, "leldate") == 0)
794: ml->type = MAGIC_TYPE_LELDATE;
795: else if (strcmp(s, "leqldate") == 0)
796: ml->type = MAGIC_TYPE_LEQLDATE;
797: else if (strcmp(s, "uledate") == 0)
798: ml->type = MAGIC_TYPE_ULEDATE;
799: else if (strcmp(s, "uleqdate") == 0)
800: ml->type = MAGIC_TYPE_ULEQDATE;
801: else if (strcmp(s, "uleldate") == 0)
802: ml->type = MAGIC_TYPE_ULELDATE;
803: else if (strcmp(s, "uleqldate") == 0)
804: ml->type = MAGIC_TYPE_ULEQLDATE;
1.16 nicm 805: else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
1.1 nicm 806: ml->type = MAGIC_TYPE_LESTRING16;
1.16 nicm 807: else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
1.1 nicm 808: ml->type = MAGIC_TYPE_MELONG;
1.16 nicm 809: else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
1.1 nicm 810: ml->type = MAGIC_TYPE_MEDATE;
1.16 nicm 811: else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
1.1 nicm 812: ml->type = MAGIC_TYPE_MELDATE;
1.16 nicm 813: else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
1.1 nicm 814: ml->type = MAGIC_TYPE_DEFAULT;
1.20 nicm 815: else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
816: ml->type = MAGIC_TYPE_CLEAR;
1.1 nicm 817: else {
1.8 nicm 818: magic_warn(ml, "unknown type: %s", s);
1.1 nicm 819: goto fail;
820: }
821: magic_mark_text(ml, 0);
822:
823: done:
824: free(copy);
825: return (0);
826:
827: fail:
828: free(copy);
829: return (-1);
830: }
831:
832: static int
833: magic_parse_value(struct magic_line *ml, char **line)
834: {
835: char *copy, *s, *cp, *endptr;
836: size_t slen;
1.10 nicm 837: uint64_t u;
1.1 nicm 838:
839: while (isspace((u_char)**line))
840: (*line)++;
841:
842: ml->test_operator = '=';
843: ml->test_not = 0;
844: ml->test_string = NULL;
845: ml->test_string_size = 0;
846: ml->test_unsigned = 0;
847: ml->test_signed = 0;
848:
1.9 nicm 849: if (**line == '\0')
850: return (0);
851:
1.1 nicm 852: s = *line;
853: if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
854: (*line)++;
1.20 nicm 855: ml->test_operator = 'x';
856: return (0);
857: }
858:
859: if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
860: magic_warn(ml, "test specified for default or clear");
1.1 nicm 861: ml->test_operator = 'x';
862: return (0);
863: }
864:
865: if (**line == '!') {
866: ml->test_not = 1;
867: (*line)++;
868: }
869:
870: switch (ml->type) {
1.22 ! nicm 871: case MAGIC_TYPE_NAME:
! 872: case MAGIC_TYPE_USE:
! 873: copy = s = xmalloc(strlen(*line) + 1);
! 874: if (magic_get_string(line, s, &slen) != 0 || slen == 0) {
! 875: magic_warn(ml, "can't parse string");
! 876: goto fail;
! 877: }
! 878: if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) {
! 879: magic_warn(ml, "invalid name");
! 880: goto fail;
! 881: }
! 882: ml->name = s;
! 883: return (0); /* do not free */
1.1 nicm 884: case MAGIC_TYPE_STRING:
885: case MAGIC_TYPE_PSTRING:
886: case MAGIC_TYPE_SEARCH:
887: if (**line == '>' || **line == '<' || **line == '=') {
888: ml->test_operator = **line;
889: (*line)++;
890: }
891: /* FALLTHROUGH */
892: case MAGIC_TYPE_REGEX:
1.21 nicm 893: if (**line == '=')
894: (*line)++;
1.1 nicm 895: copy = s = xmalloc(strlen(*line) + 1);
896: if (magic_get_string(line, s, &slen) != 0) {
897: magic_warn(ml, "can't parse string");
898: goto fail;
899: }
900: ml->test_string_size = slen;
901: ml->test_string = s;
902: return (0); /* do not free */
903: default:
904: break;
905: }
906:
1.9 nicm 907: while (isspace((u_char)**line))
908: (*line)++;
909: if ((*line)[0] == '<' && (*line)[1] == '=') {
910: ml->test_operator = '[';
911: (*line) += 2;
912: } else if ((*line)[0] == '>' && (*line)[1] == '=') {
913: ml->test_operator = ']';
1.1 nicm 914: (*line) += 2;
1.19 tobias 915: } else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
1.9 nicm 916: ml->test_operator = **line;
917: (*line)++;
1.1 nicm 918: }
1.9 nicm 919:
920: while (isspace((u_char)**line))
921: (*line)++;
922: copy = cp = xmalloc(strlen(*line) + 1);
1.1 nicm 923: while (**line != '\0' && !isspace((u_char)**line))
924: *cp++ = *(*line)++;
925: *cp = '\0';
926:
1.11 nicm 927: switch (ml->type) {
928: case MAGIC_TYPE_FLOAT:
929: case MAGIC_TYPE_DOUBLE:
930: case MAGIC_TYPE_BEFLOAT:
931: case MAGIC_TYPE_BEDOUBLE:
932: case MAGIC_TYPE_LEFLOAT:
933: case MAGIC_TYPE_LEDOUBLE:
934: errno = 0;
935: ml->test_double = strtod(copy, &endptr);
936: if (errno == ERANGE)
937: endptr = NULL;
938: break;
939: default:
940: if (*ml->type_string == 'u')
941: endptr = magic_strtoull(copy, &ml->test_unsigned);
942: else {
943: endptr = magic_strtoll(copy, &ml->test_signed);
944: if (endptr == NULL || *endptr != '\0') {
945: /*
946: * If we can't parse this as a signed number,
947: * try as unsigned instead.
948: */
949: endptr = magic_strtoull(copy, &u);
950: if (endptr != NULL && *endptr == '\0')
951: ml->test_signed = (int64_t)u;
952: }
1.10 nicm 953: }
1.11 nicm 954: break;
1.10 nicm 955: }
1.1 nicm 956: if (endptr == NULL || *endptr != '\0') {
1.9 nicm 957: magic_warn(ml, "can't parse number: %s", copy);
1.1 nicm 958: goto fail;
959: }
960:
961: free(copy);
962: return (0);
963:
964: fail:
965: free(copy);
966: return (-1);
967: }
968:
969: int
970: magic_compare(struct magic_line *ml1, struct magic_line *ml2)
971: {
972: if (ml1->strength < ml2->strength)
973: return (1);
974: if (ml1->strength > ml2->strength)
975: return (-1);
976:
977: /*
978: * The original file depends on the (undefined!) qsort(3) behaviour
979: * when the strength is equal. This is impossible to reproduce with an
980: * RB tree so just use the line number and hope for the best.
981: */
982: if (ml1->line < ml2->line)
983: return (-1);
984: if (ml1->line > ml2->line)
985: return (1);
986:
987: return (0);
988: }
989: RB_GENERATE(magic_tree, magic_line, node, magic_compare);
990:
1.22 ! nicm 991: int
! 992: magic_named_compare(struct magic_line *ml1, struct magic_line *ml2)
! 993: {
! 994: return (strcmp(ml1->name, ml2->name));
! 995: }
! 996: RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare);
! 997:
1.1 nicm 998: static void
1.18 nicm 999: magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
1000: char *line)
1001: {
1002: char *cp, *s;
1003: int64_t value;
1004:
1005: cp = line + (sizeof "!:strength") - 1;
1006: while (isspace((u_char)*cp))
1007: cp++;
1008: s = cp;
1009:
1010: cp = strchr(s, '#');
1011: if (cp != NULL)
1012: *cp = '\0';
1013: cp = s;
1014:
1.19 tobias 1015: if (*s == '\0' || strchr("+-*/", *s) == NULL) {
1.18 nicm 1016: magic_warnm(m, at, "invalid strength operator: %s", s);
1017: return;
1018: }
1019: ml->strength_operator = *cp++;
1020:
1021: while (isspace((u_char)*cp))
1022: cp++;
1023: cp = magic_strtoll(cp, &value);
1024: while (cp != NULL && isspace((u_char)*cp))
1025: cp++;
1026: if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
1027: magic_warnm(m, at, "invalid strength value: %s", s);
1028: return;
1029: }
1030: ml->strength_value = value;
1031: }
1032:
1033: static void
1.1 nicm 1034: magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
1035: {
1036: char *mimetype, *cp;
1037:
1038: mimetype = line + (sizeof "!:mime") - 1;
1039: while (isspace((u_char)*mimetype))
1040: mimetype++;
1041:
1042: cp = strchr(mimetype, '#');
1043: if (cp != NULL)
1044: *cp = '\0';
1045:
1046: if (*mimetype != '\0') {
1047: cp = mimetype + strlen(mimetype) - 1;
1048: while (cp != mimetype && isspace((u_char)*cp))
1049: *cp-- = '\0';
1050: }
1051:
1052: cp = mimetype;
1053: while (*cp != '\0') {
1054: if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
1055: break;
1056: cp++;
1057: }
1058: if (*mimetype == '\0' || *cp != '\0') {
1.7 nicm 1059: magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1.1 nicm 1060: return;
1061: }
1062: if (ml == NULL) {
1.7 nicm 1063: magic_warnm(m, at, "stray MIME type: %s", mimetype);
1.1 nicm 1064: return;
1065: }
1066: ml->mimetype = xstrdup(mimetype);
1067: }
1068:
1069: struct magic *
1070: magic_load(FILE *f, const char *path, int warnings)
1071: {
1072: struct magic *m;
1073: struct magic_line *ml = NULL, *parent, *parent0;
1074: char *line, *tmp;
1075: size_t size;
1076: u_int at, level, n, i;
1077:
1078: m = xcalloc(1, sizeof *m);
1079: m->path = xstrdup(path);
1080: m->warnings = warnings;
1081: RB_INIT(&m->tree);
1082:
1083: parent = NULL;
1084: parent0 = NULL;
1085: level = 0;
1086:
1087: at = 0;
1088: tmp = NULL;
1089: while ((line = fgetln(f, &size))) {
1090: if (line[size - 1] == '\n')
1091: line[size - 1] = '\0';
1092: else {
1093: tmp = xmalloc(size + 1);
1094: memcpy(tmp, line, size);
1095: tmp[size] = '\0';
1096: line = tmp;
1097: }
1098: at++;
1099:
1100: while (isspace((u_char)*line))
1101: line++;
1102: if (*line == '\0' || *line == '#')
1103: continue;
1104:
1.15 nicm 1105: if (strncmp (line, "!:mime", 6) == 0) {
1.1 nicm 1106: magic_set_mimetype(m, at, ml, line);
1.18 nicm 1107: continue;
1108: }
1109: if (strncmp (line, "!:strength", 10) == 0) {
1110: magic_adjust_strength(m, at, ml, line);
1.15 nicm 1111: continue;
1112: }
1113: if (strncmp (line, "!:", 2) == 0) {
1114: for (i = 0; i < 64 && line[i] != '\0'; i++) {
1115: if (isspace((u_char)line[i]))
1116: break;
1117: }
1118: magic_warnm(m, at, "%.*s not supported", i, line);
1.1 nicm 1119: continue;
1120: }
1121:
1122: n = 0;
1123: for (; *line == '>'; line++)
1124: n++;
1125:
1126: ml = xcalloc(1, sizeof *ml);
1127: ml->root = m;
1128: ml->line = at;
1129: ml->type = MAGIC_TYPE_NONE;
1130: TAILQ_INIT(&ml->children);
1131: ml->text = 1;
1132:
1.13 nicm 1133: /*
1134: * At this point n is the level we want, level is the current
1135: * level. parent0 is the last line at the same level and parent
1136: * is the last line at the previous level.
1137: */
1.1 nicm 1138: if (n == level + 1) {
1139: parent = parent0;
1140: } else if (n < level) {
1141: for (i = n; i < level && parent != NULL; i++)
1142: parent = parent->parent;
1143: } else if (n != level) {
1144: magic_warn(ml, "level skipped (%u->%u)", level, n);
1145: free(ml);
1146: continue;
1147: }
1148: ml->parent = parent;
1149: level = n;
1150:
1151: if (magic_parse_offset(ml, &line) != 0 ||
1152: magic_parse_type(ml, &line) != 0 ||
1153: magic_parse_value(ml, &line) != 0 ||
1154: magic_set_result(ml, line) != 0) {
1.13 nicm 1155: /*
1156: * An invalid line still needs to appear in the tree in
1157: * case it has any children.
1158: */
1159: ml->type = MAGIC_TYPE_NONE;
1.1 nicm 1160: }
1161:
1162: ml->strength = magic_get_strength(ml);
1.22 ! nicm 1163: if (ml->parent == NULL) {
! 1164: if (ml->name != NULL)
! 1165: RB_INSERT(magic_named_tree, &m->named, ml);
! 1166: else
! 1167: RB_INSERT(magic_tree, &m->tree, ml);
! 1168: } else
1.1 nicm 1169: TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1170: parent0 = ml;
1171: }
1172: free(tmp);
1173:
1174: fclose(f);
1175: return (m);
1176: }