Annotation of src/usr.bin/file/magic-load.c, Revision 1.21
1.21 ! nicm 1: /* $OpenBSD: magic-load.c,v 1.20 2016/05/01 08:48:39 nicm Exp $ */
1.1 nicm 2:
3: /*
4: * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15: * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18:
19: #include <sys/types.h>
20:
21: #include <ctype.h>
22: #include <errno.h>
23: #include <limits.h>
24: #include <regex.h>
25: #include <stdarg.h>
26: #include <stdio.h>
27: #include <stdlib.h>
28: #include <string.h>
29:
30: #include "magic.h"
31: #include "xmalloc.h"
32:
33: static int
34: magic_odigit(u_char c)
35: {
36: if (c >= '0' && c <= '7')
37: return (c - '0');
38: return (-1);
39: }
40:
41: static int
42: magic_xdigit(u_char c)
43: {
44: if (c >= '0' && c <= '9')
45: return (c - '0');
46: if (c >= 'a' && c <= 'f')
47: return (10 + c - 'a');
48: if (c >= 'A' && c <= 'F')
49: return (10 + c - 'A');
50: return (-1);
51: }
52:
53: static void
54: magic_mark_text(struct magic_line *ml, int text)
55: {
56: do {
57: ml->text = text;
58: ml = ml->parent;
59: } while (ml != NULL);
60: }
61:
62: static int
63: magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
64: const char *p)
65: {
66: int error;
67: char errbuf[256];
68:
69: error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
70: if (error != 0) {
71: regerror(error, re, errbuf, sizeof errbuf);
72: magic_warn(ml, "bad %s pattern: %s", name, errbuf);
73: return (-1);
74: }
75: return (0);
76: }
77:
78: static int
79: magic_set_result(struct magic_line *ml, const char *s)
80: {
81: const char *fmt;
82: const char *endfmt;
83: const char *cp;
84: regex_t *re = NULL;
85: regmatch_t pmatch;
86: size_t fmtlen;
87:
88: while (isspace((u_char)*s))
89: s++;
90: if (*s == '\0') {
91: ml->result = NULL;
92: return (0);
93: }
94: ml->result = xstrdup(s);
95:
96: fmt = NULL;
97: for (cp = s; *cp != '\0'; cp++) {
98: if (cp[0] == '%' && cp[1] != '%') {
99: if (fmt != NULL) {
100: magic_warn(ml, "multiple formats");
101: return (-1);
102: }
103: fmt = cp;
104: }
105: }
106: if (fmt == NULL)
107: return (0);
108: fmt++;
109:
110: for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
111: if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
112: break;
113: }
114: if (*endfmt == '\0') {
115: magic_warn(ml, "unterminated format");
116: return (-1);
117: }
118: fmtlen = endfmt + 1 - fmt;
119: if (fmtlen > 32) {
120: magic_warn(ml, "format too long");
121: return (-1);
122: }
123:
124: if (*endfmt == 's') {
125: switch (ml->type) {
126: case MAGIC_TYPE_DATE:
127: case MAGIC_TYPE_LDATE:
128: case MAGIC_TYPE_UDATE:
129: case MAGIC_TYPE_ULDATE:
130: case MAGIC_TYPE_BEDATE:
131: case MAGIC_TYPE_BELDATE:
132: case MAGIC_TYPE_UBEDATE:
133: case MAGIC_TYPE_UBELDATE:
134: case MAGIC_TYPE_QDATE:
135: case MAGIC_TYPE_QLDATE:
136: case MAGIC_TYPE_UQDATE:
137: case MAGIC_TYPE_UQLDATE:
138: case MAGIC_TYPE_BEQDATE:
139: case MAGIC_TYPE_BEQLDATE:
140: case MAGIC_TYPE_UBEQDATE:
141: case MAGIC_TYPE_UBEQLDATE:
142: case MAGIC_TYPE_LEQDATE:
143: case MAGIC_TYPE_LEQLDATE:
144: case MAGIC_TYPE_ULEQDATE:
145: case MAGIC_TYPE_ULEQLDATE:
146: case MAGIC_TYPE_LEDATE:
147: case MAGIC_TYPE_LELDATE:
148: case MAGIC_TYPE_ULEDATE:
149: case MAGIC_TYPE_ULELDATE:
150: case MAGIC_TYPE_MEDATE:
151: case MAGIC_TYPE_MELDATE:
152: case MAGIC_TYPE_STRING:
153: case MAGIC_TYPE_PSTRING:
154: case MAGIC_TYPE_BESTRING16:
155: case MAGIC_TYPE_LESTRING16:
156: case MAGIC_TYPE_REGEX:
157: case MAGIC_TYPE_SEARCH:
158: break;
159: default:
160: ml->stringify = 1;
161: break;
162: }
163: }
164:
165: if (!ml->root->compiled) {
166: /*
167: * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
168: * with byte, short, long. We get lucky because our first and
169: * only argument ends up in a register. Accept it for now.
170: */
171: if (magic_make_pattern(ml, "short", &ml->root->format_short,
172: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
173: return (-1);
174: if (magic_make_pattern(ml, "long", &ml->root->format_long,
175: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
176: return (-1);
177: if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
178: "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
179: return (-1);
180: if (magic_make_pattern(ml, "float", &ml->root->format_float,
181: "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
182: return (-1);
183: if (magic_make_pattern(ml, "string", &ml->root->format_string,
184: "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
185: return (-1);
186: ml->root->compiled = 1;
187: }
188:
189: if (ml->stringify)
190: re = &ml->root->format_string;
191: else {
192: switch (ml->type) {
193: case MAGIC_TYPE_NONE:
1.17 nicm 194: case MAGIC_TYPE_BESTRING16:
195: case MAGIC_TYPE_LESTRING16:
1.1 nicm 196: return (0); /* don't use result */
197: case MAGIC_TYPE_BYTE:
198: case MAGIC_TYPE_UBYTE:
199: case MAGIC_TYPE_SHORT:
200: case MAGIC_TYPE_USHORT:
201: case MAGIC_TYPE_BESHORT:
202: case MAGIC_TYPE_UBESHORT:
203: case MAGIC_TYPE_LESHORT:
204: case MAGIC_TYPE_ULESHORT:
205: re = &ml->root->format_short;
206: break;
207: case MAGIC_TYPE_LONG:
208: case MAGIC_TYPE_ULONG:
209: case MAGIC_TYPE_BELONG:
210: case MAGIC_TYPE_UBELONG:
211: case MAGIC_TYPE_LELONG:
212: case MAGIC_TYPE_ULELONG:
213: case MAGIC_TYPE_MELONG:
214: re = &ml->root->format_long;
215: break;
216: case MAGIC_TYPE_QUAD:
217: case MAGIC_TYPE_UQUAD:
218: case MAGIC_TYPE_BEQUAD:
219: case MAGIC_TYPE_UBEQUAD:
220: case MAGIC_TYPE_LEQUAD:
221: case MAGIC_TYPE_ULEQUAD:
222: re = &ml->root->format_quad;
223: break;
224: case MAGIC_TYPE_FLOAT:
225: case MAGIC_TYPE_BEFLOAT:
226: case MAGIC_TYPE_LEFLOAT:
227: case MAGIC_TYPE_DOUBLE:
228: case MAGIC_TYPE_BEDOUBLE:
229: case MAGIC_TYPE_LEDOUBLE:
230: re = &ml->root->format_float;
231: break;
232: case MAGIC_TYPE_DATE:
233: case MAGIC_TYPE_LDATE:
234: case MAGIC_TYPE_UDATE:
235: case MAGIC_TYPE_ULDATE:
236: case MAGIC_TYPE_BEDATE:
237: case MAGIC_TYPE_BELDATE:
238: case MAGIC_TYPE_UBEDATE:
239: case MAGIC_TYPE_UBELDATE:
240: case MAGIC_TYPE_QDATE:
241: case MAGIC_TYPE_QLDATE:
242: case MAGIC_TYPE_UQDATE:
243: case MAGIC_TYPE_UQLDATE:
244: case MAGIC_TYPE_BEQDATE:
245: case MAGIC_TYPE_BEQLDATE:
246: case MAGIC_TYPE_UBEQDATE:
247: case MAGIC_TYPE_UBEQLDATE:
248: case MAGIC_TYPE_LEQDATE:
249: case MAGIC_TYPE_LEQLDATE:
250: case MAGIC_TYPE_ULEQDATE:
251: case MAGIC_TYPE_ULEQLDATE:
252: case MAGIC_TYPE_LEDATE:
253: case MAGIC_TYPE_LELDATE:
254: case MAGIC_TYPE_ULEDATE:
255: case MAGIC_TYPE_ULELDATE:
256: case MAGIC_TYPE_MEDATE:
257: case MAGIC_TYPE_MELDATE:
258: case MAGIC_TYPE_STRING:
259: case MAGIC_TYPE_PSTRING:
260: case MAGIC_TYPE_REGEX:
261: case MAGIC_TYPE_SEARCH:
1.20 nicm 262: case MAGIC_TYPE_DEFAULT:
263: case MAGIC_TYPE_CLEAR:
1.1 nicm 264: re = &ml->root->format_string;
265: break;
266: }
267: }
268:
269: pmatch.rm_so = 0;
270: pmatch.rm_eo = fmtlen;
271: if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
272: magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
273: (int)fmtlen, fmt);
274: return (-1);
275: }
276:
277: return (0);
278: }
279:
280: static u_int
281: magic_get_strength(struct magic_line *ml)
282: {
283: int n;
284: size_t size;
285:
1.13 nicm 286: if (ml->type == MAGIC_TYPE_NONE)
287: return (0);
288:
1.18 nicm 289: if (ml->test_not || ml->test_operator == 'x') {
290: n = 1;
291: goto skip;
292: }
1.1 nicm 293:
1.5 nicm 294: n = 2 * MAGIC_STRENGTH_MULTIPLIER;
1.1 nicm 295: switch (ml->type) {
296: case MAGIC_TYPE_NONE:
297: case MAGIC_TYPE_DEFAULT:
298: return (0);
1.20 nicm 299: case MAGIC_TYPE_CLEAR:
300: break;
1.1 nicm 301: case MAGIC_TYPE_BYTE:
302: case MAGIC_TYPE_UBYTE:
303: n += 1 * MAGIC_STRENGTH_MULTIPLIER;
304: break;
305: case MAGIC_TYPE_SHORT:
306: case MAGIC_TYPE_USHORT:
307: case MAGIC_TYPE_BESHORT:
308: case MAGIC_TYPE_UBESHORT:
309: case MAGIC_TYPE_LESHORT:
310: case MAGIC_TYPE_ULESHORT:
311: n += 2 * MAGIC_STRENGTH_MULTIPLIER;
312: break;
313: case MAGIC_TYPE_LONG:
314: case MAGIC_TYPE_ULONG:
315: case MAGIC_TYPE_FLOAT:
316: case MAGIC_TYPE_DATE:
317: case MAGIC_TYPE_LDATE:
318: case MAGIC_TYPE_UDATE:
319: case MAGIC_TYPE_ULDATE:
320: case MAGIC_TYPE_BELONG:
321: case MAGIC_TYPE_UBELONG:
322: case MAGIC_TYPE_BEFLOAT:
323: case MAGIC_TYPE_BEDATE:
324: case MAGIC_TYPE_BELDATE:
325: case MAGIC_TYPE_UBEDATE:
326: case MAGIC_TYPE_UBELDATE:
327: n += 4 * MAGIC_STRENGTH_MULTIPLIER;
328: break;
329: case MAGIC_TYPE_QUAD:
330: case MAGIC_TYPE_UQUAD:
331: case MAGIC_TYPE_DOUBLE:
332: case MAGIC_TYPE_QDATE:
333: case MAGIC_TYPE_QLDATE:
334: case MAGIC_TYPE_UQDATE:
335: case MAGIC_TYPE_UQLDATE:
336: case MAGIC_TYPE_BEQUAD:
337: case MAGIC_TYPE_UBEQUAD:
338: case MAGIC_TYPE_BEDOUBLE:
339: case MAGIC_TYPE_BEQDATE:
340: case MAGIC_TYPE_BEQLDATE:
341: case MAGIC_TYPE_UBEQDATE:
342: case MAGIC_TYPE_UBEQLDATE:
343: case MAGIC_TYPE_LEQUAD:
344: case MAGIC_TYPE_ULEQUAD:
345: case MAGIC_TYPE_LEDOUBLE:
346: case MAGIC_TYPE_LEQDATE:
347: case MAGIC_TYPE_LEQLDATE:
348: case MAGIC_TYPE_ULEQDATE:
349: case MAGIC_TYPE_ULEQLDATE:
350: case MAGIC_TYPE_LELONG:
351: case MAGIC_TYPE_ULELONG:
352: case MAGIC_TYPE_LEFLOAT:
353: case MAGIC_TYPE_LEDATE:
354: case MAGIC_TYPE_LELDATE:
355: case MAGIC_TYPE_ULEDATE:
356: case MAGIC_TYPE_ULELDATE:
357: case MAGIC_TYPE_MELONG:
358: case MAGIC_TYPE_MEDATE:
359: case MAGIC_TYPE_MELDATE:
360: n += 8 * MAGIC_STRENGTH_MULTIPLIER;
361: break;
362: case MAGIC_TYPE_STRING:
363: case MAGIC_TYPE_PSTRING:
364: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
365: break;
366: case MAGIC_TYPE_BESTRING16:
367: case MAGIC_TYPE_LESTRING16:
368: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
369: break;
370: case MAGIC_TYPE_REGEX:
371: case MAGIC_TYPE_SEARCH:
372: size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
373: if (size < 1)
374: size = 1;
375: n += ml->test_string_size * size;
376: break;
377: }
378: switch (ml->test_operator) {
379: case '=':
380: n += MAGIC_STRENGTH_MULTIPLIER;
381: break;
382: case '<':
383: case '>':
384: case '[':
385: case ']':
386: n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
387: break;
388: case '^':
389: case '&':
390: n -= MAGIC_STRENGTH_MULTIPLIER;
391: break;
392: }
1.18 nicm 393:
394: skip:
395: switch (ml->strength_operator) {
396: case '+':
397: n += ml->strength_value;
398: break;
399: case '-':
400: n -= ml->strength_value;
401: break;
402: case '*':
403: n *= ml->strength_value;
404: break;
405: case '/':
406: n /= ml->strength_value;
407: break;
408: }
1.1 nicm 409: return (n <= 0 ? 1 : n);
410: }
411:
412: static int
413: magic_get_string(char **line, char *out, size_t *outlen)
414: {
415: char *start, *cp, c;
416: int d0, d1, d2;
417:
418: start = out;
419: for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
420: if (*cp != '\\') {
421: *out++ = *cp;
422: continue;
423: }
424:
425: switch (c = *++cp) {
1.3 nicm 426: case '\0': /* end of line */
427: return (-1);
1.1 nicm 428: case ' ':
429: *out++ = ' ';
430: break;
431: case '0':
432: case '1':
433: case '2':
434: case '3':
435: case '4':
436: case '5':
437: case '6':
438: case '7':
439: d0 = magic_odigit(cp[0]);
440: if (cp[0] != '\0')
441: d1 = magic_odigit(cp[1]);
442: else
443: d1 = -1;
444: if (cp[0] != '\0' && cp[1] != '\0')
445: d2 = magic_odigit(cp[2]);
446: else
447: d2 = -1;
448:
449: if (d0 != -1 && d1 != -1 && d2 != -1) {
450: *out = d2 | (d1 << 3) | (d0 << 6);
451: cp += 2;
452: } else if (d0 != -1 && d1 != -1) {
453: *out = d1 | (d0 << 3);
454: cp++;
455: } else if (d0 != -1)
456: *out = d0;
457: else
458: return (-1);
459: out++;
460: break;
461: case 'x':
462: d0 = magic_xdigit(cp[1]);
463: if (cp[1] != '\0')
464: d1 = magic_xdigit(cp[2]);
465: else
466: d1 = -1;
467:
468: if (d0 != -1 && d1 != -1) {
469: *out = d1 | (d0 << 4);
470: cp += 2;
471: } else if (d0 != -1) {
472: *out = d0;
473: cp++;
474: } else
475: return (-1);
476: out++;
477:
478: break;
479: case 'a':
480: *out++ = '\a';
481: break;
482: case 'b':
483: *out++ = '\b';
484: break;
485: case 't':
486: *out++ = '\t';
487: break;
488: case 'f':
489: *out++ = '\f';
490: break;
491: case 'n':
492: *out++ = '\n';
493: break;
494: case 'r':
495: *out++ = '\r';
496: break;
497: case '\\':
498: *out++ = '\\';
499: break;
500: case '\'':
501: *out++ = '\'';
502: break;
503: case '\"':
504: *out++ = '\"';
505: break;
506: default:
507: *out++ = c;
508: break;
509: }
510: }
511: *out = '\0';
512: *outlen = out - start;
513:
514: *line = cp;
515: return (0);
516: }
517:
518: static int
519: magic_parse_offset(struct magic_line *ml, char **line)
520: {
521: char *copy, *s, *cp, *endptr;
522:
523: while (isspace((u_char)**line))
524: (*line)++;
525: copy = s = cp = xmalloc(strlen(*line) + 1);
526: while (**line != '\0' && !isspace((u_char)**line))
527: *cp++ = *(*line)++;
528: *cp = '\0';
529:
530: ml->offset = 0;
531: ml->offset_relative = 0;
532:
533: ml->indirect_type = ' ';
534: ml->indirect_relative = 0;
535: ml->indirect_offset = 0;
536: ml->indirect_operator = ' ';
537: ml->indirect_operand = 0;
538:
539: if (*s == '&') {
540: ml->offset_relative = 1;
541: s++;
542: }
543:
544: if (*s != '(') {
545: endptr = magic_strtoll(s, &ml->offset);
546: if (endptr == NULL || *endptr != '\0') {
547: magic_warn(ml, "missing closing bracket");
548: goto fail;
549: }
550: if (ml->offset < 0 && !ml->offset_relative) {
551: magic_warn(ml, "negative absolute offset");
552: goto fail;
553: }
554: goto done;
555: }
556: s++;
557:
558: if (*s == '&') {
559: ml->indirect_relative = 1;
560: s++;
561: }
562:
563: endptr = magic_strtoll(s, &ml->indirect_offset);
564: if (endptr == NULL) {
1.8 nicm 565: magic_warn(ml, "can't parse offset: %s", s);
1.1 nicm 566: goto fail;
567: }
568: s = endptr;
569: if (*s == ')')
570: goto done;
571:
572: if (*s == '.') {
573: s++;
1.6 tobias 574: if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
1.8 nicm 575: magic_warn(ml, "unknown offset type: %c", *s);
1.1 nicm 576: goto fail;
577: }
578: ml->indirect_type = *s;
579: s++;
580: if (*s == ')')
581: goto done;
582: }
583:
1.6 tobias 584: if (*s == '\0' || strchr("+-*", *s) == NULL) {
1.8 nicm 585: magic_warn(ml, "unknown offset operator: %c", *s);
1.1 nicm 586: goto fail;
587: }
588: ml->indirect_operator = *s;
589: s++;
590: if (*s == ')')
591: goto done;
592:
593: if (*s == '(') {
594: s++;
595: endptr = magic_strtoll(s, &ml->indirect_operand);
596: if (endptr == NULL || *endptr != ')') {
597: magic_warn(ml, "missing closing bracket");
598: goto fail;
599: }
600: if (*++endptr != ')') {
601: magic_warn(ml, "missing closing bracket");
602: goto fail;
603: }
604: } else {
605: endptr = magic_strtoll(s, &ml->indirect_operand);
606: if (endptr == NULL || *endptr != ')') {
607: magic_warn(ml, "missing closing bracket");
608: goto fail;
609: }
610: }
611:
612: done:
613: free(copy);
614: return (0);
615:
616: fail:
617: free(copy);
618: return (-1);
619: }
620:
621: static int
622: magic_parse_type(struct magic_line *ml, char **line)
623: {
624: char *copy, *s, *cp, *endptr;
625:
626: while (isspace((u_char)**line))
627: (*line)++;
628: copy = s = cp = xmalloc(strlen(*line) + 1);
629: while (**line != '\0' && !isspace((u_char)**line))
630: *cp++ = *(*line)++;
631: *cp = '\0';
632:
633: ml->type = MAGIC_TYPE_NONE;
634: ml->type_operator = ' ';
635: ml->type_operand = 0;
636:
1.16 nicm 637: if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
638: strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
639: if (*s == 'u')
640: ml->type_string = xstrdup(s + 1);
641: else
642: ml->type_string = xstrdup(s);
1.1 nicm 643: ml->type = MAGIC_TYPE_STRING;
644: magic_mark_text(ml, 0);
645: goto done;
646: }
1.16 nicm 647: if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
648: strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
649: if (*s == 'u')
650: ml->type_string = xstrdup(s + 1);
651: else
652: ml->type_string = xstrdup(s);
653: ml->type = MAGIC_TYPE_PSTRING;
654: magic_mark_text(ml, 0);
655: goto done;
656: }
657: if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
658: strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
659: if (*s == 'u')
660: ml->type_string = xstrdup(s + 1);
661: else
662: ml->type_string = xstrdup(s);
1.1 nicm 663: ml->type = MAGIC_TYPE_SEARCH;
664: goto done;
665: }
1.16 nicm 666: if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
667: strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
668: if (*s == 'u')
669: ml->type_string = xstrdup(s + 1);
670: else
671: ml->type_string = xstrdup(s);
1.1 nicm 672: ml->type = MAGIC_TYPE_REGEX;
673: goto done;
674: }
1.16 nicm 675: ml->type_string = xstrdup(s);
1.1 nicm 676:
1.12 nicm 677: cp = &s[strcspn(s, "+-&/%*")];
1.1 nicm 678: if (*cp != '\0') {
679: ml->type_operator = *cp;
680: endptr = magic_strtoull(cp + 1, &ml->type_operand);
681: if (endptr == NULL || *endptr != '\0') {
1.8 nicm 682: magic_warn(ml, "can't parse operand: %s", cp + 1);
1.1 nicm 683: goto fail;
684: }
685: *cp = '\0';
686: }
687:
688: if (strcmp(s, "byte") == 0)
689: ml->type = MAGIC_TYPE_BYTE;
690: else if (strcmp(s, "short") == 0)
691: ml->type = MAGIC_TYPE_SHORT;
692: else if (strcmp(s, "long") == 0)
693: ml->type = MAGIC_TYPE_LONG;
694: else if (strcmp(s, "quad") == 0)
695: ml->type = MAGIC_TYPE_QUAD;
696: else if (strcmp(s, "ubyte") == 0)
697: ml->type = MAGIC_TYPE_UBYTE;
698: else if (strcmp(s, "ushort") == 0)
699: ml->type = MAGIC_TYPE_USHORT;
700: else if (strcmp(s, "ulong") == 0)
701: ml->type = MAGIC_TYPE_ULONG;
702: else if (strcmp(s, "uquad") == 0)
703: ml->type = MAGIC_TYPE_UQUAD;
1.16 nicm 704: else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
1.1 nicm 705: ml->type = MAGIC_TYPE_FLOAT;
1.16 nicm 706: else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
1.1 nicm 707: ml->type = MAGIC_TYPE_DOUBLE;
708: else if (strcmp(s, "date") == 0)
709: ml->type = MAGIC_TYPE_DATE;
710: else if (strcmp(s, "qdate") == 0)
711: ml->type = MAGIC_TYPE_QDATE;
712: else if (strcmp(s, "ldate") == 0)
713: ml->type = MAGIC_TYPE_LDATE;
714: else if (strcmp(s, "qldate") == 0)
715: ml->type = MAGIC_TYPE_QLDATE;
716: else if (strcmp(s, "udate") == 0)
717: ml->type = MAGIC_TYPE_UDATE;
718: else if (strcmp(s, "uqdate") == 0)
719: ml->type = MAGIC_TYPE_UQDATE;
720: else if (strcmp(s, "uldate") == 0)
721: ml->type = MAGIC_TYPE_ULDATE;
722: else if (strcmp(s, "uqldate") == 0)
723: ml->type = MAGIC_TYPE_UQLDATE;
724: else if (strcmp(s, "beshort") == 0)
725: ml->type = MAGIC_TYPE_BESHORT;
726: else if (strcmp(s, "belong") == 0)
727: ml->type = MAGIC_TYPE_BELONG;
728: else if (strcmp(s, "bequad") == 0)
729: ml->type = MAGIC_TYPE_BEQUAD;
730: else if (strcmp(s, "ubeshort") == 0)
731: ml->type = MAGIC_TYPE_UBESHORT;
732: else if (strcmp(s, "ubelong") == 0)
733: ml->type = MAGIC_TYPE_UBELONG;
734: else if (strcmp(s, "ubequad") == 0)
735: ml->type = MAGIC_TYPE_UBEQUAD;
1.16 nicm 736: else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
1.1 nicm 737: ml->type = MAGIC_TYPE_BEFLOAT;
1.16 nicm 738: else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
1.1 nicm 739: ml->type = MAGIC_TYPE_BEDOUBLE;
740: else if (strcmp(s, "bedate") == 0)
741: ml->type = MAGIC_TYPE_BEDATE;
742: else if (strcmp(s, "beqdate") == 0)
743: ml->type = MAGIC_TYPE_BEQDATE;
744: else if (strcmp(s, "beldate") == 0)
745: ml->type = MAGIC_TYPE_BELDATE;
746: else if (strcmp(s, "beqldate") == 0)
747: ml->type = MAGIC_TYPE_BEQLDATE;
748: else if (strcmp(s, "ubedate") == 0)
749: ml->type = MAGIC_TYPE_UBEDATE;
750: else if (strcmp(s, "ubeqdate") == 0)
751: ml->type = MAGIC_TYPE_UBEQDATE;
752: else if (strcmp(s, "ubeldate") == 0)
753: ml->type = MAGIC_TYPE_UBELDATE;
754: else if (strcmp(s, "ubeqldate") == 0)
755: ml->type = MAGIC_TYPE_UBEQLDATE;
1.16 nicm 756: else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
1.1 nicm 757: ml->type = MAGIC_TYPE_BESTRING16;
758: else if (strcmp(s, "leshort") == 0)
759: ml->type = MAGIC_TYPE_LESHORT;
760: else if (strcmp(s, "lelong") == 0)
761: ml->type = MAGIC_TYPE_LELONG;
762: else if (strcmp(s, "lequad") == 0)
763: ml->type = MAGIC_TYPE_LEQUAD;
764: else if (strcmp(s, "uleshort") == 0)
765: ml->type = MAGIC_TYPE_ULESHORT;
766: else if (strcmp(s, "ulelong") == 0)
767: ml->type = MAGIC_TYPE_ULELONG;
768: else if (strcmp(s, "ulequad") == 0)
769: ml->type = MAGIC_TYPE_ULEQUAD;
1.16 nicm 770: else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
1.1 nicm 771: ml->type = MAGIC_TYPE_LEFLOAT;
1.16 nicm 772: else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
1.1 nicm 773: ml->type = MAGIC_TYPE_LEDOUBLE;
774: else if (strcmp(s, "ledate") == 0)
775: ml->type = MAGIC_TYPE_LEDATE;
776: else if (strcmp(s, "leqdate") == 0)
777: ml->type = MAGIC_TYPE_LEQDATE;
778: else if (strcmp(s, "leldate") == 0)
779: ml->type = MAGIC_TYPE_LELDATE;
780: else if (strcmp(s, "leqldate") == 0)
781: ml->type = MAGIC_TYPE_LEQLDATE;
782: else if (strcmp(s, "uledate") == 0)
783: ml->type = MAGIC_TYPE_ULEDATE;
784: else if (strcmp(s, "uleqdate") == 0)
785: ml->type = MAGIC_TYPE_ULEQDATE;
786: else if (strcmp(s, "uleldate") == 0)
787: ml->type = MAGIC_TYPE_ULELDATE;
788: else if (strcmp(s, "uleqldate") == 0)
789: ml->type = MAGIC_TYPE_ULEQLDATE;
1.16 nicm 790: else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
1.1 nicm 791: ml->type = MAGIC_TYPE_LESTRING16;
1.16 nicm 792: else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
1.1 nicm 793: ml->type = MAGIC_TYPE_MELONG;
1.16 nicm 794: else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
1.1 nicm 795: ml->type = MAGIC_TYPE_MEDATE;
1.16 nicm 796: else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
1.1 nicm 797: ml->type = MAGIC_TYPE_MELDATE;
1.16 nicm 798: else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
1.1 nicm 799: ml->type = MAGIC_TYPE_DEFAULT;
1.20 nicm 800: else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
801: ml->type = MAGIC_TYPE_CLEAR;
1.1 nicm 802: else {
1.8 nicm 803: magic_warn(ml, "unknown type: %s", s);
1.1 nicm 804: goto fail;
805: }
806: magic_mark_text(ml, 0);
807:
808: done:
809: free(copy);
810: return (0);
811:
812: fail:
813: free(copy);
814: return (-1);
815: }
816:
817: static int
818: magic_parse_value(struct magic_line *ml, char **line)
819: {
820: char *copy, *s, *cp, *endptr;
821: size_t slen;
1.10 nicm 822: uint64_t u;
1.1 nicm 823:
824: while (isspace((u_char)**line))
825: (*line)++;
826:
827: ml->test_operator = '=';
828: ml->test_not = 0;
829: ml->test_string = NULL;
830: ml->test_string_size = 0;
831: ml->test_unsigned = 0;
832: ml->test_signed = 0;
833:
1.9 nicm 834: if (**line == '\0')
835: return (0);
836:
1.1 nicm 837: s = *line;
838: if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
839: (*line)++;
1.20 nicm 840: ml->test_operator = 'x';
841: return (0);
842: }
843:
844: if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
845: magic_warn(ml, "test specified for default or clear");
1.1 nicm 846: ml->test_operator = 'x';
847: return (0);
848: }
849:
850: if (**line == '!') {
851: ml->test_not = 1;
852: (*line)++;
853: }
854:
855: switch (ml->type) {
856: case MAGIC_TYPE_STRING:
857: case MAGIC_TYPE_PSTRING:
858: case MAGIC_TYPE_SEARCH:
859: if (**line == '>' || **line == '<' || **line == '=') {
860: ml->test_operator = **line;
861: (*line)++;
862: }
863: /* FALLTHROUGH */
864: case MAGIC_TYPE_REGEX:
1.21 ! nicm 865: if (**line == '=')
! 866: (*line)++;
1.1 nicm 867: copy = s = xmalloc(strlen(*line) + 1);
868: if (magic_get_string(line, s, &slen) != 0) {
869: magic_warn(ml, "can't parse string");
870: goto fail;
871: }
872: ml->test_string_size = slen;
873: ml->test_string = s;
874: return (0); /* do not free */
875: default:
876: break;
877: }
878:
1.9 nicm 879: while (isspace((u_char)**line))
880: (*line)++;
881: if ((*line)[0] == '<' && (*line)[1] == '=') {
882: ml->test_operator = '[';
883: (*line) += 2;
884: } else if ((*line)[0] == '>' && (*line)[1] == '=') {
885: ml->test_operator = ']';
1.1 nicm 886: (*line) += 2;
1.19 tobias 887: } else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
1.9 nicm 888: ml->test_operator = **line;
889: (*line)++;
1.1 nicm 890: }
1.9 nicm 891:
892: while (isspace((u_char)**line))
893: (*line)++;
894: copy = cp = xmalloc(strlen(*line) + 1);
1.1 nicm 895: while (**line != '\0' && !isspace((u_char)**line))
896: *cp++ = *(*line)++;
897: *cp = '\0';
898:
1.11 nicm 899: switch (ml->type) {
900: case MAGIC_TYPE_FLOAT:
901: case MAGIC_TYPE_DOUBLE:
902: case MAGIC_TYPE_BEFLOAT:
903: case MAGIC_TYPE_BEDOUBLE:
904: case MAGIC_TYPE_LEFLOAT:
905: case MAGIC_TYPE_LEDOUBLE:
906: errno = 0;
907: ml->test_double = strtod(copy, &endptr);
908: if (errno == ERANGE)
909: endptr = NULL;
910: break;
911: default:
912: if (*ml->type_string == 'u')
913: endptr = magic_strtoull(copy, &ml->test_unsigned);
914: else {
915: endptr = magic_strtoll(copy, &ml->test_signed);
916: if (endptr == NULL || *endptr != '\0') {
917: /*
918: * If we can't parse this as a signed number,
919: * try as unsigned instead.
920: */
921: endptr = magic_strtoull(copy, &u);
922: if (endptr != NULL && *endptr == '\0')
923: ml->test_signed = (int64_t)u;
924: }
1.10 nicm 925: }
1.11 nicm 926: break;
1.10 nicm 927: }
1.1 nicm 928: if (endptr == NULL || *endptr != '\0') {
1.9 nicm 929: magic_warn(ml, "can't parse number: %s", copy);
1.1 nicm 930: goto fail;
931: }
932:
933: free(copy);
934: return (0);
935:
936: fail:
937: free(copy);
938: return (-1);
939: }
940:
941: int
942: magic_compare(struct magic_line *ml1, struct magic_line *ml2)
943: {
944: if (ml1->strength < ml2->strength)
945: return (1);
946: if (ml1->strength > ml2->strength)
947: return (-1);
948:
949: /*
950: * The original file depends on the (undefined!) qsort(3) behaviour
951: * when the strength is equal. This is impossible to reproduce with an
952: * RB tree so just use the line number and hope for the best.
953: */
954: if (ml1->line < ml2->line)
955: return (-1);
956: if (ml1->line > ml2->line)
957: return (1);
958:
959: return (0);
960: }
961: RB_GENERATE(magic_tree, magic_line, node, magic_compare);
962:
963: static void
1.18 nicm 964: magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
965: char *line)
966: {
967: char *cp, *s;
968: int64_t value;
969:
970: cp = line + (sizeof "!:strength") - 1;
971: while (isspace((u_char)*cp))
972: cp++;
973: s = cp;
974:
975: cp = strchr(s, '#');
976: if (cp != NULL)
977: *cp = '\0';
978: cp = s;
979:
1.19 tobias 980: if (*s == '\0' || strchr("+-*/", *s) == NULL) {
1.18 nicm 981: magic_warnm(m, at, "invalid strength operator: %s", s);
982: return;
983: }
984: ml->strength_operator = *cp++;
985:
986: while (isspace((u_char)*cp))
987: cp++;
988: cp = magic_strtoll(cp, &value);
989: while (cp != NULL && isspace((u_char)*cp))
990: cp++;
991: if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
992: magic_warnm(m, at, "invalid strength value: %s", s);
993: return;
994: }
995: ml->strength_value = value;
996: }
997:
998: static void
1.1 nicm 999: magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
1000: {
1001: char *mimetype, *cp;
1002:
1003: mimetype = line + (sizeof "!:mime") - 1;
1004: while (isspace((u_char)*mimetype))
1005: mimetype++;
1006:
1007: cp = strchr(mimetype, '#');
1008: if (cp != NULL)
1009: *cp = '\0';
1010:
1011: if (*mimetype != '\0') {
1012: cp = mimetype + strlen(mimetype) - 1;
1013: while (cp != mimetype && isspace((u_char)*cp))
1014: *cp-- = '\0';
1015: }
1016:
1017: cp = mimetype;
1018: while (*cp != '\0') {
1019: if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
1020: break;
1021: cp++;
1022: }
1023: if (*mimetype == '\0' || *cp != '\0') {
1.7 nicm 1024: magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1.1 nicm 1025: return;
1026: }
1027: if (ml == NULL) {
1.7 nicm 1028: magic_warnm(m, at, "stray MIME type: %s", mimetype);
1.1 nicm 1029: return;
1030: }
1031: ml->mimetype = xstrdup(mimetype);
1032: }
1033:
1034: struct magic *
1035: magic_load(FILE *f, const char *path, int warnings)
1036: {
1037: struct magic *m;
1038: struct magic_line *ml = NULL, *parent, *parent0;
1039: char *line, *tmp;
1040: size_t size;
1041: u_int at, level, n, i;
1042:
1043: m = xcalloc(1, sizeof *m);
1044: m->path = xstrdup(path);
1045: m->warnings = warnings;
1046: RB_INIT(&m->tree);
1047:
1048: parent = NULL;
1049: parent0 = NULL;
1050: level = 0;
1051:
1052: at = 0;
1053: tmp = NULL;
1054: while ((line = fgetln(f, &size))) {
1055: if (line[size - 1] == '\n')
1056: line[size - 1] = '\0';
1057: else {
1058: tmp = xmalloc(size + 1);
1059: memcpy(tmp, line, size);
1060: tmp[size] = '\0';
1061: line = tmp;
1062: }
1063: at++;
1064:
1065: while (isspace((u_char)*line))
1066: line++;
1067: if (*line == '\0' || *line == '#')
1068: continue;
1069:
1.15 nicm 1070: if (strncmp (line, "!:mime", 6) == 0) {
1.1 nicm 1071: magic_set_mimetype(m, at, ml, line);
1.18 nicm 1072: continue;
1073: }
1074: if (strncmp (line, "!:strength", 10) == 0) {
1075: magic_adjust_strength(m, at, ml, line);
1.15 nicm 1076: continue;
1077: }
1078: if (strncmp (line, "!:", 2) == 0) {
1079: for (i = 0; i < 64 && line[i] != '\0'; i++) {
1080: if (isspace((u_char)line[i]))
1081: break;
1082: }
1083: magic_warnm(m, at, "%.*s not supported", i, line);
1.1 nicm 1084: continue;
1085: }
1086:
1087: n = 0;
1088: for (; *line == '>'; line++)
1089: n++;
1090:
1091: ml = xcalloc(1, sizeof *ml);
1092: ml->root = m;
1093: ml->line = at;
1094: ml->type = MAGIC_TYPE_NONE;
1095: TAILQ_INIT(&ml->children);
1096: ml->text = 1;
1097:
1.13 nicm 1098: /*
1099: * At this point n is the level we want, level is the current
1100: * level. parent0 is the last line at the same level and parent
1101: * is the last line at the previous level.
1102: */
1.1 nicm 1103: if (n == level + 1) {
1104: parent = parent0;
1105: } else if (n < level) {
1106: for (i = n; i < level && parent != NULL; i++)
1107: parent = parent->parent;
1108: } else if (n != level) {
1109: magic_warn(ml, "level skipped (%u->%u)", level, n);
1110: free(ml);
1111: continue;
1112: }
1113: ml->parent = parent;
1114: level = n;
1115:
1116: if (magic_parse_offset(ml, &line) != 0 ||
1117: magic_parse_type(ml, &line) != 0 ||
1118: magic_parse_value(ml, &line) != 0 ||
1119: magic_set_result(ml, line) != 0) {
1.13 nicm 1120: /*
1121: * An invalid line still needs to appear in the tree in
1122: * case it has any children.
1123: */
1124: ml->type = MAGIC_TYPE_NONE;
1.1 nicm 1125: }
1126:
1127: ml->strength = magic_get_strength(ml);
1128: if (ml->parent == NULL)
1129: RB_INSERT(magic_tree, &m->tree, ml);
1130: else
1131: TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1132: parent0 = ml;
1133: }
1134: free(tmp);
1135:
1136: fclose(f);
1137: return (m);
1138: }