Annotation of src/usr.bin/file/magic-load.c, Revision 1.17
1.17 ! nicm 1: /* $OpenBSD: magic-load.c,v 1.16 2015/08/11 22:48:09 nicm Exp $ */
1.1 nicm 2:
3: /*
4: * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15: * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18:
19: #include <sys/types.h>
20:
21: #include <ctype.h>
22: #include <errno.h>
23: #include <limits.h>
24: #include <regex.h>
25: #include <stdarg.h>
26: #include <stdio.h>
27: #include <stdlib.h>
28: #include <string.h>
29:
30: #include "magic.h"
31: #include "xmalloc.h"
32:
33: static int
34: magic_odigit(u_char c)
35: {
36: if (c >= '0' && c <= '7')
37: return (c - '0');
38: return (-1);
39: }
40:
41: static int
42: magic_xdigit(u_char c)
43: {
44: if (c >= '0' && c <= '9')
45: return (c - '0');
46: if (c >= 'a' && c <= 'f')
47: return (10 + c - 'a');
48: if (c >= 'A' && c <= 'F')
49: return (10 + c - 'A');
50: return (-1);
51: }
52:
53: static void
54: magic_mark_text(struct magic_line *ml, int text)
55: {
56: do {
57: ml->text = text;
58: ml = ml->parent;
59: } while (ml != NULL);
60: }
61:
62: static int
63: magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
64: const char *p)
65: {
66: int error;
67: char errbuf[256];
68:
69: error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
70: if (error != 0) {
71: regerror(error, re, errbuf, sizeof errbuf);
72: magic_warn(ml, "bad %s pattern: %s", name, errbuf);
73: return (-1);
74: }
75: return (0);
76: }
77:
78: static int
79: magic_set_result(struct magic_line *ml, const char *s)
80: {
81: const char *fmt;
82: const char *endfmt;
83: const char *cp;
84: regex_t *re = NULL;
85: regmatch_t pmatch;
86: size_t fmtlen;
87:
88: while (isspace((u_char)*s))
89: s++;
90: if (*s == '\0') {
91: ml->result = NULL;
92: return (0);
93: }
94: ml->result = xstrdup(s);
95:
96: fmt = NULL;
97: for (cp = s; *cp != '\0'; cp++) {
98: if (cp[0] == '%' && cp[1] != '%') {
99: if (fmt != NULL) {
100: magic_warn(ml, "multiple formats");
101: return (-1);
102: }
103: fmt = cp;
104: }
105: }
106: if (fmt == NULL)
107: return (0);
108: fmt++;
109:
110: for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
111: if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
112: break;
113: }
114: if (*endfmt == '\0') {
115: magic_warn(ml, "unterminated format");
116: return (-1);
117: }
118: fmtlen = endfmt + 1 - fmt;
119: if (fmtlen > 32) {
120: magic_warn(ml, "format too long");
121: return (-1);
122: }
123:
124: if (*endfmt == 's') {
125: switch (ml->type) {
126: case MAGIC_TYPE_DATE:
127: case MAGIC_TYPE_LDATE:
128: case MAGIC_TYPE_UDATE:
129: case MAGIC_TYPE_ULDATE:
130: case MAGIC_TYPE_BEDATE:
131: case MAGIC_TYPE_BELDATE:
132: case MAGIC_TYPE_UBEDATE:
133: case MAGIC_TYPE_UBELDATE:
134: case MAGIC_TYPE_QDATE:
135: case MAGIC_TYPE_QLDATE:
136: case MAGIC_TYPE_UQDATE:
137: case MAGIC_TYPE_UQLDATE:
138: case MAGIC_TYPE_BEQDATE:
139: case MAGIC_TYPE_BEQLDATE:
140: case MAGIC_TYPE_UBEQDATE:
141: case MAGIC_TYPE_UBEQLDATE:
142: case MAGIC_TYPE_LEQDATE:
143: case MAGIC_TYPE_LEQLDATE:
144: case MAGIC_TYPE_ULEQDATE:
145: case MAGIC_TYPE_ULEQLDATE:
146: case MAGIC_TYPE_LEDATE:
147: case MAGIC_TYPE_LELDATE:
148: case MAGIC_TYPE_ULEDATE:
149: case MAGIC_TYPE_ULELDATE:
150: case MAGIC_TYPE_MEDATE:
151: case MAGIC_TYPE_MELDATE:
152: case MAGIC_TYPE_STRING:
153: case MAGIC_TYPE_PSTRING:
154: case MAGIC_TYPE_BESTRING16:
155: case MAGIC_TYPE_LESTRING16:
156: case MAGIC_TYPE_REGEX:
157: case MAGIC_TYPE_SEARCH:
158: break;
159: default:
160: ml->stringify = 1;
161: break;
162: }
163: }
164:
165: if (!ml->root->compiled) {
166: /*
167: * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
168: * with byte, short, long. We get lucky because our first and
169: * only argument ends up in a register. Accept it for now.
170: */
171: if (magic_make_pattern(ml, "short", &ml->root->format_short,
172: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
173: return (-1);
174: if (magic_make_pattern(ml, "long", &ml->root->format_long,
175: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
176: return (-1);
177: if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
178: "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
179: return (-1);
180: if (magic_make_pattern(ml, "float", &ml->root->format_float,
181: "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
182: return (-1);
183: if (magic_make_pattern(ml, "string", &ml->root->format_string,
184: "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
185: return (-1);
186: ml->root->compiled = 1;
187: }
188:
189: if (ml->stringify)
190: re = &ml->root->format_string;
191: else {
192: switch (ml->type) {
193: case MAGIC_TYPE_NONE:
194: case MAGIC_TYPE_DEFAULT:
1.17 ! nicm 195: case MAGIC_TYPE_BESTRING16:
! 196: case MAGIC_TYPE_LESTRING16:
1.1 nicm 197: return (0); /* don't use result */
198: case MAGIC_TYPE_BYTE:
199: case MAGIC_TYPE_UBYTE:
200: case MAGIC_TYPE_SHORT:
201: case MAGIC_TYPE_USHORT:
202: case MAGIC_TYPE_BESHORT:
203: case MAGIC_TYPE_UBESHORT:
204: case MAGIC_TYPE_LESHORT:
205: case MAGIC_TYPE_ULESHORT:
206: re = &ml->root->format_short;
207: break;
208: case MAGIC_TYPE_LONG:
209: case MAGIC_TYPE_ULONG:
210: case MAGIC_TYPE_BELONG:
211: case MAGIC_TYPE_UBELONG:
212: case MAGIC_TYPE_LELONG:
213: case MAGIC_TYPE_ULELONG:
214: case MAGIC_TYPE_MELONG:
215: re = &ml->root->format_long;
216: break;
217: case MAGIC_TYPE_QUAD:
218: case MAGIC_TYPE_UQUAD:
219: case MAGIC_TYPE_BEQUAD:
220: case MAGIC_TYPE_UBEQUAD:
221: case MAGIC_TYPE_LEQUAD:
222: case MAGIC_TYPE_ULEQUAD:
223: re = &ml->root->format_quad;
224: break;
225: case MAGIC_TYPE_FLOAT:
226: case MAGIC_TYPE_BEFLOAT:
227: case MAGIC_TYPE_LEFLOAT:
228: case MAGIC_TYPE_DOUBLE:
229: case MAGIC_TYPE_BEDOUBLE:
230: case MAGIC_TYPE_LEDOUBLE:
231: re = &ml->root->format_float;
232: break;
233: case MAGIC_TYPE_DATE:
234: case MAGIC_TYPE_LDATE:
235: case MAGIC_TYPE_UDATE:
236: case MAGIC_TYPE_ULDATE:
237: case MAGIC_TYPE_BEDATE:
238: case MAGIC_TYPE_BELDATE:
239: case MAGIC_TYPE_UBEDATE:
240: case MAGIC_TYPE_UBELDATE:
241: case MAGIC_TYPE_QDATE:
242: case MAGIC_TYPE_QLDATE:
243: case MAGIC_TYPE_UQDATE:
244: case MAGIC_TYPE_UQLDATE:
245: case MAGIC_TYPE_BEQDATE:
246: case MAGIC_TYPE_BEQLDATE:
247: case MAGIC_TYPE_UBEQDATE:
248: case MAGIC_TYPE_UBEQLDATE:
249: case MAGIC_TYPE_LEQDATE:
250: case MAGIC_TYPE_LEQLDATE:
251: case MAGIC_TYPE_ULEQDATE:
252: case MAGIC_TYPE_ULEQLDATE:
253: case MAGIC_TYPE_LEDATE:
254: case MAGIC_TYPE_LELDATE:
255: case MAGIC_TYPE_ULEDATE:
256: case MAGIC_TYPE_ULELDATE:
257: case MAGIC_TYPE_MEDATE:
258: case MAGIC_TYPE_MELDATE:
259: case MAGIC_TYPE_STRING:
260: case MAGIC_TYPE_PSTRING:
261: case MAGIC_TYPE_REGEX:
262: case MAGIC_TYPE_SEARCH:
263: re = &ml->root->format_string;
264: break;
265: }
266: }
267:
268: pmatch.rm_so = 0;
269: pmatch.rm_eo = fmtlen;
270: if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
271: magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
272: (int)fmtlen, fmt);
273: return (-1);
274: }
275:
276: return (0);
277: }
278:
279: static u_int
280: magic_get_strength(struct magic_line *ml)
281: {
282: int n;
283: size_t size;
284:
1.13 nicm 285: if (ml->type == MAGIC_TYPE_NONE)
286: return (0);
287:
1.1 nicm 288: if (ml->test_not || ml->test_operator == 'x')
289: return (1);
290:
1.5 nicm 291: n = 2 * MAGIC_STRENGTH_MULTIPLIER;
1.1 nicm 292: switch (ml->type) {
293: case MAGIC_TYPE_NONE:
294: case MAGIC_TYPE_DEFAULT:
295: return (0);
296: case MAGIC_TYPE_BYTE:
297: case MAGIC_TYPE_UBYTE:
298: n += 1 * MAGIC_STRENGTH_MULTIPLIER;
299: break;
300: case MAGIC_TYPE_SHORT:
301: case MAGIC_TYPE_USHORT:
302: case MAGIC_TYPE_BESHORT:
303: case MAGIC_TYPE_UBESHORT:
304: case MAGIC_TYPE_LESHORT:
305: case MAGIC_TYPE_ULESHORT:
306: n += 2 * MAGIC_STRENGTH_MULTIPLIER;
307: break;
308: case MAGIC_TYPE_LONG:
309: case MAGIC_TYPE_ULONG:
310: case MAGIC_TYPE_FLOAT:
311: case MAGIC_TYPE_DATE:
312: case MAGIC_TYPE_LDATE:
313: case MAGIC_TYPE_UDATE:
314: case MAGIC_TYPE_ULDATE:
315: case MAGIC_TYPE_BELONG:
316: case MAGIC_TYPE_UBELONG:
317: case MAGIC_TYPE_BEFLOAT:
318: case MAGIC_TYPE_BEDATE:
319: case MAGIC_TYPE_BELDATE:
320: case MAGIC_TYPE_UBEDATE:
321: case MAGIC_TYPE_UBELDATE:
322: n += 4 * MAGIC_STRENGTH_MULTIPLIER;
323: break;
324: case MAGIC_TYPE_QUAD:
325: case MAGIC_TYPE_UQUAD:
326: case MAGIC_TYPE_DOUBLE:
327: case MAGIC_TYPE_QDATE:
328: case MAGIC_TYPE_QLDATE:
329: case MAGIC_TYPE_UQDATE:
330: case MAGIC_TYPE_UQLDATE:
331: case MAGIC_TYPE_BEQUAD:
332: case MAGIC_TYPE_UBEQUAD:
333: case MAGIC_TYPE_BEDOUBLE:
334: case MAGIC_TYPE_BEQDATE:
335: case MAGIC_TYPE_BEQLDATE:
336: case MAGIC_TYPE_UBEQDATE:
337: case MAGIC_TYPE_UBEQLDATE:
338: case MAGIC_TYPE_LEQUAD:
339: case MAGIC_TYPE_ULEQUAD:
340: case MAGIC_TYPE_LEDOUBLE:
341: case MAGIC_TYPE_LEQDATE:
342: case MAGIC_TYPE_LEQLDATE:
343: case MAGIC_TYPE_ULEQDATE:
344: case MAGIC_TYPE_ULEQLDATE:
345: case MAGIC_TYPE_LELONG:
346: case MAGIC_TYPE_ULELONG:
347: case MAGIC_TYPE_LEFLOAT:
348: case MAGIC_TYPE_LEDATE:
349: case MAGIC_TYPE_LELDATE:
350: case MAGIC_TYPE_ULEDATE:
351: case MAGIC_TYPE_ULELDATE:
352: case MAGIC_TYPE_MELONG:
353: case MAGIC_TYPE_MEDATE:
354: case MAGIC_TYPE_MELDATE:
355: n += 8 * MAGIC_STRENGTH_MULTIPLIER;
356: break;
357: case MAGIC_TYPE_STRING:
358: case MAGIC_TYPE_PSTRING:
359: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
360: break;
361: case MAGIC_TYPE_BESTRING16:
362: case MAGIC_TYPE_LESTRING16:
363: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
364: break;
365: case MAGIC_TYPE_REGEX:
366: case MAGIC_TYPE_SEARCH:
367: size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
368: if (size < 1)
369: size = 1;
370: n += ml->test_string_size * size;
371: break;
372: }
373: switch (ml->test_operator) {
374: case '=':
375: n += MAGIC_STRENGTH_MULTIPLIER;
376: break;
377: case '<':
378: case '>':
379: case '[':
380: case ']':
381: n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
382: break;
383: case '^':
384: case '&':
385: n -= MAGIC_STRENGTH_MULTIPLIER;
386: break;
387: }
388: return (n <= 0 ? 1 : n);
389: }
390:
391: static int
392: magic_get_string(char **line, char *out, size_t *outlen)
393: {
394: char *start, *cp, c;
395: int d0, d1, d2;
396:
397: start = out;
398: for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
399: if (*cp != '\\') {
400: *out++ = *cp;
401: continue;
402: }
403:
404: switch (c = *++cp) {
1.3 nicm 405: case '\0': /* end of line */
406: return (-1);
1.1 nicm 407: case ' ':
408: *out++ = ' ';
409: break;
410: case '0':
411: case '1':
412: case '2':
413: case '3':
414: case '4':
415: case '5':
416: case '6':
417: case '7':
418: d0 = magic_odigit(cp[0]);
419: if (cp[0] != '\0')
420: d1 = magic_odigit(cp[1]);
421: else
422: d1 = -1;
423: if (cp[0] != '\0' && cp[1] != '\0')
424: d2 = magic_odigit(cp[2]);
425: else
426: d2 = -1;
427:
428: if (d0 != -1 && d1 != -1 && d2 != -1) {
429: *out = d2 | (d1 << 3) | (d0 << 6);
430: cp += 2;
431: } else if (d0 != -1 && d1 != -1) {
432: *out = d1 | (d0 << 3);
433: cp++;
434: } else if (d0 != -1)
435: *out = d0;
436: else
437: return (-1);
438: out++;
439: break;
440: case 'x':
441: d0 = magic_xdigit(cp[1]);
442: if (cp[1] != '\0')
443: d1 = magic_xdigit(cp[2]);
444: else
445: d1 = -1;
446:
447: if (d0 != -1 && d1 != -1) {
448: *out = d1 | (d0 << 4);
449: cp += 2;
450: } else if (d0 != -1) {
451: *out = d0;
452: cp++;
453: } else
454: return (-1);
455: out++;
456:
457: break;
458: case 'a':
459: *out++ = '\a';
460: break;
461: case 'b':
462: *out++ = '\b';
463: break;
464: case 't':
465: *out++ = '\t';
466: break;
467: case 'f':
468: *out++ = '\f';
469: break;
470: case 'n':
471: *out++ = '\n';
472: break;
473: case 'r':
474: *out++ = '\r';
475: break;
476: case '\\':
477: *out++ = '\\';
478: break;
479: case '\'':
480: *out++ = '\'';
481: break;
482: case '\"':
483: *out++ = '\"';
484: break;
485: default:
486: *out++ = c;
487: break;
488: }
489: }
490: *out = '\0';
491: *outlen = out - start;
492:
493: *line = cp;
494: return (0);
495: }
496:
497: static int
498: magic_parse_offset(struct magic_line *ml, char **line)
499: {
500: char *copy, *s, *cp, *endptr;
501:
502: while (isspace((u_char)**line))
503: (*line)++;
504: copy = s = cp = xmalloc(strlen(*line) + 1);
505: while (**line != '\0' && !isspace((u_char)**line))
506: *cp++ = *(*line)++;
507: *cp = '\0';
508:
509: ml->offset = 0;
510: ml->offset_relative = 0;
511:
512: ml->indirect_type = ' ';
513: ml->indirect_relative = 0;
514: ml->indirect_offset = 0;
515: ml->indirect_operator = ' ';
516: ml->indirect_operand = 0;
517:
518: if (*s == '&') {
519: ml->offset_relative = 1;
520: s++;
521: }
522:
523: if (*s != '(') {
524: endptr = magic_strtoll(s, &ml->offset);
525: if (endptr == NULL || *endptr != '\0') {
526: magic_warn(ml, "missing closing bracket");
527: goto fail;
528: }
529: if (ml->offset < 0 && !ml->offset_relative) {
530: magic_warn(ml, "negative absolute offset");
531: goto fail;
532: }
533: goto done;
534: }
535: s++;
536:
537: if (*s == '&') {
538: ml->indirect_relative = 1;
539: s++;
540: }
541:
542: endptr = magic_strtoll(s, &ml->indirect_offset);
543: if (endptr == NULL) {
1.8 nicm 544: magic_warn(ml, "can't parse offset: %s", s);
1.1 nicm 545: goto fail;
546: }
547: s = endptr;
548: if (*s == ')')
549: goto done;
550:
551: if (*s == '.') {
552: s++;
1.6 tobias 553: if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
1.8 nicm 554: magic_warn(ml, "unknown offset type: %c", *s);
1.1 nicm 555: goto fail;
556: }
557: ml->indirect_type = *s;
558: s++;
559: if (*s == ')')
560: goto done;
561: }
562:
1.6 tobias 563: if (*s == '\0' || strchr("+-*", *s) == NULL) {
1.8 nicm 564: magic_warn(ml, "unknown offset operator: %c", *s);
1.1 nicm 565: goto fail;
566: }
567: ml->indirect_operator = *s;
568: s++;
569: if (*s == ')')
570: goto done;
571:
572: if (*s == '(') {
573: s++;
574: endptr = magic_strtoll(s, &ml->indirect_operand);
575: if (endptr == NULL || *endptr != ')') {
576: magic_warn(ml, "missing closing bracket");
577: goto fail;
578: }
579: if (*++endptr != ')') {
580: magic_warn(ml, "missing closing bracket");
581: goto fail;
582: }
583: } else {
584: endptr = magic_strtoll(s, &ml->indirect_operand);
585: if (endptr == NULL || *endptr != ')') {
586: magic_warn(ml, "missing closing bracket");
587: goto fail;
588: }
589: }
590:
591: done:
592: free(copy);
593: return (0);
594:
595: fail:
596: free(copy);
597: return (-1);
598: }
599:
600: static int
601: magic_parse_type(struct magic_line *ml, char **line)
602: {
603: char *copy, *s, *cp, *endptr;
604:
605: while (isspace((u_char)**line))
606: (*line)++;
607: copy = s = cp = xmalloc(strlen(*line) + 1);
608: while (**line != '\0' && !isspace((u_char)**line))
609: *cp++ = *(*line)++;
610: *cp = '\0';
611:
612: ml->type = MAGIC_TYPE_NONE;
613: ml->type_operator = ' ';
614: ml->type_operand = 0;
615:
1.16 nicm 616: if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
617: strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
618: if (*s == 'u')
619: ml->type_string = xstrdup(s + 1);
620: else
621: ml->type_string = xstrdup(s);
1.1 nicm 622: ml->type = MAGIC_TYPE_STRING;
623: magic_mark_text(ml, 0);
624: goto done;
625: }
1.16 nicm 626: if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
627: strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
628: if (*s == 'u')
629: ml->type_string = xstrdup(s + 1);
630: else
631: ml->type_string = xstrdup(s);
632: ml->type = MAGIC_TYPE_PSTRING;
633: magic_mark_text(ml, 0);
634: goto done;
635: }
636: if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
637: strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
638: if (*s == 'u')
639: ml->type_string = xstrdup(s + 1);
640: else
641: ml->type_string = xstrdup(s);
1.1 nicm 642: ml->type = MAGIC_TYPE_SEARCH;
643: goto done;
644: }
1.16 nicm 645: if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
646: strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
647: if (*s == 'u')
648: ml->type_string = xstrdup(s + 1);
649: else
650: ml->type_string = xstrdup(s);
1.1 nicm 651: ml->type = MAGIC_TYPE_REGEX;
652: goto done;
653: }
1.16 nicm 654: ml->type_string = xstrdup(s);
1.1 nicm 655:
1.12 nicm 656: cp = &s[strcspn(s, "+-&/%*")];
1.1 nicm 657: if (*cp != '\0') {
658: ml->type_operator = *cp;
659: endptr = magic_strtoull(cp + 1, &ml->type_operand);
660: if (endptr == NULL || *endptr != '\0') {
1.8 nicm 661: magic_warn(ml, "can't parse operand: %s", cp + 1);
1.1 nicm 662: goto fail;
663: }
664: *cp = '\0';
665: }
666:
667: if (strcmp(s, "byte") == 0)
668: ml->type = MAGIC_TYPE_BYTE;
669: else if (strcmp(s, "short") == 0)
670: ml->type = MAGIC_TYPE_SHORT;
671: else if (strcmp(s, "long") == 0)
672: ml->type = MAGIC_TYPE_LONG;
673: else if (strcmp(s, "quad") == 0)
674: ml->type = MAGIC_TYPE_QUAD;
675: else if (strcmp(s, "ubyte") == 0)
676: ml->type = MAGIC_TYPE_UBYTE;
677: else if (strcmp(s, "ushort") == 0)
678: ml->type = MAGIC_TYPE_USHORT;
679: else if (strcmp(s, "ulong") == 0)
680: ml->type = MAGIC_TYPE_ULONG;
681: else if (strcmp(s, "uquad") == 0)
682: ml->type = MAGIC_TYPE_UQUAD;
1.16 nicm 683: else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
1.1 nicm 684: ml->type = MAGIC_TYPE_FLOAT;
1.16 nicm 685: else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
1.1 nicm 686: ml->type = MAGIC_TYPE_DOUBLE;
687: else if (strcmp(s, "date") == 0)
688: ml->type = MAGIC_TYPE_DATE;
689: else if (strcmp(s, "qdate") == 0)
690: ml->type = MAGIC_TYPE_QDATE;
691: else if (strcmp(s, "ldate") == 0)
692: ml->type = MAGIC_TYPE_LDATE;
693: else if (strcmp(s, "qldate") == 0)
694: ml->type = MAGIC_TYPE_QLDATE;
695: else if (strcmp(s, "udate") == 0)
696: ml->type = MAGIC_TYPE_UDATE;
697: else if (strcmp(s, "uqdate") == 0)
698: ml->type = MAGIC_TYPE_UQDATE;
699: else if (strcmp(s, "uldate") == 0)
700: ml->type = MAGIC_TYPE_ULDATE;
701: else if (strcmp(s, "uqldate") == 0)
702: ml->type = MAGIC_TYPE_UQLDATE;
703: else if (strcmp(s, "beshort") == 0)
704: ml->type = MAGIC_TYPE_BESHORT;
705: else if (strcmp(s, "belong") == 0)
706: ml->type = MAGIC_TYPE_BELONG;
707: else if (strcmp(s, "bequad") == 0)
708: ml->type = MAGIC_TYPE_BEQUAD;
709: else if (strcmp(s, "ubeshort") == 0)
710: ml->type = MAGIC_TYPE_UBESHORT;
711: else if (strcmp(s, "ubelong") == 0)
712: ml->type = MAGIC_TYPE_UBELONG;
713: else if (strcmp(s, "ubequad") == 0)
714: ml->type = MAGIC_TYPE_UBEQUAD;
1.16 nicm 715: else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
1.1 nicm 716: ml->type = MAGIC_TYPE_BEFLOAT;
1.16 nicm 717: else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
1.1 nicm 718: ml->type = MAGIC_TYPE_BEDOUBLE;
719: else if (strcmp(s, "bedate") == 0)
720: ml->type = MAGIC_TYPE_BEDATE;
721: else if (strcmp(s, "beqdate") == 0)
722: ml->type = MAGIC_TYPE_BEQDATE;
723: else if (strcmp(s, "beldate") == 0)
724: ml->type = MAGIC_TYPE_BELDATE;
725: else if (strcmp(s, "beqldate") == 0)
726: ml->type = MAGIC_TYPE_BEQLDATE;
727: else if (strcmp(s, "ubedate") == 0)
728: ml->type = MAGIC_TYPE_UBEDATE;
729: else if (strcmp(s, "ubeqdate") == 0)
730: ml->type = MAGIC_TYPE_UBEQDATE;
731: else if (strcmp(s, "ubeldate") == 0)
732: ml->type = MAGIC_TYPE_UBELDATE;
733: else if (strcmp(s, "ubeqldate") == 0)
734: ml->type = MAGIC_TYPE_UBEQLDATE;
1.16 nicm 735: else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
1.1 nicm 736: ml->type = MAGIC_TYPE_BESTRING16;
737: else if (strcmp(s, "leshort") == 0)
738: ml->type = MAGIC_TYPE_LESHORT;
739: else if (strcmp(s, "lelong") == 0)
740: ml->type = MAGIC_TYPE_LELONG;
741: else if (strcmp(s, "lequad") == 0)
742: ml->type = MAGIC_TYPE_LEQUAD;
743: else if (strcmp(s, "uleshort") == 0)
744: ml->type = MAGIC_TYPE_ULESHORT;
745: else if (strcmp(s, "ulelong") == 0)
746: ml->type = MAGIC_TYPE_ULELONG;
747: else if (strcmp(s, "ulequad") == 0)
748: ml->type = MAGIC_TYPE_ULEQUAD;
1.16 nicm 749: else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
1.1 nicm 750: ml->type = MAGIC_TYPE_LEFLOAT;
1.16 nicm 751: else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
1.1 nicm 752: ml->type = MAGIC_TYPE_LEDOUBLE;
753: else if (strcmp(s, "ledate") == 0)
754: ml->type = MAGIC_TYPE_LEDATE;
755: else if (strcmp(s, "leqdate") == 0)
756: ml->type = MAGIC_TYPE_LEQDATE;
757: else if (strcmp(s, "leldate") == 0)
758: ml->type = MAGIC_TYPE_LELDATE;
759: else if (strcmp(s, "leqldate") == 0)
760: ml->type = MAGIC_TYPE_LEQLDATE;
761: else if (strcmp(s, "uledate") == 0)
762: ml->type = MAGIC_TYPE_ULEDATE;
763: else if (strcmp(s, "uleqdate") == 0)
764: ml->type = MAGIC_TYPE_ULEQDATE;
765: else if (strcmp(s, "uleldate") == 0)
766: ml->type = MAGIC_TYPE_ULELDATE;
767: else if (strcmp(s, "uleqldate") == 0)
768: ml->type = MAGIC_TYPE_ULEQLDATE;
1.16 nicm 769: else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
1.1 nicm 770: ml->type = MAGIC_TYPE_LESTRING16;
1.16 nicm 771: else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
1.1 nicm 772: ml->type = MAGIC_TYPE_MELONG;
1.16 nicm 773: else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
1.1 nicm 774: ml->type = MAGIC_TYPE_MEDATE;
1.16 nicm 775: else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
1.1 nicm 776: ml->type = MAGIC_TYPE_MELDATE;
1.16 nicm 777: else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
1.1 nicm 778: ml->type = MAGIC_TYPE_DEFAULT;
779: else {
1.8 nicm 780: magic_warn(ml, "unknown type: %s", s);
1.1 nicm 781: goto fail;
782: }
783: magic_mark_text(ml, 0);
784:
785: done:
786: free(copy);
787: return (0);
788:
789: fail:
790: free(copy);
791: return (-1);
792: }
793:
794: static int
795: magic_parse_value(struct magic_line *ml, char **line)
796: {
797: char *copy, *s, *cp, *endptr;
798: size_t slen;
1.10 nicm 799: uint64_t u;
1.1 nicm 800:
801: while (isspace((u_char)**line))
802: (*line)++;
803:
804: ml->test_operator = '=';
805: ml->test_not = 0;
806: ml->test_string = NULL;
807: ml->test_string_size = 0;
808: ml->test_unsigned = 0;
809: ml->test_signed = 0;
810:
1.9 nicm 811: if (**line == '\0')
812: return (0);
813:
1.1 nicm 814: s = *line;
815: if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
816: (*line)++;
817: ml->test_operator = 'x';
818: return (0);
819: }
820:
821: if (**line == '!') {
822: ml->test_not = 1;
823: (*line)++;
824: }
825:
826: switch (ml->type) {
827: case MAGIC_TYPE_STRING:
828: case MAGIC_TYPE_PSTRING:
829: case MAGIC_TYPE_SEARCH:
830: if (**line == '>' || **line == '<' || **line == '=') {
831: ml->test_operator = **line;
832: (*line)++;
833: }
834: /* FALLTHROUGH */
835: case MAGIC_TYPE_REGEX:
836: copy = s = xmalloc(strlen(*line) + 1);
837: if (magic_get_string(line, s, &slen) != 0) {
838: magic_warn(ml, "can't parse string");
839: goto fail;
840: }
841: ml->test_string_size = slen;
842: ml->test_string = s;
843: return (0); /* do not free */
844: default:
845: break;
846: }
847:
1.9 nicm 848: while (isspace((u_char)**line))
849: (*line)++;
850: if ((*line)[0] == '<' && (*line)[1] == '=') {
851: ml->test_operator = '[';
852: (*line) += 2;
853: } else if ((*line)[0] == '>' && (*line)[1] == '=') {
854: ml->test_operator = ']';
1.1 nicm 855: (*line) += 2;
1.9 nicm 856: } else if (strchr("=<>&^", **line) != NULL) {
857: ml->test_operator = **line;
858: (*line)++;
1.1 nicm 859: }
1.9 nicm 860:
861: while (isspace((u_char)**line))
862: (*line)++;
863: copy = cp = xmalloc(strlen(*line) + 1);
1.1 nicm 864: while (**line != '\0' && !isspace((u_char)**line))
865: *cp++ = *(*line)++;
866: *cp = '\0';
867:
1.11 nicm 868: switch (ml->type) {
869: case MAGIC_TYPE_FLOAT:
870: case MAGIC_TYPE_DOUBLE:
871: case MAGIC_TYPE_BEFLOAT:
872: case MAGIC_TYPE_BEDOUBLE:
873: case MAGIC_TYPE_LEFLOAT:
874: case MAGIC_TYPE_LEDOUBLE:
875: errno = 0;
876: ml->test_double = strtod(copy, &endptr);
877: if (errno == ERANGE)
878: endptr = NULL;
879: break;
880: default:
881: if (*ml->type_string == 'u')
882: endptr = magic_strtoull(copy, &ml->test_unsigned);
883: else {
884: endptr = magic_strtoll(copy, &ml->test_signed);
885: if (endptr == NULL || *endptr != '\0') {
886: /*
887: * If we can't parse this as a signed number,
888: * try as unsigned instead.
889: */
890: endptr = magic_strtoull(copy, &u);
891: if (endptr != NULL && *endptr == '\0')
892: ml->test_signed = (int64_t)u;
893: }
1.10 nicm 894: }
1.11 nicm 895: break;
1.10 nicm 896: }
1.1 nicm 897: if (endptr == NULL || *endptr != '\0') {
1.9 nicm 898: magic_warn(ml, "can't parse number: %s", copy);
1.1 nicm 899: goto fail;
900: }
901:
902: free(copy);
903: return (0);
904:
905: fail:
906: free(copy);
907: return (-1);
908: }
909:
910: int
911: magic_compare(struct magic_line *ml1, struct magic_line *ml2)
912: {
913: if (ml1->strength < ml2->strength)
914: return (1);
915: if (ml1->strength > ml2->strength)
916: return (-1);
917:
918: /*
919: * The original file depends on the (undefined!) qsort(3) behaviour
920: * when the strength is equal. This is impossible to reproduce with an
921: * RB tree so just use the line number and hope for the best.
922: */
923: if (ml1->line < ml2->line)
924: return (-1);
925: if (ml1->line > ml2->line)
926: return (1);
927:
928: return (0);
929: }
930: RB_GENERATE(magic_tree, magic_line, node, magic_compare);
931:
932: static void
933: magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
934: {
935: char *mimetype, *cp;
936:
937: mimetype = line + (sizeof "!:mime") - 1;
938: while (isspace((u_char)*mimetype))
939: mimetype++;
940:
941: cp = strchr(mimetype, '#');
942: if (cp != NULL)
943: *cp = '\0';
944:
945: if (*mimetype != '\0') {
946: cp = mimetype + strlen(mimetype) - 1;
947: while (cp != mimetype && isspace((u_char)*cp))
948: *cp-- = '\0';
949: }
950:
951: cp = mimetype;
952: while (*cp != '\0') {
953: if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
954: break;
955: cp++;
956: }
957: if (*mimetype == '\0' || *cp != '\0') {
1.7 nicm 958: magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1.1 nicm 959: return;
960: }
961: if (ml == NULL) {
1.7 nicm 962: magic_warnm(m, at, "stray MIME type: %s", mimetype);
1.1 nicm 963: return;
964: }
965: ml->mimetype = xstrdup(mimetype);
966: }
967:
968: struct magic *
969: magic_load(FILE *f, const char *path, int warnings)
970: {
971: struct magic *m;
972: struct magic_line *ml = NULL, *parent, *parent0;
973: char *line, *tmp;
974: size_t size;
975: u_int at, level, n, i;
976:
977: m = xcalloc(1, sizeof *m);
978: m->path = xstrdup(path);
979: m->warnings = warnings;
980: RB_INIT(&m->tree);
981:
982: parent = NULL;
983: parent0 = NULL;
984: level = 0;
985:
986: at = 0;
987: tmp = NULL;
988: while ((line = fgetln(f, &size))) {
989: if (line[size - 1] == '\n')
990: line[size - 1] = '\0';
991: else {
992: tmp = xmalloc(size + 1);
993: memcpy(tmp, line, size);
994: tmp[size] = '\0';
995: line = tmp;
996: }
997: at++;
998:
999: while (isspace((u_char)*line))
1000: line++;
1001: if (*line == '\0' || *line == '#')
1002: continue;
1003:
1.15 nicm 1004: if (strncmp (line, "!:mime", 6) == 0) {
1.1 nicm 1005: magic_set_mimetype(m, at, ml, line);
1.15 nicm 1006: continue;
1007: }
1008: if (strncmp (line, "!:", 2) == 0) {
1009: for (i = 0; i < 64 && line[i] != '\0'; i++) {
1010: if (isspace((u_char)line[i]))
1011: break;
1012: }
1013: magic_warnm(m, at, "%.*s not supported", i, line);
1.1 nicm 1014: continue;
1015: }
1016:
1017: n = 0;
1018: for (; *line == '>'; line++)
1019: n++;
1020:
1021: ml = xcalloc(1, sizeof *ml);
1022: ml->root = m;
1023: ml->line = at;
1024: ml->type = MAGIC_TYPE_NONE;
1025: TAILQ_INIT(&ml->children);
1026: ml->text = 1;
1027:
1.13 nicm 1028: /*
1029: * At this point n is the level we want, level is the current
1030: * level. parent0 is the last line at the same level and parent
1031: * is the last line at the previous level.
1032: */
1.1 nicm 1033: if (n == level + 1) {
1034: parent = parent0;
1035: } else if (n < level) {
1036: for (i = n; i < level && parent != NULL; i++)
1037: parent = parent->parent;
1038: } else if (n != level) {
1039: magic_warn(ml, "level skipped (%u->%u)", level, n);
1040: free(ml);
1041: continue;
1042: }
1043: ml->parent = parent;
1044: level = n;
1045:
1046: if (magic_parse_offset(ml, &line) != 0 ||
1047: magic_parse_type(ml, &line) != 0 ||
1048: magic_parse_value(ml, &line) != 0 ||
1049: magic_set_result(ml, line) != 0) {
1.13 nicm 1050: /*
1051: * An invalid line still needs to appear in the tree in
1052: * case it has any children.
1053: */
1054: ml->type = MAGIC_TYPE_NONE;
1.1 nicm 1055: }
1056:
1057: ml->strength = magic_get_strength(ml);
1058: if (ml->parent == NULL)
1059: RB_INSERT(magic_tree, &m->tree, ml);
1060: else
1061: TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1062: parent0 = ml;
1063: }
1064: free(tmp);
1065:
1066: fclose(f);
1067: return (m);
1068: }