Annotation of src/usr.bin/file/magic-load.c, Revision 1.14
1.14 ! nicm 1: /* $OpenBSD: magic-load.c,v 1.13 2015/08/11 22:23:51 nicm Exp $ */
1.1 nicm 2:
3: /*
4: * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15: * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18:
19: #include <sys/types.h>
20:
21: #include <ctype.h>
22: #include <errno.h>
23: #include <limits.h>
24: #include <regex.h>
25: #include <stdarg.h>
26: #include <stdio.h>
27: #include <stdlib.h>
28: #include <string.h>
29:
30: #include "magic.h"
31: #include "xmalloc.h"
32:
33: static int
34: magic_odigit(u_char c)
35: {
36: if (c >= '0' && c <= '7')
37: return (c - '0');
38: return (-1);
39: }
40:
41: static int
42: magic_xdigit(u_char c)
43: {
44: if (c >= '0' && c <= '9')
45: return (c - '0');
46: if (c >= 'a' && c <= 'f')
47: return (10 + c - 'a');
48: if (c >= 'A' && c <= 'F')
49: return (10 + c - 'A');
50: return (-1);
51: }
52:
53: static void
54: magic_mark_text(struct magic_line *ml, int text)
55: {
56: do {
57: ml->text = text;
58: ml = ml->parent;
59: } while (ml != NULL);
60: }
61:
62: static int
63: magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
64: const char *p)
65: {
66: int error;
67: char errbuf[256];
68:
69: error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
70: if (error != 0) {
71: regerror(error, re, errbuf, sizeof errbuf);
72: magic_warn(ml, "bad %s pattern: %s", name, errbuf);
73: return (-1);
74: }
75: return (0);
76: }
77:
78: static int
79: magic_set_result(struct magic_line *ml, const char *s)
80: {
81: const char *fmt;
82: const char *endfmt;
83: const char *cp;
84: regex_t *re = NULL;
85: regmatch_t pmatch;
86: size_t fmtlen;
87:
88: while (isspace((u_char)*s))
89: s++;
90: if (*s == '\0') {
91: ml->result = NULL;
92: return (0);
93: }
94: ml->result = xstrdup(s);
95:
96: fmt = NULL;
97: for (cp = s; *cp != '\0'; cp++) {
98: if (cp[0] == '%' && cp[1] != '%') {
99: if (fmt != NULL) {
100: magic_warn(ml, "multiple formats");
101: return (-1);
102: }
103: fmt = cp;
104: }
105: }
106: if (fmt == NULL)
107: return (0);
108: fmt++;
109:
110: for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
111: if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
112: break;
113: }
114: if (*endfmt == '\0') {
115: magic_warn(ml, "unterminated format");
116: return (-1);
117: }
118: fmtlen = endfmt + 1 - fmt;
119: if (fmtlen > 32) {
120: magic_warn(ml, "format too long");
121: return (-1);
122: }
123:
124: if (*endfmt == 's') {
125: switch (ml->type) {
126: case MAGIC_TYPE_DATE:
127: case MAGIC_TYPE_LDATE:
128: case MAGIC_TYPE_UDATE:
129: case MAGIC_TYPE_ULDATE:
130: case MAGIC_TYPE_BEDATE:
131: case MAGIC_TYPE_BELDATE:
132: case MAGIC_TYPE_UBEDATE:
133: case MAGIC_TYPE_UBELDATE:
134: case MAGIC_TYPE_QDATE:
135: case MAGIC_TYPE_QLDATE:
136: case MAGIC_TYPE_UQDATE:
137: case MAGIC_TYPE_UQLDATE:
138: case MAGIC_TYPE_BEQDATE:
139: case MAGIC_TYPE_BEQLDATE:
140: case MAGIC_TYPE_UBEQDATE:
141: case MAGIC_TYPE_UBEQLDATE:
142: case MAGIC_TYPE_LEQDATE:
143: case MAGIC_TYPE_LEQLDATE:
144: case MAGIC_TYPE_ULEQDATE:
145: case MAGIC_TYPE_ULEQLDATE:
146: case MAGIC_TYPE_LEDATE:
147: case MAGIC_TYPE_LELDATE:
148: case MAGIC_TYPE_ULEDATE:
149: case MAGIC_TYPE_ULELDATE:
150: case MAGIC_TYPE_MEDATE:
151: case MAGIC_TYPE_MELDATE:
152: case MAGIC_TYPE_STRING:
153: case MAGIC_TYPE_PSTRING:
154: case MAGIC_TYPE_BESTRING16:
155: case MAGIC_TYPE_LESTRING16:
156: case MAGIC_TYPE_REGEX:
157: case MAGIC_TYPE_SEARCH:
158: break;
159: default:
160: ml->stringify = 1;
161: break;
162: }
163: }
164:
165: if (!ml->root->compiled) {
166: /*
167: * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
168: * with byte, short, long. We get lucky because our first and
169: * only argument ends up in a register. Accept it for now.
170: */
171: if (magic_make_pattern(ml, "short", &ml->root->format_short,
172: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
173: return (-1);
174: if (magic_make_pattern(ml, "long", &ml->root->format_long,
175: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
176: return (-1);
177: if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
178: "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
179: return (-1);
180: if (magic_make_pattern(ml, "float", &ml->root->format_float,
181: "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
182: return (-1);
183: if (magic_make_pattern(ml, "string", &ml->root->format_string,
184: "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
185: return (-1);
186: ml->root->compiled = 1;
187: }
188:
189: if (ml->stringify)
190: re = &ml->root->format_string;
191: else {
192: switch (ml->type) {
193: case MAGIC_TYPE_NONE:
194: case MAGIC_TYPE_DEFAULT:
195: return (0); /* don't use result */
196: case MAGIC_TYPE_BYTE:
197: case MAGIC_TYPE_UBYTE:
198: case MAGIC_TYPE_SHORT:
199: case MAGIC_TYPE_USHORT:
200: case MAGIC_TYPE_BESHORT:
201: case MAGIC_TYPE_UBESHORT:
202: case MAGIC_TYPE_LESHORT:
203: case MAGIC_TYPE_ULESHORT:
204: re = &ml->root->format_short;
205: break;
206: case MAGIC_TYPE_LONG:
207: case MAGIC_TYPE_ULONG:
208: case MAGIC_TYPE_BELONG:
209: case MAGIC_TYPE_UBELONG:
210: case MAGIC_TYPE_LELONG:
211: case MAGIC_TYPE_ULELONG:
212: case MAGIC_TYPE_MELONG:
213: re = &ml->root->format_long;
214: break;
215: case MAGIC_TYPE_QUAD:
216: case MAGIC_TYPE_UQUAD:
217: case MAGIC_TYPE_BEQUAD:
218: case MAGIC_TYPE_UBEQUAD:
219: case MAGIC_TYPE_LEQUAD:
220: case MAGIC_TYPE_ULEQUAD:
221: re = &ml->root->format_quad;
222: break;
223: case MAGIC_TYPE_FLOAT:
224: case MAGIC_TYPE_BEFLOAT:
225: case MAGIC_TYPE_LEFLOAT:
226: case MAGIC_TYPE_DOUBLE:
227: case MAGIC_TYPE_BEDOUBLE:
228: case MAGIC_TYPE_LEDOUBLE:
229: re = &ml->root->format_float;
230: break;
231: case MAGIC_TYPE_DATE:
232: case MAGIC_TYPE_LDATE:
233: case MAGIC_TYPE_UDATE:
234: case MAGIC_TYPE_ULDATE:
235: case MAGIC_TYPE_BEDATE:
236: case MAGIC_TYPE_BELDATE:
237: case MAGIC_TYPE_UBEDATE:
238: case MAGIC_TYPE_UBELDATE:
239: case MAGIC_TYPE_QDATE:
240: case MAGIC_TYPE_QLDATE:
241: case MAGIC_TYPE_UQDATE:
242: case MAGIC_TYPE_UQLDATE:
243: case MAGIC_TYPE_BEQDATE:
244: case MAGIC_TYPE_BEQLDATE:
245: case MAGIC_TYPE_UBEQDATE:
246: case MAGIC_TYPE_UBEQLDATE:
247: case MAGIC_TYPE_LEQDATE:
248: case MAGIC_TYPE_LEQLDATE:
249: case MAGIC_TYPE_ULEQDATE:
250: case MAGIC_TYPE_ULEQLDATE:
251: case MAGIC_TYPE_LEDATE:
252: case MAGIC_TYPE_LELDATE:
253: case MAGIC_TYPE_ULEDATE:
254: case MAGIC_TYPE_ULELDATE:
255: case MAGIC_TYPE_MEDATE:
256: case MAGIC_TYPE_MELDATE:
257: case MAGIC_TYPE_STRING:
258: case MAGIC_TYPE_PSTRING:
259: case MAGIC_TYPE_REGEX:
260: case MAGIC_TYPE_SEARCH:
261: re = &ml->root->format_string;
262: break;
263: case MAGIC_TYPE_BESTRING16:
264: case MAGIC_TYPE_LESTRING16:
1.8 nicm 265: magic_warn(ml, "unsupported type: %s", ml->type_string);
1.1 nicm 266: return (-1);
267: }
268: }
269:
270: pmatch.rm_so = 0;
271: pmatch.rm_eo = fmtlen;
272: if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
273: magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
274: (int)fmtlen, fmt);
275: return (-1);
276: }
277:
278: return (0);
279: }
280:
281: static u_int
282: magic_get_strength(struct magic_line *ml)
283: {
284: int n;
285: size_t size;
286:
1.13 nicm 287: if (ml->type == MAGIC_TYPE_NONE)
288: return (0);
289:
1.1 nicm 290: if (ml->test_not || ml->test_operator == 'x')
291: return (1);
292:
1.5 nicm 293: n = 2 * MAGIC_STRENGTH_MULTIPLIER;
1.1 nicm 294: switch (ml->type) {
295: case MAGIC_TYPE_NONE:
296: case MAGIC_TYPE_DEFAULT:
297: return (0);
298: case MAGIC_TYPE_BYTE:
299: case MAGIC_TYPE_UBYTE:
300: n += 1 * MAGIC_STRENGTH_MULTIPLIER;
301: break;
302: case MAGIC_TYPE_SHORT:
303: case MAGIC_TYPE_USHORT:
304: case MAGIC_TYPE_BESHORT:
305: case MAGIC_TYPE_UBESHORT:
306: case MAGIC_TYPE_LESHORT:
307: case MAGIC_TYPE_ULESHORT:
308: n += 2 * MAGIC_STRENGTH_MULTIPLIER;
309: break;
310: case MAGIC_TYPE_LONG:
311: case MAGIC_TYPE_ULONG:
312: case MAGIC_TYPE_FLOAT:
313: case MAGIC_TYPE_DATE:
314: case MAGIC_TYPE_LDATE:
315: case MAGIC_TYPE_UDATE:
316: case MAGIC_TYPE_ULDATE:
317: case MAGIC_TYPE_BELONG:
318: case MAGIC_TYPE_UBELONG:
319: case MAGIC_TYPE_BEFLOAT:
320: case MAGIC_TYPE_BEDATE:
321: case MAGIC_TYPE_BELDATE:
322: case MAGIC_TYPE_UBEDATE:
323: case MAGIC_TYPE_UBELDATE:
324: n += 4 * MAGIC_STRENGTH_MULTIPLIER;
325: break;
326: case MAGIC_TYPE_QUAD:
327: case MAGIC_TYPE_UQUAD:
328: case MAGIC_TYPE_DOUBLE:
329: case MAGIC_TYPE_QDATE:
330: case MAGIC_TYPE_QLDATE:
331: case MAGIC_TYPE_UQDATE:
332: case MAGIC_TYPE_UQLDATE:
333: case MAGIC_TYPE_BEQUAD:
334: case MAGIC_TYPE_UBEQUAD:
335: case MAGIC_TYPE_BEDOUBLE:
336: case MAGIC_TYPE_BEQDATE:
337: case MAGIC_TYPE_BEQLDATE:
338: case MAGIC_TYPE_UBEQDATE:
339: case MAGIC_TYPE_UBEQLDATE:
340: case MAGIC_TYPE_LEQUAD:
341: case MAGIC_TYPE_ULEQUAD:
342: case MAGIC_TYPE_LEDOUBLE:
343: case MAGIC_TYPE_LEQDATE:
344: case MAGIC_TYPE_LEQLDATE:
345: case MAGIC_TYPE_ULEQDATE:
346: case MAGIC_TYPE_ULEQLDATE:
347: case MAGIC_TYPE_LELONG:
348: case MAGIC_TYPE_ULELONG:
349: case MAGIC_TYPE_LEFLOAT:
350: case MAGIC_TYPE_LEDATE:
351: case MAGIC_TYPE_LELDATE:
352: case MAGIC_TYPE_ULEDATE:
353: case MAGIC_TYPE_ULELDATE:
354: case MAGIC_TYPE_MELONG:
355: case MAGIC_TYPE_MEDATE:
356: case MAGIC_TYPE_MELDATE:
357: n += 8 * MAGIC_STRENGTH_MULTIPLIER;
358: break;
359: case MAGIC_TYPE_STRING:
360: case MAGIC_TYPE_PSTRING:
361: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
362: break;
363: case MAGIC_TYPE_BESTRING16:
364: case MAGIC_TYPE_LESTRING16:
365: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
366: break;
367: case MAGIC_TYPE_REGEX:
368: case MAGIC_TYPE_SEARCH:
369: size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
370: if (size < 1)
371: size = 1;
372: n += ml->test_string_size * size;
373: break;
374: }
375: switch (ml->test_operator) {
376: case '=':
377: n += MAGIC_STRENGTH_MULTIPLIER;
378: break;
379: case '<':
380: case '>':
381: case '[':
382: case ']':
383: n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
384: break;
385: case '^':
386: case '&':
387: n -= MAGIC_STRENGTH_MULTIPLIER;
388: break;
389: }
390: return (n <= 0 ? 1 : n);
391: }
392:
393: static int
394: magic_get_string(char **line, char *out, size_t *outlen)
395: {
396: char *start, *cp, c;
397: int d0, d1, d2;
398:
399: start = out;
400: for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
401: if (*cp != '\\') {
402: *out++ = *cp;
403: continue;
404: }
405:
406: switch (c = *++cp) {
1.3 nicm 407: case '\0': /* end of line */
408: return (-1);
1.1 nicm 409: case ' ':
410: *out++ = ' ';
411: break;
412: case '0':
413: case '1':
414: case '2':
415: case '3':
416: case '4':
417: case '5':
418: case '6':
419: case '7':
420: d0 = magic_odigit(cp[0]);
421: if (cp[0] != '\0')
422: d1 = magic_odigit(cp[1]);
423: else
424: d1 = -1;
425: if (cp[0] != '\0' && cp[1] != '\0')
426: d2 = magic_odigit(cp[2]);
427: else
428: d2 = -1;
429:
430: if (d0 != -1 && d1 != -1 && d2 != -1) {
431: *out = d2 | (d1 << 3) | (d0 << 6);
432: cp += 2;
433: } else if (d0 != -1 && d1 != -1) {
434: *out = d1 | (d0 << 3);
435: cp++;
436: } else if (d0 != -1)
437: *out = d0;
438: else
439: return (-1);
440: out++;
441: break;
442: case 'x':
443: d0 = magic_xdigit(cp[1]);
444: if (cp[1] != '\0')
445: d1 = magic_xdigit(cp[2]);
446: else
447: d1 = -1;
448:
449: if (d0 != -1 && d1 != -1) {
450: *out = d1 | (d0 << 4);
451: cp += 2;
452: } else if (d0 != -1) {
453: *out = d0;
454: cp++;
455: } else
456: return (-1);
457: out++;
458:
459: break;
460: case 'a':
461: *out++ = '\a';
462: break;
463: case 'b':
464: *out++ = '\b';
465: break;
466: case 't':
467: *out++ = '\t';
468: break;
469: case 'f':
470: *out++ = '\f';
471: break;
472: case 'n':
473: *out++ = '\n';
474: break;
475: case 'r':
476: *out++ = '\r';
477: break;
478: case '\\':
479: *out++ = '\\';
480: break;
481: case '\'':
482: *out++ = '\'';
483: break;
484: case '\"':
485: *out++ = '\"';
486: break;
487: default:
488: *out++ = c;
489: break;
490: }
491: }
492: *out = '\0';
493: *outlen = out - start;
494:
495: *line = cp;
496: return (0);
497: }
498:
499: static int
500: magic_parse_offset(struct magic_line *ml, char **line)
501: {
502: char *copy, *s, *cp, *endptr;
503:
504: while (isspace((u_char)**line))
505: (*line)++;
506: copy = s = cp = xmalloc(strlen(*line) + 1);
507: while (**line != '\0' && !isspace((u_char)**line))
508: *cp++ = *(*line)++;
509: *cp = '\0';
510:
511: ml->offset = 0;
512: ml->offset_relative = 0;
513:
514: ml->indirect_type = ' ';
515: ml->indirect_relative = 0;
516: ml->indirect_offset = 0;
517: ml->indirect_operator = ' ';
518: ml->indirect_operand = 0;
519:
520: if (*s == '&') {
521: ml->offset_relative = 1;
522: s++;
523: }
524:
525: if (*s != '(') {
526: endptr = magic_strtoll(s, &ml->offset);
527: if (endptr == NULL || *endptr != '\0') {
528: magic_warn(ml, "missing closing bracket");
529: goto fail;
530: }
531: if (ml->offset < 0 && !ml->offset_relative) {
532: magic_warn(ml, "negative absolute offset");
533: goto fail;
534: }
535: goto done;
536: }
537: s++;
538:
539: if (*s == '&') {
540: ml->indirect_relative = 1;
541: s++;
542: }
543:
544: endptr = magic_strtoll(s, &ml->indirect_offset);
545: if (endptr == NULL) {
1.8 nicm 546: magic_warn(ml, "can't parse offset: %s", s);
1.1 nicm 547: goto fail;
548: }
549: s = endptr;
550: if (*s == ')')
551: goto done;
552:
553: if (*s == '.') {
554: s++;
1.6 tobias 555: if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
1.8 nicm 556: magic_warn(ml, "unknown offset type: %c", *s);
1.1 nicm 557: goto fail;
558: }
559: ml->indirect_type = *s;
560: s++;
561: if (*s == ')')
562: goto done;
563: }
564:
1.6 tobias 565: if (*s == '\0' || strchr("+-*", *s) == NULL) {
1.8 nicm 566: magic_warn(ml, "unknown offset operator: %c", *s);
1.1 nicm 567: goto fail;
568: }
569: ml->indirect_operator = *s;
570: s++;
571: if (*s == ')')
572: goto done;
573:
574: if (*s == '(') {
575: s++;
576: endptr = magic_strtoll(s, &ml->indirect_operand);
577: if (endptr == NULL || *endptr != ')') {
578: magic_warn(ml, "missing closing bracket");
579: goto fail;
580: }
581: if (*++endptr != ')') {
582: magic_warn(ml, "missing closing bracket");
583: goto fail;
584: }
585: } else {
586: endptr = magic_strtoll(s, &ml->indirect_operand);
587: if (endptr == NULL || *endptr != ')') {
588: magic_warn(ml, "missing closing bracket");
589: goto fail;
590: }
591: }
592:
593: done:
594: free(copy);
595: return (0);
596:
597: fail:
598: free(copy);
599: return (-1);
600: }
601:
602: static int
603: magic_parse_type(struct magic_line *ml, char **line)
604: {
605: char *copy, *s, *cp, *endptr;
606:
607: while (isspace((u_char)**line))
608: (*line)++;
609: copy = s = cp = xmalloc(strlen(*line) + 1);
610: while (**line != '\0' && !isspace((u_char)**line))
611: *cp++ = *(*line)++;
612: *cp = '\0';
613:
614: ml->type = MAGIC_TYPE_NONE;
615: ml->type_string = xstrdup(s);
616:
617: ml->type_operator = ' ';
618: ml->type_operand = 0;
619:
620: if (strncmp(s, "string", (sizeof "string") - 1) == 0) {
621: ml->type = MAGIC_TYPE_STRING;
622: magic_mark_text(ml, 0);
623: goto done;
624: }
625: if (strncmp(s, "search", (sizeof "search") - 1) == 0) {
626: ml->type = MAGIC_TYPE_SEARCH;
627: goto done;
628: }
629: if (strncmp(s, "regex", (sizeof "regex") - 1) == 0) {
630: ml->type = MAGIC_TYPE_REGEX;
631: goto done;
632: }
633:
1.12 nicm 634: cp = &s[strcspn(s, "+-&/%*")];
1.1 nicm 635: if (*cp != '\0') {
636: ml->type_operator = *cp;
637: endptr = magic_strtoull(cp + 1, &ml->type_operand);
638: if (endptr == NULL || *endptr != '\0') {
1.8 nicm 639: magic_warn(ml, "can't parse operand: %s", cp + 1);
1.1 nicm 640: goto fail;
641: }
642: *cp = '\0';
643: }
644:
645: if (strcmp(s, "byte") == 0)
646: ml->type = MAGIC_TYPE_BYTE;
647: else if (strcmp(s, "short") == 0)
648: ml->type = MAGIC_TYPE_SHORT;
649: else if (strcmp(s, "long") == 0)
650: ml->type = MAGIC_TYPE_LONG;
651: else if (strcmp(s, "quad") == 0)
652: ml->type = MAGIC_TYPE_QUAD;
653: else if (strcmp(s, "ubyte") == 0)
654: ml->type = MAGIC_TYPE_UBYTE;
655: else if (strcmp(s, "ushort") == 0)
656: ml->type = MAGIC_TYPE_USHORT;
657: else if (strcmp(s, "ulong") == 0)
658: ml->type = MAGIC_TYPE_ULONG;
659: else if (strcmp(s, "uquad") == 0)
660: ml->type = MAGIC_TYPE_UQUAD;
661: else if (strcmp(s, "float") == 0)
662: ml->type = MAGIC_TYPE_FLOAT;
663: else if (strcmp(s, "double") == 0)
664: ml->type = MAGIC_TYPE_DOUBLE;
665: else if (strcmp(s, "pstring") == 0)
666: ml->type = MAGIC_TYPE_PSTRING;
667: else if (strcmp(s, "date") == 0)
668: ml->type = MAGIC_TYPE_DATE;
669: else if (strcmp(s, "qdate") == 0)
670: ml->type = MAGIC_TYPE_QDATE;
671: else if (strcmp(s, "ldate") == 0)
672: ml->type = MAGIC_TYPE_LDATE;
673: else if (strcmp(s, "qldate") == 0)
674: ml->type = MAGIC_TYPE_QLDATE;
675: else if (strcmp(s, "udate") == 0)
676: ml->type = MAGIC_TYPE_UDATE;
677: else if (strcmp(s, "uqdate") == 0)
678: ml->type = MAGIC_TYPE_UQDATE;
679: else if (strcmp(s, "uldate") == 0)
680: ml->type = MAGIC_TYPE_ULDATE;
681: else if (strcmp(s, "uqldate") == 0)
682: ml->type = MAGIC_TYPE_UQLDATE;
683: else if (strcmp(s, "beshort") == 0)
684: ml->type = MAGIC_TYPE_BESHORT;
685: else if (strcmp(s, "belong") == 0)
686: ml->type = MAGIC_TYPE_BELONG;
687: else if (strcmp(s, "bequad") == 0)
688: ml->type = MAGIC_TYPE_BEQUAD;
689: else if (strcmp(s, "ubeshort") == 0)
690: ml->type = MAGIC_TYPE_UBESHORT;
691: else if (strcmp(s, "ubelong") == 0)
692: ml->type = MAGIC_TYPE_UBELONG;
693: else if (strcmp(s, "ubequad") == 0)
694: ml->type = MAGIC_TYPE_UBEQUAD;
695: else if (strcmp(s, "befloat") == 0)
696: ml->type = MAGIC_TYPE_BEFLOAT;
697: else if (strcmp(s, "bedouble") == 0)
698: ml->type = MAGIC_TYPE_BEDOUBLE;
699: else if (strcmp(s, "bedate") == 0)
700: ml->type = MAGIC_TYPE_BEDATE;
701: else if (strcmp(s, "beqdate") == 0)
702: ml->type = MAGIC_TYPE_BEQDATE;
703: else if (strcmp(s, "beldate") == 0)
704: ml->type = MAGIC_TYPE_BELDATE;
705: else if (strcmp(s, "beqldate") == 0)
706: ml->type = MAGIC_TYPE_BEQLDATE;
707: else if (strcmp(s, "ubedate") == 0)
708: ml->type = MAGIC_TYPE_UBEDATE;
709: else if (strcmp(s, "ubeqdate") == 0)
710: ml->type = MAGIC_TYPE_UBEQDATE;
711: else if (strcmp(s, "ubeldate") == 0)
712: ml->type = MAGIC_TYPE_UBELDATE;
713: else if (strcmp(s, "ubeqldate") == 0)
714: ml->type = MAGIC_TYPE_UBEQLDATE;
715: else if (strcmp(s, "bestring16") == 0)
716: ml->type = MAGIC_TYPE_BESTRING16;
717: else if (strcmp(s, "leshort") == 0)
718: ml->type = MAGIC_TYPE_LESHORT;
719: else if (strcmp(s, "lelong") == 0)
720: ml->type = MAGIC_TYPE_LELONG;
721: else if (strcmp(s, "lequad") == 0)
722: ml->type = MAGIC_TYPE_LEQUAD;
723: else if (strcmp(s, "uleshort") == 0)
724: ml->type = MAGIC_TYPE_ULESHORT;
725: else if (strcmp(s, "ulelong") == 0)
726: ml->type = MAGIC_TYPE_ULELONG;
727: else if (strcmp(s, "ulequad") == 0)
728: ml->type = MAGIC_TYPE_ULEQUAD;
729: else if (strcmp(s, "lefloat") == 0)
730: ml->type = MAGIC_TYPE_LEFLOAT;
731: else if (strcmp(s, "ledouble") == 0)
732: ml->type = MAGIC_TYPE_LEDOUBLE;
733: else if (strcmp(s, "ledate") == 0)
734: ml->type = MAGIC_TYPE_LEDATE;
735: else if (strcmp(s, "leqdate") == 0)
736: ml->type = MAGIC_TYPE_LEQDATE;
737: else if (strcmp(s, "leldate") == 0)
738: ml->type = MAGIC_TYPE_LELDATE;
739: else if (strcmp(s, "leqldate") == 0)
740: ml->type = MAGIC_TYPE_LEQLDATE;
741: else if (strcmp(s, "uledate") == 0)
742: ml->type = MAGIC_TYPE_ULEDATE;
743: else if (strcmp(s, "uleqdate") == 0)
744: ml->type = MAGIC_TYPE_ULEQDATE;
745: else if (strcmp(s, "uleldate") == 0)
746: ml->type = MAGIC_TYPE_ULELDATE;
747: else if (strcmp(s, "uleqldate") == 0)
748: ml->type = MAGIC_TYPE_ULEQLDATE;
749: else if (strcmp(s, "lestring16") == 0)
750: ml->type = MAGIC_TYPE_LESTRING16;
751: else if (strcmp(s, "melong") == 0)
752: ml->type = MAGIC_TYPE_MELONG;
753: else if (strcmp(s, "medate") == 0)
754: ml->type = MAGIC_TYPE_MEDATE;
755: else if (strcmp(s, "meldate") == 0)
756: ml->type = MAGIC_TYPE_MELDATE;
757: else if (strcmp(s, "default") == 0)
758: ml->type = MAGIC_TYPE_DEFAULT;
759: else {
1.8 nicm 760: magic_warn(ml, "unknown type: %s", s);
1.1 nicm 761: goto fail;
762: }
763: magic_mark_text(ml, 0);
764:
765: done:
766: free(copy);
767: return (0);
768:
769: fail:
770: free(copy);
771: return (-1);
772: }
773:
774: static int
775: magic_parse_value(struct magic_line *ml, char **line)
776: {
777: char *copy, *s, *cp, *endptr;
778: size_t slen;
1.10 nicm 779: uint64_t u;
1.1 nicm 780:
781: while (isspace((u_char)**line))
782: (*line)++;
783:
784: ml->test_operator = '=';
785: ml->test_not = 0;
786: ml->test_string = NULL;
787: ml->test_string_size = 0;
788: ml->test_unsigned = 0;
789: ml->test_signed = 0;
790:
1.9 nicm 791: if (**line == '\0')
792: return (0);
793:
1.1 nicm 794: s = *line;
795: if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
796: (*line)++;
797: ml->test_operator = 'x';
798: return (0);
799: }
800:
801: if (**line == '!') {
802: ml->test_not = 1;
803: (*line)++;
804: }
805:
806: switch (ml->type) {
807: case MAGIC_TYPE_STRING:
808: case MAGIC_TYPE_PSTRING:
809: case MAGIC_TYPE_SEARCH:
810: if (**line == '>' || **line == '<' || **line == '=') {
811: ml->test_operator = **line;
812: (*line)++;
813: }
814: /* FALLTHROUGH */
815: case MAGIC_TYPE_REGEX:
816: copy = s = xmalloc(strlen(*line) + 1);
817: if (magic_get_string(line, s, &slen) != 0) {
818: magic_warn(ml, "can't parse string");
819: goto fail;
820: }
821: ml->test_string_size = slen;
822: ml->test_string = s;
823: return (0); /* do not free */
824: default:
825: break;
826: }
827:
1.9 nicm 828: while (isspace((u_char)**line))
829: (*line)++;
830: if ((*line)[0] == '<' && (*line)[1] == '=') {
831: ml->test_operator = '[';
832: (*line) += 2;
833: } else if ((*line)[0] == '>' && (*line)[1] == '=') {
834: ml->test_operator = ']';
1.1 nicm 835: (*line) += 2;
1.9 nicm 836: } else if (strchr("=<>&^", **line) != NULL) {
837: ml->test_operator = **line;
838: (*line)++;
1.1 nicm 839: }
1.9 nicm 840:
841: while (isspace((u_char)**line))
842: (*line)++;
843: copy = cp = xmalloc(strlen(*line) + 1);
1.1 nicm 844: while (**line != '\0' && !isspace((u_char)**line))
845: *cp++ = *(*line)++;
846: *cp = '\0';
847:
1.11 nicm 848: switch (ml->type) {
849: case MAGIC_TYPE_FLOAT:
850: case MAGIC_TYPE_DOUBLE:
851: case MAGIC_TYPE_BEFLOAT:
852: case MAGIC_TYPE_BEDOUBLE:
853: case MAGIC_TYPE_LEFLOAT:
854: case MAGIC_TYPE_LEDOUBLE:
855: errno = 0;
856: ml->test_double = strtod(copy, &endptr);
857: if (errno == ERANGE)
858: endptr = NULL;
859: break;
860: default:
861: if (*ml->type_string == 'u')
862: endptr = magic_strtoull(copy, &ml->test_unsigned);
863: else {
864: endptr = magic_strtoll(copy, &ml->test_signed);
865: if (endptr == NULL || *endptr != '\0') {
866: /*
867: * If we can't parse this as a signed number,
868: * try as unsigned instead.
869: */
870: endptr = magic_strtoull(copy, &u);
871: if (endptr != NULL && *endptr == '\0')
872: ml->test_signed = (int64_t)u;
873: }
1.10 nicm 874: }
1.11 nicm 875: break;
1.10 nicm 876: }
1.1 nicm 877: if (endptr == NULL || *endptr != '\0') {
1.9 nicm 878: magic_warn(ml, "can't parse number: %s", copy);
1.1 nicm 879: goto fail;
880: }
881:
882: free(copy);
883: return (0);
884:
885: fail:
886: free(copy);
887: return (-1);
888: }
889:
890: int
891: magic_compare(struct magic_line *ml1, struct magic_line *ml2)
892: {
893: if (ml1->strength < ml2->strength)
894: return (1);
895: if (ml1->strength > ml2->strength)
896: return (-1);
897:
898: /*
899: * The original file depends on the (undefined!) qsort(3) behaviour
900: * when the strength is equal. This is impossible to reproduce with an
901: * RB tree so just use the line number and hope for the best.
902: */
903: if (ml1->line < ml2->line)
904: return (-1);
905: if (ml1->line > ml2->line)
906: return (1);
907:
908: return (0);
909: }
910: RB_GENERATE(magic_tree, magic_line, node, magic_compare);
911:
912: static void
913: magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
914: {
915: char *mimetype, *cp;
916:
917: mimetype = line + (sizeof "!:mime") - 1;
918: while (isspace((u_char)*mimetype))
919: mimetype++;
920:
921: cp = strchr(mimetype, '#');
922: if (cp != NULL)
923: *cp = '\0';
924:
925: if (*mimetype != '\0') {
926: cp = mimetype + strlen(mimetype) - 1;
927: while (cp != mimetype && isspace((u_char)*cp))
928: *cp-- = '\0';
929: }
930:
931: cp = mimetype;
932: while (*cp != '\0') {
933: if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
934: break;
935: cp++;
936: }
937: if (*mimetype == '\0' || *cp != '\0') {
1.7 nicm 938: magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1.1 nicm 939: return;
940: }
941: if (ml == NULL) {
1.7 nicm 942: magic_warnm(m, at, "stray MIME type: %s", mimetype);
1.1 nicm 943: return;
944: }
945: ml->mimetype = xstrdup(mimetype);
946: }
947:
948: struct magic *
949: magic_load(FILE *f, const char *path, int warnings)
950: {
951: struct magic *m;
952: struct magic_line *ml = NULL, *parent, *parent0;
953: char *line, *tmp;
954: size_t size;
955: u_int at, level, n, i;
956:
957: m = xcalloc(1, sizeof *m);
958: m->path = xstrdup(path);
959: m->warnings = warnings;
960: RB_INIT(&m->tree);
961:
962: parent = NULL;
963: parent0 = NULL;
964: level = 0;
965:
966: at = 0;
967: tmp = NULL;
968: while ((line = fgetln(f, &size))) {
969: if (line[size - 1] == '\n')
970: line[size - 1] = '\0';
971: else {
972: tmp = xmalloc(size + 1);
973: memcpy(tmp, line, size);
974: tmp[size] = '\0';
975: line = tmp;
976: }
977: at++;
978:
979: while (isspace((u_char)*line))
980: line++;
981: if (*line == '\0' || *line == '#')
982: continue;
983:
984: if (strncmp (line, "!:mime", (sizeof "!:mime") - 1) == 0) {
985: magic_set_mimetype(m, at, ml, line);
986: continue;
987: }
988:
989: n = 0;
990: for (; *line == '>'; line++)
991: n++;
992:
993: ml = xcalloc(1, sizeof *ml);
994: ml->root = m;
995: ml->line = at;
996: ml->type = MAGIC_TYPE_NONE;
997: TAILQ_INIT(&ml->children);
998: ml->text = 1;
999:
1.13 nicm 1000: /*
1001: * At this point n is the level we want, level is the current
1002: * level. parent0 is the last line at the same level and parent
1003: * is the last line at the previous level.
1004: */
1.1 nicm 1005: if (n == level + 1) {
1006: parent = parent0;
1007: } else if (n < level) {
1008: for (i = n; i < level && parent != NULL; i++)
1009: parent = parent->parent;
1010: } else if (n != level) {
1011: magic_warn(ml, "level skipped (%u->%u)", level, n);
1012: free(ml);
1013: continue;
1014: }
1015: ml->parent = parent;
1016: level = n;
1017:
1018: if (magic_parse_offset(ml, &line) != 0 ||
1019: magic_parse_type(ml, &line) != 0 ||
1020: magic_parse_value(ml, &line) != 0 ||
1021: magic_set_result(ml, line) != 0) {
1.13 nicm 1022: /*
1023: * An invalid line still needs to appear in the tree in
1024: * case it has any children.
1025: */
1026: ml->type = MAGIC_TYPE_NONE;
1.1 nicm 1027: }
1028:
1029: ml->strength = magic_get_strength(ml);
1030: if (ml->parent == NULL)
1031: RB_INSERT(magic_tree, &m->tree, ml);
1032: else
1033: TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1034: parent0 = ml;
1035: }
1036: free(tmp);
1037:
1038: fclose(f);
1039: return (m);
1040: }