Annotation of src/usr.bin/file/magic-load.c, Revision 1.25
1.25 ! brynet 1: /* $OpenBSD: magic-load.c,v 1.24 2017/04/18 14:16:48 nicm Exp $ */
1.1 nicm 2:
3: /*
4: * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15: * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18:
19: #include <sys/types.h>
20:
21: #include <ctype.h>
22: #include <errno.h>
23: #include <limits.h>
24: #include <regex.h>
25: #include <stdarg.h>
26: #include <stdio.h>
27: #include <stdlib.h>
28: #include <string.h>
29:
30: #include "magic.h"
31: #include "xmalloc.h"
32:
33: static int
34: magic_odigit(u_char c)
35: {
36: if (c >= '0' && c <= '7')
37: return (c - '0');
38: return (-1);
39: }
40:
41: static int
42: magic_xdigit(u_char c)
43: {
44: if (c >= '0' && c <= '9')
45: return (c - '0');
46: if (c >= 'a' && c <= 'f')
47: return (10 + c - 'a');
48: if (c >= 'A' && c <= 'F')
49: return (10 + c - 'A');
50: return (-1);
51: }
52:
53: static void
54: magic_mark_text(struct magic_line *ml, int text)
55: {
56: do {
57: ml->text = text;
58: ml = ml->parent;
59: } while (ml != NULL);
60: }
61:
62: static int
63: magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
64: const char *p)
65: {
66: int error;
67: char errbuf[256];
68:
69: error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
70: if (error != 0) {
71: regerror(error, re, errbuf, sizeof errbuf);
72: magic_warn(ml, "bad %s pattern: %s", name, errbuf);
73: return (-1);
74: }
75: return (0);
76: }
77:
78: static int
79: magic_set_result(struct magic_line *ml, const char *s)
80: {
1.24 nicm 81: const char *fmt, *endfmt, *cp;
1.1 nicm 82: regex_t *re = NULL;
83: regmatch_t pmatch;
84: size_t fmtlen;
85:
86: while (isspace((u_char)*s))
87: s++;
88: if (*s == '\0') {
89: ml->result = NULL;
90: return (0);
91: }
92: ml->result = xstrdup(s);
93:
94: fmt = NULL;
95: for (cp = s; *cp != '\0'; cp++) {
96: if (cp[0] == '%' && cp[1] != '%') {
97: if (fmt != NULL) {
98: magic_warn(ml, "multiple formats");
99: return (-1);
100: }
101: fmt = cp;
102: }
103: }
104: if (fmt == NULL)
105: return (0);
106: fmt++;
107:
108: for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
109: if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
110: break;
111: }
112: if (*endfmt == '\0') {
113: magic_warn(ml, "unterminated format");
114: return (-1);
115: }
116: fmtlen = endfmt + 1 - fmt;
117: if (fmtlen > 32) {
118: magic_warn(ml, "format too long");
119: return (-1);
120: }
121:
122: if (*endfmt == 's') {
123: switch (ml->type) {
124: case MAGIC_TYPE_DATE:
125: case MAGIC_TYPE_LDATE:
126: case MAGIC_TYPE_UDATE:
127: case MAGIC_TYPE_ULDATE:
128: case MAGIC_TYPE_BEDATE:
129: case MAGIC_TYPE_BELDATE:
130: case MAGIC_TYPE_UBEDATE:
131: case MAGIC_TYPE_UBELDATE:
132: case MAGIC_TYPE_QDATE:
133: case MAGIC_TYPE_QLDATE:
134: case MAGIC_TYPE_UQDATE:
135: case MAGIC_TYPE_UQLDATE:
136: case MAGIC_TYPE_BEQDATE:
137: case MAGIC_TYPE_BEQLDATE:
138: case MAGIC_TYPE_UBEQDATE:
139: case MAGIC_TYPE_UBEQLDATE:
140: case MAGIC_TYPE_LEQDATE:
141: case MAGIC_TYPE_LEQLDATE:
142: case MAGIC_TYPE_ULEQDATE:
143: case MAGIC_TYPE_ULEQLDATE:
144: case MAGIC_TYPE_LEDATE:
145: case MAGIC_TYPE_LELDATE:
146: case MAGIC_TYPE_ULEDATE:
147: case MAGIC_TYPE_ULELDATE:
148: case MAGIC_TYPE_MEDATE:
149: case MAGIC_TYPE_MELDATE:
150: case MAGIC_TYPE_STRING:
151: case MAGIC_TYPE_PSTRING:
152: case MAGIC_TYPE_BESTRING16:
153: case MAGIC_TYPE_LESTRING16:
154: case MAGIC_TYPE_REGEX:
155: case MAGIC_TYPE_SEARCH:
156: break;
157: default:
158: ml->stringify = 1;
159: break;
160: }
161: }
162:
163: if (!ml->root->compiled) {
164: /*
165: * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
166: * with byte, short, long. We get lucky because our first and
167: * only argument ends up in a register. Accept it for now.
168: */
169: if (magic_make_pattern(ml, "short", &ml->root->format_short,
170: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
171: return (-1);
172: if (magic_make_pattern(ml, "long", &ml->root->format_long,
173: "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
174: return (-1);
175: if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
176: "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
177: return (-1);
178: if (magic_make_pattern(ml, "float", &ml->root->format_float,
179: "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
180: return (-1);
181: if (magic_make_pattern(ml, "string", &ml->root->format_string,
182: "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
183: return (-1);
184: ml->root->compiled = 1;
185: }
186:
187: if (ml->stringify)
188: re = &ml->root->format_string;
189: else {
190: switch (ml->type) {
191: case MAGIC_TYPE_NONE:
1.17 nicm 192: case MAGIC_TYPE_BESTRING16:
193: case MAGIC_TYPE_LESTRING16:
1.22 nicm 194: case MAGIC_TYPE_NAME:
195: case MAGIC_TYPE_USE:
1.1 nicm 196: return (0); /* don't use result */
197: case MAGIC_TYPE_BYTE:
198: case MAGIC_TYPE_UBYTE:
199: case MAGIC_TYPE_SHORT:
200: case MAGIC_TYPE_USHORT:
201: case MAGIC_TYPE_BESHORT:
202: case MAGIC_TYPE_UBESHORT:
203: case MAGIC_TYPE_LESHORT:
204: case MAGIC_TYPE_ULESHORT:
205: re = &ml->root->format_short;
206: break;
207: case MAGIC_TYPE_LONG:
208: case MAGIC_TYPE_ULONG:
209: case MAGIC_TYPE_BELONG:
210: case MAGIC_TYPE_UBELONG:
211: case MAGIC_TYPE_LELONG:
212: case MAGIC_TYPE_ULELONG:
213: case MAGIC_TYPE_MELONG:
214: re = &ml->root->format_long;
215: break;
216: case MAGIC_TYPE_QUAD:
217: case MAGIC_TYPE_UQUAD:
218: case MAGIC_TYPE_BEQUAD:
219: case MAGIC_TYPE_UBEQUAD:
220: case MAGIC_TYPE_LEQUAD:
221: case MAGIC_TYPE_ULEQUAD:
222: re = &ml->root->format_quad;
223: break;
224: case MAGIC_TYPE_FLOAT:
225: case MAGIC_TYPE_BEFLOAT:
226: case MAGIC_TYPE_LEFLOAT:
227: case MAGIC_TYPE_DOUBLE:
228: case MAGIC_TYPE_BEDOUBLE:
229: case MAGIC_TYPE_LEDOUBLE:
230: re = &ml->root->format_float;
231: break;
232: case MAGIC_TYPE_DATE:
233: case MAGIC_TYPE_LDATE:
234: case MAGIC_TYPE_UDATE:
235: case MAGIC_TYPE_ULDATE:
236: case MAGIC_TYPE_BEDATE:
237: case MAGIC_TYPE_BELDATE:
238: case MAGIC_TYPE_UBEDATE:
239: case MAGIC_TYPE_UBELDATE:
240: case MAGIC_TYPE_QDATE:
241: case MAGIC_TYPE_QLDATE:
242: case MAGIC_TYPE_UQDATE:
243: case MAGIC_TYPE_UQLDATE:
244: case MAGIC_TYPE_BEQDATE:
245: case MAGIC_TYPE_BEQLDATE:
246: case MAGIC_TYPE_UBEQDATE:
247: case MAGIC_TYPE_UBEQLDATE:
248: case MAGIC_TYPE_LEQDATE:
249: case MAGIC_TYPE_LEQLDATE:
250: case MAGIC_TYPE_ULEQDATE:
251: case MAGIC_TYPE_ULEQLDATE:
252: case MAGIC_TYPE_LEDATE:
253: case MAGIC_TYPE_LELDATE:
254: case MAGIC_TYPE_ULEDATE:
255: case MAGIC_TYPE_ULELDATE:
256: case MAGIC_TYPE_MEDATE:
257: case MAGIC_TYPE_MELDATE:
258: case MAGIC_TYPE_STRING:
259: case MAGIC_TYPE_PSTRING:
260: case MAGIC_TYPE_REGEX:
261: case MAGIC_TYPE_SEARCH:
1.20 nicm 262: case MAGIC_TYPE_DEFAULT:
263: case MAGIC_TYPE_CLEAR:
1.1 nicm 264: re = &ml->root->format_string;
265: break;
266: }
267: }
268:
269: pmatch.rm_so = 0;
270: pmatch.rm_eo = fmtlen;
271: if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
272: magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
273: (int)fmtlen, fmt);
274: return (-1);
275: }
276:
277: return (0);
278: }
279:
280: static u_int
281: magic_get_strength(struct magic_line *ml)
282: {
283: int n;
284: size_t size;
285:
1.13 nicm 286: if (ml->type == MAGIC_TYPE_NONE)
287: return (0);
288:
1.18 nicm 289: if (ml->test_not || ml->test_operator == 'x') {
290: n = 1;
291: goto skip;
292: }
1.1 nicm 293:
1.5 nicm 294: n = 2 * MAGIC_STRENGTH_MULTIPLIER;
1.1 nicm 295: switch (ml->type) {
296: case MAGIC_TYPE_NONE:
297: case MAGIC_TYPE_DEFAULT:
298: return (0);
1.20 nicm 299: case MAGIC_TYPE_CLEAR:
1.22 nicm 300: case MAGIC_TYPE_NAME:
301: case MAGIC_TYPE_USE:
1.20 nicm 302: break;
1.1 nicm 303: case MAGIC_TYPE_BYTE:
304: case MAGIC_TYPE_UBYTE:
305: n += 1 * MAGIC_STRENGTH_MULTIPLIER;
306: break;
307: case MAGIC_TYPE_SHORT:
308: case MAGIC_TYPE_USHORT:
309: case MAGIC_TYPE_BESHORT:
310: case MAGIC_TYPE_UBESHORT:
311: case MAGIC_TYPE_LESHORT:
312: case MAGIC_TYPE_ULESHORT:
313: n += 2 * MAGIC_STRENGTH_MULTIPLIER;
314: break;
315: case MAGIC_TYPE_LONG:
316: case MAGIC_TYPE_ULONG:
317: case MAGIC_TYPE_FLOAT:
318: case MAGIC_TYPE_DATE:
319: case MAGIC_TYPE_LDATE:
320: case MAGIC_TYPE_UDATE:
321: case MAGIC_TYPE_ULDATE:
322: case MAGIC_TYPE_BELONG:
323: case MAGIC_TYPE_UBELONG:
324: case MAGIC_TYPE_BEFLOAT:
325: case MAGIC_TYPE_BEDATE:
326: case MAGIC_TYPE_BELDATE:
327: case MAGIC_TYPE_UBEDATE:
328: case MAGIC_TYPE_UBELDATE:
329: n += 4 * MAGIC_STRENGTH_MULTIPLIER;
330: break;
331: case MAGIC_TYPE_QUAD:
332: case MAGIC_TYPE_UQUAD:
333: case MAGIC_TYPE_DOUBLE:
334: case MAGIC_TYPE_QDATE:
335: case MAGIC_TYPE_QLDATE:
336: case MAGIC_TYPE_UQDATE:
337: case MAGIC_TYPE_UQLDATE:
338: case MAGIC_TYPE_BEQUAD:
339: case MAGIC_TYPE_UBEQUAD:
340: case MAGIC_TYPE_BEDOUBLE:
341: case MAGIC_TYPE_BEQDATE:
342: case MAGIC_TYPE_BEQLDATE:
343: case MAGIC_TYPE_UBEQDATE:
344: case MAGIC_TYPE_UBEQLDATE:
345: case MAGIC_TYPE_LEQUAD:
346: case MAGIC_TYPE_ULEQUAD:
347: case MAGIC_TYPE_LEDOUBLE:
348: case MAGIC_TYPE_LEQDATE:
349: case MAGIC_TYPE_LEQLDATE:
350: case MAGIC_TYPE_ULEQDATE:
351: case MAGIC_TYPE_ULEQLDATE:
352: case MAGIC_TYPE_LELONG:
353: case MAGIC_TYPE_ULELONG:
354: case MAGIC_TYPE_LEFLOAT:
355: case MAGIC_TYPE_LEDATE:
356: case MAGIC_TYPE_LELDATE:
357: case MAGIC_TYPE_ULEDATE:
358: case MAGIC_TYPE_ULELDATE:
359: case MAGIC_TYPE_MELONG:
360: case MAGIC_TYPE_MEDATE:
361: case MAGIC_TYPE_MELDATE:
362: n += 8 * MAGIC_STRENGTH_MULTIPLIER;
363: break;
364: case MAGIC_TYPE_STRING:
365: case MAGIC_TYPE_PSTRING:
366: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
367: break;
368: case MAGIC_TYPE_BESTRING16:
369: case MAGIC_TYPE_LESTRING16:
370: n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
371: break;
372: case MAGIC_TYPE_REGEX:
373: case MAGIC_TYPE_SEARCH:
374: size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
375: if (size < 1)
376: size = 1;
377: n += ml->test_string_size * size;
378: break;
379: }
380: switch (ml->test_operator) {
381: case '=':
382: n += MAGIC_STRENGTH_MULTIPLIER;
383: break;
384: case '<':
385: case '>':
386: case '[':
387: case ']':
388: n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
389: break;
390: case '^':
391: case '&':
392: n -= MAGIC_STRENGTH_MULTIPLIER;
393: break;
394: }
1.18 nicm 395:
396: skip:
397: switch (ml->strength_operator) {
398: case '+':
399: n += ml->strength_value;
400: break;
401: case '-':
402: n -= ml->strength_value;
403: break;
404: case '*':
405: n *= ml->strength_value;
406: break;
407: case '/':
408: n /= ml->strength_value;
409: break;
410: }
1.1 nicm 411: return (n <= 0 ? 1 : n);
412: }
413:
414: static int
415: magic_get_string(char **line, char *out, size_t *outlen)
416: {
417: char *start, *cp, c;
418: int d0, d1, d2;
419:
420: start = out;
421: for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
422: if (*cp != '\\') {
423: *out++ = *cp;
424: continue;
425: }
426:
427: switch (c = *++cp) {
1.3 nicm 428: case '\0': /* end of line */
429: return (-1);
1.1 nicm 430: case ' ':
431: *out++ = ' ';
432: break;
433: case '0':
434: case '1':
435: case '2':
436: case '3':
437: case '4':
438: case '5':
439: case '6':
440: case '7':
441: d0 = magic_odigit(cp[0]);
442: if (cp[0] != '\0')
443: d1 = magic_odigit(cp[1]);
444: else
445: d1 = -1;
446: if (cp[0] != '\0' && cp[1] != '\0')
447: d2 = magic_odigit(cp[2]);
448: else
449: d2 = -1;
450:
451: if (d0 != -1 && d1 != -1 && d2 != -1) {
452: *out = d2 | (d1 << 3) | (d0 << 6);
453: cp += 2;
454: } else if (d0 != -1 && d1 != -1) {
455: *out = d1 | (d0 << 3);
456: cp++;
457: } else if (d0 != -1)
458: *out = d0;
459: else
460: return (-1);
461: out++;
462: break;
463: case 'x':
464: d0 = magic_xdigit(cp[1]);
465: if (cp[1] != '\0')
466: d1 = magic_xdigit(cp[2]);
467: else
468: d1 = -1;
469:
470: if (d0 != -1 && d1 != -1) {
471: *out = d1 | (d0 << 4);
472: cp += 2;
473: } else if (d0 != -1) {
474: *out = d0;
475: cp++;
476: } else
477: return (-1);
478: out++;
479:
480: break;
481: case 'a':
482: *out++ = '\a';
483: break;
484: case 'b':
485: *out++ = '\b';
486: break;
487: case 't':
488: *out++ = '\t';
489: break;
490: case 'f':
491: *out++ = '\f';
492: break;
493: case 'n':
494: *out++ = '\n';
495: break;
496: case 'r':
497: *out++ = '\r';
498: break;
499: case '\\':
500: *out++ = '\\';
501: break;
502: case '\'':
503: *out++ = '\'';
504: break;
505: case '\"':
506: *out++ = '\"';
507: break;
508: default:
509: *out++ = c;
510: break;
511: }
512: }
513: *out = '\0';
514: *outlen = out - start;
515:
516: *line = cp;
517: return (0);
518: }
519:
520: static int
521: magic_parse_offset(struct magic_line *ml, char **line)
522: {
523: char *copy, *s, *cp, *endptr;
524:
525: while (isspace((u_char)**line))
526: (*line)++;
527: copy = s = cp = xmalloc(strlen(*line) + 1);
528: while (**line != '\0' && !isspace((u_char)**line))
529: *cp++ = *(*line)++;
530: *cp = '\0';
531:
532: ml->offset = 0;
533: ml->offset_relative = 0;
534:
535: ml->indirect_type = ' ';
536: ml->indirect_relative = 0;
537: ml->indirect_offset = 0;
538: ml->indirect_operator = ' ';
539: ml->indirect_operand = 0;
540:
541: if (*s == '&') {
542: ml->offset_relative = 1;
543: s++;
544: }
545:
546: if (*s != '(') {
547: endptr = magic_strtoll(s, &ml->offset);
548: if (endptr == NULL || *endptr != '\0') {
549: magic_warn(ml, "missing closing bracket");
550: goto fail;
551: }
552: if (ml->offset < 0 && !ml->offset_relative) {
553: magic_warn(ml, "negative absolute offset");
554: goto fail;
555: }
556: goto done;
557: }
558: s++;
559:
560: if (*s == '&') {
561: ml->indirect_relative = 1;
562: s++;
563: }
564:
565: endptr = magic_strtoll(s, &ml->indirect_offset);
566: if (endptr == NULL) {
1.8 nicm 567: magic_warn(ml, "can't parse offset: %s", s);
1.1 nicm 568: goto fail;
569: }
570: s = endptr;
571: if (*s == ')')
572: goto done;
573:
574: if (*s == '.') {
575: s++;
1.6 tobias 576: if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
1.8 nicm 577: magic_warn(ml, "unknown offset type: %c", *s);
1.1 nicm 578: goto fail;
579: }
580: ml->indirect_type = *s;
581: s++;
582: if (*s == ')')
583: goto done;
584: }
585:
1.6 tobias 586: if (*s == '\0' || strchr("+-*", *s) == NULL) {
1.8 nicm 587: magic_warn(ml, "unknown offset operator: %c", *s);
1.1 nicm 588: goto fail;
589: }
590: ml->indirect_operator = *s;
591: s++;
592: if (*s == ')')
593: goto done;
594:
595: if (*s == '(') {
596: s++;
597: endptr = magic_strtoll(s, &ml->indirect_operand);
598: if (endptr == NULL || *endptr != ')') {
599: magic_warn(ml, "missing closing bracket");
600: goto fail;
601: }
602: if (*++endptr != ')') {
603: magic_warn(ml, "missing closing bracket");
604: goto fail;
605: }
606: } else {
607: endptr = magic_strtoll(s, &ml->indirect_operand);
608: if (endptr == NULL || *endptr != ')') {
609: magic_warn(ml, "missing closing bracket");
610: goto fail;
611: }
612: }
613:
614: done:
615: free(copy);
616: return (0);
617:
618: fail:
619: free(copy);
620: return (-1);
621: }
622:
623: static int
624: magic_parse_type(struct magic_line *ml, char **line)
625: {
626: char *copy, *s, *cp, *endptr;
627:
628: while (isspace((u_char)**line))
629: (*line)++;
630: copy = s = cp = xmalloc(strlen(*line) + 1);
631: while (**line != '\0' && !isspace((u_char)**line))
632: *cp++ = *(*line)++;
633: *cp = '\0';
634:
635: ml->type = MAGIC_TYPE_NONE;
636: ml->type_operator = ' ';
637: ml->type_operand = 0;
638:
1.23 nicm 639: if (strcmp(s, "name") == 0) {
1.22 nicm 640: ml->type = MAGIC_TYPE_NAME;
641: ml->type_string = xstrdup(s);
642: goto done;
643: }
1.23 nicm 644: if (strcmp(s, "use") == 0) {
1.22 nicm 645: ml->type = MAGIC_TYPE_USE;
646: ml->type_string = xstrdup(s);
647: goto done;
648: }
649:
1.16 nicm 650: if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
651: strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
652: if (*s == 'u')
653: ml->type_string = xstrdup(s + 1);
654: else
655: ml->type_string = xstrdup(s);
1.1 nicm 656: ml->type = MAGIC_TYPE_STRING;
657: magic_mark_text(ml, 0);
658: goto done;
659: }
1.16 nicm 660: if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
661: strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
662: if (*s == 'u')
663: ml->type_string = xstrdup(s + 1);
664: else
665: ml->type_string = xstrdup(s);
666: ml->type = MAGIC_TYPE_PSTRING;
667: magic_mark_text(ml, 0);
668: goto done;
669: }
670: if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
671: strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
672: if (*s == 'u')
673: ml->type_string = xstrdup(s + 1);
674: else
675: ml->type_string = xstrdup(s);
1.1 nicm 676: ml->type = MAGIC_TYPE_SEARCH;
677: goto done;
678: }
1.16 nicm 679: if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
680: strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
681: if (*s == 'u')
682: ml->type_string = xstrdup(s + 1);
683: else
684: ml->type_string = xstrdup(s);
1.1 nicm 685: ml->type = MAGIC_TYPE_REGEX;
686: goto done;
687: }
1.16 nicm 688: ml->type_string = xstrdup(s);
1.1 nicm 689:
1.12 nicm 690: cp = &s[strcspn(s, "+-&/%*")];
1.1 nicm 691: if (*cp != '\0') {
692: ml->type_operator = *cp;
693: endptr = magic_strtoull(cp + 1, &ml->type_operand);
694: if (endptr == NULL || *endptr != '\0') {
1.8 nicm 695: magic_warn(ml, "can't parse operand: %s", cp + 1);
1.1 nicm 696: goto fail;
697: }
698: *cp = '\0';
699: }
700:
701: if (strcmp(s, "byte") == 0)
702: ml->type = MAGIC_TYPE_BYTE;
703: else if (strcmp(s, "short") == 0)
704: ml->type = MAGIC_TYPE_SHORT;
705: else if (strcmp(s, "long") == 0)
706: ml->type = MAGIC_TYPE_LONG;
707: else if (strcmp(s, "quad") == 0)
708: ml->type = MAGIC_TYPE_QUAD;
709: else if (strcmp(s, "ubyte") == 0)
710: ml->type = MAGIC_TYPE_UBYTE;
711: else if (strcmp(s, "ushort") == 0)
712: ml->type = MAGIC_TYPE_USHORT;
713: else if (strcmp(s, "ulong") == 0)
714: ml->type = MAGIC_TYPE_ULONG;
715: else if (strcmp(s, "uquad") == 0)
716: ml->type = MAGIC_TYPE_UQUAD;
1.16 nicm 717: else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
1.1 nicm 718: ml->type = MAGIC_TYPE_FLOAT;
1.16 nicm 719: else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
1.1 nicm 720: ml->type = MAGIC_TYPE_DOUBLE;
721: else if (strcmp(s, "date") == 0)
722: ml->type = MAGIC_TYPE_DATE;
723: else if (strcmp(s, "qdate") == 0)
724: ml->type = MAGIC_TYPE_QDATE;
725: else if (strcmp(s, "ldate") == 0)
726: ml->type = MAGIC_TYPE_LDATE;
727: else if (strcmp(s, "qldate") == 0)
728: ml->type = MAGIC_TYPE_QLDATE;
729: else if (strcmp(s, "udate") == 0)
730: ml->type = MAGIC_TYPE_UDATE;
731: else if (strcmp(s, "uqdate") == 0)
732: ml->type = MAGIC_TYPE_UQDATE;
733: else if (strcmp(s, "uldate") == 0)
734: ml->type = MAGIC_TYPE_ULDATE;
735: else if (strcmp(s, "uqldate") == 0)
736: ml->type = MAGIC_TYPE_UQLDATE;
737: else if (strcmp(s, "beshort") == 0)
738: ml->type = MAGIC_TYPE_BESHORT;
739: else if (strcmp(s, "belong") == 0)
740: ml->type = MAGIC_TYPE_BELONG;
741: else if (strcmp(s, "bequad") == 0)
742: ml->type = MAGIC_TYPE_BEQUAD;
743: else if (strcmp(s, "ubeshort") == 0)
744: ml->type = MAGIC_TYPE_UBESHORT;
745: else if (strcmp(s, "ubelong") == 0)
746: ml->type = MAGIC_TYPE_UBELONG;
747: else if (strcmp(s, "ubequad") == 0)
748: ml->type = MAGIC_TYPE_UBEQUAD;
1.16 nicm 749: else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
1.1 nicm 750: ml->type = MAGIC_TYPE_BEFLOAT;
1.16 nicm 751: else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
1.1 nicm 752: ml->type = MAGIC_TYPE_BEDOUBLE;
753: else if (strcmp(s, "bedate") == 0)
754: ml->type = MAGIC_TYPE_BEDATE;
755: else if (strcmp(s, "beqdate") == 0)
756: ml->type = MAGIC_TYPE_BEQDATE;
757: else if (strcmp(s, "beldate") == 0)
758: ml->type = MAGIC_TYPE_BELDATE;
759: else if (strcmp(s, "beqldate") == 0)
760: ml->type = MAGIC_TYPE_BEQLDATE;
761: else if (strcmp(s, "ubedate") == 0)
762: ml->type = MAGIC_TYPE_UBEDATE;
763: else if (strcmp(s, "ubeqdate") == 0)
764: ml->type = MAGIC_TYPE_UBEQDATE;
765: else if (strcmp(s, "ubeldate") == 0)
766: ml->type = MAGIC_TYPE_UBELDATE;
767: else if (strcmp(s, "ubeqldate") == 0)
768: ml->type = MAGIC_TYPE_UBEQLDATE;
1.16 nicm 769: else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
1.1 nicm 770: ml->type = MAGIC_TYPE_BESTRING16;
771: else if (strcmp(s, "leshort") == 0)
772: ml->type = MAGIC_TYPE_LESHORT;
773: else if (strcmp(s, "lelong") == 0)
774: ml->type = MAGIC_TYPE_LELONG;
775: else if (strcmp(s, "lequad") == 0)
776: ml->type = MAGIC_TYPE_LEQUAD;
777: else if (strcmp(s, "uleshort") == 0)
778: ml->type = MAGIC_TYPE_ULESHORT;
779: else if (strcmp(s, "ulelong") == 0)
780: ml->type = MAGIC_TYPE_ULELONG;
781: else if (strcmp(s, "ulequad") == 0)
782: ml->type = MAGIC_TYPE_ULEQUAD;
1.16 nicm 783: else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
1.1 nicm 784: ml->type = MAGIC_TYPE_LEFLOAT;
1.16 nicm 785: else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
1.1 nicm 786: ml->type = MAGIC_TYPE_LEDOUBLE;
787: else if (strcmp(s, "ledate") == 0)
788: ml->type = MAGIC_TYPE_LEDATE;
789: else if (strcmp(s, "leqdate") == 0)
790: ml->type = MAGIC_TYPE_LEQDATE;
791: else if (strcmp(s, "leldate") == 0)
792: ml->type = MAGIC_TYPE_LELDATE;
793: else if (strcmp(s, "leqldate") == 0)
794: ml->type = MAGIC_TYPE_LEQLDATE;
795: else if (strcmp(s, "uledate") == 0)
796: ml->type = MAGIC_TYPE_ULEDATE;
797: else if (strcmp(s, "uleqdate") == 0)
798: ml->type = MAGIC_TYPE_ULEQDATE;
799: else if (strcmp(s, "uleldate") == 0)
800: ml->type = MAGIC_TYPE_ULELDATE;
801: else if (strcmp(s, "uleqldate") == 0)
802: ml->type = MAGIC_TYPE_ULEQLDATE;
1.16 nicm 803: else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
1.1 nicm 804: ml->type = MAGIC_TYPE_LESTRING16;
1.16 nicm 805: else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
1.1 nicm 806: ml->type = MAGIC_TYPE_MELONG;
1.16 nicm 807: else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
1.1 nicm 808: ml->type = MAGIC_TYPE_MEDATE;
1.16 nicm 809: else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
1.1 nicm 810: ml->type = MAGIC_TYPE_MELDATE;
1.16 nicm 811: else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
1.1 nicm 812: ml->type = MAGIC_TYPE_DEFAULT;
1.20 nicm 813: else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
814: ml->type = MAGIC_TYPE_CLEAR;
1.1 nicm 815: else {
1.8 nicm 816: magic_warn(ml, "unknown type: %s", s);
1.1 nicm 817: goto fail;
818: }
819: magic_mark_text(ml, 0);
820:
821: done:
822: free(copy);
823: return (0);
824:
825: fail:
826: free(copy);
827: return (-1);
828: }
829:
830: static int
831: magic_parse_value(struct magic_line *ml, char **line)
832: {
833: char *copy, *s, *cp, *endptr;
834: size_t slen;
1.10 nicm 835: uint64_t u;
1.1 nicm 836:
837: while (isspace((u_char)**line))
838: (*line)++;
839:
840: ml->test_operator = '=';
841: ml->test_not = 0;
842: ml->test_string = NULL;
843: ml->test_string_size = 0;
844: ml->test_unsigned = 0;
845: ml->test_signed = 0;
846:
1.9 nicm 847: if (**line == '\0')
848: return (0);
849:
1.1 nicm 850: s = *line;
851: if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
852: (*line)++;
1.20 nicm 853: ml->test_operator = 'x';
854: return (0);
855: }
856:
857: if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
858: magic_warn(ml, "test specified for default or clear");
1.1 nicm 859: ml->test_operator = 'x';
860: return (0);
861: }
862:
863: if (**line == '!') {
864: ml->test_not = 1;
865: (*line)++;
866: }
867:
868: switch (ml->type) {
1.22 nicm 869: case MAGIC_TYPE_NAME:
870: case MAGIC_TYPE_USE:
871: copy = s = xmalloc(strlen(*line) + 1);
872: if (magic_get_string(line, s, &slen) != 0 || slen == 0) {
873: magic_warn(ml, "can't parse string");
874: goto fail;
875: }
876: if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) {
877: magic_warn(ml, "invalid name");
878: goto fail;
879: }
880: ml->name = s;
881: return (0); /* do not free */
1.1 nicm 882: case MAGIC_TYPE_STRING:
883: case MAGIC_TYPE_PSTRING:
884: case MAGIC_TYPE_SEARCH:
885: if (**line == '>' || **line == '<' || **line == '=') {
886: ml->test_operator = **line;
887: (*line)++;
888: }
889: /* FALLTHROUGH */
890: case MAGIC_TYPE_REGEX:
1.21 nicm 891: if (**line == '=')
892: (*line)++;
1.1 nicm 893: copy = s = xmalloc(strlen(*line) + 1);
894: if (magic_get_string(line, s, &slen) != 0) {
895: magic_warn(ml, "can't parse string");
896: goto fail;
897: }
898: ml->test_string_size = slen;
899: ml->test_string = s;
900: return (0); /* do not free */
901: default:
902: break;
903: }
904:
1.9 nicm 905: while (isspace((u_char)**line))
906: (*line)++;
907: if ((*line)[0] == '<' && (*line)[1] == '=') {
908: ml->test_operator = '[';
909: (*line) += 2;
910: } else if ((*line)[0] == '>' && (*line)[1] == '=') {
911: ml->test_operator = ']';
1.1 nicm 912: (*line) += 2;
1.19 tobias 913: } else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
1.9 nicm 914: ml->test_operator = **line;
915: (*line)++;
1.1 nicm 916: }
1.9 nicm 917:
918: while (isspace((u_char)**line))
919: (*line)++;
920: copy = cp = xmalloc(strlen(*line) + 1);
1.1 nicm 921: while (**line != '\0' && !isspace((u_char)**line))
922: *cp++ = *(*line)++;
923: *cp = '\0';
924:
1.11 nicm 925: switch (ml->type) {
926: case MAGIC_TYPE_FLOAT:
927: case MAGIC_TYPE_DOUBLE:
928: case MAGIC_TYPE_BEFLOAT:
929: case MAGIC_TYPE_BEDOUBLE:
930: case MAGIC_TYPE_LEFLOAT:
931: case MAGIC_TYPE_LEDOUBLE:
932: errno = 0;
933: ml->test_double = strtod(copy, &endptr);
934: if (errno == ERANGE)
935: endptr = NULL;
936: break;
937: default:
938: if (*ml->type_string == 'u')
939: endptr = magic_strtoull(copy, &ml->test_unsigned);
940: else {
941: endptr = magic_strtoll(copy, &ml->test_signed);
942: if (endptr == NULL || *endptr != '\0') {
943: /*
944: * If we can't parse this as a signed number,
945: * try as unsigned instead.
946: */
947: endptr = magic_strtoull(copy, &u);
948: if (endptr != NULL && *endptr == '\0')
949: ml->test_signed = (int64_t)u;
950: }
1.10 nicm 951: }
1.11 nicm 952: break;
1.10 nicm 953: }
1.1 nicm 954: if (endptr == NULL || *endptr != '\0') {
1.9 nicm 955: magic_warn(ml, "can't parse number: %s", copy);
1.1 nicm 956: goto fail;
957: }
958:
959: free(copy);
960: return (0);
961:
962: fail:
963: free(copy);
964: return (-1);
965: }
966:
967: int
968: magic_compare(struct magic_line *ml1, struct magic_line *ml2)
969: {
970: if (ml1->strength < ml2->strength)
971: return (1);
972: if (ml1->strength > ml2->strength)
973: return (-1);
974:
975: /*
976: * The original file depends on the (undefined!) qsort(3) behaviour
977: * when the strength is equal. This is impossible to reproduce with an
978: * RB tree so just use the line number and hope for the best.
979: */
980: if (ml1->line < ml2->line)
981: return (-1);
982: if (ml1->line > ml2->line)
983: return (1);
984:
985: return (0);
986: }
987: RB_GENERATE(magic_tree, magic_line, node, magic_compare);
988:
1.22 nicm 989: int
990: magic_named_compare(struct magic_line *ml1, struct magic_line *ml2)
991: {
992: return (strcmp(ml1->name, ml2->name));
993: }
994: RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare);
995:
1.1 nicm 996: static void
1.18 nicm 997: magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
998: char *line)
999: {
1000: char *cp, *s;
1001: int64_t value;
1002:
1003: cp = line + (sizeof "!:strength") - 1;
1004: while (isspace((u_char)*cp))
1005: cp++;
1006: s = cp;
1007:
1008: cp = strchr(s, '#');
1009: if (cp != NULL)
1010: *cp = '\0';
1011: cp = s;
1012:
1.19 tobias 1013: if (*s == '\0' || strchr("+-*/", *s) == NULL) {
1.18 nicm 1014: magic_warnm(m, at, "invalid strength operator: %s", s);
1015: return;
1016: }
1017: ml->strength_operator = *cp++;
1018:
1019: while (isspace((u_char)*cp))
1020: cp++;
1021: cp = magic_strtoll(cp, &value);
1022: while (cp != NULL && isspace((u_char)*cp))
1023: cp++;
1024: if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
1025: magic_warnm(m, at, "invalid strength value: %s", s);
1026: return;
1027: }
1028: ml->strength_value = value;
1029: }
1030:
1031: static void
1.1 nicm 1032: magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
1033: {
1034: char *mimetype, *cp;
1035:
1036: mimetype = line + (sizeof "!:mime") - 1;
1037: while (isspace((u_char)*mimetype))
1038: mimetype++;
1039:
1040: cp = strchr(mimetype, '#');
1041: if (cp != NULL)
1042: *cp = '\0';
1043:
1044: if (*mimetype != '\0') {
1045: cp = mimetype + strlen(mimetype) - 1;
1046: while (cp != mimetype && isspace((u_char)*cp))
1047: *cp-- = '\0';
1048: }
1049:
1050: cp = mimetype;
1051: while (*cp != '\0') {
1052: if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
1053: break;
1054: cp++;
1055: }
1056: if (*mimetype == '\0' || *cp != '\0') {
1.7 nicm 1057: magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1.1 nicm 1058: return;
1059: }
1060: if (ml == NULL) {
1.7 nicm 1061: magic_warnm(m, at, "stray MIME type: %s", mimetype);
1.1 nicm 1062: return;
1063: }
1064: ml->mimetype = xstrdup(mimetype);
1065: }
1066:
1067: struct magic *
1068: magic_load(FILE *f, const char *path, int warnings)
1069: {
1070: struct magic *m;
1071: struct magic_line *ml = NULL, *parent, *parent0;
1072: char *line, *tmp;
1073: size_t size;
1074: u_int at, level, n, i;
1075:
1076: m = xcalloc(1, sizeof *m);
1077: m->path = xstrdup(path);
1078: m->warnings = warnings;
1079: RB_INIT(&m->tree);
1080:
1081: parent = NULL;
1082: parent0 = NULL;
1083: level = 0;
1084:
1085: at = 0;
1086: tmp = NULL;
1087: while ((line = fgetln(f, &size))) {
1088: if (line[size - 1] == '\n')
1089: line[size - 1] = '\0';
1090: else {
1091: tmp = xmalloc(size + 1);
1092: memcpy(tmp, line, size);
1093: tmp[size] = '\0';
1094: line = tmp;
1095: }
1096: at++;
1097:
1098: while (isspace((u_char)*line))
1099: line++;
1100: if (*line == '\0' || *line == '#')
1101: continue;
1102:
1.15 nicm 1103: if (strncmp (line, "!:mime", 6) == 0) {
1.1 nicm 1104: magic_set_mimetype(m, at, ml, line);
1.18 nicm 1105: continue;
1106: }
1107: if (strncmp (line, "!:strength", 10) == 0) {
1108: magic_adjust_strength(m, at, ml, line);
1.15 nicm 1109: continue;
1110: }
1111: if (strncmp (line, "!:", 2) == 0) {
1112: for (i = 0; i < 64 && line[i] != '\0'; i++) {
1113: if (isspace((u_char)line[i]))
1114: break;
1115: }
1116: magic_warnm(m, at, "%.*s not supported", i, line);
1.1 nicm 1117: continue;
1118: }
1119:
1120: n = 0;
1121: for (; *line == '>'; line++)
1122: n++;
1123:
1124: ml = xcalloc(1, sizeof *ml);
1125: ml->root = m;
1126: ml->line = at;
1127: ml->type = MAGIC_TYPE_NONE;
1128: TAILQ_INIT(&ml->children);
1129: ml->text = 1;
1130:
1.13 nicm 1131: /*
1132: * At this point n is the level we want, level is the current
1133: * level. parent0 is the last line at the same level and parent
1134: * is the last line at the previous level.
1135: */
1.1 nicm 1136: if (n == level + 1) {
1137: parent = parent0;
1138: } else if (n < level) {
1139: for (i = n; i < level && parent != NULL; i++)
1140: parent = parent->parent;
1141: } else if (n != level) {
1142: magic_warn(ml, "level skipped (%u->%u)", level, n);
1143: free(ml);
1144: continue;
1145: }
1146: ml->parent = parent;
1147: level = n;
1148:
1149: if (magic_parse_offset(ml, &line) != 0 ||
1150: magic_parse_type(ml, &line) != 0 ||
1151: magic_parse_value(ml, &line) != 0 ||
1152: magic_set_result(ml, line) != 0) {
1.13 nicm 1153: /*
1154: * An invalid line still needs to appear in the tree in
1155: * case it has any children.
1156: */
1157: ml->type = MAGIC_TYPE_NONE;
1.1 nicm 1158: }
1159:
1160: ml->strength = magic_get_strength(ml);
1.22 nicm 1161: if (ml->parent == NULL) {
1162: if (ml->name != NULL)
1163: RB_INSERT(magic_named_tree, &m->named, ml);
1164: else
1165: RB_INSERT(magic_tree, &m->tree, ml);
1166: } else
1.1 nicm 1167: TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1168: parent0 = ml;
1169: }
1170: free(tmp);
1171:
1172: return (m);
1173: }