Annotation of src/usr.bin/file/text.c, Revision 1.1
1.1 ! nicm 1: /* $OpenBSD$ */
! 2:
! 3: /*
! 4: * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
! 5: *
! 6: * Permission to use, copy, modify, and distribute this software for any
! 7: * purpose with or without fee is hereby granted, provided that the above
! 8: * copyright notice and this permission notice appear in all copies.
! 9: *
! 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
! 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
! 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
! 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 14: * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
! 15: * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
! 16: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
! 17: */
! 18:
! 19: #include <sys/types.h>
! 20:
! 21: #include <ctype.h>
! 22: #include <string.h>
! 23:
! 24: #include "file.h"
! 25: #include "magic.h"
! 26: #include "xmalloc.h"
! 27:
! 28: static const char *text_words[][3] = {
! 29: { "msgid", "PO (gettext message catalogue)", "text/x-po" },
! 30: { "dnl", "M4 macro language pre-processor", "text/x-m4" },
! 31: { "import", "Java program", "text/x-java" },
! 32: { "\"libhdr\"", "BCPL program", "text/x-bcpl" },
! 33: { "\"LIBHDR\"", "BCPL program", "text/x-bcpl" },
! 34: { "//", "C++ program", "text/x-c++" },
! 35: { "virtual", "C++ program", "text/x-c++" },
! 36: { "class", "C++ program", "text/x-c++" },
! 37: { "public:", "C++ program", "text/x-c++" },
! 38: { "private:", "C++ program", "text/x-c++" },
! 39: { "/*", "C program", "text/x-c" },
! 40: { "#include", "C program", "text/x-c" },
! 41: { "char", "C program", "text/x-c" },
! 42: { "The", "English", "text/plain" },
! 43: { "the", "English", "text/plain" },
! 44: { "double", "C program", "text/x-c" },
! 45: { "extern", "C program", "text/x-c" },
! 46: { "float", "C program", "text/x-c" },
! 47: { "struct", "C program", "text/x-c" },
! 48: { "union", "C program", "text/x-c" },
! 49: { "CFLAGS", "make commands", "text/x-makefile" },
! 50: { "LDFLAGS", "make commands", "text/x-makefile" },
! 51: { "all:", "make commands", "text/x-makefile" },
! 52: { ".PRECIOUS", "make commands", "text/x-makefile" },
! 53: { ".ascii", "assembler program", "text/x-asm" },
! 54: { ".asciiz", "assembler program", "text/x-asm" },
! 55: { ".byte", "assembler program", "text/x-asm" },
! 56: { ".even", "assembler program", "text/x-asm" },
! 57: { ".globl", "assembler program", "text/x-asm" },
! 58: { ".text", "assembler program", "text/x-asm" },
! 59: { "clr", "assembler program", "text/x-asm" },
! 60: { "(input", "Pascal program", "text/x-pascal" },
! 61: { "program", "Pascal program", "text/x-pascal" },
! 62: { "record", "Pascal program", "text/x-pascal" },
! 63: { "dcl", "PL/1 program", "text/x-pl1" },
! 64: { "Received:", "mail", "text/x-mail" },
! 65: { ">From", "mail", "text/x-mail" },
! 66: { "Return-Path:", "mail", "text/x-mail" },
! 67: { "Cc:", "mail", "text/x-mail" },
! 68: { "Newsgroups:", "news", "text/x-news" },
! 69: { "Path:", "news", "text/x-news" },
! 70: { "Organization:", "news", "text/x-news" },
! 71: { "href=", "HTML document", "text/html" },
! 72: { "HREF=", "HTML document", "text/html" },
! 73: { "<body", "HTML document", "text/html" },
! 74: { "<BODY", "HTML document", "text/html" },
! 75: { "<html", "HTML document", "text/html" },
! 76: { "<HTML", "HTML document", "text/html" },
! 77: { "<!--", "HTML document", "text/html" },
! 78: { NULL, NULL, NULL }
! 79: };
! 80:
! 81: static int
! 82: text_is_ascii(u_char c)
! 83: {
! 84: const char cc[] = "\007\010\011\012\014\015\033";
! 85:
! 86: if (c == '\0')
! 87: return (0);
! 88: if (strchr(cc, c) != NULL)
! 89: return (1);
! 90: return (c > 31 && c < 127);
! 91: }
! 92:
! 93: static int
! 94: text_is_latin1(u_char c)
! 95: {
! 96: if (c >= 160)
! 97: return (1);
! 98: return (text_is_ascii(c));
! 99: }
! 100:
! 101: static int
! 102: text_is_extended(u_char c)
! 103: {
! 104: if (c >= 128)
! 105: return (1);
! 106: return (text_is_ascii(c));
! 107: }
! 108:
! 109: static int
! 110: text_try_test(const void *base, size_t size, int (*f)(u_char))
! 111: {
! 112: const u_char *data = base;
! 113: size_t offset;
! 114:
! 115: for (offset = 0; offset < size; offset++) {
! 116: if (!f(data[offset]))
! 117: return (0);
! 118: }
! 119: return (1);
! 120: }
! 121:
! 122: const char *
! 123: text_get_type(const void *base, size_t size)
! 124: {
! 125: if (text_try_test(base, size, text_is_ascii))
! 126: return ("ASCII");
! 127: if (text_try_test(base, size, text_is_latin1))
! 128: return ("ISO-8859");
! 129: if (text_try_test(base, size, text_is_extended))
! 130: return ("Non-ISO extended-ASCII");
! 131: return (NULL);
! 132: }
! 133:
! 134: const char *
! 135: text_try_words(const void *base, size_t size, int flags)
! 136: {
! 137: const char *cp, *end, *next, *word;
! 138: size_t wordlen;
! 139: u_int i;
! 140:
! 141: end = (char*)base + size;
! 142: for (cp = base; cp != end; /* nothing */) {
! 143: while (cp != end && isspace((u_char)*cp))
! 144: cp++;
! 145:
! 146: next = cp;
! 147: while (next != end && !isspace((u_char)*next))
! 148: next++;
! 149:
! 150: for (i = 0; /* nothing */; i++) {
! 151: word = text_words[i][0];
! 152: if (word == NULL)
! 153: break;
! 154: wordlen = strlen(word);
! 155:
! 156: if ((size_t)(next - cp) != wordlen)
! 157: continue;
! 158: if (memcmp(cp, word, wordlen) != 0)
! 159: continue;
! 160: if (flags & MAGIC_TEST_MIME)
! 161: return (text_words[i][2]);
! 162: return (text_words[i][1]);
! 163: }
! 164:
! 165: cp = next;
! 166: }
! 167: return (NULL);
! 168: }