src/usr.bin/file/text.c - annotate

Return to text.c CVS log
Up to [local] / src / usr.bin / file
Annotation of src/usr.bin/file/text.c, Revision 1.1

1.1     ! nicm        1: /* $OpenBSD$ */
        !             2:
        !             3: /*
        !             4:  * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
        !             5:  *
        !             6:  * Permission to use, copy, modify, and distribute this software for any
        !             7:  * purpose with or without fee is hereby granted, provided that the above
        !             8:  * copyright notice and this permission notice appear in all copies.
        !             9:  *
        !            10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
        !            11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
        !            12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
        !            13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
        !            14:  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
        !            15:  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
        !            16:  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
        !            17:  */
        !            18:
        !            19: #include <sys/types.h>
        !            20:
        !            21: #include <ctype.h>
        !            22: #include <string.h>
        !            23:
        !            24: #include "file.h"
        !            25: #include "magic.h"
        !            26: #include "xmalloc.h"
        !            27:
        !            28: static const char *text_words[][3] = {
        !            29:        { "msgid", "PO (gettext message catalogue)", "text/x-po" },
        !            30:        { "dnl", "M4 macro language pre-processor", "text/x-m4" },
        !            31:        { "import", "Java program", "text/x-java" },
        !            32:        { "\"libhdr\"", "BCPL program", "text/x-bcpl" },
        !            33:        { "\"LIBHDR\"", "BCPL program", "text/x-bcpl" },
        !            34:        { "//", "C++ program", "text/x-c++" },
        !            35:        { "virtual", "C++ program", "text/x-c++" },
        !            36:        { "class", "C++ program", "text/x-c++" },
        !            37:        { "public:", "C++ program", "text/x-c++" },
        !            38:        { "private:", "C++ program", "text/x-c++" },
        !            39:        { "/*", "C program", "text/x-c" },
        !            40:        { "#include", "C program", "text/x-c" },
        !            41:        { "char", "C program", "text/x-c" },
        !            42:        { "The", "English", "text/plain" },
        !            43:        { "the", "English", "text/plain" },
        !            44:        { "double", "C program", "text/x-c" },
        !            45:        { "extern", "C program", "text/x-c" },
        !            46:        { "float", "C program", "text/x-c" },
        !            47:        { "struct", "C program", "text/x-c" },
        !            48:        { "union", "C program", "text/x-c" },
        !            49:        { "CFLAGS", "make commands", "text/x-makefile" },
        !            50:        { "LDFLAGS", "make commands", "text/x-makefile" },
        !            51:        { "all:", "make commands", "text/x-makefile" },
        !            52:        { ".PRECIOUS", "make commands", "text/x-makefile" },
        !            53:        { ".ascii", "assembler program", "text/x-asm" },
        !            54:        { ".asciiz", "assembler program", "text/x-asm" },
        !            55:        { ".byte", "assembler program", "text/x-asm" },
        !            56:        { ".even", "assembler program", "text/x-asm" },
        !            57:        { ".globl", "assembler program", "text/x-asm" },
        !            58:        { ".text", "assembler program", "text/x-asm" },
        !            59:        { "clr", "assembler program", "text/x-asm" },
        !            60:        { "(input", "Pascal program", "text/x-pascal" },
        !            61:        { "program", "Pascal program", "text/x-pascal" },
        !            62:        { "record", "Pascal program", "text/x-pascal" },
        !            63:        { "dcl", "PL/1 program", "text/x-pl1" },
        !            64:        { "Received:", "mail", "text/x-mail" },
        !            65:        { ">From", "mail", "text/x-mail" },
        !            66:        { "Return-Path:", "mail", "text/x-mail" },
        !            67:        { "Cc:", "mail", "text/x-mail" },
        !            68:        { "Newsgroups:", "news", "text/x-news" },
        !            69:        { "Path:", "news", "text/x-news" },
        !            70:        { "Organization:", "news", "text/x-news" },
        !            71:        { "href=", "HTML document", "text/html" },
        !            72:        { "HREF=", "HTML document", "text/html" },
        !            73:        { "<body", "HTML document", "text/html" },
        !            74:        { "<BODY", "HTML document", "text/html" },
        !            75:        { "<html", "HTML document", "text/html" },
        !            76:        { "<HTML", "HTML document", "text/html" },
        !            77:        { "<!--", "HTML document", "text/html" },
        !            78:        { NULL, NULL, NULL }
        !            79: };
        !            80:
        !            81: static int
        !            82: text_is_ascii(u_char c)
        !            83: {
        !            84:        const char      cc[] = "\007\010\011\012\014\015\033";
        !            85:
        !            86:        if (c == '\0')
        !            87:                return (0);
        !            88:        if (strchr(cc, c) != NULL)
        !            89:                return (1);
        !            90:        return (c > 31 && c < 127);
        !            91: }
        !            92:
        !            93: static int
        !            94: text_is_latin1(u_char c)
        !            95: {
        !            96:        if (c >= 160)
        !            97:                return (1);
        !            98:        return (text_is_ascii(c));
        !            99: }
        !           100:
        !           101: static int
        !           102: text_is_extended(u_char c)
        !           103: {
        !           104:        if (c >= 128)
        !           105:                return (1);
        !           106:        return (text_is_ascii(c));
        !           107: }
        !           108:
        !           109: static int
        !           110: text_try_test(const void *base, size_t size, int (*f)(u_char))
        !           111: {
        !           112:        const u_char    *data = base;
        !           113:        size_t           offset;
        !           114:
        !           115:        for (offset = 0; offset < size; offset++) {
        !           116:                if (!f(data[offset]))
        !           117:                        return (0);
        !           118:        }
        !           119:        return (1);
        !           120: }
        !           121:
        !           122: const char *
        !           123: text_get_type(const void *base, size_t size)
        !           124: {
        !           125:        if (text_try_test(base, size, text_is_ascii))
        !           126:                return ("ASCII");
        !           127:        if (text_try_test(base, size, text_is_latin1))
        !           128:                return ("ISO-8859");
        !           129:        if (text_try_test(base, size, text_is_extended))
        !           130:                return ("Non-ISO extended-ASCII");
        !           131:        return (NULL);
        !           132: }
        !           133:
        !           134: const char *
        !           135: text_try_words(const void *base, size_t size, int flags)
        !           136: {
        !           137:        const char      *cp, *end, *next, *word;
        !           138:        size_t           wordlen;
        !           139:        u_int            i;
        !           140:
        !           141:        end = (char*)base + size;
        !           142:        for (cp = base; cp != end; /* nothing */) {
        !           143:                while (cp != end && isspace((u_char)*cp))
        !           144:                        cp++;
        !           145:
        !           146:                next = cp;
        !           147:                while (next != end && !isspace((u_char)*next))
        !           148:                        next++;
        !           149:
        !           150:                for (i = 0; /* nothing */; i++) {
        !           151:                        word = text_words[i][0];
        !           152:                        if (word == NULL)
        !           153:                                break;
        !           154:                        wordlen = strlen(word);
        !           155:
        !           156:                        if ((size_t)(next - cp) != wordlen)
        !           157:                                continue;
        !           158:                        if (memcmp(cp, word, wordlen) != 0)
        !           159:                                continue;
        !           160:                        if (flags & MAGIC_TEST_MIME)
        !           161:                                return (text_words[i][2]);
        !           162:                        return (text_words[i][1]);
        !           163:                }
        !           164:
        !           165:                cp = next;
        !           166:        }
        !           167:        return (NULL);
        !           168: }