src/usr.bin/file/text.c - annotate

Return to text.c CVS log
Up to [local] / src / usr.bin / file
Annotation of src/usr.bin/file/text.c, Revision 1.2

1.2     ! nicm        1: /* $OpenBSD: text.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */
1.1       nicm        2:
                      3: /*
                      4:  * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
                      5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
                      7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
                      9:  *
                     10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
                     15:  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
                     16:  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17:  */
                     18:
                     19: #include <sys/types.h>
                     20:
                     21: #include <ctype.h>
                     22: #include <string.h>
                     23:
                     24: #include "file.h"
                     25: #include "magic.h"
                     26: #include "xmalloc.h"
                     27:
                     28: static const char *text_words[][3] = {
                     29:        { "msgid", "PO (gettext message catalogue)", "text/x-po" },
                     30:        { "dnl", "M4 macro language pre-processor", "text/x-m4" },
                     31:        { "import", "Java program", "text/x-java" },
                     32:        { "\"libhdr\"", "BCPL program", "text/x-bcpl" },
                     33:        { "\"LIBHDR\"", "BCPL program", "text/x-bcpl" },
                     34:        { "//", "C++ program", "text/x-c++" },
                     35:        { "virtual", "C++ program", "text/x-c++" },
                     36:        { "class", "C++ program", "text/x-c++" },
                     37:        { "public:", "C++ program", "text/x-c++" },
                     38:        { "private:", "C++ program", "text/x-c++" },
                     39:        { "/*", "C program", "text/x-c" },
                     40:        { "#include", "C program", "text/x-c" },
                     41:        { "char", "C program", "text/x-c" },
                     42:        { "The", "English", "text/plain" },
                     43:        { "the", "English", "text/plain" },
                     44:        { "double", "C program", "text/x-c" },
                     45:        { "extern", "C program", "text/x-c" },
                     46:        { "float", "C program", "text/x-c" },
                     47:        { "struct", "C program", "text/x-c" },
                     48:        { "union", "C program", "text/x-c" },
                     49:        { "CFLAGS", "make commands", "text/x-makefile" },
                     50:        { "LDFLAGS", "make commands", "text/x-makefile" },
                     51:        { "all:", "make commands", "text/x-makefile" },
                     52:        { ".PRECIOUS", "make commands", "text/x-makefile" },
                     53:        { ".ascii", "assembler program", "text/x-asm" },
                     54:        { ".asciiz", "assembler program", "text/x-asm" },
                     55:        { ".byte", "assembler program", "text/x-asm" },
                     56:        { ".even", "assembler program", "text/x-asm" },
                     57:        { ".globl", "assembler program", "text/x-asm" },
                     58:        { ".text", "assembler program", "text/x-asm" },
                     59:        { "clr", "assembler program", "text/x-asm" },
                     60:        { "(input", "Pascal program", "text/x-pascal" },
                     61:        { "program", "Pascal program", "text/x-pascal" },
                     62:        { "record", "Pascal program", "text/x-pascal" },
                     63:        { "dcl", "PL/1 program", "text/x-pl1" },
                     64:        { "Received:", "mail", "text/x-mail" },
                     65:        { ">From", "mail", "text/x-mail" },
                     66:        { "Return-Path:", "mail", "text/x-mail" },
                     67:        { "Cc:", "mail", "text/x-mail" },
                     68:        { "Newsgroups:", "news", "text/x-news" },
                     69:        { "Path:", "news", "text/x-news" },
                     70:        { "Organization:", "news", "text/x-news" },
                     71:        { "href=", "HTML document", "text/html" },
                     72:        { "HREF=", "HTML document", "text/html" },
                     73:        { "<body", "HTML document", "text/html" },
                     74:        { "<BODY", "HTML document", "text/html" },
                     75:        { "<html", "HTML document", "text/html" },
                     76:        { "<HTML", "HTML document", "text/html" },
                     77:        { "<!--", "HTML document", "text/html" },
                     78:        { NULL, NULL, NULL }
                     79: };
                     80:
                     81: static int
                     82: text_is_ascii(u_char c)
                     83: {
                     84:        const char      cc[] = "\007\010\011\012\014\015\033";
                     85:
                     86:        if (c == '\0')
                     87:                return (0);
                     88:        if (strchr(cc, c) != NULL)
                     89:                return (1);
                     90:        return (c > 31 && c < 127);
                     91: }
                     92:
                     93: static int
                     94: text_is_latin1(u_char c)
                     95: {
                     96:        if (c >= 160)
                     97:                return (1);
                     98:        return (text_is_ascii(c));
                     99: }
                    100:
                    101: static int
                    102: text_is_extended(u_char c)
                    103: {
                    104:        if (c >= 128)
                    105:                return (1);
                    106:        return (text_is_ascii(c));
                    107: }
                    108:
                    109: static int
                    110: text_try_test(const void *base, size_t size, int (*f)(u_char))
                    111: {
                    112:        const u_char    *data = base;
                    113:        size_t           offset;
                    114:
                    115:        for (offset = 0; offset < size; offset++) {
                    116:                if (!f(data[offset]))
                    117:                        return (0);
                    118:        }
                    119:        return (1);
                    120: }
                    121:
                    122: const char *
                    123: text_get_type(const void *base, size_t size)
                    124: {
                    125:        if (text_try_test(base, size, text_is_ascii))
                    126:                return ("ASCII");
                    127:        if (text_try_test(base, size, text_is_latin1))
                    128:                return ("ISO-8859");
                    129:        if (text_try_test(base, size, text_is_extended))
                    130:                return ("Non-ISO extended-ASCII");
                    131:        return (NULL);
                    132: }
                    133:
                    134: const char *
                    135: text_try_words(const void *base, size_t size, int flags)
                    136: {
                    137:        const char      *cp, *end, *next, *word;
                    138:        size_t           wordlen;
                    139:        u_int            i;
                    140:
1.2     ! nicm      141:        end = (char *)base + size;
1.1       nicm      142:        for (cp = base; cp != end; /* nothing */) {
                    143:                while (cp != end && isspace((u_char)*cp))
                    144:                        cp++;
                    145:
                    146:                next = cp;
                    147:                while (next != end && !isspace((u_char)*next))
                    148:                        next++;
                    149:
                    150:                for (i = 0; /* nothing */; i++) {
                    151:                        word = text_words[i][0];
                    152:                        if (word == NULL)
                    153:                                break;
                    154:                        wordlen = strlen(word);
                    155:
                    156:                        if ((size_t)(next - cp) != wordlen)
                    157:                                continue;
                    158:                        if (memcmp(cp, word, wordlen) != 0)
                    159:                                continue;
                    160:                        if (flags & MAGIC_TEST_MIME)
                    161:                                return (text_words[i][2]);
                    162:                        return (text_words[i][1]);
                    163:                }
                    164:
                    165:                cp = next;
                    166:        }
                    167:        return (NULL);
                    168: }