version 1.8, 2004/05/19 02:32:35 |
version 1.9, 2008/05/08 01:40:56 |
|
|
file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) |
file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) |
{ |
{ |
size_t i; |
size_t i; |
unsigned char nbuf[HOWMANY+1]; /* one extra for terminating '\0' */ |
unsigned char *nbuf = NULL; |
unichar ubuf[HOWMANY+1]; /* one extra for terminating '\0' */ |
unichar *ubuf = NULL; |
size_t ulen; |
size_t ulen; |
struct names *p; |
struct names *p; |
|
int rv = -1; |
|
|
const char *code = NULL; |
const char *code = NULL; |
const char *code_mime = NULL; |
const char *code_mime = NULL; |
|
|
|
|
int has_escapes = 0; |
int has_escapes = 0; |
int has_backspace = 0; |
int has_backspace = 0; |
|
int seen_cr = 0; |
|
|
int n_crlf = 0; |
int n_crlf = 0; |
int n_lf = 0; |
int n_lf = 0; |
int n_cr = 0; |
int n_cr = 0; |
int n_nel = 0; |
int n_nel = 0; |
|
|
int last_line_end = -1; |
size_t last_line_end = (size_t)-1; |
int has_long_lines = 0; |
int has_long_lines = 0; |
|
|
/* |
/* |
* Undo the NUL-termination kindly provided by process() |
* Undo the NUL-termination kindly provided by process() |
* but leave at least one byte to look at |
* but leave at least one byte to look at |
*/ |
*/ |
|
|
while (nbytes > 1 && buf[nbytes - 1] == '\0') |
while (nbytes > 1 && buf[nbytes - 1] == '\0') |
nbytes--; |
nbytes--; |
|
|
/* nbuf and ubuf relies on this */ |
if ((nbuf = calloc(1, (nbytes + 1) * sizeof(nbuf[0]))) == NULL) |
if (nbytes > HOWMANY) |
goto done; |
nbytes = HOWMANY; |
if ((ubuf = calloc(1, (nbytes + 1) * sizeof(ubuf[0]))) == NULL) |
|
goto done; |
|
|
/* |
/* |
* Then try to determine whether it's any character code we can |
* Then try to determine whether it's any character code we can |
|
|
type = "character data"; |
type = "character data"; |
code_mime = "ebcdic"; |
code_mime = "ebcdic"; |
} else { |
} else { |
return 0; /* doesn't look like text at all */ |
rv = 0; |
|
goto done; /* doesn't look like text at all */ |
} |
} |
} |
} |
|
|
|
if (nbytes <= 1) { |
|
rv = 0; |
|
goto done; |
|
} |
|
|
/* |
/* |
* for troff, look for . + letter + letter or .\"; |
* for troff, look for . + letter + letter or .\"; |
* this must be done to disambiguate tar archives' ./file |
* this must be done to disambiguate tar archives' ./file |
|
|
* I believe Plan 9 troff allows non-ASCII characters in the names |
* I believe Plan 9 troff allows non-ASCII characters in the names |
* of macros, so this test might possibly fail on such a file. |
* of macros, so this test might possibly fail on such a file. |
*/ |
*/ |
if (*ubuf == '.') { |
if ((ms->flags & MAGIC_NO_CHECK_TROFF) == 0 && *ubuf == '.') { |
unichar *tp = ubuf + 1; |
unichar *tp = ubuf + 1; |
|
|
while (ISSPC(*tp)) |
while (ISSPC(*tp)) |
|
|
} |
} |
} |
} |
|
|
if ((*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) { |
if ((ms->flags & MAGIC_NO_CHECK_FORTRAN) == 0 && |
|
(*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) { |
subtype_mime = "text/fortran"; |
subtype_mime = "text/fortran"; |
subtype = "fortran program"; |
subtype = "fortran program"; |
goto subtype_identified; |
goto subtype_identified; |
|
|
|
|
/* look for tokens from names.h - this is expensive! */ |
/* look for tokens from names.h - this is expensive! */ |
|
|
|
if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0) |
|
goto subtype_identified; |
|
|
i = 0; |
i = 0; |
while (i < ulen) { |
while (i < ulen) { |
size_t end; |
size_t end; |
|
|
* Now try to discover other details about the file. |
* Now try to discover other details about the file. |
*/ |
*/ |
for (i = 0; i < ulen; i++) { |
for (i = 0; i < ulen; i++) { |
if (i > last_line_end + MAXLINELEN) |
if (ubuf[i] == '\n') { |
has_long_lines = 1; |
if (seen_cr) |
|
n_crlf++; |
if (ubuf[i] == '\033') |
else |
has_escapes = 1; |
n_lf++; |
if (ubuf[i] == '\b') |
|
has_backspace = 1; |
|
|
|
if (ubuf[i] == '\r' && (i + 1 < ulen && ubuf[i + 1] == '\n')) { |
|
n_crlf++; |
|
last_line_end = i; |
last_line_end = i; |
} |
} else if (seen_cr) |
if (ubuf[i] == '\r' && (i + 1 >= ulen || ubuf[i + 1] != '\n')) { |
|
n_cr++; |
n_cr++; |
|
|
|
seen_cr = (ubuf[i] == '\r'); |
|
if (seen_cr) |
last_line_end = i; |
last_line_end = i; |
} |
|
if (ubuf[i] == '\n' && ((int)i - 1 < 0 || ubuf[i - 1] != '\r')){ |
|
n_lf++; |
|
last_line_end = i; |
|
} |
|
if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */ |
if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */ |
n_nel++; |
n_nel++; |
last_line_end = i; |
last_line_end = i; |
} |
} |
|
|
|
/* If this line is _longer_ than MAXLINELEN, remember it. */ |
|
if (i > last_line_end + MAXLINELEN) |
|
has_long_lines = 1; |
|
|
|
if (ubuf[i] == '\033') |
|
has_escapes = 1; |
|
if (ubuf[i] == '\b') |
|
has_backspace = 1; |
} |
} |
|
|
|
/* Beware, if the data has been truncated, the final CR could have |
|
been followed by a LF. If we have HOWMANY bytes, it indicates |
|
that the data might have been truncated, probably even before |
|
this function was called. */ |
|
if (seen_cr && nbytes < HOWMANY) |
|
n_cr++; |
|
|
if ((ms->flags & MAGIC_MIME)) { |
if ((ms->flags & MAGIC_MIME)) { |
if (subtype_mime) { |
if (subtype_mime) { |
if (file_printf(ms, subtype_mime) == -1) |
if (file_printf(ms, subtype_mime) == -1) |
return -1; |
goto done; |
} else { |
} else { |
if (file_printf(ms, "text/plain") == -1) |
if (file_printf(ms, "text/plain") == -1) |
return -1; |
goto done; |
} |
} |
|
|
if (code_mime) { |
if (code_mime) { |
if (file_printf(ms, "; charset=") == -1) |
if (file_printf(ms, "; charset=") == -1) |
return -1; |
goto done; |
if (file_printf(ms, code_mime) == -1) |
if (file_printf(ms, code_mime) == -1) |
return -1; |
goto done; |
} |
} |
} else { |
} else { |
if (file_printf(ms, code) == -1) |
if (file_printf(ms, code) == -1) |
return -1; |
goto done; |
|
|
if (subtype) { |
if (subtype) { |
if (file_printf(ms, " ") == -1) |
if (file_printf(ms, " ") == -1) |
return -1; |
goto done; |
if (file_printf(ms, subtype) == -1) |
if (file_printf(ms, subtype) == -1) |
return -1; |
goto done; |
} |
} |
|
|
if (file_printf(ms, " ") == -1) |
if (file_printf(ms, " ") == -1) |
return -1; |
goto done; |
if (file_printf(ms, type) == -1) |
if (file_printf(ms, type) == -1) |
return -1; |
goto done; |
|
|
if (has_long_lines) |
if (has_long_lines) |
if (file_printf(ms, ", with very long lines") == -1) |
if (file_printf(ms, ", with very long lines") == -1) |
return -1; |
goto done; |
|
|
/* |
/* |
* Only report line terminators if we find one other than LF, |
* Only report line terminators if we find one other than LF, |
|
|
if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) || |
if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) || |
(n_crlf != 0 || n_cr != 0 || n_nel != 0)) { |
(n_crlf != 0 || n_cr != 0 || n_nel != 0)) { |
if (file_printf(ms, ", with") == -1) |
if (file_printf(ms, ", with") == -1) |
return -1; |
goto done; |
|
|
if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) { |
if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) { |
if (file_printf(ms, " no") == -1) |
if (file_printf(ms, " no") == -1) |
return -1; |
goto done; |
} else { |
} else { |
if (n_crlf) { |
if (n_crlf) { |
if (file_printf(ms, " CRLF") == -1) |
if (file_printf(ms, " CRLF") == -1) |
return -1; |
goto done; |
if (n_cr || n_lf || n_nel) |
if (n_cr || n_lf || n_nel) |
if (file_printf(ms, ",") == -1) |
if (file_printf(ms, ",") == -1) |
return -1; |
goto done; |
} |
} |
if (n_cr) { |
if (n_cr) { |
if (file_printf(ms, " CR") == -1) |
if (file_printf(ms, " CR") == -1) |
return -1; |
goto done; |
if (n_lf || n_nel) |
if (n_lf || n_nel) |
if (file_printf(ms, ",") == -1) |
if (file_printf(ms, ",") == -1) |
return -1; |
goto done; |
} |
} |
if (n_lf) { |
if (n_lf) { |
if (file_printf(ms, " LF") == -1) |
if (file_printf(ms, " LF") == -1) |
return -1; |
goto done; |
if (n_nel) |
if (n_nel) |
if (file_printf(ms, ",") == -1) |
if (file_printf(ms, ",") == -1) |
return -1; |
goto done; |
} |
} |
if (n_nel) |
if (n_nel) |
if (file_printf(ms, " NEL") == -1) |
if (file_printf(ms, " NEL") == -1) |
return -1; |
goto done; |
} |
} |
|
|
if (file_printf(ms, " line terminators") == -1) |
if (file_printf(ms, " line terminators") == -1) |
return -1; |
goto done; |
} |
} |
|
|
if (has_escapes) |
if (has_escapes) |
if (file_printf(ms, ", with escape sequences") == -1) |
if (file_printf(ms, ", with escape sequences") == -1) |
return -1; |
goto done; |
if (has_backspace) |
if (has_backspace) |
if (file_printf(ms, ", with overstriking") == -1) |
if (file_printf(ms, ", with overstriking") == -1) |
return -1; |
goto done; |
} |
} |
|
rv = 1; |
|
done: |
|
if (nbuf) |
|
free(nbuf); |
|
if (ubuf) |
|
free(ubuf); |
|
|
return 1; |
return rv; |
} |
} |
|
|
private int |
private int |
|
|
looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf, |
looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf, |
size_t *ulen) |
size_t *ulen) |
{ |
{ |
int i; |
size_t i; |
|
|
*ulen = 0; |
*ulen = 0; |
|
|
|
|
private int |
private int |
looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) |
looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) |
{ |
{ |
int i; |
size_t i; |
|
|
*ulen = 0; |
*ulen = 0; |
|
|
|
|
looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf, |
looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf, |
size_t *ulen) |
size_t *ulen) |
{ |
{ |
int i; |
size_t i; |
|
|
*ulen = 0; |
*ulen = 0; |
|
|
|
|
private int |
private int |
looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) |
looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) |
{ |
{ |
int i, n; |
size_t i; |
|
int n; |
unichar c; |
unichar c; |
int gotone = 0; |
int gotone = 0; |
|
|
|
|
size_t *ulen) |
size_t *ulen) |
{ |
{ |
int bigend; |
int bigend; |
int i; |
size_t i; |
|
|
if (nbytes < 2) |
if (nbytes < 2) |
return 0; |
return 0; |
|
|
private void |
private void |
from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out) |
from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out) |
{ |
{ |
int i; |
size_t i; |
|
|
for (i = 0; i < nbytes; i++) { |
for (i = 0; i < nbytes; i++) { |
out[i] = ebcdic_to_ascii[buf[i]]; |
out[i] = ebcdic_to_ascii[buf[i]]; |