=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/less/charset.c,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -c -r1.1.1.1 -r1.1.1.2 *** src/usr.bin/less/charset.c 1996/09/21 05:39:41 1.1.1.1 --- src/usr.bin/less/charset.c 2003/04/13 18:21:21 1.1.1.2 *************** *** 1,27 **** /* ! * Copyright (c) 1984,1985,1989,1994,1995 Mark Nudelman ! * All rights reserved. * ! * Redistribution and use in source and binary forms, with or without ! * modification, are permitted provided that the following conditions ! * are met: ! * 1. Redistributions of source code must retain the above copyright ! * notice, this list of conditions and the following disclaimer. ! * 2. Redistributions in binary form must reproduce the above copyright ! * notice in the documentation and/or other materials provided with ! * the distribution. * ! * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY ! * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ! * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ! * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE ! * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ! * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ! * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR ! * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, ! * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE ! * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN ! * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ --- 1,11 ---- /* ! * Copyright (C) 1984-2002 Mark Nudelman * ! * You may distribute under the terms of either the GNU General Public ! * License or the Less License, as specified in the README file. * ! * For more information about less, or for information on how to ! * contact the author, see the README file. */ *************** *** 36,57 **** #include #endif /* * Predefined character sets, * selected by the LESSCHARSET environment variable. */ struct charset { char *name; char *desc; } charsets[] = { ! { "ascii", "8bcccbcc18b95.b" }, ! { "latin1", "8bcccbcc18b95.33b." }, ! { "dos", "8bcccbcc12bc5b95.b." }, ! { "koi8-r", "8bcccbcc18b95.b128." }, ! { "next", "8bcccbcc18b95.bb125.bb" }, ! { NULL } }; #define IS_BINARY_CHAR 01 #define IS_CONTROL_CHAR 02 --- 20,56 ---- #include #endif + public int utf_mode = 0; + /* * Predefined character sets, * selected by the LESSCHARSET environment variable. */ struct charset { char *name; + int *p_flag; char *desc; } charsets[] = { ! { "ascii", NULL, "8bcccbcc18b95.b" }, ! { "dos", NULL, "8bcccbcc12bc5b223.b" }, ! { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, ! { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" }, ! { "iso8859", NULL, "8bcccbcc18b95.33b." }, ! { "koi8-r", NULL, "8bcccbcc18b95.b128." }, ! { "next", NULL, "8bcccbcc18b95.bb125.bb" }, ! { "utf-8", &utf_mode, "8bcccbcc18b." }, ! { NULL, NULL, NULL } }; + struct cs_alias { + char *name; + char *oname; + } cs_aliases[] = { + { "latin1", "iso8859" }, + { "latin9", "iso8859" }, + { NULL, NULL } + }; + #define IS_BINARY_CHAR 01 #define IS_CONTROL_CHAR 02 *************** *** 136,150 **** --- 135,162 ---- register char *name; { register struct charset *p; + register struct cs_alias *a; if (name == NULL || *name == '\0') return (0); + /* First see if the name is an alias. */ + for (a = cs_aliases; a->name != NULL; a++) + { + if (strcmp(name, a->name) == 0) + { + name = a->oname; + break; + } + } + for (p = charsets; p->name != NULL; p++) { if (strcmp(name, p->name) == 0) { ichardef(p->desc); + if (p->p_flag != NULL) + *(p->p_flag) = 1; return (1); } } *************** *** 152,157 **** --- 164,170 ---- error("invalid charset name", NULL_PARG); quit(QUIT_ERROR); /*NOTREACHED*/ + return (0); } #if HAVE_LOCALE *************** *** 163,170 **** { register int c; ! setlocale(LC_CTYPE, ""); ! for (c = 0; c < sizeof(chardef); c++) { if (isprint(c)) chardef[c] = 0; --- 176,183 ---- { register int c; ! setlocale(LC_ALL, ""); ! for (c = 0; c < (int) sizeof(chardef); c++) { if (isprint(c)) chardef[c] = 0; *************** *** 211,245 **** { register char *s; ! s = getenv("LESSBINFMT"); setbinfmt(s); /* * See if environment variable LESSCHARSET is defined. */ ! s = getenv("LESSCHARSET"); if (icharset(s)) return; /* * LESSCHARSET is not defined: try LESSCHARDEF. */ ! s = getenv("LESSCHARDEF"); if (s != NULL && *s != '\0') { ichardef(s); return; } #if HAVE_LOCALE /* * Use setlocale. */ ilocale(); #else /* ! * Default to "ascii". */ ! (void) icharset("ascii"); #endif } /* --- 224,280 ---- { register char *s; ! s = lgetenv("LESSBINFMT"); setbinfmt(s); /* * See if environment variable LESSCHARSET is defined. */ ! s = lgetenv("LESSCHARSET"); if (icharset(s)) return; /* * LESSCHARSET is not defined: try LESSCHARDEF. */ ! s = lgetenv("LESSCHARDEF"); if (s != NULL && *s != '\0') { ichardef(s); return; } + + #if HAVE_STRSTR + /* + * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used. + */ + if ((s = lgetenv("LC_ALL")) != NULL || + (s = lgetenv("LC_CTYPE")) != NULL || + (s = lgetenv("LANG")) != NULL) + { + if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL) + if (icharset("utf-8")) + return; + } + #endif + #if HAVE_LOCALE /* * Use setlocale. */ ilocale(); #else + #if MSDOS_COMPILER /* ! * Default to "dos". */ ! (void) icharset("dos"); ! #else ! /* ! * Default to "latin1". ! */ ! (void) icharset("latin1"); #endif + #endif } /* *************** *** 247,253 **** */ public int binary_char(c) ! int c; { c &= 0377; return (chardef[c] & IS_BINARY_CHAR); --- 282,288 ---- */ public int binary_char(c) ! unsigned char c; { c &= 0377; return (chardef[c] & IS_BINARY_CHAR); *************** *** 279,286 **** sprintf(buf, "%c", c); else if (c == ESC) sprintf(buf, "ESC"); ! else if (c < 128 && !control_char(c ^ 0100)) ! sprintf(buf, "^%c", c ^ 0100); else sprintf(buf, binfmt, c); return (buf); --- 314,335 ---- sprintf(buf, "%c", c); else if (c == ESC) sprintf(buf, "ESC"); ! #if IS_EBCDIC_HOST ! else if (!binary_char(c) && c < 64) ! sprintf(buf, "^%c", ! /* ! * This array roughly inverts CONTROL() #defined in less.h, ! * and should be kept in sync with CONTROL() and IBM-1047. ! */ ! "@ABC.I.?...KLMNO" ! "PQRS.JH.XY.." ! "\\]^_" ! "......W[.....EFG" ! "..V....D....TU.Z"[c]); ! #else ! else if (c < 128 && !control_char(c ^ 0100)) ! sprintf(buf, "^%c", c ^ 0100); ! #endif else sprintf(buf, binfmt, c); return (buf);