[BACK]Return to charset.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / less

Annotation of src/usr.bin/less/charset.c, Revision 1.5

1.1       etheisen    1: /*
1.5     ! millert     2:  * Copyright (C) 1984-2002  Mark Nudelman
1.1       etheisen    3:  *
1.5     ! millert     4:  * You may distribute under the terms of either the GNU General Public
        !             5:  * License or the Less License, as specified in the README file.
1.1       etheisen    6:  *
1.5     ! millert     7:  * For more information about less, or for information on how to
        !             8:  * contact the author, see the README file.
1.1       etheisen    9:  */
                     10:
                     11:
                     12: /*
                     13:  * Functions to define the character set
                     14:  * and do things specific to the character set.
                     15:  */
                     16:
                     17: #include "less.h"
                     18: #if HAVE_LOCALE
                     19: #include <locale.h>
                     20: #include <ctype.h>
                     21: #endif
                     22:
1.5     ! millert    23: public int utf_mode = 0;
        !            24:
1.1       etheisen   25: /*
                     26:  * Predefined character sets,
                     27:  * selected by the LESSCHARSET environment variable.
                     28:  */
                     29: struct charset {
                     30:        char *name;
1.5     ! millert    31:        int *p_flag;
1.1       etheisen   32:        char *desc;
                     33: } charsets[] = {
1.5     ! millert    34:        { "ascii",      NULL,       "8bcccbcc18b95.b" },
        !            35:        { "dos",        NULL,       "8bcccbcc12bc5b223.b" },
        !            36:        { "ebcdic",     NULL,       "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." },
        !            37:        { "IBM-1047",   NULL,       "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" },
        !            38:        { "iso8859",    NULL,       "8bcccbcc18b95.33b." },
        !            39:        { "koi8-r",     NULL,       "8bcccbcc18b95.b128." },
        !            40:        { "next",       NULL,       "8bcccbcc18b95.bb125.bb" },
        !            41:        { "utf-8",      &utf_mode,  "8bcccbcc18b." },
        !            42:        { NULL, NULL, NULL }
        !            43: };
        !            44:
        !            45: struct cs_alias {
        !            46:        char *name;
        !            47:        char *oname;
        !            48: } cs_aliases[] = {
        !            49:        { "latin1",     "iso8859" },
        !            50:        { "latin9",     "iso8859" },
        !            51:        { NULL, NULL }
1.1       etheisen   52: };
                     53:
                     54: #define        IS_BINARY_CHAR  01
                     55: #define        IS_CONTROL_CHAR 02
                     56:
                     57: static char chardef[256];
                     58: static char *binfmt = NULL;
                     59: public int binattr = AT_STANDOUT;
                     60:
                     61:
                     62: /*
                     63:  * Define a charset, given a description string.
                     64:  * The string consists of 256 letters,
                     65:  * one for each character in the charset.
                     66:  * If the string is shorter than 256 letters, missing letters
                     67:  * are taken to be identical to the last one.
                     68:  * A decimal number followed by a letter is taken to be a
                     69:  * repetition of the letter.
                     70:  *
                     71:  * Each letter is one of:
                     72:  *     . normal character
                     73:  *     b binary character
                     74:  *     c control character
                     75:  */
                     76:        static void
                     77: ichardef(s)
                     78:        char *s;
                     79: {
1.5     ! millert    80:        register char *cp;
        !            81:        register int n;
        !            82:        register char v;
1.1       etheisen   83:
                     84:        n = 0;
                     85:        v = 0;
                     86:        cp = chardef;
                     87:        while (*s != '\0')
                     88:        {
                     89:                switch (*s++)
                     90:                {
                     91:                case '.':
                     92:                        v = 0;
                     93:                        break;
                     94:                case 'c':
                     95:                        v = IS_CONTROL_CHAR;
                     96:                        break;
                     97:                case 'b':
                     98:                        v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
                     99:                        break;
                    100:
                    101:                case '0': case '1': case '2': case '3': case '4':
                    102:                case '5': case '6': case '7': case '8': case '9':
                    103:                        n = (10 * n) + (s[-1] - '0');
                    104:                        continue;
                    105:
                    106:                default:
                    107:                        error("invalid chardef", NULL_PARG);
                    108:                        quit(QUIT_ERROR);
                    109:                        /*NOTREACHED*/
                    110:                }
                    111:
                    112:                do
                    113:                {
                    114:                        if (cp >= chardef + sizeof(chardef))
                    115:                        {
                    116:                                error("chardef longer than 256", NULL_PARG);
                    117:                                quit(QUIT_ERROR);
                    118:                                /*NOTREACHED*/
                    119:                        }
                    120:                        *cp++ = v;
                    121:                } while (--n > 0);
                    122:                n = 0;
                    123:        }
                    124:
                    125:        while (cp < chardef + sizeof(chardef))
                    126:                *cp++ = v;
                    127: }
                    128:
                    129: /*
                    130:  * Define a charset, given a charset name.
                    131:  * The valid charset names are listed in the "charsets" array.
                    132:  */
                    133:        static int
                    134: icharset(name)
1.5     ! millert   135:        register char *name;
1.1       etheisen  136: {
1.5     ! millert   137:        register struct charset *p;
        !           138:        register struct cs_alias *a;
1.1       etheisen  139:
                    140:        if (name == NULL || *name == '\0')
                    141:                return (0);
                    142:
1.5     ! millert   143:        /* First see if the name is an alias. */
        !           144:        for (a = cs_aliases;  a->name != NULL;  a++)
        !           145:        {
        !           146:                if (strcmp(name, a->name) == 0)
        !           147:                {
        !           148:                        name = a->oname;
        !           149:                        break;
        !           150:                }
        !           151:        }
        !           152:
1.1       etheisen  153:        for (p = charsets;  p->name != NULL;  p++)
                    154:        {
                    155:                if (strcmp(name, p->name) == 0)
                    156:                {
                    157:                        ichardef(p->desc);
1.5     ! millert   158:                        if (p->p_flag != NULL)
        !           159:                                *(p->p_flag) = 1;
1.1       etheisen  160:                        return (1);
                    161:                }
                    162:        }
                    163:
                    164:        error("invalid charset name", NULL_PARG);
                    165:        quit(QUIT_ERROR);
                    166:        /*NOTREACHED*/
1.5     ! millert   167:        return (0);
1.1       etheisen  168: }
                    169:
                    170: #if HAVE_LOCALE
                    171: /*
                    172:  * Define a charset, given a locale name.
                    173:  */
                    174:        static void
                    175: ilocale()
                    176: {
1.5     ! millert   177:        register int c;
1.1       etheisen  178:
1.5     ! millert   179:        setlocale(LC_ALL, "");
        !           180:        for (c = 0;  c < (int) sizeof(chardef);  c++)
1.1       etheisen  181:        {
                    182:                if (isprint(c))
                    183:                        chardef[c] = 0;
                    184:                else if (iscntrl(c))
                    185:                        chardef[c] = IS_CONTROL_CHAR;
                    186:                else
                    187:                        chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
                    188:        }
                    189: }
                    190: #endif
                    191:
                    192: /*
                    193:  * Define the printing format for control chars.
                    194:  */
                    195:        public void
                    196: setbinfmt(s)
                    197:        char *s;
                    198: {
                    199:        if (s == NULL || *s == '\0')
                    200:                s = "*s<%X>";
                    201:        /*
                    202:         * Select the attributes if it starts with "*".
                    203:         */
                    204:        if (*s == '*')
                    205:        {
                    206:                switch (s[1])
                    207:                {
                    208:                case 'd':  binattr = AT_BOLD;      break;
                    209:                case 'k':  binattr = AT_BLINK;     break;
                    210:                case 's':  binattr = AT_STANDOUT;  break;
                    211:                case 'u':  binattr = AT_UNDERLINE; break;
                    212:                default:   binattr = AT_NORMAL;    break;
                    213:                }
                    214:                s += 2;
                    215:        }
                    216:        binfmt = s;
                    217: }
                    218:
                    219: /*
                    220:  * Initialize charset data structures.
                    221:  */
                    222:        public void
                    223: init_charset()
                    224: {
1.5     ! millert   225:        register char *s;
1.1       etheisen  226:
1.5     ! millert   227:        s = lgetenv("LESSBINFMT");
1.1       etheisen  228:        setbinfmt(s);
                    229:
                    230:        /*
                    231:         * See if environment variable LESSCHARSET is defined.
                    232:         */
1.5     ! millert   233:        s = lgetenv("LESSCHARSET");
1.1       etheisen  234:        if (icharset(s))
                    235:                return;
                    236:        /*
                    237:         * LESSCHARSET is not defined: try LESSCHARDEF.
                    238:         */
1.5     ! millert   239:        s = lgetenv("LESSCHARDEF");
1.1       etheisen  240:        if (s != NULL && *s != '\0')
                    241:        {
                    242:                ichardef(s);
                    243:                return;
                    244:        }
1.5     ! millert   245:
        !           246: #if HAVE_STRSTR
        !           247:        /*
        !           248:         * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used.
        !           249:         */
        !           250:        if ((s = lgetenv("LC_ALL")) != NULL ||
        !           251:            (s = lgetenv("LC_CTYPE")) != NULL ||
        !           252:            (s = lgetenv("LANG")) != NULL)
        !           253:        {
        !           254:                if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL)
        !           255:                        if (icharset("utf-8"))
        !           256:                                return;
        !           257:        }
        !           258: #endif
        !           259:
1.1       etheisen  260: #if HAVE_LOCALE
                    261:        /*
                    262:         * Use setlocale.
                    263:         */
                    264:        ilocale();
                    265: #else
1.5     ! millert   266: #if MSDOS_COMPILER
1.1       etheisen  267:        /*
1.5     ! millert   268:         * Default to "dos".
1.1       etheisen  269:         */
1.5     ! millert   270:        (void) icharset("dos");
        !           271: #else
        !           272:        /*
        !           273:         * Default to "latin1".
        !           274:         */
        !           275:        (void) icharset("latin1");
        !           276: #endif
1.1       etheisen  277: #endif
                    278: }
                    279:
                    280: /*
                    281:  * Is a given character a "binary" character?
                    282:  */
                    283:        public int
                    284: binary_char(c)
1.5     ! millert   285:        unsigned char c;
1.1       etheisen  286: {
                    287:        c &= 0377;
                    288:        return (chardef[c] & IS_BINARY_CHAR);
                    289: }
                    290:
                    291: /*
                    292:  * Is a given character a "control" character?
                    293:  */
                    294:        public int
                    295: control_char(c)
                    296:        int c;
                    297: {
                    298:        c &= 0377;
                    299:        return (chardef[c] & IS_CONTROL_CHAR);
                    300: }
                    301:
                    302: /*
                    303:  * Return the printable form of a character.
                    304:  * For example, in the "ascii" charset '\3' is printed as "^C".
                    305:  */
                    306:        public char *
                    307: prchar(c)
                    308:        int c;
                    309: {
                    310:        static char buf[8];
                    311:
                    312:        c &= 0377;
                    313:        if (!control_char(c))
1.5     ! millert   314:                snprintf(buf, sizeof(buf), "%c", c);
1.1       etheisen  315:        else if (c == ESC)
1.5     ! millert   316:                snprintf(buf, sizeof(buf), "ESC");
        !           317: #if IS_EBCDIC_HOST
        !           318:        else if (!binary_char(c) && c < 64)
        !           319:                snprintf(buf, sizeof(buf), "^%c",
        !           320:                /*
        !           321:                 * This array roughly inverts CONTROL() #defined in less.h,
        !           322:                 * and should be kept in sync with CONTROL() and IBM-1047.
        !           323:                 */
        !           324:                "@ABC.I.?...KLMNO"
        !           325:                "PQRS.JH.XY.."
        !           326:                "\\]^_"
        !           327:                "......W[.....EFG"
        !           328:                "..V....D....TU.Z"[c]);
        !           329: #else
        !           330:        else if (c < 128 && !control_char(c ^ 0100))
        !           331:                snprintf(buf, sizeof(buf), "^%c", c ^ 0100);
        !           332: #endif
1.1       etheisen  333:        else
1.5     ! millert   334:                snprintf(buf, sizeof(buf), binfmt, c);
1.1       etheisen  335:        return (buf);
                    336: }