[BACK]Return to charset.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / less

Annotation of src/usr.bin/less/charset.c, Revision 1.1

1.1     ! etheisen    1: /*
        !             2:  * Copyright (c) 1984,1985,1989,1994,1995  Mark Nudelman
        !             3:  * All rights reserved.
        !             4:  *
        !             5:  * Redistribution and use in source and binary forms, with or without
        !             6:  * modification, are permitted provided that the following conditions
        !             7:  * are met:
        !             8:  * 1. Redistributions of source code must retain the above copyright
        !             9:  *    notice, this list of conditions and the following disclaimer.
        !            10:  * 2. Redistributions in binary form must reproduce the above copyright
        !            11:  *    notice in the documentation and/or other materials provided with
        !            12:  *    the distribution.
        !            13:  *
        !            14:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
        !            15:  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            16:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
        !            17:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
        !            18:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
        !            19:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
        !            20:  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
        !            21:  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
        !            22:  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
        !            23:  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
        !            24:  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        !            25:  */
        !            26:
        !            27:
        !            28: /*
        !            29:  * Functions to define the character set
        !            30:  * and do things specific to the character set.
        !            31:  */
        !            32:
        !            33: #include "less.h"
        !            34: #if HAVE_LOCALE
        !            35: #include <locale.h>
        !            36: #include <ctype.h>
        !            37: #endif
        !            38:
        !            39: /*
        !            40:  * Predefined character sets,
        !            41:  * selected by the LESSCHARSET environment variable.
        !            42:  */
        !            43: struct charset {
        !            44:        char *name;
        !            45:        char *desc;
        !            46: } charsets[] = {
        !            47:        { "ascii",      "8bcccbcc18b95.b"               },
        !            48:        { "latin1",     "8bcccbcc18b95.33b."            },
        !            49:        { "dos",        "8bcccbcc12bc5b95.b."           },
        !            50:        { "koi8-r",     "8bcccbcc18b95.b128."           },
        !            51:        { "next",       "8bcccbcc18b95.bb125.bb"        },
        !            52:        { NULL }
        !            53: };
        !            54:
        !            55: #define        IS_BINARY_CHAR  01
        !            56: #define        IS_CONTROL_CHAR 02
        !            57:
        !            58: static char chardef[256];
        !            59: static char *binfmt = NULL;
        !            60: public int binattr = AT_STANDOUT;
        !            61:
        !            62:
        !            63: /*
        !            64:  * Define a charset, given a description string.
        !            65:  * The string consists of 256 letters,
        !            66:  * one for each character in the charset.
        !            67:  * If the string is shorter than 256 letters, missing letters
        !            68:  * are taken to be identical to the last one.
        !            69:  * A decimal number followed by a letter is taken to be a
        !            70:  * repetition of the letter.
        !            71:  *
        !            72:  * Each letter is one of:
        !            73:  *     . normal character
        !            74:  *     b binary character
        !            75:  *     c control character
        !            76:  */
        !            77:        static void
        !            78: ichardef(s)
        !            79:        char *s;
        !            80: {
        !            81:        register char *cp;
        !            82:        register int n;
        !            83:        register char v;
        !            84:
        !            85:        n = 0;
        !            86:        v = 0;
        !            87:        cp = chardef;
        !            88:        while (*s != '\0')
        !            89:        {
        !            90:                switch (*s++)
        !            91:                {
        !            92:                case '.':
        !            93:                        v = 0;
        !            94:                        break;
        !            95:                case 'c':
        !            96:                        v = IS_CONTROL_CHAR;
        !            97:                        break;
        !            98:                case 'b':
        !            99:                        v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
        !           100:                        break;
        !           101:
        !           102:                case '0': case '1': case '2': case '3': case '4':
        !           103:                case '5': case '6': case '7': case '8': case '9':
        !           104:                        n = (10 * n) + (s[-1] - '0');
        !           105:                        continue;
        !           106:
        !           107:                default:
        !           108:                        error("invalid chardef", NULL_PARG);
        !           109:                        quit(QUIT_ERROR);
        !           110:                        /*NOTREACHED*/
        !           111:                }
        !           112:
        !           113:                do
        !           114:                {
        !           115:                        if (cp >= chardef + sizeof(chardef))
        !           116:                        {
        !           117:                                error("chardef longer than 256", NULL_PARG);
        !           118:                                quit(QUIT_ERROR);
        !           119:                                /*NOTREACHED*/
        !           120:                        }
        !           121:                        *cp++ = v;
        !           122:                } while (--n > 0);
        !           123:                n = 0;
        !           124:        }
        !           125:
        !           126:        while (cp < chardef + sizeof(chardef))
        !           127:                *cp++ = v;
        !           128: }
        !           129:
        !           130: /*
        !           131:  * Define a charset, given a charset name.
        !           132:  * The valid charset names are listed in the "charsets" array.
        !           133:  */
        !           134:        static int
        !           135: icharset(name)
        !           136:        register char *name;
        !           137: {
        !           138:        register struct charset *p;
        !           139:
        !           140:        if (name == NULL || *name == '\0')
        !           141:                return (0);
        !           142:
        !           143:        for (p = charsets;  p->name != NULL;  p++)
        !           144:        {
        !           145:                if (strcmp(name, p->name) == 0)
        !           146:                {
        !           147:                        ichardef(p->desc);
        !           148:                        return (1);
        !           149:                }
        !           150:        }
        !           151:
        !           152:        error("invalid charset name", NULL_PARG);
        !           153:        quit(QUIT_ERROR);
        !           154:        /*NOTREACHED*/
        !           155: }
        !           156:
        !           157: #if HAVE_LOCALE
        !           158: /*
        !           159:  * Define a charset, given a locale name.
        !           160:  */
        !           161:        static void
        !           162: ilocale()
        !           163: {
        !           164:        register int c;
        !           165:
        !           166:        setlocale(LC_CTYPE, "");
        !           167:        for (c = 0;  c < sizeof(chardef);  c++)
        !           168:        {
        !           169:                if (isprint(c))
        !           170:                        chardef[c] = 0;
        !           171:                else if (iscntrl(c))
        !           172:                        chardef[c] = IS_CONTROL_CHAR;
        !           173:                else
        !           174:                        chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
        !           175:        }
        !           176: }
        !           177: #endif
        !           178:
        !           179: /*
        !           180:  * Define the printing format for control chars.
        !           181:  */
        !           182:        public void
        !           183: setbinfmt(s)
        !           184:        char *s;
        !           185: {
        !           186:        if (s == NULL || *s == '\0')
        !           187:                s = "*s<%X>";
        !           188:        /*
        !           189:         * Select the attributes if it starts with "*".
        !           190:         */
        !           191:        if (*s == '*')
        !           192:        {
        !           193:                switch (s[1])
        !           194:                {
        !           195:                case 'd':  binattr = AT_BOLD;      break;
        !           196:                case 'k':  binattr = AT_BLINK;     break;
        !           197:                case 's':  binattr = AT_STANDOUT;  break;
        !           198:                case 'u':  binattr = AT_UNDERLINE; break;
        !           199:                default:   binattr = AT_NORMAL;    break;
        !           200:                }
        !           201:                s += 2;
        !           202:        }
        !           203:        binfmt = s;
        !           204: }
        !           205:
        !           206: /*
        !           207:  * Initialize charset data structures.
        !           208:  */
        !           209:        public void
        !           210: init_charset()
        !           211: {
        !           212:        register char *s;
        !           213:
        !           214:        s = getenv("LESSBINFMT");
        !           215:        setbinfmt(s);
        !           216:
        !           217:        /*
        !           218:         * See if environment variable LESSCHARSET is defined.
        !           219:         */
        !           220:        s = getenv("LESSCHARSET");
        !           221:        if (icharset(s))
        !           222:                return;
        !           223:        /*
        !           224:         * LESSCHARSET is not defined: try LESSCHARDEF.
        !           225:         */
        !           226:        s = getenv("LESSCHARDEF");
        !           227:        if (s != NULL && *s != '\0')
        !           228:        {
        !           229:                ichardef(s);
        !           230:                return;
        !           231:        }
        !           232: #if HAVE_LOCALE
        !           233:        /*
        !           234:         * Use setlocale.
        !           235:         */
        !           236:        ilocale();
        !           237: #else
        !           238:        /*
        !           239:         * Default to "ascii".
        !           240:         */
        !           241:        (void) icharset("ascii");
        !           242: #endif
        !           243: }
        !           244:
        !           245: /*
        !           246:  * Is a given character a "binary" character?
        !           247:  */
        !           248:        public int
        !           249: binary_char(c)
        !           250:        int c;
        !           251: {
        !           252:        c &= 0377;
        !           253:        return (chardef[c] & IS_BINARY_CHAR);
        !           254: }
        !           255:
        !           256: /*
        !           257:  * Is a given character a "control" character?
        !           258:  */
        !           259:        public int
        !           260: control_char(c)
        !           261:        int c;
        !           262: {
        !           263:        c &= 0377;
        !           264:        return (chardef[c] & IS_CONTROL_CHAR);
        !           265: }
        !           266:
        !           267: /*
        !           268:  * Return the printable form of a character.
        !           269:  * For example, in the "ascii" charset '\3' is printed as "^C".
        !           270:  */
        !           271:        public char *
        !           272: prchar(c)
        !           273:        int c;
        !           274: {
        !           275:        static char buf[8];
        !           276:
        !           277:        c &= 0377;
        !           278:        if (!control_char(c))
        !           279:                sprintf(buf, "%c", c);
        !           280:        else if (c == ESC)
        !           281:                sprintf(buf, "ESC");
        !           282:        else if (c < 128 && !control_char(c ^ 0100))
        !           283:                sprintf(buf, "^%c", c ^ 0100);
        !           284:        else
        !           285:                sprintf(buf, binfmt, c);
        !           286:        return (buf);
        !           287: }