[BACK]Return to charset.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / less

Annotation of src/usr.bin/less/charset.c, Revision 1.3

1.3     ! mpech       1: /*     $OpenBSD: charset.c,v 1.2 2001/01/29 01:58:00 niklas Exp $      */
1.2       niklas      2:
1.1       etheisen    3: /*
                      4:  * Copyright (c) 1984,1985,1989,1994,1995  Mark Nudelman
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice in the documentation and/or other materials provided with
                     14:  *    the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
                     17:  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     18:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     19:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
                     20:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     21:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
                     22:  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
                     23:  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
                     24:  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
                     25:  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
                     26:  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     27:  */
                     28:
                     29:
                     30: /*
                     31:  * Functions to define the character set
                     32:  * and do things specific to the character set.
                     33:  */
                     34:
                     35: #include "less.h"
                     36: #if HAVE_LOCALE
                     37: #include <locale.h>
                     38: #include <ctype.h>
                     39: #endif
                     40:
                     41: /*
                     42:  * Predefined character sets,
                     43:  * selected by the LESSCHARSET environment variable.
                     44:  */
                     45: struct charset {
                     46:        char *name;
                     47:        char *desc;
                     48: } charsets[] = {
                     49:        { "ascii",      "8bcccbcc18b95.b"               },
                     50:        { "latin1",     "8bcccbcc18b95.33b."            },
                     51:        { "dos",        "8bcccbcc12bc5b95.b."           },
                     52:        { "koi8-r",     "8bcccbcc18b95.b128."           },
                     53:        { "next",       "8bcccbcc18b95.bb125.bb"        },
                     54:        { NULL }
                     55: };
                     56:
                     57: #define        IS_BINARY_CHAR  01
                     58: #define        IS_CONTROL_CHAR 02
                     59:
                     60: static char chardef[256];
                     61: static char *binfmt = NULL;
                     62: public int binattr = AT_STANDOUT;
                     63:
                     64:
                     65: /*
                     66:  * Define a charset, given a description string.
                     67:  * The string consists of 256 letters,
                     68:  * one for each character in the charset.
                     69:  * If the string is shorter than 256 letters, missing letters
                     70:  * are taken to be identical to the last one.
                     71:  * A decimal number followed by a letter is taken to be a
                     72:  * repetition of the letter.
                     73:  *
                     74:  * Each letter is one of:
                     75:  *     . normal character
                     76:  *     b binary character
                     77:  *     c control character
                     78:  */
                     79:        static void
                     80: ichardef(s)
                     81:        char *s;
                     82: {
1.3     ! mpech      83:        char *cp;
        !            84:        int n;
        !            85:        char v;
1.1       etheisen   86:
                     87:        n = 0;
                     88:        v = 0;
                     89:        cp = chardef;
                     90:        while (*s != '\0')
                     91:        {
                     92:                switch (*s++)
                     93:                {
                     94:                case '.':
                     95:                        v = 0;
                     96:                        break;
                     97:                case 'c':
                     98:                        v = IS_CONTROL_CHAR;
                     99:                        break;
                    100:                case 'b':
                    101:                        v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
                    102:                        break;
                    103:
                    104:                case '0': case '1': case '2': case '3': case '4':
                    105:                case '5': case '6': case '7': case '8': case '9':
                    106:                        n = (10 * n) + (s[-1] - '0');
                    107:                        continue;
                    108:
                    109:                default:
                    110:                        error("invalid chardef", NULL_PARG);
                    111:                        quit(QUIT_ERROR);
                    112:                        /*NOTREACHED*/
                    113:                }
                    114:
                    115:                do
                    116:                {
                    117:                        if (cp >= chardef + sizeof(chardef))
                    118:                        {
                    119:                                error("chardef longer than 256", NULL_PARG);
                    120:                                quit(QUIT_ERROR);
                    121:                                /*NOTREACHED*/
                    122:                        }
                    123:                        *cp++ = v;
                    124:                } while (--n > 0);
                    125:                n = 0;
                    126:        }
                    127:
                    128:        while (cp < chardef + sizeof(chardef))
                    129:                *cp++ = v;
                    130: }
                    131:
                    132: /*
                    133:  * Define a charset, given a charset name.
                    134:  * The valid charset names are listed in the "charsets" array.
                    135:  */
                    136:        static int
                    137: icharset(name)
1.3     ! mpech     138:        char *name;
1.1       etheisen  139: {
1.3     ! mpech     140:        struct charset *p;
1.1       etheisen  141:
                    142:        if (name == NULL || *name == '\0')
                    143:                return (0);
                    144:
                    145:        for (p = charsets;  p->name != NULL;  p++)
                    146:        {
                    147:                if (strcmp(name, p->name) == 0)
                    148:                {
                    149:                        ichardef(p->desc);
                    150:                        return (1);
                    151:                }
                    152:        }
                    153:
                    154:        error("invalid charset name", NULL_PARG);
                    155:        quit(QUIT_ERROR);
                    156:        /*NOTREACHED*/
                    157: }
                    158:
                    159: #if HAVE_LOCALE
                    160: /*
                    161:  * Define a charset, given a locale name.
                    162:  */
                    163:        static void
                    164: ilocale()
                    165: {
1.3     ! mpech     166:        int c;
1.1       etheisen  167:
                    168:        setlocale(LC_CTYPE, "");
                    169:        for (c = 0;  c < sizeof(chardef);  c++)
                    170:        {
                    171:                if (isprint(c))
                    172:                        chardef[c] = 0;
                    173:                else if (iscntrl(c))
                    174:                        chardef[c] = IS_CONTROL_CHAR;
                    175:                else
                    176:                        chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
                    177:        }
                    178: }
                    179: #endif
                    180:
                    181: /*
                    182:  * Define the printing format for control chars.
                    183:  */
                    184:        public void
                    185: setbinfmt(s)
                    186:        char *s;
                    187: {
                    188:        if (s == NULL || *s == '\0')
                    189:                s = "*s<%X>";
                    190:        /*
                    191:         * Select the attributes if it starts with "*".
                    192:         */
                    193:        if (*s == '*')
                    194:        {
                    195:                switch (s[1])
                    196:                {
                    197:                case 'd':  binattr = AT_BOLD;      break;
                    198:                case 'k':  binattr = AT_BLINK;     break;
                    199:                case 's':  binattr = AT_STANDOUT;  break;
                    200:                case 'u':  binattr = AT_UNDERLINE; break;
                    201:                default:   binattr = AT_NORMAL;    break;
                    202:                }
                    203:                s += 2;
                    204:        }
                    205:        binfmt = s;
                    206: }
                    207:
                    208: /*
                    209:  * Initialize charset data structures.
                    210:  */
                    211:        public void
                    212: init_charset()
                    213: {
1.3     ! mpech     214:        char *s;
1.1       etheisen  215:
                    216:        s = getenv("LESSBINFMT");
                    217:        setbinfmt(s);
                    218:
                    219:        /*
                    220:         * See if environment variable LESSCHARSET is defined.
                    221:         */
                    222:        s = getenv("LESSCHARSET");
                    223:        if (icharset(s))
                    224:                return;
                    225:        /*
                    226:         * LESSCHARSET is not defined: try LESSCHARDEF.
                    227:         */
                    228:        s = getenv("LESSCHARDEF");
                    229:        if (s != NULL && *s != '\0')
                    230:        {
                    231:                ichardef(s);
                    232:                return;
                    233:        }
                    234: #if HAVE_LOCALE
                    235:        /*
                    236:         * Use setlocale.
                    237:         */
                    238:        ilocale();
                    239: #else
                    240:        /*
                    241:         * Default to "ascii".
                    242:         */
                    243:        (void) icharset("ascii");
                    244: #endif
                    245: }
                    246:
                    247: /*
                    248:  * Is a given character a "binary" character?
                    249:  */
                    250:        public int
                    251: binary_char(c)
                    252:        int c;
                    253: {
                    254:        c &= 0377;
                    255:        return (chardef[c] & IS_BINARY_CHAR);
                    256: }
                    257:
                    258: /*
                    259:  * Is a given character a "control" character?
                    260:  */
                    261:        public int
                    262: control_char(c)
                    263:        int c;
                    264: {
                    265:        c &= 0377;
                    266:        return (chardef[c] & IS_CONTROL_CHAR);
                    267: }
                    268:
                    269: /*
                    270:  * Return the printable form of a character.
                    271:  * For example, in the "ascii" charset '\3' is printed as "^C".
                    272:  */
                    273:        public char *
                    274: prchar(c)
                    275:        int c;
                    276: {
                    277:        static char buf[8];
                    278:
                    279:        c &= 0377;
                    280:        if (!control_char(c))
                    281:                sprintf(buf, "%c", c);
                    282:        else if (c == ESC)
                    283:                sprintf(buf, "ESC");
                    284:        else if (c < 128 && !control_char(c ^ 0100))
                    285:                sprintf(buf, "^%c", c ^ 0100);
                    286:        else
                    287:                sprintf(buf, binfmt, c);
                    288:        return (buf);
                    289: }