[BACK]Return to charset.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / less

Annotation of src/usr.bin/less/charset.c, Revision 1.1.1.1

1.1       etheisen    1: /*
                      2:  * Copyright (c) 1984,1985,1989,1994,1995  Mark Nudelman
                      3:  * All rights reserved.
                      4:  *
                      5:  * Redistribution and use in source and binary forms, with or without
                      6:  * modification, are permitted provided that the following conditions
                      7:  * are met:
                      8:  * 1. Redistributions of source code must retain the above copyright
                      9:  *    notice, this list of conditions and the following disclaimer.
                     10:  * 2. Redistributions in binary form must reproduce the above copyright
                     11:  *    notice in the documentation and/or other materials provided with
                     12:  *    the distribution.
                     13:  *
                     14:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
                     15:  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     16:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     17:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
                     18:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     19:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
                     20:  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
                     21:  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
                     22:  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
                     23:  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
                     24:  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     25:  */
                     26:
                     27:
                     28: /*
                     29:  * Functions to define the character set
                     30:  * and do things specific to the character set.
                     31:  */
                     32:
                     33: #include "less.h"
                     34: #if HAVE_LOCALE
                     35: #include <locale.h>
                     36: #include <ctype.h>
                     37: #endif
                     38:
                     39: /*
                     40:  * Predefined character sets,
                     41:  * selected by the LESSCHARSET environment variable.
                     42:  */
                     43: struct charset {
                     44:        char *name;
                     45:        char *desc;
                     46: } charsets[] = {
                     47:        { "ascii",      "8bcccbcc18b95.b"               },
                     48:        { "latin1",     "8bcccbcc18b95.33b."            },
                     49:        { "dos",        "8bcccbcc12bc5b95.b."           },
                     50:        { "koi8-r",     "8bcccbcc18b95.b128."           },
                     51:        { "next",       "8bcccbcc18b95.bb125.bb"        },
                     52:        { NULL }
                     53: };
                     54:
                     55: #define        IS_BINARY_CHAR  01
                     56: #define        IS_CONTROL_CHAR 02
                     57:
                     58: static char chardef[256];
                     59: static char *binfmt = NULL;
                     60: public int binattr = AT_STANDOUT;
                     61:
                     62:
                     63: /*
                     64:  * Define a charset, given a description string.
                     65:  * The string consists of 256 letters,
                     66:  * one for each character in the charset.
                     67:  * If the string is shorter than 256 letters, missing letters
                     68:  * are taken to be identical to the last one.
                     69:  * A decimal number followed by a letter is taken to be a
                     70:  * repetition of the letter.
                     71:  *
                     72:  * Each letter is one of:
                     73:  *     . normal character
                     74:  *     b binary character
                     75:  *     c control character
                     76:  */
                     77:        static void
                     78: ichardef(s)
                     79:        char *s;
                     80: {
                     81:        register char *cp;
                     82:        register int n;
                     83:        register char v;
                     84:
                     85:        n = 0;
                     86:        v = 0;
                     87:        cp = chardef;
                     88:        while (*s != '\0')
                     89:        {
                     90:                switch (*s++)
                     91:                {
                     92:                case '.':
                     93:                        v = 0;
                     94:                        break;
                     95:                case 'c':
                     96:                        v = IS_CONTROL_CHAR;
                     97:                        break;
                     98:                case 'b':
                     99:                        v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
                    100:                        break;
                    101:
                    102:                case '0': case '1': case '2': case '3': case '4':
                    103:                case '5': case '6': case '7': case '8': case '9':
                    104:                        n = (10 * n) + (s[-1] - '0');
                    105:                        continue;
                    106:
                    107:                default:
                    108:                        error("invalid chardef", NULL_PARG);
                    109:                        quit(QUIT_ERROR);
                    110:                        /*NOTREACHED*/
                    111:                }
                    112:
                    113:                do
                    114:                {
                    115:                        if (cp >= chardef + sizeof(chardef))
                    116:                        {
                    117:                                error("chardef longer than 256", NULL_PARG);
                    118:                                quit(QUIT_ERROR);
                    119:                                /*NOTREACHED*/
                    120:                        }
                    121:                        *cp++ = v;
                    122:                } while (--n > 0);
                    123:                n = 0;
                    124:        }
                    125:
                    126:        while (cp < chardef + sizeof(chardef))
                    127:                *cp++ = v;
                    128: }
                    129:
                    130: /*
                    131:  * Define a charset, given a charset name.
                    132:  * The valid charset names are listed in the "charsets" array.
                    133:  */
                    134:        static int
                    135: icharset(name)
                    136:        register char *name;
                    137: {
                    138:        register struct charset *p;
                    139:
                    140:        if (name == NULL || *name == '\0')
                    141:                return (0);
                    142:
                    143:        for (p = charsets;  p->name != NULL;  p++)
                    144:        {
                    145:                if (strcmp(name, p->name) == 0)
                    146:                {
                    147:                        ichardef(p->desc);
                    148:                        return (1);
                    149:                }
                    150:        }
                    151:
                    152:        error("invalid charset name", NULL_PARG);
                    153:        quit(QUIT_ERROR);
                    154:        /*NOTREACHED*/
                    155: }
                    156:
                    157: #if HAVE_LOCALE
                    158: /*
                    159:  * Define a charset, given a locale name.
                    160:  */
                    161:        static void
                    162: ilocale()
                    163: {
                    164:        register int c;
                    165:
                    166:        setlocale(LC_CTYPE, "");
                    167:        for (c = 0;  c < sizeof(chardef);  c++)
                    168:        {
                    169:                if (isprint(c))
                    170:                        chardef[c] = 0;
                    171:                else if (iscntrl(c))
                    172:                        chardef[c] = IS_CONTROL_CHAR;
                    173:                else
                    174:                        chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
                    175:        }
                    176: }
                    177: #endif
                    178:
                    179: /*
                    180:  * Define the printing format for control chars.
                    181:  */
                    182:        public void
                    183: setbinfmt(s)
                    184:        char *s;
                    185: {
                    186:        if (s == NULL || *s == '\0')
                    187:                s = "*s<%X>";
                    188:        /*
                    189:         * Select the attributes if it starts with "*".
                    190:         */
                    191:        if (*s == '*')
                    192:        {
                    193:                switch (s[1])
                    194:                {
                    195:                case 'd':  binattr = AT_BOLD;      break;
                    196:                case 'k':  binattr = AT_BLINK;     break;
                    197:                case 's':  binattr = AT_STANDOUT;  break;
                    198:                case 'u':  binattr = AT_UNDERLINE; break;
                    199:                default:   binattr = AT_NORMAL;    break;
                    200:                }
                    201:                s += 2;
                    202:        }
                    203:        binfmt = s;
                    204: }
                    205:
                    206: /*
                    207:  * Initialize charset data structures.
                    208:  */
                    209:        public void
                    210: init_charset()
                    211: {
                    212:        register char *s;
                    213:
                    214:        s = getenv("LESSBINFMT");
                    215:        setbinfmt(s);
                    216:
                    217:        /*
                    218:         * See if environment variable LESSCHARSET is defined.
                    219:         */
                    220:        s = getenv("LESSCHARSET");
                    221:        if (icharset(s))
                    222:                return;
                    223:        /*
                    224:         * LESSCHARSET is not defined: try LESSCHARDEF.
                    225:         */
                    226:        s = getenv("LESSCHARDEF");
                    227:        if (s != NULL && *s != '\0')
                    228:        {
                    229:                ichardef(s);
                    230:                return;
                    231:        }
                    232: #if HAVE_LOCALE
                    233:        /*
                    234:         * Use setlocale.
                    235:         */
                    236:        ilocale();
                    237: #else
                    238:        /*
                    239:         * Default to "ascii".
                    240:         */
                    241:        (void) icharset("ascii");
                    242: #endif
                    243: }
                    244:
                    245: /*
                    246:  * Is a given character a "binary" character?
                    247:  */
                    248:        public int
                    249: binary_char(c)
                    250:        int c;
                    251: {
                    252:        c &= 0377;
                    253:        return (chardef[c] & IS_BINARY_CHAR);
                    254: }
                    255:
                    256: /*
                    257:  * Is a given character a "control" character?
                    258:  */
                    259:        public int
                    260: control_char(c)
                    261:        int c;
                    262: {
                    263:        c &= 0377;
                    264:        return (chardef[c] & IS_CONTROL_CHAR);
                    265: }
                    266:
                    267: /*
                    268:  * Return the printable form of a character.
                    269:  * For example, in the "ascii" charset '\3' is printed as "^C".
                    270:  */
                    271:        public char *
                    272: prchar(c)
                    273:        int c;
                    274: {
                    275:        static char buf[8];
                    276:
                    277:        c &= 0377;
                    278:        if (!control_char(c))
                    279:                sprintf(buf, "%c", c);
                    280:        else if (c == ESC)
                    281:                sprintf(buf, "ESC");
                    282:        else if (c < 128 && !control_char(c ^ 0100))
                    283:                sprintf(buf, "^%c", c ^ 0100);
                    284:        else
                    285:                sprintf(buf, binfmt, c);
                    286:        return (buf);
                    287: }