Annotation of src/usr.bin/less/charset.c, Revision 1.1
1.1 ! etheisen 1: /*
! 2: * Copyright (c) 1984,1985,1989,1994,1995 Mark Nudelman
! 3: * All rights reserved.
! 4: *
! 5: * Redistribution and use in source and binary forms, with or without
! 6: * modification, are permitted provided that the following conditions
! 7: * are met:
! 8: * 1. Redistributions of source code must retain the above copyright
! 9: * notice, this list of conditions and the following disclaimer.
! 10: * 2. Redistributions in binary form must reproduce the above copyright
! 11: * notice in the documentation and/or other materials provided with
! 12: * the distribution.
! 13: *
! 14: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
! 15: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 16: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! 17: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
! 18: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! 19: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
! 20: * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
! 21: * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
! 22: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
! 23: * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
! 24: * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
! 25: */
! 26:
! 27:
! 28: /*
! 29: * Functions to define the character set
! 30: * and do things specific to the character set.
! 31: */
! 32:
! 33: #include "less.h"
! 34: #if HAVE_LOCALE
! 35: #include <locale.h>
! 36: #include <ctype.h>
! 37: #endif
! 38:
! 39: /*
! 40: * Predefined character sets,
! 41: * selected by the LESSCHARSET environment variable.
! 42: */
! 43: struct charset {
! 44: char *name;
! 45: char *desc;
! 46: } charsets[] = {
! 47: { "ascii", "8bcccbcc18b95.b" },
! 48: { "latin1", "8bcccbcc18b95.33b." },
! 49: { "dos", "8bcccbcc12bc5b95.b." },
! 50: { "koi8-r", "8bcccbcc18b95.b128." },
! 51: { "next", "8bcccbcc18b95.bb125.bb" },
! 52: { NULL }
! 53: };
! 54:
! 55: #define IS_BINARY_CHAR 01
! 56: #define IS_CONTROL_CHAR 02
! 57:
! 58: static char chardef[256];
! 59: static char *binfmt = NULL;
! 60: public int binattr = AT_STANDOUT;
! 61:
! 62:
! 63: /*
! 64: * Define a charset, given a description string.
! 65: * The string consists of 256 letters,
! 66: * one for each character in the charset.
! 67: * If the string is shorter than 256 letters, missing letters
! 68: * are taken to be identical to the last one.
! 69: * A decimal number followed by a letter is taken to be a
! 70: * repetition of the letter.
! 71: *
! 72: * Each letter is one of:
! 73: * . normal character
! 74: * b binary character
! 75: * c control character
! 76: */
! 77: static void
! 78: ichardef(s)
! 79: char *s;
! 80: {
! 81: register char *cp;
! 82: register int n;
! 83: register char v;
! 84:
! 85: n = 0;
! 86: v = 0;
! 87: cp = chardef;
! 88: while (*s != '\0')
! 89: {
! 90: switch (*s++)
! 91: {
! 92: case '.':
! 93: v = 0;
! 94: break;
! 95: case 'c':
! 96: v = IS_CONTROL_CHAR;
! 97: break;
! 98: case 'b':
! 99: v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
! 100: break;
! 101:
! 102: case '0': case '1': case '2': case '3': case '4':
! 103: case '5': case '6': case '7': case '8': case '9':
! 104: n = (10 * n) + (s[-1] - '0');
! 105: continue;
! 106:
! 107: default:
! 108: error("invalid chardef", NULL_PARG);
! 109: quit(QUIT_ERROR);
! 110: /*NOTREACHED*/
! 111: }
! 112:
! 113: do
! 114: {
! 115: if (cp >= chardef + sizeof(chardef))
! 116: {
! 117: error("chardef longer than 256", NULL_PARG);
! 118: quit(QUIT_ERROR);
! 119: /*NOTREACHED*/
! 120: }
! 121: *cp++ = v;
! 122: } while (--n > 0);
! 123: n = 0;
! 124: }
! 125:
! 126: while (cp < chardef + sizeof(chardef))
! 127: *cp++ = v;
! 128: }
! 129:
! 130: /*
! 131: * Define a charset, given a charset name.
! 132: * The valid charset names are listed in the "charsets" array.
! 133: */
! 134: static int
! 135: icharset(name)
! 136: register char *name;
! 137: {
! 138: register struct charset *p;
! 139:
! 140: if (name == NULL || *name == '\0')
! 141: return (0);
! 142:
! 143: for (p = charsets; p->name != NULL; p++)
! 144: {
! 145: if (strcmp(name, p->name) == 0)
! 146: {
! 147: ichardef(p->desc);
! 148: return (1);
! 149: }
! 150: }
! 151:
! 152: error("invalid charset name", NULL_PARG);
! 153: quit(QUIT_ERROR);
! 154: /*NOTREACHED*/
! 155: }
! 156:
! 157: #if HAVE_LOCALE
! 158: /*
! 159: * Define a charset, given a locale name.
! 160: */
! 161: static void
! 162: ilocale()
! 163: {
! 164: register int c;
! 165:
! 166: setlocale(LC_CTYPE, "");
! 167: for (c = 0; c < sizeof(chardef); c++)
! 168: {
! 169: if (isprint(c))
! 170: chardef[c] = 0;
! 171: else if (iscntrl(c))
! 172: chardef[c] = IS_CONTROL_CHAR;
! 173: else
! 174: chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
! 175: }
! 176: }
! 177: #endif
! 178:
! 179: /*
! 180: * Define the printing format for control chars.
! 181: */
! 182: public void
! 183: setbinfmt(s)
! 184: char *s;
! 185: {
! 186: if (s == NULL || *s == '\0')
! 187: s = "*s<%X>";
! 188: /*
! 189: * Select the attributes if it starts with "*".
! 190: */
! 191: if (*s == '*')
! 192: {
! 193: switch (s[1])
! 194: {
! 195: case 'd': binattr = AT_BOLD; break;
! 196: case 'k': binattr = AT_BLINK; break;
! 197: case 's': binattr = AT_STANDOUT; break;
! 198: case 'u': binattr = AT_UNDERLINE; break;
! 199: default: binattr = AT_NORMAL; break;
! 200: }
! 201: s += 2;
! 202: }
! 203: binfmt = s;
! 204: }
! 205:
! 206: /*
! 207: * Initialize charset data structures.
! 208: */
! 209: public void
! 210: init_charset()
! 211: {
! 212: register char *s;
! 213:
! 214: s = getenv("LESSBINFMT");
! 215: setbinfmt(s);
! 216:
! 217: /*
! 218: * See if environment variable LESSCHARSET is defined.
! 219: */
! 220: s = getenv("LESSCHARSET");
! 221: if (icharset(s))
! 222: return;
! 223: /*
! 224: * LESSCHARSET is not defined: try LESSCHARDEF.
! 225: */
! 226: s = getenv("LESSCHARDEF");
! 227: if (s != NULL && *s != '\0')
! 228: {
! 229: ichardef(s);
! 230: return;
! 231: }
! 232: #if HAVE_LOCALE
! 233: /*
! 234: * Use setlocale.
! 235: */
! 236: ilocale();
! 237: #else
! 238: /*
! 239: * Default to "ascii".
! 240: */
! 241: (void) icharset("ascii");
! 242: #endif
! 243: }
! 244:
! 245: /*
! 246: * Is a given character a "binary" character?
! 247: */
! 248: public int
! 249: binary_char(c)
! 250: int c;
! 251: {
! 252: c &= 0377;
! 253: return (chardef[c] & IS_BINARY_CHAR);
! 254: }
! 255:
! 256: /*
! 257: * Is a given character a "control" character?
! 258: */
! 259: public int
! 260: control_char(c)
! 261: int c;
! 262: {
! 263: c &= 0377;
! 264: return (chardef[c] & IS_CONTROL_CHAR);
! 265: }
! 266:
! 267: /*
! 268: * Return the printable form of a character.
! 269: * For example, in the "ascii" charset '\3' is printed as "^C".
! 270: */
! 271: public char *
! 272: prchar(c)
! 273: int c;
! 274: {
! 275: static char buf[8];
! 276:
! 277: c &= 0377;
! 278: if (!control_char(c))
! 279: sprintf(buf, "%c", c);
! 280: else if (c == ESC)
! 281: sprintf(buf, "ESC");
! 282: else if (c < 128 && !control_char(c ^ 0100))
! 283: sprintf(buf, "^%c", c ^ 0100);
! 284: else
! 285: sprintf(buf, binfmt, c);
! 286: return (buf);
! 287: }