Annotation of src/usr.bin/less/charset.c, Revision 1.1.1.1
1.1 etheisen 1: /*
2: * Copyright (c) 1984,1985,1989,1994,1995 Mark Nudelman
3: * All rights reserved.
4: *
5: * Redistribution and use in source and binary forms, with or without
6: * modification, are permitted provided that the following conditions
7: * are met:
8: * 1. Redistributions of source code must retain the above copyright
9: * notice, this list of conditions and the following disclaimer.
10: * 2. Redistributions in binary form must reproduce the above copyright
11: * notice in the documentation and/or other materials provided with
12: * the distribution.
13: *
14: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
15: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
18: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20: * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
21: * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
23: * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
24: * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25: */
26:
27:
28: /*
29: * Functions to define the character set
30: * and do things specific to the character set.
31: */
32:
33: #include "less.h"
34: #if HAVE_LOCALE
35: #include <locale.h>
36: #include <ctype.h>
37: #endif
38:
39: /*
40: * Predefined character sets,
41: * selected by the LESSCHARSET environment variable.
42: */
43: struct charset {
44: char *name;
45: char *desc;
46: } charsets[] = {
47: { "ascii", "8bcccbcc18b95.b" },
48: { "latin1", "8bcccbcc18b95.33b." },
49: { "dos", "8bcccbcc12bc5b95.b." },
50: { "koi8-r", "8bcccbcc18b95.b128." },
51: { "next", "8bcccbcc18b95.bb125.bb" },
52: { NULL }
53: };
54:
55: #define IS_BINARY_CHAR 01
56: #define IS_CONTROL_CHAR 02
57:
58: static char chardef[256];
59: static char *binfmt = NULL;
60: public int binattr = AT_STANDOUT;
61:
62:
63: /*
64: * Define a charset, given a description string.
65: * The string consists of 256 letters,
66: * one for each character in the charset.
67: * If the string is shorter than 256 letters, missing letters
68: * are taken to be identical to the last one.
69: * A decimal number followed by a letter is taken to be a
70: * repetition of the letter.
71: *
72: * Each letter is one of:
73: * . normal character
74: * b binary character
75: * c control character
76: */
77: static void
78: ichardef(s)
79: char *s;
80: {
81: register char *cp;
82: register int n;
83: register char v;
84:
85: n = 0;
86: v = 0;
87: cp = chardef;
88: while (*s != '\0')
89: {
90: switch (*s++)
91: {
92: case '.':
93: v = 0;
94: break;
95: case 'c':
96: v = IS_CONTROL_CHAR;
97: break;
98: case 'b':
99: v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
100: break;
101:
102: case '0': case '1': case '2': case '3': case '4':
103: case '5': case '6': case '7': case '8': case '9':
104: n = (10 * n) + (s[-1] - '0');
105: continue;
106:
107: default:
108: error("invalid chardef", NULL_PARG);
109: quit(QUIT_ERROR);
110: /*NOTREACHED*/
111: }
112:
113: do
114: {
115: if (cp >= chardef + sizeof(chardef))
116: {
117: error("chardef longer than 256", NULL_PARG);
118: quit(QUIT_ERROR);
119: /*NOTREACHED*/
120: }
121: *cp++ = v;
122: } while (--n > 0);
123: n = 0;
124: }
125:
126: while (cp < chardef + sizeof(chardef))
127: *cp++ = v;
128: }
129:
130: /*
131: * Define a charset, given a charset name.
132: * The valid charset names are listed in the "charsets" array.
133: */
134: static int
135: icharset(name)
136: register char *name;
137: {
138: register struct charset *p;
139:
140: if (name == NULL || *name == '\0')
141: return (0);
142:
143: for (p = charsets; p->name != NULL; p++)
144: {
145: if (strcmp(name, p->name) == 0)
146: {
147: ichardef(p->desc);
148: return (1);
149: }
150: }
151:
152: error("invalid charset name", NULL_PARG);
153: quit(QUIT_ERROR);
154: /*NOTREACHED*/
155: }
156:
157: #if HAVE_LOCALE
158: /*
159: * Define a charset, given a locale name.
160: */
161: static void
162: ilocale()
163: {
164: register int c;
165:
166: setlocale(LC_CTYPE, "");
167: for (c = 0; c < sizeof(chardef); c++)
168: {
169: if (isprint(c))
170: chardef[c] = 0;
171: else if (iscntrl(c))
172: chardef[c] = IS_CONTROL_CHAR;
173: else
174: chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
175: }
176: }
177: #endif
178:
179: /*
180: * Define the printing format for control chars.
181: */
182: public void
183: setbinfmt(s)
184: char *s;
185: {
186: if (s == NULL || *s == '\0')
187: s = "*s<%X>";
188: /*
189: * Select the attributes if it starts with "*".
190: */
191: if (*s == '*')
192: {
193: switch (s[1])
194: {
195: case 'd': binattr = AT_BOLD; break;
196: case 'k': binattr = AT_BLINK; break;
197: case 's': binattr = AT_STANDOUT; break;
198: case 'u': binattr = AT_UNDERLINE; break;
199: default: binattr = AT_NORMAL; break;
200: }
201: s += 2;
202: }
203: binfmt = s;
204: }
205:
206: /*
207: * Initialize charset data structures.
208: */
209: public void
210: init_charset()
211: {
212: register char *s;
213:
214: s = getenv("LESSBINFMT");
215: setbinfmt(s);
216:
217: /*
218: * See if environment variable LESSCHARSET is defined.
219: */
220: s = getenv("LESSCHARSET");
221: if (icharset(s))
222: return;
223: /*
224: * LESSCHARSET is not defined: try LESSCHARDEF.
225: */
226: s = getenv("LESSCHARDEF");
227: if (s != NULL && *s != '\0')
228: {
229: ichardef(s);
230: return;
231: }
232: #if HAVE_LOCALE
233: /*
234: * Use setlocale.
235: */
236: ilocale();
237: #else
238: /*
239: * Default to "ascii".
240: */
241: (void) icharset("ascii");
242: #endif
243: }
244:
245: /*
246: * Is a given character a "binary" character?
247: */
248: public int
249: binary_char(c)
250: int c;
251: {
252: c &= 0377;
253: return (chardef[c] & IS_BINARY_CHAR);
254: }
255:
256: /*
257: * Is a given character a "control" character?
258: */
259: public int
260: control_char(c)
261: int c;
262: {
263: c &= 0377;
264: return (chardef[c] & IS_CONTROL_CHAR);
265: }
266:
267: /*
268: * Return the printable form of a character.
269: * For example, in the "ascii" charset '\3' is printed as "^C".
270: */
271: public char *
272: prchar(c)
273: int c;
274: {
275: static char buf[8];
276:
277: c &= 0377;
278: if (!control_char(c))
279: sprintf(buf, "%c", c);
280: else if (c == ESC)
281: sprintf(buf, "ESC");
282: else if (c < 128 && !control_char(c ^ 0100))
283: sprintf(buf, "^%c", c ^ 0100);
284: else
285: sprintf(buf, binfmt, c);
286: return (buf);
287: }