Annotation of src/usr.bin/less/charset.c, Revision 1.2
1.2 ! niklas 1: /* $OpenBSD$ */
! 2:
1.1 etheisen 3: /*
4: * Copyright (c) 1984,1985,1989,1994,1995 Mark Nudelman
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice in the documentation and/or other materials provided with
14: * the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
17: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
20: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
22: * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23: * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25: * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
26: * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27: */
28:
29:
30: /*
31: * Functions to define the character set
32: * and do things specific to the character set.
33: */
34:
35: #include "less.h"
36: #if HAVE_LOCALE
37: #include <locale.h>
38: #include <ctype.h>
39: #endif
40:
41: /*
42: * Predefined character sets,
43: * selected by the LESSCHARSET environment variable.
44: */
45: struct charset {
46: char *name;
47: char *desc;
48: } charsets[] = {
49: { "ascii", "8bcccbcc18b95.b" },
50: { "latin1", "8bcccbcc18b95.33b." },
51: { "dos", "8bcccbcc12bc5b95.b." },
52: { "koi8-r", "8bcccbcc18b95.b128." },
53: { "next", "8bcccbcc18b95.bb125.bb" },
54: { NULL }
55: };
56:
57: #define IS_BINARY_CHAR 01
58: #define IS_CONTROL_CHAR 02
59:
60: static char chardef[256];
61: static char *binfmt = NULL;
62: public int binattr = AT_STANDOUT;
63:
64:
65: /*
66: * Define a charset, given a description string.
67: * The string consists of 256 letters,
68: * one for each character in the charset.
69: * If the string is shorter than 256 letters, missing letters
70: * are taken to be identical to the last one.
71: * A decimal number followed by a letter is taken to be a
72: * repetition of the letter.
73: *
74: * Each letter is one of:
75: * . normal character
76: * b binary character
77: * c control character
78: */
79: static void
80: ichardef(s)
81: char *s;
82: {
83: register char *cp;
84: register int n;
85: register char v;
86:
87: n = 0;
88: v = 0;
89: cp = chardef;
90: while (*s != '\0')
91: {
92: switch (*s++)
93: {
94: case '.':
95: v = 0;
96: break;
97: case 'c':
98: v = IS_CONTROL_CHAR;
99: break;
100: case 'b':
101: v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
102: break;
103:
104: case '0': case '1': case '2': case '3': case '4':
105: case '5': case '6': case '7': case '8': case '9':
106: n = (10 * n) + (s[-1] - '0');
107: continue;
108:
109: default:
110: error("invalid chardef", NULL_PARG);
111: quit(QUIT_ERROR);
112: /*NOTREACHED*/
113: }
114:
115: do
116: {
117: if (cp >= chardef + sizeof(chardef))
118: {
119: error("chardef longer than 256", NULL_PARG);
120: quit(QUIT_ERROR);
121: /*NOTREACHED*/
122: }
123: *cp++ = v;
124: } while (--n > 0);
125: n = 0;
126: }
127:
128: while (cp < chardef + sizeof(chardef))
129: *cp++ = v;
130: }
131:
132: /*
133: * Define a charset, given a charset name.
134: * The valid charset names are listed in the "charsets" array.
135: */
136: static int
137: icharset(name)
138: register char *name;
139: {
140: register struct charset *p;
141:
142: if (name == NULL || *name == '\0')
143: return (0);
144:
145: for (p = charsets; p->name != NULL; p++)
146: {
147: if (strcmp(name, p->name) == 0)
148: {
149: ichardef(p->desc);
150: return (1);
151: }
152: }
153:
154: error("invalid charset name", NULL_PARG);
155: quit(QUIT_ERROR);
156: /*NOTREACHED*/
157: }
158:
159: #if HAVE_LOCALE
160: /*
161: * Define a charset, given a locale name.
162: */
163: static void
164: ilocale()
165: {
166: register int c;
167:
168: setlocale(LC_CTYPE, "");
169: for (c = 0; c < sizeof(chardef); c++)
170: {
171: if (isprint(c))
172: chardef[c] = 0;
173: else if (iscntrl(c))
174: chardef[c] = IS_CONTROL_CHAR;
175: else
176: chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
177: }
178: }
179: #endif
180:
181: /*
182: * Define the printing format for control chars.
183: */
184: public void
185: setbinfmt(s)
186: char *s;
187: {
188: if (s == NULL || *s == '\0')
189: s = "*s<%X>";
190: /*
191: * Select the attributes if it starts with "*".
192: */
193: if (*s == '*')
194: {
195: switch (s[1])
196: {
197: case 'd': binattr = AT_BOLD; break;
198: case 'k': binattr = AT_BLINK; break;
199: case 's': binattr = AT_STANDOUT; break;
200: case 'u': binattr = AT_UNDERLINE; break;
201: default: binattr = AT_NORMAL; break;
202: }
203: s += 2;
204: }
205: binfmt = s;
206: }
207:
208: /*
209: * Initialize charset data structures.
210: */
211: public void
212: init_charset()
213: {
214: register char *s;
215:
216: s = getenv("LESSBINFMT");
217: setbinfmt(s);
218:
219: /*
220: * See if environment variable LESSCHARSET is defined.
221: */
222: s = getenv("LESSCHARSET");
223: if (icharset(s))
224: return;
225: /*
226: * LESSCHARSET is not defined: try LESSCHARDEF.
227: */
228: s = getenv("LESSCHARDEF");
229: if (s != NULL && *s != '\0')
230: {
231: ichardef(s);
232: return;
233: }
234: #if HAVE_LOCALE
235: /*
236: * Use setlocale.
237: */
238: ilocale();
239: #else
240: /*
241: * Default to "ascii".
242: */
243: (void) icharset("ascii");
244: #endif
245: }
246:
247: /*
248: * Is a given character a "binary" character?
249: */
250: public int
251: binary_char(c)
252: int c;
253: {
254: c &= 0377;
255: return (chardef[c] & IS_BINARY_CHAR);
256: }
257:
258: /*
259: * Is a given character a "control" character?
260: */
261: public int
262: control_char(c)
263: int c;
264: {
265: c &= 0377;
266: return (chardef[c] & IS_CONTROL_CHAR);
267: }
268:
269: /*
270: * Return the printable form of a character.
271: * For example, in the "ascii" charset '\3' is printed as "^C".
272: */
273: public char *
274: prchar(c)
275: int c;
276: {
277: static char buf[8];
278:
279: c &= 0377;
280: if (!control_char(c))
281: sprintf(buf, "%c", c);
282: else if (c == ESC)
283: sprintf(buf, "ESC");
284: else if (c < 128 && !control_char(c ^ 0100))
285: sprintf(buf, "^%c", c ^ 0100);
286: else
287: sprintf(buf, binfmt, c);
288: return (buf);
289: }