=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/less/charset.c,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- src/usr.bin/less/charset.c 2014/04/25 13:38:21 1.10 +++ src/usr.bin/less/charset.c 2015/11/05 22:08:43 1.11 @@ -6,25 +6,25 @@ * * For more information, see the README file. */ +/* + * Modified for use with illumos. + * Copyright 2014 Garrett D'Amore + */ - /* * Functions to define the character set * and do things specific to the character set. */ #include "less.h" -#if HAVE_LOCALE #include #include #include -#endif #include "charset.h" -public int utf_mode = 0; +int utf_mode = 0; -#if !SMALL /* * Predefined character sets, * selected by the LESSCHARSET environment variable. @@ -34,27 +34,29 @@ int *p_flag; char *desc; } charsets[] = { - { "ascii", NULL, "8bcccbcc18b95.b" }, - { "utf-8", &utf_mode, "8bcccbcc18b95.b126.bb" }, - { "iso8859", NULL, "8bcccbcc18b95.33b." }, - { "latin3", NULL, "8bcccbcc18b95.33b5.b8.b15.b4.b12.b18.b12.b." }, - { "arabic", NULL, "8bcccbcc18b95.33b.3b.7b2.13b.3b.b26.5b19.b" }, - { "greek", NULL, "8bcccbcc18b95.33b4.2b4.b3.b35.b44.b" }, - { "greek2005", NULL, "8bcccbcc18b95.33b14.b35.b44.b" }, - { "hebrew", NULL, "8bcccbcc18b95.33b.b29.32b28.2b2.b" }, - { "koi8-r", NULL, "8bcccbcc18b95.b." }, - { "KOI8-T", NULL, "8bcccbcc18b95.b8.b6.b8.b.b.5b7.3b4.b4.b3.b.b.3b." }, - { "georgianps", NULL, "8bcccbcc18b95.3b11.4b12.2b." }, - { "tcvn", NULL, "b..b...bcccbccbbb7.8b95.b48.5b." }, - { "TIS-620", NULL, "8bcccbcc18b95.b.4b.11b7.8b." }, - { "next", NULL, "8bcccbcc18b95.bb125.bb" }, - { "dos", NULL, "8bcccbcc12bc5b95.b." }, - { "windows-1251", NULL, "8bcccbcc12bc5b95.b24.b." }, - { "windows-1252", NULL, "8bcccbcc12bc5b95.b.b11.b.2b12.b." }, - { "windows-1255", NULL, "8bcccbcc12bc5b95.b.b8.b.5b9.b.4b." }, - { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, - { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" }, + /* BEGIN CSTYLED */ + { "ascii", NULL, "8bcccbcc18b95.b" }, + { "utf-8", &utf_mode, "8bcccbcc18b95.b126.bb" }, + { "iso8859", NULL, "8bcccbcc18b95.33b." }, + { "latin3", NULL, "8bcccbcc18b95.33b5.b8.b15.b4.b12.b18.b12.b." }, + { "arabic", NULL, "8bcccbcc18b95.33b.3b.7b2.13b.3b.b26.5b19.b" }, + { "greek", NULL, "8bcccbcc18b95.33b4.2b4.b3.b35.b44.b" }, + { "greek2005", NULL, "8bcccbcc18b95.33b14.b35.b44.b" }, + { "hebrew", NULL, "8bcccbcc18b95.33b.b29.32b28.2b2.b" }, + { "koi8-r", NULL, "8bcccbcc18b95.b." }, + { "KOI8-T", NULL, "8bcccbcc18b95.b8.b6.b8.b.b.5b7.3b4.b4.b3.b.b.3b." }, + { "georgianps", NULL, "8bcccbcc18b95.3b11.4b12.2b." }, + { "tcvn", NULL, "b..b...bcccbccbbb7.8b95.b48.5b." }, + { "TIS-620", NULL, "8bcccbcc18b95.b.4b.11b7.8b." }, + { "next", NULL, "8bcccbcc18b95.bb125.bb" }, + { "dos", NULL, "8bcccbcc12bc5b95.b." }, + { "windows-1251", NULL, "8bcccbcc12bc5b95.b24.b." }, + { "windows-1252", NULL, "8bcccbcc12bc5b95.b.b11.b.2b12.b." }, + { "windows-1255", NULL, "8bcccbcc12bc5b95.b.b8.b.5b9.b.4b." }, + { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, + { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" }, { NULL, NULL, NULL } + /* END CSTYLED */ }; /* @@ -67,6 +69,8 @@ { "UTF-8", "utf-8" }, { "ANSI_X3.4-1968", "ascii" }, { "US-ASCII", "ascii" }, + { "646", "ascii" }, + { "C", "ascii" }, { "latin1", "iso8859" }, { "ISO-8859-1", "iso8859" }, { "latin9", "iso8859" }, @@ -111,9 +115,9 @@ #define IS_CONTROL_CHAR 02 static char chardef[256]; -static char *binfmt = NULL; -static char *utfbinfmt = NULL; -public int binattr = AT_STANDOUT; +static const char *binfmt = NULL; +static const char *utfbinfmt = NULL; +int binattr = AT_STANDOUT; /* @@ -122,7 +126,7 @@ * one for each character in the charset. * If the string is shorter than 256 letters, missing letters * are taken to be identical to the last one. - * A decimal number followed by a letter is taken to be a + * A decimal number followed by a letter is taken to be a * repetition of the letter. * * Each letter is one of: @@ -130,21 +134,18 @@ * b binary character * c control character */ - static void -ichardef(s) - char *s; +static void +ichardef(char *s) { - register char *cp; - register int n; - register char v; + char *cp; + int n; + char v; n = 0; v = 0; cp = chardef; - while (*s != '\0') - { - switch (*s++) - { + while (*s != '\0') { + switch (*s++) { case '.': v = 0; break; @@ -166,10 +167,8 @@ /*NOTREACHED*/ } - do - { - if (cp >= chardef + sizeof(chardef)) - { + do { + if (cp >= chardef + sizeof (chardef)) { error("chardef longer than 256", NULL_PARG); quit(QUIT_ERROR); /*NOTREACHED*/ @@ -179,7 +178,7 @@ n = 0; } - while (cp < chardef + sizeof(chardef)) + while (cp < chardef + sizeof (chardef)) *cp++ = v; } @@ -187,31 +186,25 @@ * Define a charset, given a charset name. * The valid charset names are listed in the "charsets" array. */ - static int -icharset(name, no_error) - register char *name; - int no_error; +static int +icharset(char *name, int no_error) { - register struct charset *p; - register struct cs_alias *a; + struct charset *p; + struct cs_alias *a; if (name == NULL || *name == '\0') return (0); /* First see if the name is an alias. */ - for (a = cs_aliases; a->name != NULL; a++) - { - if (strcmp(name, a->name) == 0) - { + for (a = cs_aliases; a->name != NULL; a++) { + if (strcmp(name, a->name) == 0) { name = a->oname; break; } } - for (p = charsets; p->name != NULL; p++) - { - if (strcmp(name, p->name) == 0) - { + for (p = charsets; p->name != NULL; p++) { + if (strcmp(name, p->name) == 0) { ichardef(p->desc); if (p->p_flag != NULL) *(p->p_flag) = 1; @@ -226,17 +219,15 @@ return (0); } -#if HAVE_LOCALE /* * Define a charset, given a locale name. */ - static void -ilocale() +static void +ilocale(void) { - register int c; + int c; - for (c = 0; c < (int) sizeof(chardef); c++) - { + for (c = 0; c < sizeof (chardef); c++) { if (isprint(c)) chardef[c] = 0; else if (iscntrl(c)) @@ -245,51 +236,100 @@ chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR; } } -#endif -/* - * Define the printing format for control (or binary utf) chars. - */ - static void -setbinfmt(s, fmtvarptr, default_fmt) - char *s; - char **fmtvarptr; - char *default_fmt; +static int +checkfmt(const char *s) { - if (s && utf_mode) - { - /* It would be too hard to account for width otherwise. */ - char *t = s; - while (*t) - { - if (*t < ' ' || *t > '~') - { - s = default_fmt; - goto attr; + char c; + int seen = 0; + + /* %[][][.][] */ + + if (*s == '*') { /* skip leading attribute if there */ + s++; + if (strchr("dksu", *s) == NULL) { + return (-1); + } + s++; + } + + while ((c = *s++) != 0) { + if (!isascii(c) || !isprint(c)) { + return (-1); + } + if (c != '%') { + continue; + } + if (*s == '%') { /* % escaped with second % */ + s++; + continue; + } + if (seen) { + return (-1); /* 2nd % format item! */ + } + while (strchr(" '+-0#", *s) != NULL) { /* skip flags */ + s++; + } + while (isdigit(*s)) { /* skip width */ + s++; + } + if (*s == '.') { /* skip precision */ + s++; + while (isdigit(*s)) { + s++; } - t++; } + /* type width specifications, only "l", "h", and "hh" valid */ + if (*s == 'l') { + s++; + } else if (*s == 'h') { + s++; + if (*s == 'h') + s++; + } + + if (strchr("cCdiouxX", *s) == NULL) { + /* bad or evil format character (%s, %n, etc.) */ + return (-1); + } + + seen = 1; } - /* %n is evil */ - if (s == NULL || *s == '\0' || - (*s == '*' && (s[1] == '\0' || s[2] == '\0' || strchr(s + 2, 'n'))) || - (*s != '*' && strchr(s, 'n'))) + return (0); +} + +/* + * Define the printing format for control (or binary utf) chars. + */ +static void +setbinfmt(char *e, const char **fmtvarptr, const char *default_fmt) +{ + const char *s; + + if (((s = lgetenv(e)) == NULL) || (*s == 0)) { s = default_fmt; + goto attr; + } + if (s != NULL && *s != 0) { + if (checkfmt(s) < 0) { + s = default_fmt; + goto attr; + } + } + /* * Select the attributes if it starts with "*". */ - attr: - if (*s == '*') - { - switch (s[1]) - { - case 'd': binattr = AT_BOLD; break; - case 'k': binattr = AT_BLINK; break; - case 's': binattr = AT_STANDOUT; break; +attr: + if (*s == '*') { + switch (s[1]) { + case 'd': binattr = AT_BOLD; break; + case 'k': binattr = AT_BLINK; break; + case 's': binattr = AT_STANDOUT; break; case 'u': binattr = AT_UNDERLINE; break; - default: binattr = AT_NORMAL; break; + default: binattr = AT_NORMAL; break; } s += 2; } @@ -299,8 +339,8 @@ /* * */ - static void -set_charset() +static void +set_charset(void) { char *s; @@ -315,85 +355,44 @@ * LESSCHARSET is not defined: try LESSCHARDEF. */ s = lgetenv("LESSCHARDEF"); - if (s != NULL && *s != '\0') - { + if (s != NULL && *s != '\0') { ichardef(s); return; } -#if HAVE_LOCALE -#ifdef CODESET /* * Try using the codeset name as the charset name. */ s = nl_langinfo(CODESET); if (icharset(s, 1)) return; -#endif -#endif -#if HAVE_STRSTR /* - * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used. - */ - if ((s = lgetenv("LC_ALL")) != NULL || - (s = lgetenv("LC_CTYPE")) != NULL || - (s = lgetenv("LANG")) != NULL) - { - if ( strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL - || strstr(s, "UTF8") != NULL || strstr(s, "utf8") != NULL) - if (icharset("utf-8", 1)) - return; - } -#endif - -#if HAVE_LOCALE - /* * Get character definitions from locale functions, * rather than from predefined charset entry. */ ilocale(); -#if MSDOS_COMPILER - /* - * Default to "dos". - */ - (void) icharset("dos", 1); -#else - /* - * Default to "latin1". - */ - (void) icharset("latin1", 1); -#endif -#endif } /* * Initialize charset data structures. */ - public void -init_charset() +void +init_charset(void) { - char *s; - -#if HAVE_LOCALE setlocale(LC_ALL, ""); -#endif set_charset(); - s = lgetenv("LESSBINFMT"); - setbinfmt(s, &binfmt, "*s<%02X>"); - - s = lgetenv("LESSUTFBINFMT"); - setbinfmt(s, &utfbinfmt, ""); + setbinfmt("LESSBINFMT", &binfmt, "*s<%02X>"); + setbinfmt("LESSUTFBINFMT", &utfbinfmt, ""); } /* * Is a given character a "binary" character? */ - public int -binary_char(c) - LWCHAR c; +int +binary_char(LWCHAR c) { if (utf_mode) return (is_ubin_char(c)); @@ -404,9 +403,8 @@ /* * Is a given character a "control" character? */ - public int -control_char(c) - LWCHAR c; +int +control_char(LWCHAR c) { c &= 0377; return (chardef[c] & IS_CONTROL_CHAR); @@ -416,83 +414,62 @@ * Return the printable form of a character. * For example, in the "ascii" charset '\3' is printed as "^C". */ - public char * -prchar(c) - LWCHAR c; +char * +prchar(LWCHAR c) { /* {{ This buffer can be overrun if LESSBINFMT is a long string. }} */ static char buf[32]; c &= 0377; if ((c < 128 || !utf_mode) && !control_char(c)) - SNPRINTF1(buf, sizeof(buf), "%c", (int) c); + (void) snprintf(buf, sizeof (buf), "%c", (int)c); else if (c == ESC) - strlcpy(buf, "ESC", sizeof(buf)); -#if IS_EBCDIC_HOST - else if (!binary_char(c) && c < 64) - SNPRINTF1(buf, sizeof(buf), "^%c", - /* - * This array roughly inverts CONTROL() #defined in less.h, - * and should be kept in sync with CONTROL() and IBM-1047. - */ - "@ABC.I.?...KLMNO" - "PQRS.JH.XY.." - "\\]^_" - "......W[.....EFG" - "..V....D....TU.Z"[c]); -#else - else if (c < 128 && !control_char(c ^ 0100)) - SNPRINTF1(buf, sizeof(buf), "^%c", (int) (c ^ 0100)); -#endif + (void) strlcpy(buf, "ESC", sizeof (buf)); + else if (c < 128 && !control_char(c ^ 0100)) + (void) snprintf(buf, sizeof (buf), "^%c", (int)(c ^ 0100)); else - SNPRINTF1(buf, sizeof(buf), binfmt, c); + (void) snprintf(buf, sizeof (buf), binfmt, c); return (buf); } /* * Return the printable form of a UTF-8 character. */ - public char * -prutfchar(ch) - LWCHAR ch; +char * +prutfchar(LWCHAR ch) { static char buf[32]; - if (ch == ESC) - strlcpy(buf, "ESC", sizeof(buf)); - else if (ch < 128 && control_char(ch)) - { + if (ch == ESC) { + (void) strlcpy(buf, "ESC", sizeof (buf)); + } else if (ch < 128 && control_char(ch)) { if (!control_char(ch ^ 0100)) - SNPRINTF1(buf, sizeof(buf), "^%c", ((char) ch) ^ 0100); + (void) snprintf(buf, sizeof (buf), "^%c", + ((char)ch) ^ 0100); else - SNPRINTF1(buf, sizeof(buf), binfmt, (char) ch); - } else if (is_ubin_char(ch)) - SNPRINTF1(buf, sizeof(buf), utfbinfmt, ch); - else - { + (void) snprintf(buf, sizeof (buf), binfmt, (char)ch); + } else if (is_ubin_char(ch)) { + (void) snprintf(buf, sizeof (buf), utfbinfmt, ch); + } else { int len; - if (ch >= 0x80000000) - { + if (ch >= 0x80000000) { len = 3; ch = 0xFFFD; - } else - { - len = (ch < 0x80) ? 1 - : (ch < 0x800) ? 2 - : (ch < 0x10000) ? 3 - : (ch < 0x200000) ? 4 - : (ch < 0x4000000) ? 5 - : 6; + } else { + len = (ch < 0x80) ? 1 + : (ch < 0x800) ? 2 + : (ch < 0x10000) ? 3 + : (ch < 0x200000) ? 4 + : (ch < 0x4000000) ? 5 + : 6; } buf[len] = '\0'; - if (len == 1) - *buf = (char) ch; - else - { + if (len == 1) { + *buf = (char)ch; + } else { *buf = ((1 << len) - 1) << (8 - len); - while (--len > 0) - { - buf[len] = (char) (0x80 | (ch & 0x3F)); + while (--len > 0) { + buf[len] = (char)(0x80 | (ch & 0x3F)); ch >>= 6; } *buf |= ch; @@ -504,32 +481,30 @@ /* * Get the length of a UTF-8 character in bytes. */ - public int -utf_len(ch) - char ch; +int +utf_len(char ch) { if ((ch & 0x80) == 0) - return 1; + return (1); if ((ch & 0xE0) == 0xC0) - return 2; + return (2); if ((ch & 0xF0) == 0xE0) - return 3; + return (3); if ((ch & 0xF8) == 0xF0) - return 4; + return (4); if ((ch & 0xFC) == 0xF8) - return 5; + return (5); if ((ch & 0xFE) == 0xFC) - return 6; + return (6); /* Invalid UTF-8 encoding. */ - return 1; + return (1); } /* * Is a UTF-8 character well-formed? */ - public int -is_utf8_well_formed(s) - unsigned char *s; +int +is_utf8_well_formed(const char *s) { int i; int len; @@ -537,15 +512,13 @@ if (IS_UTF8_INVALID(s[0])) return (0); - len = utf_len((char) s[0]); + len = utf_len((char)s[0]); if (len == 1) return (1); - if (len == 2) - { - if (s[0] < 0xC2) - return (0); - } else - { + if (len == 2) { + if ((unsigned char)(s[0]) < 0xC2) + return (0); + } else { unsigned char mask; mask = (~((1 << (8-len)) - 1)) & 0xFF; if (s[0] == mask && (s[1] & mask) == 0x80) @@ -561,139 +534,121 @@ /* * Get the value of a UTF-8 character. */ - public LWCHAR -get_wchar(p) - char *p; +LWCHAR +get_wchar(const char *p) { - switch (utf_len(p[0])) - { + switch (utf_len(p[0])) { case 1: default: /* 0xxxxxxx */ return (LWCHAR) - (p[0] & 0xFF); + (p[0] & 0xFF); case 2: /* 110xxxxx 10xxxxxx */ return (LWCHAR) ( - ((p[0] & 0x1F) << 6) | - (p[1] & 0x3F)); + ((p[0] & 0x1F) << 6) | + (p[1] & 0x3F)); case 3: /* 1110xxxx 10xxxxxx 10xxxxxx */ return (LWCHAR) ( - ((p[0] & 0x0F) << 12) | - ((p[1] & 0x3F) << 6) | - (p[2] & 0x3F)); + ((p[0] & 0x0F) << 12) | + ((p[1] & 0x3F) << 6) | + (p[2] & 0x3F)); case 4: /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ return (LWCHAR) ( - ((p[0] & 0x07) << 18) | - ((p[1] & 0x3F) << 12) | - ((p[2] & 0x3F) << 6) | - (p[3] & 0x3F)); + ((p[0] & 0x07) << 18) | + ((p[1] & 0x3F) << 12) | + ((p[2] & 0x3F) << 6) | + (p[3] & 0x3F)); case 5: /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ return (LWCHAR) ( - ((p[0] & 0x03) << 24) | - ((p[1] & 0x3F) << 18) | - ((p[2] & 0x3F) << 12) | - ((p[3] & 0x3F) << 6) | - (p[4] & 0x3F)); + ((p[0] & 0x03) << 24) | + ((p[1] & 0x3F) << 18) | + ((p[2] & 0x3F) << 12) | + ((p[3] & 0x3F) << 6) | + (p[4] & 0x3F)); case 6: /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ return (LWCHAR) ( - ((p[0] & 0x01) << 30) | - ((p[1] & 0x3F) << 24) | - ((p[2] & 0x3F) << 18) | - ((p[3] & 0x3F) << 12) | - ((p[4] & 0x3F) << 6) | - (p[5] & 0x3F)); + ((p[0] & 0x01) << 30) | + ((p[1] & 0x3F) << 24) | + ((p[2] & 0x3F) << 18) | + ((p[3] & 0x3F) << 12) | + ((p[4] & 0x3F) << 6) | + (p[5] & 0x3F)); } } /* * Store a character into a UTF-8 string. */ - public void -put_wchar(pp, ch) - char **pp; - LWCHAR ch; +void +put_wchar(char **pp, LWCHAR ch) { - if (!utf_mode || ch < 0x80) - { + if (!utf_mode || ch < 0x80) { /* 0xxxxxxx */ - *(*pp)++ = (char) ch; - } else if (ch < 0x800) - { + *(*pp)++ = (char)ch; + } else if (ch < 0x800) { /* 110xxxxx 10xxxxxx */ - *(*pp)++ = (char) (0xC0 | ((ch >> 6) & 0x1F)); - *(*pp)++ = (char) (0x80 | (ch & 0x3F)); - } else if (ch < 0x10000) - { + *(*pp)++ = (char)(0xC0 | ((ch >> 6) & 0x1F)); + *(*pp)++ = (char)(0x80 | (ch & 0x3F)); + } else if (ch < 0x10000) { /* 1110xxxx 10xxxxxx 10xxxxxx */ - *(*pp)++ = (char) (0xE0 | ((ch >> 12) & 0x0F)); - *(*pp)++ = (char) (0x80 | ((ch >> 6) & 0x3F)); - *(*pp)++ = (char) (0x80 | (ch & 0x3F)); - } else if (ch < 0x200000) - { + *(*pp)++ = (char)(0xE0 | ((ch >> 12) & 0x0F)); + *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F)); + *(*pp)++ = (char)(0x80 | (ch & 0x3F)); + } else if (ch < 0x200000) { /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - *(*pp)++ = (char) (0xF0 | ((ch >> 18) & 0x07)); - *(*pp)++ = (char) (0x80 | ((ch >> 12) & 0x3F)); - *(*pp)++ = (char) (0x80 | ((ch >> 6) & 0x3F)); - *(*pp)++ = (char) (0x80 | (ch & 0x3F)); - } else if (ch < 0x4000000) - { + *(*pp)++ = (char)(0xF0 | ((ch >> 18) & 0x07)); + *(*pp)++ = (char)(0x80 | ((ch >> 12) & 0x3F)); + *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F)); + *(*pp)++ = (char)(0x80 | (ch & 0x3F)); + } else if (ch < 0x4000000) { /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - *(*pp)++ = (char) (0xF0 | ((ch >> 24) & 0x03)); - *(*pp)++ = (char) (0x80 | ((ch >> 18) & 0x3F)); - *(*pp)++ = (char) (0x80 | ((ch >> 12) & 0x3F)); - *(*pp)++ = (char) (0x80 | ((ch >> 6) & 0x3F)); - *(*pp)++ = (char) (0x80 | (ch & 0x3F)); - } else - { + *(*pp)++ = (char)(0xF0 | ((ch >> 24) & 0x03)); + *(*pp)++ = (char)(0x80 | ((ch >> 18) & 0x3F)); + *(*pp)++ = (char)(0x80 | ((ch >> 12) & 0x3F)); + *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F)); + *(*pp)++ = (char)(0x80 | (ch & 0x3F)); + } else { /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - *(*pp)++ = (char) (0xF0 | ((ch >> 30) & 0x01)); - *(*pp)++ = (char) (0x80 | ((ch >> 24) & 0x3F)); - *(*pp)++ = (char) (0x80 | ((ch >> 18) & 0x3F)); - *(*pp)++ = (char) (0x80 | ((ch >> 12) & 0x3F)); - *(*pp)++ = (char) (0x80 | ((ch >> 6) & 0x3F)); - *(*pp)++ = (char) (0x80 | (ch & 0x3F)); + *(*pp)++ = (char)(0xF0 | ((ch >> 30) & 0x01)); + *(*pp)++ = (char)(0x80 | ((ch >> 24) & 0x3F)); + *(*pp)++ = (char)(0x80 | ((ch >> 18) & 0x3F)); + *(*pp)++ = (char)(0x80 | ((ch >> 12) & 0x3F)); + *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F)); + *(*pp)++ = (char)(0x80 | (ch & 0x3F)); } } /* * Step forward or backward one character in a string. */ - public LWCHAR -step_char(pp, dir, limit) - char **pp; - signed int dir; - char *limit; +LWCHAR +step_char(char **pp, int dir, char *limit) { LWCHAR ch; int len; char *p = *pp; - if (!utf_mode) - { + if (!utf_mode) { /* It's easy if chars are one byte. */ if (dir > 0) ch = (LWCHAR) ((p < limit) ? *p++ : 0); else ch = (LWCHAR) ((p > limit) ? *--p : 0); - } else if (dir > 0) - { + } else if (dir > 0) { len = utf_len(*p); - if (p + len > limit) - { + if (p + len > limit) { ch = 0; p = limit; - } else - { + } else { ch = get_wchar(p); p += len; } - } else - { + } else { while (p > limit && IS_UTF8_TRAIL(p[-1])) p--; if (p > limit) @@ -702,7 +657,7 @@ ch = 0; } *pp = p; - return ch; + return (ch); } /* @@ -796,7 +751,7 @@ * Special pairs, not ranges. */ static struct wchar_range comb_table[] = { - {0x0644,0x0622}, {0x0644,0x0623}, {0x0644,0x0625}, {0x0644,0x0627}, + {0x0644, 0x0622}, {0x0644, 0x0623}, {0x0644, 0x0625}, {0x0644, 0x0627}, }; /* @@ -813,10 +768,10 @@ * dated 2005-11-30T00:58:48Z */ static struct wchar_range ubin_table[] = { - { 0x0000, 0x0007} /* Cc */, - { 0x000B, 0x000C} /* Cc */, - { 0x000E, 0x001A} /* Cc */, - { 0x001C, 0x001F} /* Cc */, + { 0x0000, 0x0007} /* Cc */, + { 0x000B, 0x000C} /* Cc */, + { 0x000E, 0x001A} /* Cc */, + { 0x001C, 0x001F} /* Cc */, { 0x007F, 0x009F} /* Cc */, #if 0 { 0x00AD, 0x00AD} /* Cf */, @@ -1066,9 +1021,9 @@ { 0xE0080, 0xE00FF} /* Cn */, { 0xE01F0, 0xEFFFF} /* Cn */, { 0xF0000, 0xFFFFD} /* Co */, { 0xFFFFE, 0xFFFFF} /* Cn */, - {0x100000,0x10FFFD} /* Co */, - {0x10FFFE,0x10FFFF} /* Cn */, - {0x110000,0x7FFFFFFF} /* ISO 10646?? */ + { 0x100000, 0x10FFFD} /* Co */, + { 0x10FFFE, 0x10FFFF} /* Cn */, + { 0x110000, 0x7FFFFFFF} /* ISO 10646?? */ }; /* @@ -1092,62 +1047,58 @@ { 0x20000, 0x2FFFD} /* W */, { 0x30000, 0x3FFFD} /* W */, }; - static int -is_in_table(ch, table, tsize) - LWCHAR ch; - struct wchar_range table[]; - int tsize; +static int +is_in_table(LWCHAR ch, struct wchar_range table[], int tsize) { int hi; int lo; /* Binary search in the table. */ if (ch < table[0].first) - return 0; + return (0); lo = 0; hi = tsize - 1; - while (lo <= hi) - { + while (lo <= hi) { int mid = (lo + hi) / 2; if (ch > table[mid].last) lo = mid + 1; else if (ch < table[mid].first) hi = mid - 1; else - return 1; + return (1); } - return 0; + return (0); } /* * Is a character a UTF-8 composing character? * If a composing character follows any char, the two combine into one glyph. */ - public int -is_composing_char(ch) - LWCHAR ch; +int +is_composing_char(LWCHAR ch) { - return is_in_table(ch, comp_table, (sizeof(comp_table) / sizeof(*comp_table))); + return (is_in_table(ch, comp_table, + (sizeof (comp_table) / sizeof (*comp_table)))); } /* * Should this UTF-8 character be treated as binary? */ - public int -is_ubin_char(ch) - LWCHAR ch; +int +is_ubin_char(LWCHAR ch) { - return is_in_table(ch, ubin_table, (sizeof(ubin_table) / sizeof(*ubin_table))); + return (is_in_table(ch, ubin_table, + (sizeof (ubin_table) / sizeof (*ubin_table)))); } /* * Is this a double width UTF-8 character? */ - public int -is_wide_char(ch) - LWCHAR ch; +int +is_wide_char(LWCHAR ch) { - return is_in_table(ch, wide_table, (sizeof(wide_table) / sizeof(*wide_table))); + return (is_in_table(ch, wide_table, + (sizeof (wide_table) / sizeof (*wide_table)))); } /* @@ -1155,95 +1106,15 @@ * A combining char acts like an ordinary char, but if it follows * a specific char (not any char), the two combine into one glyph. */ - public int -is_combining_char(ch1, ch2) - LWCHAR ch1; - LWCHAR ch2; +int +is_combining_char(LWCHAR ch1, LWCHAR ch2) { /* The table is small; use linear search. */ int i; - for (i = 0; i < sizeof(comb_table)/sizeof(*comb_table); i++) - { + for (i = 0; i < sizeof (comb_table) / sizeof (*comb_table); i++) { if (ch1 == comb_table[i].first && ch2 == comb_table[i].last) - return 1; + return (1); } - return 0; + return (0); } - -#else /* !SMALL */ - -public int binattr = AT_STANDOUT; - - public void -init_charset() -{ - return; -} - -/* - * Is a given character a "binary" character? - */ - public int -binary_char(c) - LWCHAR c; -{ - return (!isprint(c) && !isspace(c)); -} - -/* - * Is a given character a "control" character? - */ - public int -control_char(c) - LWCHAR c; -{ - return (iscntrl(c)); -} - -/* - * Return the printable form of a character. - * For example, in the "ascii" charset '\3' is printed as "^C". - */ - public char * -prchar(c) - LWCHAR c; -{ - static char buf[8]; - - c &= 0377; - if (!iscntrl(c)) - snprintf(buf, sizeof(buf), "%c", c); - else if (c == ESC) - strlcpy(buf, "ESC", sizeof(buf)); - else if (c < 128 && !iscntrl(c ^ 0100)) - snprintf(buf, sizeof(buf), "^%c", c ^ 0100); - else - snprintf(buf, sizeof(buf), "*s<%X>", c); - return (buf); -} - -/* - * Step forward or backward one character in a string. - */ - public LWCHAR -step_char(pp, dir, limit) - char **pp; - signed int dir; - char *limit; -{ - LWCHAR ch; - int len; - char *p = *pp; - - /* It's easy if chars are one byte. */ - if (dir > 0) - ch = (LWCHAR) ((p < limit) ? *p++ : 0); - else - ch = (LWCHAR) ((p > limit) ? *--p : 0); - - *pp = p; - return ch; -} - -#endif /* !SMALL */