Annotation of src/usr.bin/lex/ccl.c, Revision 1.8
1.8 ! tedu 1: /* $OpenBSD: ccl.c,v 1.7 2015/11/19 19:43:40 tedu Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* ccl - routines for character classes */
4:
1.7 tedu 5: /* Copyright (c) 1990 The Regents of the University of California. */
6: /* All rights reserved. */
7:
8: /* This code is derived from software contributed to Berkeley by */
9: /* Vern Paxson. */
1.1 deraadt 10:
1.7 tedu 11: /* The United States Government has rights in this work pursuant */
12: /* to contract no. DE-AC03-76SF00098 between the United States */
1.8 ! tedu 13: /* Department of Energy and the University of California. */
1.7 tedu 14:
15: /* This file is part of flex. */
16:
17: /* Redistribution and use in source and binary forms, with or without */
18: /* modification, are permitted provided that the following conditions */
19: /* are met: */
20:
21: /* 1. Redistributions of source code must retain the above copyright */
22: /* notice, this list of conditions and the following disclaimer. */
23: /* 2. Redistributions in binary form must reproduce the above copyright */
24: /* notice, this list of conditions and the following disclaimer in the */
25: /* documentation and/or other materials provided with the distribution. */
26:
27: /* Neither the name of the University nor the names of its contributors */
28: /* may be used to endorse or promote products derived from this software */
29: /* without specific prior written permission. */
30:
31: /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
32: /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
33: /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
34: /* PURPOSE. */
1.1 deraadt 35:
36: #include "flexdef.h"
37:
1.7 tedu 38: /* return true if the chr is in the ccl. Takes negation into account. */
39: static bool
1.8 ! tedu 40: ccl_contains(const int cclp, const int ch)
1.7 tedu 41: {
1.8 ! tedu 42: int ind, len, i;
1.7 tedu 43:
44: len = ccllen[cclp];
45: ind = cclmap[cclp];
46:
47: for (i = 0; i < len; ++i)
48: if (ccltbl[ind + i] == ch)
49: return !cclng[cclp];
50:
1.8 ! tedu 51: return cclng[cclp];
1.7 tedu 52: }
53:
54:
1.1 deraadt 55: /* ccladd - add a single character to a ccl */
56:
1.8 ! tedu 57: void
! 58: ccladd(cclp, ch)
! 59: int cclp;
! 60: int ch;
1.7 tedu 61: {
1.8 ! tedu 62: int ind, len, newpos, i;
1.1 deraadt 63:
1.8 ! tedu 64: check_char(ch);
1.1 deraadt 65:
66: len = ccllen[cclp];
67: ind = cclmap[cclp];
68:
69: /* check to see if the character is already in the ccl */
70:
1.7 tedu 71: for (i = 0; i < len; ++i)
72: if (ccltbl[ind + i] == ch)
1.1 deraadt 73: return;
74:
1.7 tedu 75: /* mark newlines */
76: if (ch == nlch)
77: ccl_has_nl[cclp] = true;
78:
1.1 deraadt 79: newpos = ind + len;
80:
1.7 tedu 81: if (newpos >= current_max_ccl_tbl_size) {
1.1 deraadt 82: current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
83:
84: ++num_reallocs;
85:
1.8 ! tedu 86: ccltbl = reallocate_Character_array(ccltbl,
! 87: current_max_ccl_tbl_size);
1.7 tedu 88: }
1.1 deraadt 89: ccllen[cclp] = len + 1;
90: ccltbl[newpos] = ch;
1.7 tedu 91: }
92:
93: /* dump_cclp - same thing as list_character_set, but for cclps. */
94:
1.8 ! tedu 95: static void
! 96: dump_cclp(FILE * file, int cclp)
1.7 tedu 97: {
98: int i;
99:
1.8 ! tedu 100: putc('[', file);
1.7 tedu 101:
102: for (i = 0; i < csize; ++i) {
1.8 ! tedu 103: if (ccl_contains(cclp, i)) {
1.7 tedu 104: int start_char = i;
105:
1.8 ! tedu 106: putc(' ', file);
1.7 tedu 107:
1.8 ! tedu 108: fputs(readable_form(i), file);
1.7 tedu 109:
1.8 ! tedu 110: while (++i < csize && ccl_contains(cclp, i));
1.7 tedu 111:
112: if (i - 1 > start_char)
113: /* this was a run */
1.8 ! tedu 114: fprintf(file, "-%s",
! 115: readable_form(i - 1));
1.7 tedu 116:
1.8 ! tedu 117: putc(' ', file);
1.7 tedu 118: }
1.1 deraadt 119: }
120:
1.8 ! tedu 121: putc(']', file);
1.7 tedu 122: }
123:
124:
125:
126: /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
127: int
1.8 ! tedu 128: ccl_set_diff(int a, int b)
1.7 tedu 129: {
1.8 ! tedu 130: int d, ch;
1.7 tedu 131:
1.8 ! tedu 132: /* create new class */
! 133: d = cclinit();
1.7 tedu 134:
1.8 ! tedu 135: /*
! 136: * In order to handle negation, we spin through all possible chars,
! 137: * addding each char in a that is not in b. (This could be O(n^2),
! 138: * but n is small and bounded.)
! 139: */
! 140: for (ch = 0; ch < csize; ++ch)
! 141: if (ccl_contains(a, ch) && !ccl_contains(b, ch))
! 142: ccladd(d, ch);
! 143:
! 144: /* debug */
! 145: if (0) {
! 146: fprintf(stderr, "ccl_set_diff (");
! 147: fprintf(stderr, "\n ");
! 148: dump_cclp(stderr, a);
! 149: fprintf(stderr, "\n ");
! 150: dump_cclp(stderr, b);
! 151: fprintf(stderr, "\n ");
! 152: dump_cclp(stderr, d);
! 153: fprintf(stderr, "\n)\n");
! 154: }
! 155: return d;
1.7 tedu 156: }
157:
158: /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
159: int
1.8 ! tedu 160: ccl_set_union(int a, int b)
1.7 tedu 161: {
1.8 ! tedu 162: int d, i;
1.7 tedu 163:
1.8 ! tedu 164: /* create new class */
! 165: d = cclinit();
1.7 tedu 166:
1.8 ! tedu 167: /* Add all of a */
! 168: for (i = 0; i < ccllen[a]; ++i)
! 169: ccladd(d, ccltbl[cclmap[a] + i]);
! 170:
! 171: /* Add all of b */
! 172: for (i = 0; i < ccllen[b]; ++i)
! 173: ccladd(d, ccltbl[cclmap[b] + i]);
! 174:
! 175: /* debug */
! 176: if (0) {
! 177: fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
! 178: fprintf(stderr, "\n ");
! 179: dump_cclp(stderr, a);
! 180: fprintf(stderr, "\n ");
! 181: dump_cclp(stderr, b);
! 182: fprintf(stderr, "\n ");
! 183: dump_cclp(stderr, d);
! 184: fprintf(stderr, "\n)\n");
! 185: }
! 186: return d;
1.7 tedu 187: }
188:
1.1 deraadt 189:
190: /* cclinit - return an empty ccl */
191:
1.8 ! tedu 192: int
! 193: cclinit()
1.7 tedu 194: {
195: if (++lastccl >= current_maxccls) {
1.1 deraadt 196: current_maxccls += MAX_CCLS_INCREMENT;
197:
198: ++num_reallocs;
199:
1.7 tedu 200: cclmap =
1.8 ! tedu 201: reallocate_integer_array(cclmap, current_maxccls);
1.7 tedu 202: ccllen =
1.8 ! tedu 203: reallocate_integer_array(ccllen, current_maxccls);
! 204: cclng = reallocate_integer_array(cclng, current_maxccls);
1.7 tedu 205: ccl_has_nl =
1.8 ! tedu 206: reallocate_bool_array(ccl_has_nl,
! 207: current_maxccls);
1.7 tedu 208: }
209: if (lastccl == 1)
1.1 deraadt 210: /* we're making the first ccl */
211: cclmap[lastccl] = 0;
212:
213: else
1.8 ! tedu 214: /*
! 215: * The new pointer is just past the end of the last ccl.
! 216: * Since the cclmap points to the \first/ character of a ccl,
! 217: * adding the length of the ccl to the cclmap pointer will
! 218: * produce a cursor to the first free space.
1.1 deraadt 219: */
1.7 tedu 220: cclmap[lastccl] =
1.8 ! tedu 221: cclmap[lastccl - 1] + ccllen[lastccl - 1];
1.1 deraadt 222:
223: ccllen[lastccl] = 0;
224: cclng[lastccl] = 0; /* ccl's start out life un-negated */
1.7 tedu 225: ccl_has_nl[lastccl] = false;
1.1 deraadt 226:
227: return lastccl;
1.7 tedu 228: }
1.1 deraadt 229:
230:
231: /* cclnegate - negate the given ccl */
232:
1.8 ! tedu 233: void
! 234: cclnegate(cclp)
! 235: int cclp;
1.7 tedu 236: {
1.1 deraadt 237: cclng[cclp] = 1;
1.7 tedu 238: ccl_has_nl[cclp] = !ccl_has_nl[cclp];
239: }
1.1 deraadt 240:
241:
242: /* list_character_set - list the members of a set of characters in CCL form
243: *
244: * Writes to the given file a character-class representation of those
245: * characters present in the given CCL. A character is present if it
246: * has a non-zero value in the cset array.
247: */
248:
1.8 ! tedu 249: void
! 250: list_character_set(file, cset)
! 251: FILE *file;
! 252: int cset[];
1.7 tedu 253: {
1.5 mpech 254: int i;
1.1 deraadt 255:
1.8 ! tedu 256: putc('[', file);
1.1 deraadt 257:
1.7 tedu 258: for (i = 0; i < csize; ++i) {
259: if (cset[i]) {
1.5 mpech 260: int start_char = i;
1.1 deraadt 261:
1.8 ! tedu 262: putc(' ', file);
1.1 deraadt 263:
1.8 ! tedu 264: fputs(readable_form(i), file);
1.1 deraadt 265:
1.8 ! tedu 266: while (++i < csize && cset[i]);
1.1 deraadt 267:
1.7 tedu 268: if (i - 1 > start_char)
1.1 deraadt 269: /* this was a run */
1.8 ! tedu 270: fprintf(file, "-%s",
! 271: readable_form(i - 1));
1.1 deraadt 272:
1.8 ! tedu 273: putc(' ', file);
1.1 deraadt 274: }
1.7 tedu 275: }
276:
1.8 ! tedu 277: putc(']', file);
1.7 tedu 278: }
1.1 deraadt 279:
1.7 tedu 280: /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
281: * scanner. Specifically, if a lowercase or uppercase character, x, is in the
282: * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
283: * be in the range. If not, then this range is ambiguous, and the function
284: * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
285: * [a-z] will be labeled ambiguous because it does not include [A-Z].
286: *
287: * @param c1 the lower end of the range
288: * @param c2 the upper end of the range
289: * @return true if [c1-c2] is not ambiguous for a caseless scanner.
290: */
1.8 ! tedu 291: bool
! 292: range_covers_case(int c1, int c2)
1.7 tedu 293: {
1.8 ! tedu 294: int i, o;
1.7 tedu 295:
296: for (i = c1; i <= c2; i++) {
1.8 ! tedu 297: if (has_case(i)) {
! 298: o = reverse_case(i);
1.7 tedu 299: if (o < c1 || c2 < o)
300: return false;
301: }
1.1 deraadt 302: }
1.7 tedu 303: return true;
304: }
305:
306: /** Reverse the case of a character, if possible.
307: * @return c if case-reversal does not apply.
308: */
1.8 ! tedu 309: int
! 310: reverse_case(int c)
1.7 tedu 311: {
1.8 ! tedu 312: return isupper(c) ? tolower(c) : (islower(c) ? toupper(c) : c);
1.7 tedu 313: }
314:
315: /** Return true if c is uppercase or lowercase. */
1.8 ! tedu 316: bool
! 317: has_case(int c)
1.7 tedu 318: {
1.8 ! tedu 319: return (isupper(c) || islower(c)) ? true : false;
1.7 tedu 320: }