[BACK]Return to ccl.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / lex

Annotation of src/usr.bin/lex/ccl.c, Revision 1.8

1.8     ! tedu        1: /*     $OpenBSD: ccl.c,v 1.7 2015/11/19 19:43:40 tedu Exp $    */
1.2       deraadt     2:
1.1       deraadt     3: /* ccl - routines for character classes */
                      4:
1.7       tedu        5: /*  Copyright (c) 1990 The Regents of the University of California. */
                      6: /*  All rights reserved. */
                      7:
                      8: /*  This code is derived from software contributed to Berkeley by */
                      9: /*  Vern Paxson. */
1.1       deraadt    10:
1.7       tedu       11: /*  The United States Government has rights in this work pursuant */
                     12: /*  to contract no. DE-AC03-76SF00098 between the United States */
1.8     ! tedu       13:  /* Department of Energy and the University of California. */
1.7       tedu       14:
                     15: /*  This file is part of flex. */
                     16:
                     17: /*  Redistribution and use in source and binary forms, with or without */
                     18: /*  modification, are permitted provided that the following conditions */
                     19: /*  are met: */
                     20:
                     21: /*  1. Redistributions of source code must retain the above copyright */
                     22: /*     notice, this list of conditions and the following disclaimer. */
                     23: /*  2. Redistributions in binary form must reproduce the above copyright */
                     24: /*     notice, this list of conditions and the following disclaimer in the */
                     25: /*     documentation and/or other materials provided with the distribution. */
                     26:
                     27: /*  Neither the name of the University nor the names of its contributors */
                     28: /*  may be used to endorse or promote products derived from this software */
                     29: /*  without specific prior written permission. */
                     30:
                     31: /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
                     32: /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
                     33: /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
                     34: /*  PURPOSE. */
1.1       deraadt    35:
                     36: #include "flexdef.h"
                     37:
1.7       tedu       38: /* return true if the chr is in the ccl. Takes negation into account. */
                     39: static bool
1.8     ! tedu       40: ccl_contains(const int cclp, const int ch)
1.7       tedu       41: {
1.8     ! tedu       42:        int ind, len, i;
1.7       tedu       43:
                     44:        len = ccllen[cclp];
                     45:        ind = cclmap[cclp];
                     46:
                     47:        for (i = 0; i < len; ++i)
                     48:                if (ccltbl[ind + i] == ch)
                     49:                        return !cclng[cclp];
                     50:
1.8     ! tedu       51:        return cclng[cclp];
1.7       tedu       52: }
                     53:
                     54:
1.1       deraadt    55: /* ccladd - add a single character to a ccl */
                     56:
1.8     ! tedu       57: void
        !            58: ccladd(cclp, ch)
        !            59:        int cclp;
        !            60:        int ch;
1.7       tedu       61: {
1.8     ! tedu       62:        int ind, len, newpos, i;
1.1       deraadt    63:
1.8     ! tedu       64:        check_char(ch);
1.1       deraadt    65:
                     66:        len = ccllen[cclp];
                     67:        ind = cclmap[cclp];
                     68:
                     69:        /* check to see if the character is already in the ccl */
                     70:
1.7       tedu       71:        for (i = 0; i < len; ++i)
                     72:                if (ccltbl[ind + i] == ch)
1.1       deraadt    73:                        return;
                     74:
1.7       tedu       75:        /* mark newlines */
                     76:        if (ch == nlch)
                     77:                ccl_has_nl[cclp] = true;
                     78:
1.1       deraadt    79:        newpos = ind + len;
                     80:
1.7       tedu       81:        if (newpos >= current_max_ccl_tbl_size) {
1.1       deraadt    82:                current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
                     83:
                     84:                ++num_reallocs;
                     85:
1.8     ! tedu       86:                ccltbl = reallocate_Character_array(ccltbl,
        !            87:                    current_max_ccl_tbl_size);
1.7       tedu       88:        }
1.1       deraadt    89:        ccllen[cclp] = len + 1;
                     90:        ccltbl[newpos] = ch;
1.7       tedu       91: }
                     92:
                     93: /* dump_cclp - same thing as list_character_set, but for cclps.  */
                     94:
1.8     ! tedu       95: static void
        !            96: dump_cclp(FILE * file, int cclp)
1.7       tedu       97: {
                     98:        int i;
                     99:
1.8     ! tedu      100:        putc('[', file);
1.7       tedu      101:
                    102:        for (i = 0; i < csize; ++i) {
1.8     ! tedu      103:                if (ccl_contains(cclp, i)) {
1.7       tedu      104:                        int start_char = i;
                    105:
1.8     ! tedu      106:                        putc(' ', file);
1.7       tedu      107:
1.8     ! tedu      108:                        fputs(readable_form(i), file);
1.7       tedu      109:
1.8     ! tedu      110:                        while (++i < csize && ccl_contains(cclp, i));
1.7       tedu      111:
                    112:                        if (i - 1 > start_char)
                    113:                                /* this was a run */
1.8     ! tedu      114:                                fprintf(file, "-%s",
        !           115:                                    readable_form(i - 1));
1.7       tedu      116:
1.8     ! tedu      117:                        putc(' ', file);
1.7       tedu      118:                }
1.1       deraadt   119:        }
                    120:
1.8     ! tedu      121:        putc(']', file);
1.7       tedu      122: }
                    123:
                    124:
                    125:
                    126: /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
                    127: int
1.8     ! tedu      128: ccl_set_diff(int a, int b)
1.7       tedu      129: {
1.8     ! tedu      130:        int d, ch;
1.7       tedu      131:
1.8     ! tedu      132:        /* create new class  */
        !           133:        d = cclinit();
1.7       tedu      134:
1.8     ! tedu      135:        /*
        !           136:         * In order to handle negation, we spin through all possible chars,
        !           137:         * addding each char in a that is not in b. (This could be O(n^2),
        !           138:         * but n is small and bounded.)
        !           139:         */
        !           140:        for (ch = 0; ch < csize; ++ch)
        !           141:                if (ccl_contains(a, ch) && !ccl_contains(b, ch))
        !           142:                        ccladd(d, ch);
        !           143:
        !           144:        /* debug */
        !           145:        if (0) {
        !           146:                fprintf(stderr, "ccl_set_diff (");
        !           147:                fprintf(stderr, "\n    ");
        !           148:                dump_cclp(stderr, a);
        !           149:                fprintf(stderr, "\n    ");
        !           150:                dump_cclp(stderr, b);
        !           151:                fprintf(stderr, "\n    ");
        !           152:                dump_cclp(stderr, d);
        !           153:                fprintf(stderr, "\n)\n");
        !           154:        }
        !           155:        return d;
1.7       tedu      156: }
                    157:
                    158: /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
                    159: int
1.8     ! tedu      160: ccl_set_union(int a, int b)
1.7       tedu      161: {
1.8     ! tedu      162:        int d, i;
1.7       tedu      163:
1.8     ! tedu      164:        /* create new class  */
        !           165:        d = cclinit();
1.7       tedu      166:
1.8     ! tedu      167:        /* Add all of a */
        !           168:        for (i = 0; i < ccllen[a]; ++i)
        !           169:                ccladd(d, ccltbl[cclmap[a] + i]);
        !           170:
        !           171:        /* Add all of b */
        !           172:        for (i = 0; i < ccllen[b]; ++i)
        !           173:                ccladd(d, ccltbl[cclmap[b] + i]);
        !           174:
        !           175:        /* debug */
        !           176:        if (0) {
        !           177:                fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
        !           178:                fprintf(stderr, "\n    ");
        !           179:                dump_cclp(stderr, a);
        !           180:                fprintf(stderr, "\n    ");
        !           181:                dump_cclp(stderr, b);
        !           182:                fprintf(stderr, "\n    ");
        !           183:                dump_cclp(stderr, d);
        !           184:                fprintf(stderr, "\n)\n");
        !           185:        }
        !           186:        return d;
1.7       tedu      187: }
                    188:
1.1       deraadt   189:
                    190: /* cclinit - return an empty ccl */
                    191:
1.8     ! tedu      192: int
        !           193: cclinit()
1.7       tedu      194: {
                    195:        if (++lastccl >= current_maxccls) {
1.1       deraadt   196:                current_maxccls += MAX_CCLS_INCREMENT;
                    197:
                    198:                ++num_reallocs;
                    199:
1.7       tedu      200:                cclmap =
1.8     ! tedu      201:                    reallocate_integer_array(cclmap, current_maxccls);
1.7       tedu      202:                ccllen =
1.8     ! tedu      203:                    reallocate_integer_array(ccllen, current_maxccls);
        !           204:                cclng = reallocate_integer_array(cclng, current_maxccls);
1.7       tedu      205:                ccl_has_nl =
1.8     ! tedu      206:                    reallocate_bool_array(ccl_has_nl,
        !           207:                    current_maxccls);
1.7       tedu      208:        }
                    209:        if (lastccl == 1)
1.1       deraadt   210:                /* we're making the first ccl */
                    211:                cclmap[lastccl] = 0;
                    212:
                    213:        else
1.8     ! tedu      214:                /*
        !           215:                 * The new pointer is just past the end of the last ccl.
        !           216:                 * Since the cclmap points to the \first/ character of a ccl,
        !           217:                 * adding the length of the ccl to the cclmap pointer will
        !           218:                 * produce a cursor to the first free space.
1.1       deraadt   219:                 */
1.7       tedu      220:                cclmap[lastccl] =
1.8     ! tedu      221:                    cclmap[lastccl - 1] + ccllen[lastccl - 1];
1.1       deraadt   222:
                    223:        ccllen[lastccl] = 0;
                    224:        cclng[lastccl] = 0;     /* ccl's start out life un-negated */
1.7       tedu      225:        ccl_has_nl[lastccl] = false;
1.1       deraadt   226:
                    227:        return lastccl;
1.7       tedu      228: }
1.1       deraadt   229:
                    230:
                    231: /* cclnegate - negate the given ccl */
                    232:
1.8     ! tedu      233: void
        !           234: cclnegate(cclp)
        !           235:        int cclp;
1.7       tedu      236: {
1.1       deraadt   237:        cclng[cclp] = 1;
1.7       tedu      238:        ccl_has_nl[cclp] = !ccl_has_nl[cclp];
                    239: }
1.1       deraadt   240:
                    241:
                    242: /* list_character_set - list the members of a set of characters in CCL form
                    243:  *
                    244:  * Writes to the given file a character-class representation of those
                    245:  * characters present in the given CCL.  A character is present if it
                    246:  * has a non-zero value in the cset array.
                    247:  */
                    248:
1.8     ! tedu      249: void
        !           250: list_character_set(file, cset)
        !           251:        FILE *file;
        !           252:        int cset[];
1.7       tedu      253: {
1.5       mpech     254:        int i;
1.1       deraadt   255:
1.8     ! tedu      256:        putc('[', file);
1.1       deraadt   257:
1.7       tedu      258:        for (i = 0; i < csize; ++i) {
                    259:                if (cset[i]) {
1.5       mpech     260:                        int start_char = i;
1.1       deraadt   261:
1.8     ! tedu      262:                        putc(' ', file);
1.1       deraadt   263:
1.8     ! tedu      264:                        fputs(readable_form(i), file);
1.1       deraadt   265:
1.8     ! tedu      266:                        while (++i < csize && cset[i]);
1.1       deraadt   267:
1.7       tedu      268:                        if (i - 1 > start_char)
1.1       deraadt   269:                                /* this was a run */
1.8     ! tedu      270:                                fprintf(file, "-%s",
        !           271:                                    readable_form(i - 1));
1.1       deraadt   272:
1.8     ! tedu      273:                        putc(' ', file);
1.1       deraadt   274:                }
1.7       tedu      275:        }
                    276:
1.8     ! tedu      277:        putc(']', file);
1.7       tedu      278: }
1.1       deraadt   279:
1.7       tedu      280: /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
                    281:  * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
                    282:  * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
                    283:  * be in the range. If not, then this range is ambiguous, and the function
                    284:  * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
                    285:  * [a-z] will be labeled ambiguous because it does not include [A-Z].
                    286:  *
                    287:  * @param c1 the lower end of the range
                    288:  * @param c2 the upper end of the range
                    289:  * @return true if [c1-c2] is not ambiguous for a caseless scanner.
                    290:  */
1.8     ! tedu      291: bool
        !           292: range_covers_case(int c1, int c2)
1.7       tedu      293: {
1.8     ! tedu      294:        int i, o;
1.7       tedu      295:
                    296:        for (i = c1; i <= c2; i++) {
1.8     ! tedu      297:                if (has_case(i)) {
        !           298:                        o = reverse_case(i);
1.7       tedu      299:                        if (o < c1 || c2 < o)
                    300:                                return false;
                    301:                }
1.1       deraadt   302:        }
1.7       tedu      303:        return true;
                    304: }
                    305:
                    306: /** Reverse the case of a character, if possible.
                    307:  * @return c if case-reversal does not apply.
                    308:  */
1.8     ! tedu      309: int
        !           310: reverse_case(int c)
1.7       tedu      311: {
1.8     ! tedu      312:        return isupper(c) ? tolower(c) : (islower(c) ? toupper(c) : c);
1.7       tedu      313: }
                    314:
                    315: /** Return true if c is uppercase or lowercase. */
1.8     ! tedu      316: bool
        !           317: has_case(int c)
1.7       tedu      318: {
1.8     ! tedu      319:        return (isupper(c) || islower(c)) ? true : false;
1.7       tedu      320: }