=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/lex/ccl.c,v retrieving revision 1.6 retrieving revision 1.7 diff -c -r1.6 -r1.7 *** src/usr.bin/lex/ccl.c 2003/06/04 17:34:44 1.6 --- src/usr.bin/lex/ccl.c 2015/11/19 19:43:40 1.7 *************** *** 1,94 **** ! /* $OpenBSD: ccl.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */ /* ccl - routines for character classes */ ! /*- ! * Copyright (c) 1990 The Regents of the University of California. ! * All rights reserved. ! * ! * This code is derived from software contributed to Berkeley by ! * Vern Paxson. ! * ! * The United States Government has rights in this work pursuant ! * to contract no. DE-AC03-76SF00098 between the United States ! * Department of Energy and the University of California. ! * ! * Redistribution and use in source and binary forms, with or without ! * modification, are permitted provided that the following conditions ! * are met: ! * ! * 1. Redistributions of source code must retain the above copyright ! * notice, this list of conditions and the following disclaimer. ! * 2. Redistributions in binary form must reproduce the above copyright ! * notice, this list of conditions and the following disclaimer in the ! * documentation and/or other materials provided with the distribution. ! * ! * Neither the name of the University nor the names of its contributors ! * may be used to endorse or promote products derived from this software ! * without specific prior written permission. ! * ! * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR ! * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED ! * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ! * PURPOSE. ! */ ! /* $Header: /cvsrepo/anoncvs/cvs/src/usr.bin/lex/ccl.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */ #include "flexdef.h" /* ccladd - add a single character to a ccl */ ! void ccladd( cclp, ch ) ! int cclp; ! int ch; ! { ! int ind, len, newpos, i; ! check_char( ch ); len = ccllen[cclp]; ind = cclmap[cclp]; /* check to see if the character is already in the ccl */ ! for ( i = 0; i < len; ++i ) ! if ( ccltbl[ind + i] == ch ) return; newpos = ind + len; ! if ( newpos >= current_max_ccl_tbl_size ) ! { current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; ++num_reallocs; ! ccltbl = reallocate_Character_array( ccltbl, ! current_max_ccl_tbl_size ); ! } ccllen[cclp] = len + 1; ccltbl[newpos] = ch; } /* cclinit - return an empty ccl */ ! int cclinit() ! { ! if ( ++lastccl >= current_maxccls ) ! { current_maxccls += MAX_CCLS_INCREMENT; ++num_reallocs; ! cclmap = reallocate_integer_array( cclmap, current_maxccls ); ! ccllen = reallocate_integer_array( ccllen, current_maxccls ); ! cclng = reallocate_integer_array( cclng, current_maxccls ); ! } ! if ( lastccl == 1 ) /* we're making the first ccl */ cclmap[lastccl] = 0; --- 1,210 ---- ! /* $OpenBSD: ccl.c,v 1.7 2015/11/19 19:43:40 tedu Exp $ */ /* ccl - routines for character classes */ ! /* Copyright (c) 1990 The Regents of the University of California. */ ! /* All rights reserved. */ ! /* This code is derived from software contributed to Berkeley by */ ! /* Vern Paxson. */ + /* The United States Government has rights in this work pursuant */ + /* to contract no. DE-AC03-76SF00098 between the United States */ + /* Department of Energy and the University of California. */ + + /* This file is part of flex. */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + + /* Neither the name of the University nor the names of its contributors */ + /* may be used to endorse or promote products derived from this software */ + /* without specific prior written permission. */ + + /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ + /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ + /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE. */ + #include "flexdef.h" + /* return true if the chr is in the ccl. Takes negation into account. */ + static bool + ccl_contains (const int cclp, const int ch) + { + int ind, len, i; + + len = ccllen[cclp]; + ind = cclmap[cclp]; + + for (i = 0; i < len; ++i) + if (ccltbl[ind + i] == ch) + return !cclng[cclp]; + + return cclng[cclp]; + } + + /* ccladd - add a single character to a ccl */ ! void ccladd (cclp, ch) ! int cclp; ! int ch; ! { ! int ind, len, newpos, i; ! check_char (ch); len = ccllen[cclp]; ind = cclmap[cclp]; /* check to see if the character is already in the ccl */ ! for (i = 0; i < len; ++i) ! if (ccltbl[ind + i] == ch) return; + /* mark newlines */ + if (ch == nlch) + ccl_has_nl[cclp] = true; + newpos = ind + len; ! if (newpos >= current_max_ccl_tbl_size) { current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; ++num_reallocs; ! ccltbl = reallocate_Character_array (ccltbl, ! current_max_ccl_tbl_size); ! } ccllen[cclp] = len + 1; ccltbl[newpos] = ch; + } + + /* dump_cclp - same thing as list_character_set, but for cclps. */ + + static void dump_cclp (FILE* file, int cclp) + { + int i; + + putc ('[', file); + + for (i = 0; i < csize; ++i) { + if (ccl_contains(cclp, i)){ + int start_char = i; + + putc (' ', file); + + fputs (readable_form (i), file); + + while (++i < csize && ccl_contains(cclp,i)) ; + + if (i - 1 > start_char) + /* this was a run */ + fprintf (file, "-%s", + readable_form (i - 1)); + + putc (' ', file); + } } + putc (']', file); + } + + + /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ + int + ccl_set_diff (int a, int b) + { + int d, ch; + + /* create new class */ + d = cclinit(); + + /* In order to handle negation, we spin through all possible chars, + * addding each char in a that is not in b. + * (This could be O(n^2), but n is small and bounded.) + */ + for ( ch = 0; ch < csize; ++ch ) + if (ccl_contains (a, ch) && !ccl_contains(b, ch)) + ccladd (d, ch); + + /* debug */ + if (0){ + fprintf(stderr, "ccl_set_diff ("); + fprintf(stderr, "\n "); + dump_cclp (stderr, a); + fprintf(stderr, "\n "); + dump_cclp (stderr, b); + fprintf(stderr, "\n "); + dump_cclp (stderr, d); + fprintf(stderr, "\n)\n"); + } + return d; + } + + /* ccl_set_union - create a new ccl as the set union of the two given ccls. */ + int + ccl_set_union (int a, int b) + { + int d, i; + + /* create new class */ + d = cclinit(); + + /* Add all of a */ + for (i = 0; i < ccllen[a]; ++i) + ccladd (d, ccltbl[cclmap[a] + i]); + + /* Add all of b */ + for (i = 0; i < ccllen[b]; ++i) + ccladd (d, ccltbl[cclmap[b] + i]); + + /* debug */ + if (0){ + fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); + fprintf(stderr, "\n "); + dump_cclp (stderr, a); + fprintf(stderr, "\n "); + dump_cclp (stderr, b); + fprintf(stderr, "\n "); + dump_cclp (stderr, d); + fprintf(stderr, "\n)\n"); + } + return d; + } + + /* cclinit - return an empty ccl */ ! int cclinit () ! { ! if (++lastccl >= current_maxccls) { current_maxccls += MAX_CCLS_INCREMENT; ++num_reallocs; ! cclmap = ! reallocate_integer_array (cclmap, current_maxccls); ! ccllen = ! reallocate_integer_array (ccllen, current_maxccls); ! cclng = reallocate_integer_array (cclng, current_maxccls); ! ccl_has_nl = ! reallocate_bool_array (ccl_has_nl, ! current_maxccls); ! } ! if (lastccl == 1) /* we're making the first ccl */ cclmap[lastccl] = 0; *************** *** 98,119 **** * ccl, adding the length of the ccl to the cclmap pointer * will produce a cursor to the first free space. */ ! cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1]; ccllen[lastccl] = 0; cclng[lastccl] = 0; /* ccl's start out life un-negated */ return lastccl; ! } /* cclnegate - negate the given ccl */ ! void cclnegate( cclp ) ! int cclp; ! { cclng[cclp] = 1; ! } /* list_character_set - list the members of a set of characters in CCL form --- 214,238 ---- * ccl, adding the length of the ccl to the cclmap pointer * will produce a cursor to the first free space. */ ! cclmap[lastccl] = ! cclmap[lastccl - 1] + ccllen[lastccl - 1]; ccllen[lastccl] = 0; cclng[lastccl] = 0; /* ccl's start out life un-negated */ + ccl_has_nl[lastccl] = false; return lastccl; ! } /* cclnegate - negate the given ccl */ ! void cclnegate (cclp) ! int cclp; ! { cclng[cclp] = 1; ! ccl_has_nl[cclp] = !ccl_has_nl[cclp]; ! } /* list_character_set - list the members of a set of characters in CCL form *************** *** 123,156 **** * has a non-zero value in the cset array. */ ! void list_character_set( file, cset ) ! FILE *file; ! int cset[]; ! { int i; ! putc( '[', file ); ! for ( i = 0; i < csize; ++i ) ! { ! if ( cset[i] ) ! { int start_char = i; ! putc( ' ', file ); ! fputs( readable_form( i ), file ); ! while ( ++i < csize && cset[i] ) ! ; ! if ( i - 1 > start_char ) /* this was a run */ ! fprintf( file, "-%s", readable_form( i - 1 ) ); ! putc( ' ', file ); ! } } ! putc( ']', file ); } --- 242,312 ---- * has a non-zero value in the cset array. */ ! void list_character_set (file, cset) ! FILE *file; ! int cset[]; ! { int i; ! putc ('[', file); ! for (i = 0; i < csize; ++i) { ! if (cset[i]) { int start_char = i; ! putc (' ', file); ! fputs (readable_form (i), file); ! while (++i < csize && cset[i]) ; ! if (i - 1 > start_char) /* this was a run */ ! fprintf (file, "-%s", ! readable_form (i - 1)); ! putc (' ', file); } + } ! putc (']', file); ! } ! ! /** Determines if the range [c1-c2] is unambiguous in a case-insensitive ! * scanner. Specifically, if a lowercase or uppercase character, x, is in the ! * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also ! * be in the range. If not, then this range is ambiguous, and the function ! * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that ! * [a-z] will be labeled ambiguous because it does not include [A-Z]. ! * ! * @param c1 the lower end of the range ! * @param c2 the upper end of the range ! * @return true if [c1-c2] is not ambiguous for a caseless scanner. ! */ ! bool range_covers_case (int c1, int c2) ! { ! int i, o; ! ! for (i = c1; i <= c2; i++) { ! if (has_case (i)) { ! o = reverse_case (i); ! if (o < c1 || c2 < o) ! return false; ! } } + return true; + } + + /** Reverse the case of a character, if possible. + * @return c if case-reversal does not apply. + */ + int reverse_case (int c) + { + return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); + } + + /** Return true if c is uppercase or lowercase. */ + bool has_case (int c) + { + return (isupper (c) || islower (c)) ? true : false; + }