version 1.6, 2003/06/04 17:34:44 |
version 1.7, 2015/11/19 19:43:40 |
|
|
|
|
/* ccl - routines for character classes */ |
/* ccl - routines for character classes */ |
|
|
/*- |
/* Copyright (c) 1990 The Regents of the University of California. */ |
* Copyright (c) 1990 The Regents of the University of California. |
/* All rights reserved. */ |
* All rights reserved. |
|
* |
|
* This code is derived from software contributed to Berkeley by |
|
* Vern Paxson. |
|
* |
|
* The United States Government has rights in this work pursuant |
|
* to contract no. DE-AC03-76SF00098 between the United States |
|
* Department of Energy and the University of California. |
|
* |
|
* Redistribution and use in source and binary forms, with or without |
|
* modification, are permitted provided that the following conditions |
|
* are met: |
|
* |
|
* 1. Redistributions of source code must retain the above copyright |
|
* notice, this list of conditions and the following disclaimer. |
|
* 2. Redistributions in binary form must reproduce the above copyright |
|
* notice, this list of conditions and the following disclaimer in the |
|
* documentation and/or other materials provided with the distribution. |
|
* |
|
* Neither the name of the University nor the names of its contributors |
|
* may be used to endorse or promote products derived from this software |
|
* without specific prior written permission. |
|
* |
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR |
|
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED |
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
* PURPOSE. |
|
*/ |
|
|
|
/* $Header$ */ |
/* This code is derived from software contributed to Berkeley by */ |
|
/* Vern Paxson. */ |
|
|
|
/* The United States Government has rights in this work pursuant */ |
|
/* to contract no. DE-AC03-76SF00098 between the United States */ |
|
/* Department of Energy and the University of California. */ |
|
|
|
/* This file is part of flex. */ |
|
|
|
/* Redistribution and use in source and binary forms, with or without */ |
|
/* modification, are permitted provided that the following conditions */ |
|
/* are met: */ |
|
|
|
/* 1. Redistributions of source code must retain the above copyright */ |
|
/* notice, this list of conditions and the following disclaimer. */ |
|
/* 2. Redistributions in binary form must reproduce the above copyright */ |
|
/* notice, this list of conditions and the following disclaimer in the */ |
|
/* documentation and/or other materials provided with the distribution. */ |
|
|
|
/* Neither the name of the University nor the names of its contributors */ |
|
/* may be used to endorse or promote products derived from this software */ |
|
/* without specific prior written permission. */ |
|
|
|
/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ |
|
/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ |
|
/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ |
|
/* PURPOSE. */ |
|
|
#include "flexdef.h" |
#include "flexdef.h" |
|
|
|
/* return true if the chr is in the ccl. Takes negation into account. */ |
|
static bool |
|
ccl_contains (const int cclp, const int ch) |
|
{ |
|
int ind, len, i; |
|
|
|
len = ccllen[cclp]; |
|
ind = cclmap[cclp]; |
|
|
|
for (i = 0; i < len; ++i) |
|
if (ccltbl[ind + i] == ch) |
|
return !cclng[cclp]; |
|
|
|
return cclng[cclp]; |
|
} |
|
|
|
|
/* ccladd - add a single character to a ccl */ |
/* ccladd - add a single character to a ccl */ |
|
|
void ccladd( cclp, ch ) |
void ccladd (cclp, ch) |
int cclp; |
int cclp; |
int ch; |
int ch; |
{ |
{ |
int ind, len, newpos, i; |
int ind, len, newpos, i; |
|
|
check_char( ch ); |
check_char (ch); |
|
|
len = ccllen[cclp]; |
len = ccllen[cclp]; |
ind = cclmap[cclp]; |
ind = cclmap[cclp]; |
|
|
/* check to see if the character is already in the ccl */ |
/* check to see if the character is already in the ccl */ |
|
|
for ( i = 0; i < len; ++i ) |
for (i = 0; i < len; ++i) |
if ( ccltbl[ind + i] == ch ) |
if (ccltbl[ind + i] == ch) |
return; |
return; |
|
|
|
/* mark newlines */ |
|
if (ch == nlch) |
|
ccl_has_nl[cclp] = true; |
|
|
newpos = ind + len; |
newpos = ind + len; |
|
|
if ( newpos >= current_max_ccl_tbl_size ) |
if (newpos >= current_max_ccl_tbl_size) { |
{ |
|
current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; |
current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; |
|
|
++num_reallocs; |
++num_reallocs; |
|
|
ccltbl = reallocate_Character_array( ccltbl, |
ccltbl = reallocate_Character_array (ccltbl, |
current_max_ccl_tbl_size ); |
current_max_ccl_tbl_size); |
} |
} |
|
|
ccllen[cclp] = len + 1; |
ccllen[cclp] = len + 1; |
ccltbl[newpos] = ch; |
ccltbl[newpos] = ch; |
|
} |
|
|
|
/* dump_cclp - same thing as list_character_set, but for cclps. */ |
|
|
|
static void dump_cclp (FILE* file, int cclp) |
|
{ |
|
int i; |
|
|
|
putc ('[', file); |
|
|
|
for (i = 0; i < csize; ++i) { |
|
if (ccl_contains(cclp, i)){ |
|
int start_char = i; |
|
|
|
putc (' ', file); |
|
|
|
fputs (readable_form (i), file); |
|
|
|
while (++i < csize && ccl_contains(cclp,i)) ; |
|
|
|
if (i - 1 > start_char) |
|
/* this was a run */ |
|
fprintf (file, "-%s", |
|
readable_form (i - 1)); |
|
|
|
putc (' ', file); |
|
} |
} |
} |
|
|
|
putc (']', file); |
|
} |
|
|
|
|
|
|
|
/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ |
|
int |
|
ccl_set_diff (int a, int b) |
|
{ |
|
int d, ch; |
|
|
|
/* create new class */ |
|
d = cclinit(); |
|
|
|
/* In order to handle negation, we spin through all possible chars, |
|
* addding each char in a that is not in b. |
|
* (This could be O(n^2), but n is small and bounded.) |
|
*/ |
|
for ( ch = 0; ch < csize; ++ch ) |
|
if (ccl_contains (a, ch) && !ccl_contains(b, ch)) |
|
ccladd (d, ch); |
|
|
|
/* debug */ |
|
if (0){ |
|
fprintf(stderr, "ccl_set_diff ("); |
|
fprintf(stderr, "\n "); |
|
dump_cclp (stderr, a); |
|
fprintf(stderr, "\n "); |
|
dump_cclp (stderr, b); |
|
fprintf(stderr, "\n "); |
|
dump_cclp (stderr, d); |
|
fprintf(stderr, "\n)\n"); |
|
} |
|
return d; |
|
} |
|
|
|
/* ccl_set_union - create a new ccl as the set union of the two given ccls. */ |
|
int |
|
ccl_set_union (int a, int b) |
|
{ |
|
int d, i; |
|
|
|
/* create new class */ |
|
d = cclinit(); |
|
|
|
/* Add all of a */ |
|
for (i = 0; i < ccllen[a]; ++i) |
|
ccladd (d, ccltbl[cclmap[a] + i]); |
|
|
|
/* Add all of b */ |
|
for (i = 0; i < ccllen[b]; ++i) |
|
ccladd (d, ccltbl[cclmap[b] + i]); |
|
|
|
/* debug */ |
|
if (0){ |
|
fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); |
|
fprintf(stderr, "\n "); |
|
dump_cclp (stderr, a); |
|
fprintf(stderr, "\n "); |
|
dump_cclp (stderr, b); |
|
fprintf(stderr, "\n "); |
|
dump_cclp (stderr, d); |
|
fprintf(stderr, "\n)\n"); |
|
} |
|
return d; |
|
} |
|
|
|
|
/* cclinit - return an empty ccl */ |
/* cclinit - return an empty ccl */ |
|
|
int cclinit() |
int cclinit () |
{ |
{ |
if ( ++lastccl >= current_maxccls ) |
if (++lastccl >= current_maxccls) { |
{ |
|
current_maxccls += MAX_CCLS_INCREMENT; |
current_maxccls += MAX_CCLS_INCREMENT; |
|
|
++num_reallocs; |
++num_reallocs; |
|
|
cclmap = reallocate_integer_array( cclmap, current_maxccls ); |
cclmap = |
ccllen = reallocate_integer_array( ccllen, current_maxccls ); |
reallocate_integer_array (cclmap, current_maxccls); |
cclng = reallocate_integer_array( cclng, current_maxccls ); |
ccllen = |
} |
reallocate_integer_array (ccllen, current_maxccls); |
|
cclng = reallocate_integer_array (cclng, current_maxccls); |
|
ccl_has_nl = |
|
reallocate_bool_array (ccl_has_nl, |
|
current_maxccls); |
|
} |
|
|
if ( lastccl == 1 ) |
if (lastccl == 1) |
/* we're making the first ccl */ |
/* we're making the first ccl */ |
cclmap[lastccl] = 0; |
cclmap[lastccl] = 0; |
|
|
|
|
* ccl, adding the length of the ccl to the cclmap pointer |
* ccl, adding the length of the ccl to the cclmap pointer |
* will produce a cursor to the first free space. |
* will produce a cursor to the first free space. |
*/ |
*/ |
cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1]; |
cclmap[lastccl] = |
|
cclmap[lastccl - 1] + ccllen[lastccl - 1]; |
|
|
ccllen[lastccl] = 0; |
ccllen[lastccl] = 0; |
cclng[lastccl] = 0; /* ccl's start out life un-negated */ |
cclng[lastccl] = 0; /* ccl's start out life un-negated */ |
|
ccl_has_nl[lastccl] = false; |
|
|
return lastccl; |
return lastccl; |
} |
} |
|
|
|
|
/* cclnegate - negate the given ccl */ |
/* cclnegate - negate the given ccl */ |
|
|
void cclnegate( cclp ) |
void cclnegate (cclp) |
int cclp; |
int cclp; |
{ |
{ |
cclng[cclp] = 1; |
cclng[cclp] = 1; |
} |
ccl_has_nl[cclp] = !ccl_has_nl[cclp]; |
|
} |
|
|
|
|
/* list_character_set - list the members of a set of characters in CCL form |
/* list_character_set - list the members of a set of characters in CCL form |
|
|
* has a non-zero value in the cset array. |
* has a non-zero value in the cset array. |
*/ |
*/ |
|
|
void list_character_set( file, cset ) |
void list_character_set (file, cset) |
FILE *file; |
FILE *file; |
int cset[]; |
int cset[]; |
{ |
{ |
int i; |
int i; |
|
|
putc( '[', file ); |
putc ('[', file); |
|
|
for ( i = 0; i < csize; ++i ) |
for (i = 0; i < csize; ++i) { |
{ |
if (cset[i]) { |
if ( cset[i] ) |
|
{ |
|
int start_char = i; |
int start_char = i; |
|
|
putc( ' ', file ); |
putc (' ', file); |
|
|
fputs( readable_form( i ), file ); |
fputs (readable_form (i), file); |
|
|
while ( ++i < csize && cset[i] ) |
while (++i < csize && cset[i]) ; |
; |
|
|
|
if ( i - 1 > start_char ) |
if (i - 1 > start_char) |
/* this was a run */ |
/* this was a run */ |
fprintf( file, "-%s", readable_form( i - 1 ) ); |
fprintf (file, "-%s", |
|
readable_form (i - 1)); |
|
|
putc( ' ', file ); |
putc (' ', file); |
} |
|
} |
} |
|
} |
|
|
putc( ']', file ); |
putc (']', file); |
|
} |
|
|
|
/** Determines if the range [c1-c2] is unambiguous in a case-insensitive |
|
* scanner. Specifically, if a lowercase or uppercase character, x, is in the |
|
* range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also |
|
* be in the range. If not, then this range is ambiguous, and the function |
|
* returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that |
|
* [a-z] will be labeled ambiguous because it does not include [A-Z]. |
|
* |
|
* @param c1 the lower end of the range |
|
* @param c2 the upper end of the range |
|
* @return true if [c1-c2] is not ambiguous for a caseless scanner. |
|
*/ |
|
bool range_covers_case (int c1, int c2) |
|
{ |
|
int i, o; |
|
|
|
for (i = c1; i <= c2; i++) { |
|
if (has_case (i)) { |
|
o = reverse_case (i); |
|
if (o < c1 || c2 < o) |
|
return false; |
|
} |
} |
} |
|
return true; |
|
} |
|
|
|
/** Reverse the case of a character, if possible. |
|
* @return c if case-reversal does not apply. |
|
*/ |
|
int reverse_case (int c) |
|
{ |
|
return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); |
|
} |
|
|
|
/** Return true if c is uppercase or lowercase. */ |
|
bool has_case (int c) |
|
{ |
|
return (isupper (c) || islower (c)) ? true : false; |
|
} |