Annotation of src/usr.bin/lex/flexdef.h, Revision 1.8
1.8 ! deraadt 1: /* $OpenBSD: flexdef.h,v 1.7 2004/02/03 21:20:17 espie Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* flexdef - definitions file for flex */
4:
5: /*-
6: * Copyright (c) 1990 The Regents of the University of California.
7: * All rights reserved.
8: *
9: * This code is derived from software contributed to Berkeley by
10: * Vern Paxson.
11: *
12: * The United States Government has rights in this work pursuant
13: * to contract no. DE-AC03-76SF00098 between the United States
14: * Department of Energy and the University of California.
15: *
1.5 deraadt 16: * Redistribution and use in source and binary forms, with or without
1.6 millert 17: * modification, are permitted provided that the following conditions
18: * are met:
19: *
20: * 1. Redistributions of source code must retain the above copyright
21: * notice, this list of conditions and the following disclaimer.
22: * 2. Redistributions in binary form must reproduce the above copyright
23: * notice, this list of conditions and the following disclaimer in the
24: * documentation and/or other materials provided with the distribution.
25: *
26: * Neither the name of the University nor the names of its contributors
27: * may be used to endorse or promote products derived from this software
28: * without specific prior written permission.
29: *
30: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
31: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
32: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33: * PURPOSE.
1.1 deraadt 34: */
35:
1.8 ! deraadt 36: /* @(#) $Header: /cvs/src/usr.bin/lex/flexdef.h,v 1.7 2004/02/03 21:20:17 espie Exp $ (LBL) */
1.1 deraadt 37:
38: #include <stdio.h>
39: #include <ctype.h>
40:
41: #include "config.h"
42:
43: #ifdef __TURBOC__
44: #define HAVE_STRING_H 1
45: #define MS_DOS 1
46: #ifndef __STDC__
47: #define __STDC__ 1
48: #endif
49: #pragma warn -pro
50: #pragma warn -rch
51: #pragma warn -use
52: #pragma warn -aus
53: #pragma warn -par
54: #pragma warn -pia
55: #endif
56:
57: #ifdef HAVE_STRING_H
58: #include <string.h>
59: #else
60: #include <strings.h>
61: #endif
62:
63: #ifdef HAVE_SYS_TYPES_H
64: #include <sys/types.h>
65: #endif
66:
1.7 espie 67: #ifdef STDC_HEADERS
68: #include <stdlib.h>
1.8 ! deraadt 69: #include <unistd.h>
1.7 espie 70: #else
1.1 deraadt 71: #ifdef HAVE_MALLOC_H
72: #include <malloc.h>
73: #endif
1.7 espie 74: #endif
1.1 deraadt 75:
76:
77: /* As an aid for the internationalization patch to flex, which
78: * is maintained outside this distribution for copyright reasons.
79: */
80: #define _(String) (String)
81:
82: /* Always be prepared to generate an 8-bit scanner. */
83: #define CSIZE 256
84: #define Char unsigned char
85:
86: /* Size of input alphabet - should be size of ASCII set. */
87: #ifndef DEFAULT_CSIZE
88: #define DEFAULT_CSIZE 128
89: #endif
90:
91: #ifndef PROTO
1.4 mickey 92: #ifdef __STDC__
1.1 deraadt 93: #define PROTO(proto) proto
94: #else
95: #define PROTO(proto) ()
96: #endif
97: #endif
98:
99: #ifdef VMS
100: #ifndef __VMS_POSIX
101: #define unlink remove
102: #define SHORT_FILE_NAMES
103: #endif
104: #endif
105:
106: #ifdef MS_DOS
107: #define SHORT_FILE_NAMES
108: #endif
109:
110:
111: /* Maximum line length we'll have to deal with. */
112: #define MAXLINE 2048
113:
114: #ifndef MIN
115: #define MIN(x,y) ((x) < (y) ? (x) : (y))
116: #endif
117: #ifndef MAX
118: #define MAX(x,y) ((x) > (y) ? (x) : (y))
119: #endif
120: #ifndef ABS
121: #define ABS(x) ((x) < 0 ? -(x) : (x))
122: #endif
123:
124:
125: /* ANSI C does not guarantee that isascii() is defined */
126: #ifndef isascii
127: #define isascii(c) ((c) <= 0177)
128: #endif
129:
130:
131: #define true 1
132: #define false 0
133: #define unspecified -1
134:
135:
136: /* Special chk[] values marking the slots taking by end-of-buffer and action
137: * numbers.
138: */
139: #define EOB_POSITION -1
140: #define ACTION_POSITION -2
141:
142: /* Number of data items per line for -f output. */
143: #define NUMDATAITEMS 10
144:
145: /* Number of lines of data in -f output before inserting a blank line for
146: * readability.
147: */
148: #define NUMDATALINES 10
149:
150: /* transition_struct_out() definitions. */
151: #define TRANS_STRUCT_PRINT_LENGTH 14
152:
153: /* Returns true if an nfa state has an epsilon out-transition slot
154: * that can be used. This definition is currently not used.
155: */
156: #define FREE_EPSILON(state) \
157: (transchar[state] == SYM_EPSILON && \
158: trans2[state] == NO_TRANSITION && \
159: finalst[state] != state)
160:
161: /* Returns true if an nfa state has an epsilon out-transition character
162: * and both slots are free
163: */
164: #define SUPER_FREE_EPSILON(state) \
165: (transchar[state] == SYM_EPSILON && \
166: trans1[state] == NO_TRANSITION) \
167:
168: /* Maximum number of NFA states that can comprise a DFA state. It's real
169: * big because if there's a lot of rules, the initial state will have a
170: * huge epsilon closure.
171: */
172: #define INITIAL_MAX_DFA_SIZE 750
173: #define MAX_DFA_SIZE_INCREMENT 750
174:
175:
176: /* A note on the following masks. They are used to mark accepting numbers
177: * as being special. As such, they implicitly limit the number of accepting
178: * numbers (i.e., rules) because if there are too many rules the rule numbers
179: * will overload the mask bits. Fortunately, this limit is \large/ (0x2000 ==
180: * 8192) so unlikely to actually cause any problems. A check is made in
181: * new_rule() to ensure that this limit is not reached.
182: */
183:
184: /* Mask to mark a trailing context accepting number. */
185: #define YY_TRAILING_MASK 0x2000
186:
187: /* Mask to mark the accepting number of the "head" of a trailing context
188: * rule.
189: */
190: #define YY_TRAILING_HEAD_MASK 0x4000
191:
192: /* Maximum number of rules, as outlined in the above note. */
193: #define MAX_RULE (YY_TRAILING_MASK - 1)
194:
195:
196: /* NIL must be 0. If not, its special meaning when making equivalence classes
197: * (it marks the representative of a given e.c.) will be unidentifiable.
198: */
199: #define NIL 0
200:
201: #define JAM -1 /* to mark a missing DFA transition */
202: #define NO_TRANSITION NIL
203: #define UNIQUE -1 /* marks a symbol as an e.c. representative */
204: #define INFINITY -1 /* for x{5,} constructions */
205:
206: #define INITIAL_MAX_CCLS 100 /* max number of unique character classes */
207: #define MAX_CCLS_INCREMENT 100
208:
209: /* Size of table holding members of character classes. */
210: #define INITIAL_MAX_CCL_TBL_SIZE 500
211: #define MAX_CCL_TBL_SIZE_INCREMENT 250
212:
213: #define INITIAL_MAX_RULES 100 /* default maximum number of rules */
214: #define MAX_RULES_INCREMENT 100
215:
216: #define INITIAL_MNS 2000 /* default maximum number of nfa states */
217: #define MNS_INCREMENT 1000 /* amount to bump above by if it's not enough */
218:
219: #define INITIAL_MAX_DFAS 1000 /* default maximum number of dfa states */
220: #define MAX_DFAS_INCREMENT 1000
221:
222: #define JAMSTATE -32766 /* marks a reference to the state that always jams */
223:
224: /* Maximum number of NFA states. */
225: #define MAXIMUM_MNS 31999
226:
227: /* Enough so that if it's subtracted from an NFA state number, the result
228: * is guaranteed to be negative.
229: */
230: #define MARKER_DIFFERENCE (MAXIMUM_MNS+2)
231:
232: /* Maximum number of nxt/chk pairs for non-templates. */
233: #define INITIAL_MAX_XPAIRS 2000
234: #define MAX_XPAIRS_INCREMENT 2000
235:
236: /* Maximum number of nxt/chk pairs needed for templates. */
237: #define INITIAL_MAX_TEMPLATE_XPAIRS 2500
238: #define MAX_TEMPLATE_XPAIRS_INCREMENT 2500
239:
240: #define SYM_EPSILON (CSIZE + 1) /* to mark transitions on the symbol epsilon */
241:
242: #define INITIAL_MAX_SCS 40 /* maximum number of start conditions */
243: #define MAX_SCS_INCREMENT 40 /* amount to bump by if it's not enough */
244:
245: #define ONE_STACK_SIZE 500 /* stack of states with only one out-transition */
246: #define SAME_TRANS -1 /* transition is the same as "default" entry for state */
247:
248: /* The following percentages are used to tune table compression:
249:
250: * The percentage the number of out-transitions a state must be of the
251: * number of equivalence classes in order to be considered for table
252: * compaction by using protos.
253: */
254: #define PROTO_SIZE_PERCENTAGE 15
255:
256: /* The percentage the number of homogeneous out-transitions of a state
257: * must be of the number of total out-transitions of the state in order
258: * that the state's transition table is first compared with a potential
259: * template of the most common out-transition instead of with the first
260: * proto in the proto queue.
261: */
262: #define CHECK_COM_PERCENTAGE 50
263:
264: /* The percentage the number of differences between a state's transition
265: * table and the proto it was first compared with must be of the total
266: * number of out-transitions of the state in order to keep the first
267: * proto as a good match and not search any further.
268: */
269: #define FIRST_MATCH_DIFF_PERCENTAGE 10
270:
271: /* The percentage the number of differences between a state's transition
272: * table and the most similar proto must be of the state's total number
273: * of out-transitions to use the proto as an acceptable close match.
274: */
275: #define ACCEPTABLE_DIFF_PERCENTAGE 50
276:
277: /* The percentage the number of homogeneous out-transitions of a state
278: * must be of the number of total out-transitions of the state in order
279: * to consider making a template from the state.
280: */
281: #define TEMPLATE_SAME_PERCENTAGE 60
282:
283: /* The percentage the number of differences between a state's transition
284: * table and the most similar proto must be of the state's total number
285: * of out-transitions to create a new proto from the state.
286: */
287: #define NEW_PROTO_DIFF_PERCENTAGE 20
288:
289: /* The percentage the total number of out-transitions of a state must be
290: * of the number of equivalence classes in order to consider trying to
291: * fit the transition table into "holes" inside the nxt/chk table.
292: */
293: #define INTERIOR_FIT_PERCENTAGE 15
294:
295: /* Size of region set aside to cache the complete transition table of
296: * protos on the proto queue to enable quick comparisons.
297: */
298: #define PROT_SAVE_SIZE 2000
299:
300: #define MSP 50 /* maximum number of saved protos (protos on the proto queue) */
301:
302: /* Maximum number of out-transitions a state can have that we'll rummage
303: * around through the interior of the internal fast table looking for a
304: * spot for it.
305: */
306: #define MAX_XTIONS_FULL_INTERIOR_FIT 4
307:
308: /* Maximum number of rules which will be reported as being associated
309: * with a DFA state.
310: */
311: #define MAX_ASSOC_RULES 100
312:
313: /* Number that, if used to subscript an array, has a good chance of producing
314: * an error; should be small enough to fit into a short.
315: */
316: #define BAD_SUBSCRIPT -32767
317:
318: /* Absolute value of largest number that can be stored in a short, with a
319: * bit of slop thrown in for general paranoia.
320: */
321: #define MAX_SHORT 32700
322:
323:
324: /* Declarations for global variables. */
325:
326: /* Variables for symbol tables:
327: * sctbl - start-condition symbol table
328: * ndtbl - name-definition symbol table
329: * ccltab - character class text symbol table
330: */
331:
332: struct hash_entry
333: {
334: struct hash_entry *prev, *next;
335: char *name;
336: char *str_val;
337: int int_val;
338: } ;
339:
340: typedef struct hash_entry **hash_table;
341:
342: #define NAME_TABLE_HASH_SIZE 101
343: #define START_COND_HASH_SIZE 101
344: #define CCL_HASH_SIZE 101
345:
346: extern struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE];
347: extern struct hash_entry *sctbl[START_COND_HASH_SIZE];
348: extern struct hash_entry *ccltab[CCL_HASH_SIZE];
349:
350:
351: /* Variables for flags:
352: * printstats - if true (-v), dump statistics
353: * syntaxerror - true if a syntax error has been found
354: * eofseen - true if we've seen an eof in the input file
355: * ddebug - if true (-d), make a "debug" scanner
356: * trace - if true (-T), trace processing
357: * nowarn - if true (-w), do not generate warnings
358: * spprdflt - if true (-s), suppress the default rule
359: * interactive - if true (-I), generate an interactive scanner
360: * caseins - if true (-i), generate a case-insensitive scanner
361: * lex_compat - if true (-l), maximize compatibility with AT&T lex
362: * do_yylineno - if true, generate code to maintain yylineno
363: * useecs - if true (-Ce flag), use equivalence classes
364: * fulltbl - if true (-Cf flag), don't compress the DFA state table
365: * usemecs - if true (-Cm flag), use meta-equivalence classes
366: * fullspd - if true (-F flag), use Jacobson method of table representation
367: * gen_line_dirs - if true (i.e., no -L flag), generate #line directives
368: * performance_report - if > 0 (i.e., -p flag), generate a report relating
369: * to scanner performance; if > 1 (-p -p), report on minor performance
370: * problems, too
371: * backing_up_report - if true (i.e., -b flag), generate "lex.backup" file
372: * listing backing-up states
373: * C_plus_plus - if true (i.e., -+ flag), generate a C++ scanner class;
374: * otherwise, a standard C scanner
375: * long_align - if true (-Ca flag), favor long-word alignment.
376: * use_read - if true (-f, -F, or -Cr) then use read() for scanner input;
377: * otherwise, use fread().
378: * yytext_is_array - if true (i.e., %array directive), then declare
379: * yytext as a array instead of a character pointer. Nice and inefficient.
380: * do_yywrap - do yywrap() processing on EOF. If false, EOF treated as
381: * "no more files".
382: * csize - size of character set for the scanner we're generating;
383: * 128 for 7-bit chars and 256 for 8-bit
384: * yymore_used - if true, yymore() is used in input rules
385: * reject - if true, generate back-up tables for REJECT macro
386: * real_reject - if true, scanner really uses REJECT (as opposed to just
387: * having "reject" set for variable trailing context)
388: * continued_action - true if this rule's action is to "fall through" to
389: * the next rule's action (i.e., the '|' action)
390: * in_rule - true if we're inside an individual rule, false if not.
391: * yymore_really_used - whether to treat yymore() as really used, regardless
392: * of what we think based on references to it in the user's actions.
393: * reject_really_used - same for REJECT
394: */
395:
396: extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt;
397: extern int interactive, caseins, lex_compat, do_yylineno;
398: extern int useecs, fulltbl, usemecs, fullspd;
399: extern int gen_line_dirs, performance_report, backing_up_report;
400: extern int C_plus_plus, long_align, use_read, yytext_is_array, do_yywrap;
401: extern int csize;
402: extern int yymore_used, reject, real_reject, continued_action, in_rule;
403:
404: extern int yymore_really_used, reject_really_used;
405:
406:
407: /* Variables used in the flex input routines:
408: * datapos - characters on current output line
409: * dataline - number of contiguous lines of data in current data
410: * statement. Used to generate readable -f output
411: * linenum - current input line number
412: * out_linenum - current output line number
413: * skelfile - the skeleton file
414: * skel - compiled-in skeleton array
415: * skel_ind - index into "skel" array, if skelfile is nil
416: * yyin - input file
417: * backing_up_file - file to summarize backing-up states to
418: * infilename - name of input file
419: * outfilename - name of output file
420: * did_outfilename - whether outfilename was explicitly set
421: * prefix - the prefix used for externally visible names ("yy" by default)
422: * yyclass - yyFlexLexer subclass to use for YY_DECL
423: * do_stdinit - whether to initialize yyin/yyout to stdin/stdout
424: * use_stdout - the -t flag
425: * input_files - array holding names of input files
426: * num_input_files - size of input_files array
427: * program_name - name with which program was invoked
428: *
429: * action_array - array to hold the rule actions
430: * action_size - size of action_array
431: * defs1_offset - index where the user's section 1 definitions start
432: * in action_array
433: * prolog_offset - index where the prolog starts in action_array
434: * action_offset - index where the non-prolog starts in action_array
435: * action_index - index where the next action should go, with respect
436: * to "action_array"
437: */
438:
439: extern int datapos, dataline, linenum, out_linenum;
440: extern FILE *skelfile, *yyin, *backing_up_file;
441: extern const char *skel[];
442: extern int skel_ind;
443: extern char *infilename, *outfilename;
444: extern int did_outfilename;
445: extern char *prefix, *yyclass;
446: extern int do_stdinit, use_stdout;
447: extern char **input_files;
448: extern int num_input_files;
449: extern char *program_name;
450:
451: extern char *action_array;
452: extern int action_size;
453: extern int defs1_offset, prolog_offset, action_offset, action_index;
454:
455:
456: /* Variables for stack of states having only one out-transition:
457: * onestate - state number
458: * onesym - transition symbol
459: * onenext - target state
460: * onedef - default base entry
461: * onesp - stack pointer
462: */
463:
464: extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
465: extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
466:
467:
468: /* Variables for nfa machine data:
469: * current_mns - current maximum on number of NFA states
470: * num_rules - number of the last accepting state; also is number of
471: * rules created so far
472: * num_eof_rules - number of <<EOF>> rules
473: * default_rule - number of the default rule
474: * current_max_rules - current maximum number of rules
475: * lastnfa - last nfa state number created
476: * firstst - physically the first state of a fragment
477: * lastst - last physical state of fragment
478: * finalst - last logical state of fragment
479: * transchar - transition character
480: * trans1 - transition state
481: * trans2 - 2nd transition state for epsilons
482: * accptnum - accepting number
483: * assoc_rule - rule associated with this NFA state (or 0 if none)
484: * state_type - a STATE_xxx type identifying whether the state is part
485: * of a normal rule, the leading state in a trailing context
486: * rule (i.e., the state which marks the transition from
487: * recognizing the text-to-be-matched to the beginning of
488: * the trailing context), or a subsequent state in a trailing
489: * context rule
490: * rule_type - a RULE_xxx type identifying whether this a ho-hum
491: * normal rule or one which has variable head & trailing
492: * context
493: * rule_linenum - line number associated with rule
494: * rule_useful - true if we've determined that the rule can be matched
495: */
496:
497: extern int current_mns, current_max_rules;
498: extern int num_rules, num_eof_rules, default_rule, lastnfa;
499: extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2;
500: extern int *accptnum, *assoc_rule, *state_type;
501: extern int *rule_type, *rule_linenum, *rule_useful;
502:
503: /* Different types of states; values are useful as masks, as well, for
504: * routines like check_trailing_context().
505: */
506: #define STATE_NORMAL 0x1
507: #define STATE_TRAILING_CONTEXT 0x2
508:
509: /* Global holding current type of state we're making. */
510:
511: extern int current_state_type;
512:
513: /* Different types of rules. */
514: #define RULE_NORMAL 0
515: #define RULE_VARIABLE 1
516:
517: /* True if the input rules include a rule with both variable-length head
518: * and trailing context, false otherwise.
519: */
520: extern int variable_trailing_context_rules;
521:
522:
523: /* Variables for protos:
524: * numtemps - number of templates created
525: * numprots - number of protos created
526: * protprev - backlink to a more-recently used proto
527: * protnext - forward link to a less-recently used proto
528: * prottbl - base/def table entry for proto
529: * protcomst - common state of proto
530: * firstprot - number of the most recently used proto
531: * lastprot - number of the least recently used proto
532: * protsave contains the entire state array for protos
533: */
534:
535: extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
536: extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
537:
538:
539: /* Variables for managing equivalence classes:
540: * numecs - number of equivalence classes
541: * nextecm - forward link of Equivalence Class members
542: * ecgroup - class number or backward link of EC members
543: * nummecs - number of meta-equivalence classes (used to compress
544: * templates)
545: * tecfwd - forward link of meta-equivalence classes members
546: * tecbck - backward link of MEC's
547: */
548:
549: /* Reserve enough room in the equivalence class arrays so that we
550: * can use the CSIZE'th element to hold equivalence class information
551: * for the NUL character. Later we'll move this information into
552: * the 0th element.
553: */
554: extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs;
555:
556: /* Meta-equivalence classes are indexed starting at 1, so it's possible
557: * that they will require positions from 1 .. CSIZE, i.e., CSIZE + 1
558: * slots total (since the arrays are 0-based). nextecm[] and ecgroup[]
559: * don't require the extra position since they're indexed from 1 .. CSIZE - 1.
560: */
561: extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1];
562:
563:
564: /* Variables for start conditions:
565: * lastsc - last start condition created
566: * current_max_scs - current limit on number of start conditions
567: * scset - set of rules active in start condition
568: * scbol - set of rules active only at the beginning of line in a s.c.
569: * scxclu - true if start condition is exclusive
570: * sceof - true if start condition has EOF rule
571: * scname - start condition name
572: */
573:
574: extern int lastsc, *scset, *scbol, *scxclu, *sceof;
575: extern int current_max_scs;
576: extern char **scname;
577:
578:
579: /* Variables for dfa machine data:
580: * current_max_dfa_size - current maximum number of NFA states in DFA
581: * current_max_xpairs - current maximum number of non-template xtion pairs
582: * current_max_template_xpairs - current maximum number of template pairs
583: * current_max_dfas - current maximum number DFA states
584: * lastdfa - last dfa state number created
585: * nxt - state to enter upon reading character
586: * chk - check value to see if "nxt" applies
587: * tnxt - internal nxt table for templates
588: * base - offset into "nxt" for given state
589: * def - where to go if "chk" disallows "nxt" entry
590: * nultrans - NUL transition for each state
591: * NUL_ec - equivalence class of the NUL character
592: * tblend - last "nxt/chk" table entry being used
593: * firstfree - first empty entry in "nxt/chk" table
594: * dss - nfa state set for each dfa
595: * dfasiz - size of nfa state set for each dfa
596: * dfaacc - accepting set for each dfa state (if using REJECT), or accepting
597: * number, if not
598: * accsiz - size of accepting set for each dfa state
599: * dhash - dfa state hash value
600: * numas - number of DFA accepting states created; note that this
601: * is not necessarily the same value as num_rules, which is the analogous
602: * value for the NFA
603: * numsnpairs - number of state/nextstate transition pairs
604: * jambase - position in base/def where the default jam table starts
605: * jamstate - state number corresponding to "jam" state
606: * end_of_buffer_state - end-of-buffer dfa state number
607: */
608:
609: extern int current_max_dfa_size, current_max_xpairs;
610: extern int current_max_template_xpairs, current_max_dfas;
611: extern int lastdfa, *nxt, *chk, *tnxt;
612: extern int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz;
613: extern union dfaacc_union
614: {
615: int *dfaacc_set;
616: int dfaacc_state;
617: } *dfaacc;
618: extern int *accsiz, *dhash, numas;
619: extern int numsnpairs, jambase, jamstate;
620: extern int end_of_buffer_state;
621:
622: /* Variables for ccl information:
623: * lastccl - ccl index of the last created ccl
624: * current_maxccls - current limit on the maximum number of unique ccl's
625: * cclmap - maps a ccl index to its set pointer
626: * ccllen - gives the length of a ccl
627: * cclng - true for a given ccl if the ccl is negated
628: * cclreuse - counts how many times a ccl is re-used
629: * current_max_ccl_tbl_size - current limit on number of characters needed
630: * to represent the unique ccl's
631: * ccltbl - holds the characters in each ccl - indexed by cclmap
632: */
633:
634: extern int lastccl, *cclmap, *ccllen, *cclng, cclreuse;
635: extern int current_maxccls, current_max_ccl_tbl_size;
636: extern Char *ccltbl;
637:
638:
639: /* Variables for miscellaneous information:
640: * nmstr - last NAME scanned by the scanner
641: * sectnum - section number currently being parsed
642: * nummt - number of empty nxt/chk table entries
643: * hshcol - number of hash collisions detected by snstods
644: * dfaeql - number of times a newly created dfa was equal to an old one
645: * numeps - number of epsilon NFA states created
646: * eps2 - number of epsilon states which have 2 out-transitions
647: * num_reallocs - number of times it was necessary to realloc() a group
648: * of arrays
649: * tmpuses - number of DFA states that chain to templates
650: * totnst - total number of NFA states used to make DFA states
651: * peakpairs - peak number of transition pairs we had to store internally
652: * numuniq - number of unique transitions
653: * numdup - number of duplicate transitions
654: * hshsave - number of hash collisions saved by checking number of states
655: * num_backing_up - number of DFA states requiring backing up
656: * bol_needed - whether scanner needs beginning-of-line recognition
657: */
658:
659: extern char nmstr[MAXLINE];
660: extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
661: extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
662: extern int num_backing_up, bol_needed;
663:
664: void *allocate_array PROTO((int, size_t));
665: void *reallocate_array PROTO((void*, int, size_t));
666:
667: void *flex_alloc PROTO((size_t));
668: void *flex_realloc PROTO((void*, size_t));
669: void flex_free PROTO((void*));
670:
671: #define allocate_integer_array(size) \
672: (int *) allocate_array( size, sizeof( int ) )
673:
674: #define reallocate_integer_array(array,size) \
675: (int *) reallocate_array( (void *) array, size, sizeof( int ) )
676:
677: #define allocate_int_ptr_array(size) \
678: (int **) allocate_array( size, sizeof( int * ) )
679:
680: #define allocate_char_ptr_array(size) \
681: (char **) allocate_array( size, sizeof( char * ) )
682:
683: #define allocate_dfaacc_union(size) \
684: (union dfaacc_union *) \
685: allocate_array( size, sizeof( union dfaacc_union ) )
686:
687: #define reallocate_int_ptr_array(array,size) \
688: (int **) reallocate_array( (void *) array, size, sizeof( int * ) )
689:
690: #define reallocate_char_ptr_array(array,size) \
691: (char **) reallocate_array( (void *) array, size, sizeof( char * ) )
692:
693: #define reallocate_dfaacc_union(array, size) \
694: (union dfaacc_union *) \
695: reallocate_array( (void *) array, size, sizeof( union dfaacc_union ) )
696:
697: #define allocate_character_array(size) \
698: (char *) allocate_array( size, sizeof( char ) )
699:
700: #define reallocate_character_array(array,size) \
701: (char *) reallocate_array( (void *) array, size, sizeof( char ) )
702:
703: #define allocate_Character_array(size) \
704: (Char *) allocate_array( size, sizeof( Char ) )
705:
706: #define reallocate_Character_array(array,size) \
707: (Char *) reallocate_array( (void *) array, size, sizeof( Char ) )
708:
709:
710: /* Used to communicate between scanner and parser. The type should really
711: * be YYSTYPE, but we can't easily get our hands on it.
712: */
713: extern int yylval;
714:
715:
716: /* External functions that are cross-referenced among the flex source files. */
717:
718:
719: /* from file ccl.c */
720:
721: extern void ccladd PROTO((int, int)); /* add a single character to a ccl */
722: extern int cclinit PROTO((void)); /* make an empty ccl */
723: extern void cclnegate PROTO((int)); /* negate a ccl */
724:
725: /* List the members of a set of characters in CCL form. */
726: extern void list_character_set PROTO((FILE*, int[]));
727:
728:
729: /* from file dfa.c */
730:
731: /* Check a DFA state for backing up. */
732: extern void check_for_backing_up PROTO((int, int[]));
733:
734: /* Check to see if NFA state set constitutes "dangerous" trailing context. */
735: extern void check_trailing_context PROTO((int*, int, int*, int));
736:
737: /* Construct the epsilon closure of a set of ndfa states. */
738: extern int *epsclosure PROTO((int*, int*, int[], int*, int*));
739:
740: /* Increase the maximum number of dfas. */
741: extern void increase_max_dfas PROTO((void));
742:
743: extern void ntod PROTO((void)); /* convert a ndfa to a dfa */
744:
745: /* Converts a set of ndfa states into a dfa state. */
746: extern int snstods PROTO((int[], int, int[], int, int, int*));
747:
748:
749: /* from file ecs.c */
750:
751: /* Convert character classes to set of equivalence classes. */
752: extern void ccl2ecl PROTO((void));
753:
754: /* Associate equivalence class numbers with class members. */
755: extern int cre8ecs PROTO((int[], int[], int));
756:
757: /* Update equivalence classes based on character class transitions. */
758: extern void mkeccl PROTO((Char[], int, int[], int[], int, int));
759:
760: /* Create equivalence class for single character. */
761: extern void mkechar PROTO((int, int[], int[]));
762:
763:
764: /* from file gen.c */
765:
766: extern void do_indent PROTO((void)); /* indent to the current level */
767:
768: /* Generate the code to keep backing-up information. */
769: extern void gen_backing_up PROTO((void));
770:
771: /* Generate the code to perform the backing up. */
772: extern void gen_bu_action PROTO((void));
773:
774: /* Generate full speed compressed transition table. */
775: extern void genctbl PROTO((void));
776:
777: /* Generate the code to find the action number. */
778: extern void gen_find_action PROTO((void));
779:
780: extern void genftbl PROTO((void)); /* generate full transition table */
781:
782: /* Generate the code to find the next compressed-table state. */
783: extern void gen_next_compressed_state PROTO((char*));
784:
785: /* Generate the code to find the next match. */
786: extern void gen_next_match PROTO((void));
787:
788: /* Generate the code to find the next state. */
789: extern void gen_next_state PROTO((int));
790:
791: /* Generate the code to make a NUL transition. */
792: extern void gen_NUL_trans PROTO((void));
793:
794: /* Generate the code to find the start state. */
795: extern void gen_start_state PROTO((void));
796:
797: /* Generate data statements for the transition tables. */
798: extern void gentabs PROTO((void));
799:
800: /* Write out a formatted string at the current indentation level. */
801: extern void indent_put2s PROTO((char[], char[]));
802:
803: /* Write out a string + newline at the current indentation level. */
804: extern void indent_puts PROTO((char[]));
805:
806: extern void make_tables PROTO((void)); /* generate transition tables */
807:
808:
809: /* from file main.c */
810:
811: extern void check_options PROTO((void));
812: extern void flexend PROTO((int));
813: extern void usage PROTO((void));
814:
815:
816: /* from file misc.c */
817:
818: /* Add a #define to the action file. */
819: extern void action_define PROTO(( char *defname, int value ));
820:
821: /* Add the given text to the stored actions. */
822: extern void add_action PROTO(( char *new_text ));
823:
824: /* True if a string is all lower case. */
825: extern int all_lower PROTO((register char *));
826:
827: /* True if a string is all upper case. */
828: extern int all_upper PROTO((register char *));
829:
830: /* Bubble sort an integer array. */
831: extern void bubble PROTO((int [], int));
832:
833: /* Check a character to make sure it's in the expected range. */
834: extern void check_char PROTO((int c));
835:
836: /* Replace upper-case letter to lower-case. */
837: extern Char clower PROTO((int));
838:
839: /* Returns a dynamically allocated copy of a string. */
840: extern char *copy_string PROTO((register const char *));
841:
842: /* Returns a dynamically allocated copy of a (potentially) unsigned string. */
843: extern Char *copy_unsigned_string PROTO((register Char *));
844:
845: /* Shell sort a character array. */
846: extern void cshell PROTO((Char [], int, int));
847:
848: /* Finish up a block of data declarations. */
849: extern void dataend PROTO((void));
850:
851: /* Flush generated data statements. */
852: extern void dataflush PROTO((void));
853:
854: /* Report an error message and terminate. */
855: extern void flexerror PROTO((const char[]));
856:
857: /* Report a fatal error message and terminate. */
858: extern void flexfatal PROTO((const char[]));
859:
860: /* Convert a hexadecimal digit string to an integer value. */
861: extern int htoi PROTO((Char[]));
862:
863: /* Report an error message formatted with one integer argument. */
864: extern void lerrif PROTO((const char[], int));
865:
866: /* Report an error message formatted with one string argument. */
867: extern void lerrsf PROTO((const char[], const char[]));
868:
869: /* Spit out a "#line" statement. */
870: extern void line_directive_out PROTO((FILE*, int));
871:
872: /* Mark the current position in the action array as the end of the section 1
873: * user defs.
874: */
875: extern void mark_defs1 PROTO((void));
876:
877: /* Mark the current position in the action array as the end of the prolog. */
878: extern void mark_prolog PROTO((void));
879:
880: /* Generate a data statment for a two-dimensional array. */
881: extern void mk2data PROTO((int));
882:
883: extern void mkdata PROTO((int)); /* generate a data statement */
884:
885: /* Return the integer represented by a string of digits. */
886: extern int myctoi PROTO((char []));
887:
888: /* Return character corresponding to escape sequence. */
889: extern Char myesc PROTO((Char[]));
890:
891: /* Convert an octal digit string to an integer value. */
892: extern int otoi PROTO((Char [] ));
893:
894: /* Output a (possibly-formatted) string to the generated scanner. */
895: extern void out PROTO((const char []));
896: extern void out_dec PROTO((const char [], int));
897: extern void out_dec2 PROTO((const char [], int, int));
898: extern void out_hex PROTO((const char [], unsigned int));
899: extern void out_line_count PROTO((const char []));
900: extern void out_str PROTO((const char [], const char []));
901: extern void out_str3
902: PROTO((const char [], const char [], const char [], const char []));
903: extern void out_str_dec PROTO((const char [], const char [], int));
904: extern void outc PROTO((int));
905: extern void outn PROTO((const char []));
906:
907: /* Return a printable version of the given character, which might be
908: * 8-bit.
909: */
910: extern char *readable_form PROTO((int));
911:
912: /* Write out one section of the skeleton file. */
913: extern void skelout PROTO((void));
914:
915: /* Output a yy_trans_info structure. */
916: extern void transition_struct_out PROTO((int, int));
917:
918: /* Only needed when using certain broken versions of bison to build parse.c. */
919: extern void *yy_flex_xmalloc PROTO(( int ));
920:
921: /* Set a region of memory to 0. */
922: extern void zero_out PROTO((char *, size_t));
923:
924:
925: /* from file nfa.c */
926:
927: /* Add an accepting state to a machine. */
928: extern void add_accept PROTO((int, int));
929:
930: /* Make a given number of copies of a singleton machine. */
931: extern int copysingl PROTO((int, int));
932:
933: /* Debugging routine to write out an nfa. */
934: extern void dumpnfa PROTO((int));
935:
936: /* Finish up the processing for a rule. */
937: extern void finish_rule PROTO((int, int, int, int));
938:
939: /* Connect two machines together. */
940: extern int link_machines PROTO((int, int));
941:
942: /* Mark each "beginning" state in a machine as being a "normal" (i.e.,
943: * not trailing context associated) state.
944: */
945: extern void mark_beginning_as_normal PROTO((register int));
946:
947: /* Make a machine that branches to two machines. */
948: extern int mkbranch PROTO((int, int));
949:
950: extern int mkclos PROTO((int)); /* convert a machine into a closure */
951: extern int mkopt PROTO((int)); /* make a machine optional */
952:
953: /* Make a machine that matches either one of two machines. */
954: extern int mkor PROTO((int, int));
955:
956: /* Convert a machine into a positive closure. */
957: extern int mkposcl PROTO((int));
958:
959: extern int mkrep PROTO((int, int, int)); /* make a replicated machine */
960:
961: /* Create a state with a transition on a given symbol. */
962: extern int mkstate PROTO((int));
963:
964: extern void new_rule PROTO((void)); /* initialize for a new rule */
965:
966:
967: /* from file parse.y */
968:
969: /* Build the "<<EOF>>" action for the active start conditions. */
970: extern void build_eof_action PROTO((void));
971:
972: /* Write out a message formatted with one string, pinpointing its location. */
973: extern void format_pinpoint_message PROTO((char[], char[]));
974:
975: /* Write out a message, pinpointing its location. */
976: extern void pinpoint_message PROTO((char[]));
977:
978: /* Write out a warning, pinpointing it at the given line. */
979: extern void line_warning PROTO(( char[], int ));
980:
981: /* Write out a message, pinpointing it at the given line. */
982: extern void line_pinpoint PROTO(( char[], int ));
983:
984: /* Report a formatted syntax error. */
985: extern void format_synerr PROTO((char [], char[]));
986: extern void synerr PROTO((char [])); /* report a syntax error */
987: extern void format_warn PROTO((char [], char[]));
988: extern void warn PROTO((char [])); /* report a warning */
989: extern void yyerror PROTO((char [])); /* report a parse error */
990: extern int yyparse PROTO((void)); /* the YACC parser */
991:
992:
993: /* from file scan.l */
994:
995: /* The Flex-generated scanner for flex. */
996: extern int flexscan PROTO((void));
997:
998: /* Open the given file (if NULL, stdin) for scanning. */
999: extern void set_input_file PROTO((char*));
1000:
1001: /* Wrapup a file in the lexical analyzer. */
1002: extern int yywrap PROTO((void));
1003:
1004:
1005: /* from file sym.c */
1006:
1007: /* Add symbol and definitions to symbol table. */
1008: extern int addsym PROTO((register char[], char*, int, hash_table, int));
1009:
1010: /* Save the text of a character class. */
1011: extern void cclinstal PROTO ((Char [], int));
1012:
1013: /* Lookup the number associated with character class. */
1014: extern int ccllookup PROTO((Char []));
1015:
1016: /* Find symbol in symbol table. */
1017: extern struct hash_entry *findsym PROTO((register char[], hash_table, int ));
1018:
1019: extern void ndinstal PROTO((char[], Char[])); /* install a name definition */
1020: extern Char *ndlookup PROTO((char[])); /* lookup a name definition */
1021:
1022: /* Increase maximum number of SC's. */
1023: extern void scextend PROTO((void));
1024: extern void scinstal PROTO((char[], int)); /* make a start condition */
1025:
1026: /* Lookup the number associated with a start condition. */
1027: extern int sclookup PROTO((char[]));
1028:
1029:
1030: /* from file tblcmp.c */
1031:
1032: /* Build table entries for dfa state. */
1033: extern void bldtbl PROTO((int[], int, int, int, int));
1034:
1035: extern void cmptmps PROTO((void)); /* compress template table entries */
1036: extern void expand_nxt_chk PROTO((void)); /* increase nxt/chk arrays */
1037: /* Finds a space in the table for a state to be placed. */
1038: extern int find_table_space PROTO((int*, int));
1039: extern void inittbl PROTO((void)); /* initialize transition tables */
1040: /* Make the default, "jam" table entries. */
1041: extern void mkdeftbl PROTO((void));
1042:
1043: /* Create table entries for a state (or state fragment) which has
1044: * only one out-transition.
1045: */
1046: extern void mk1tbl PROTO((int, int, int, int));
1047:
1048: /* Place a state into full speed transition table. */
1049: extern void place_state PROTO((int*, int, int));
1050:
1051: /* Save states with only one out-transition to be processed later. */
1052: extern void stack1 PROTO((int, int, int, int));
1053:
1054:
1055: /* from file yylex.c */
1056:
1057: extern int yylex PROTO((void));