Annotation of src/usr.bin/lex/dfa.c, Revision 1.4
1.4 ! deraadt 1: /* $OpenBSD: dfa.c,v 1.3 1996/07/13 22:21:57 millert Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* dfa - DFA construction routines */
4:
5: /*-
6: * Copyright (c) 1990 The Regents of the University of California.
7: * All rights reserved.
8: *
9: * This code is derived from software contributed to Berkeley by
10: * Vern Paxson.
11: *
12: * The United States Government has rights in this work pursuant
13: * to contract no. DE-AC03-76SF00098 between the United States
14: * Department of Energy and the University of California.
15: *
1.4 ! deraadt 16: * Redistribution and use in source and binary forms, with or without
! 17: * modification, are permitted provided that: (1) source distributions
! 18: * retain this entire copyright notice and comment, and (2) distributions
! 19: * including binaries display the following acknowledgement: ``This product
! 20: * includes software developed by the University of California, Berkeley
! 21: * and its contributors'' in the documentation or other materials provided
! 22: * with the distribution and in all advertising materials mentioning
! 23: * features or use of this software. Neither the name of the University nor
! 24: * the names of its contributors may be used to endorse or promote products
! 25: * derived from this software without specific prior written permission.
1.1 deraadt 26: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
27: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
28: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
29: */
30:
1.4 ! deraadt 31: /* $Header: /cvs/src/usr.bin/lex/dfa.c,v 1.3 1996/07/13 22:21:57 millert Exp $ */
1.1 deraadt 32:
33: #include "flexdef.h"
34:
35:
36: /* declare functions that have forward references */
37:
38: void dump_associated_rules PROTO((FILE*, int));
39: void dump_transitions PROTO((FILE*, int[]));
40: void sympartition PROTO((int[], int, int[], int[]));
41: int symfollowset PROTO((int[], int, int, int[]));
42:
43:
44: /* check_for_backing_up - check a DFA state for backing up
45: *
46: * synopsis
47: * void check_for_backing_up( int ds, int state[numecs] );
48: *
49: * ds is the number of the state to check and state[] is its out-transitions,
50: * indexed by equivalence class.
51: */
52:
53: void check_for_backing_up( ds, state )
54: int ds;
55: int state[];
56: {
57: if ( (reject && ! dfaacc[ds].dfaacc_set) ||
58: (! reject && ! dfaacc[ds].dfaacc_state) )
59: { /* state is non-accepting */
60: ++num_backing_up;
61:
62: if ( backing_up_report )
63: {
64: fprintf( backing_up_file,
65: _( "State #%d is non-accepting -\n" ), ds );
66:
67: /* identify the state */
68: dump_associated_rules( backing_up_file, ds );
69:
70: /* Now identify it further using the out- and
71: * jam-transitions.
72: */
73: dump_transitions( backing_up_file, state );
74:
75: putc( '\n', backing_up_file );
76: }
77: }
78: }
79:
80:
81: /* check_trailing_context - check to see if NFA state set constitutes
82: * "dangerous" trailing context
83: *
84: * synopsis
85: * void check_trailing_context( int nfa_states[num_states+1], int num_states,
86: * int accset[nacc+1], int nacc );
87: *
88: * NOTES
89: * Trailing context is "dangerous" if both the head and the trailing
90: * part are of variable size \and/ there's a DFA state which contains
91: * both an accepting state for the head part of the rule and NFA states
92: * which occur after the beginning of the trailing context.
93: *
94: * When such a rule is matched, it's impossible to tell if having been
95: * in the DFA state indicates the beginning of the trailing context or
96: * further-along scanning of the pattern. In these cases, a warning
97: * message is issued.
98: *
99: * nfa_states[1 .. num_states] is the list of NFA states in the DFA.
100: * accset[1 .. nacc] is the list of accepting numbers for the DFA state.
101: */
102:
103: void check_trailing_context( nfa_states, num_states, accset, nacc )
104: int *nfa_states, num_states;
105: int *accset;
106: int nacc;
107: {
108: register int i, j;
109:
110: for ( i = 1; i <= num_states; ++i )
111: {
112: int ns = nfa_states[i];
113: register int type = state_type[ns];
114: register int ar = assoc_rule[ns];
115:
116: if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
117: { /* do nothing */
118: }
119:
120: else if ( type == STATE_TRAILING_CONTEXT )
121: {
122: /* Potential trouble. Scan set of accepting numbers
123: * for the one marking the end of the "head". We
124: * assume that this looping will be fairly cheap
125: * since it's rare that an accepting number set
126: * is large.
127: */
128: for ( j = 1; j <= nacc; ++j )
129: if ( accset[j] & YY_TRAILING_HEAD_MASK )
130: {
131: line_warning(
132: _( "dangerous trailing context" ),
133: rule_linenum[ar] );
134: return;
135: }
136: }
137: }
138: }
139:
140:
141: /* dump_associated_rules - list the rules associated with a DFA state
142: *
143: * Goes through the set of NFA states associated with the DFA and
144: * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
145: * and writes a report to the given file.
146: */
147:
148: void dump_associated_rules( file, ds )
149: FILE *file;
150: int ds;
151: {
152: register int i, j;
153: register int num_associated_rules = 0;
154: int rule_set[MAX_ASSOC_RULES + 1];
155: int *dset = dss[ds];
156: int size = dfasiz[ds];
157:
158: for ( i = 1; i <= size; ++i )
159: {
160: register int rule_num = rule_linenum[assoc_rule[dset[i]]];
161:
162: for ( j = 1; j <= num_associated_rules; ++j )
163: if ( rule_num == rule_set[j] )
164: break;
165:
166: if ( j > num_associated_rules )
167: { /* new rule */
168: if ( num_associated_rules < MAX_ASSOC_RULES )
169: rule_set[++num_associated_rules] = rule_num;
170: }
171: }
172:
173: bubble( rule_set, num_associated_rules );
174:
175: fprintf( file, _( " associated rule line numbers:" ) );
176:
177: for ( i = 1; i <= num_associated_rules; ++i )
178: {
179: if ( i % 8 == 1 )
180: putc( '\n', file );
181:
182: fprintf( file, "\t%d", rule_set[i] );
183: }
184:
185: putc( '\n', file );
186: }
187:
188:
189: /* dump_transitions - list the transitions associated with a DFA state
190: *
191: * synopsis
192: * dump_transitions( FILE *file, int state[numecs] );
193: *
194: * Goes through the set of out-transitions and lists them in human-readable
195: * form (i.e., not as equivalence classes); also lists jam transitions
196: * (i.e., all those which are not out-transitions, plus EOF). The dump
197: * is done to the given file.
198: */
199:
200: void dump_transitions( file, state )
201: FILE *file;
202: int state[];
203: {
204: register int i, ec;
205: int out_char_set[CSIZE];
206:
207: for ( i = 0; i < csize; ++i )
208: {
209: ec = ABS( ecgroup[i] );
210: out_char_set[i] = state[ec];
211: }
212:
213: fprintf( file, _( " out-transitions: " ) );
214:
215: list_character_set( file, out_char_set );
216:
217: /* now invert the members of the set to get the jam transitions */
218: for ( i = 0; i < csize; ++i )
219: out_char_set[i] = ! out_char_set[i];
220:
221: fprintf( file, _( "\n jam-transitions: EOF " ) );
222:
223: list_character_set( file, out_char_set );
224:
225: putc( '\n', file );
226: }
227:
228:
229: /* epsclosure - construct the epsilon closure of a set of ndfa states
230: *
231: * synopsis
232: * int *epsclosure( int t[num_states], int *numstates_addr,
233: * int accset[num_rules+1], int *nacc_addr,
234: * int *hashval_addr );
235: *
236: * NOTES
237: * The epsilon closure is the set of all states reachable by an arbitrary
238: * number of epsilon transitions, which themselves do not have epsilon
239: * transitions going out, unioned with the set of states which have non-null
240: * accepting numbers. t is an array of size numstates of nfa state numbers.
241: * Upon return, t holds the epsilon closure and *numstates_addr is updated.
242: * accset holds a list of the accepting numbers, and the size of accset is
243: * given by *nacc_addr. t may be subjected to reallocation if it is not
244: * large enough to hold the epsilon closure.
245: *
246: * hashval is the hash value for the dfa corresponding to the state set.
247: */
248:
249: int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
250: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
251: {
252: register int stkpos, ns, tsp;
253: int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
254: int stkend, nstate;
255: static int did_stk_init = false, *stk;
256:
257: #define MARK_STATE(state) \
258: trans1[state] = trans1[state] - MARKER_DIFFERENCE;
259:
260: #define IS_MARKED(state) (trans1[state] < 0)
261:
262: #define UNMARK_STATE(state) \
263: trans1[state] = trans1[state] + MARKER_DIFFERENCE;
264:
265: #define CHECK_ACCEPT(state) \
266: { \
267: nfaccnum = accptnum[state]; \
268: if ( nfaccnum != NIL ) \
269: accset[++nacc] = nfaccnum; \
270: }
271:
272: #define DO_REALLOCATION \
273: { \
274: current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
275: ++num_reallocs; \
276: t = reallocate_integer_array( t, current_max_dfa_size ); \
277: stk = reallocate_integer_array( stk, current_max_dfa_size ); \
278: } \
279:
280: #define PUT_ON_STACK(state) \
281: { \
282: if ( ++stkend >= current_max_dfa_size ) \
283: DO_REALLOCATION \
284: stk[stkend] = state; \
285: MARK_STATE(state) \
286: }
287:
288: #define ADD_STATE(state) \
289: { \
290: if ( ++numstates >= current_max_dfa_size ) \
291: DO_REALLOCATION \
292: t[numstates] = state; \
293: hashval += state; \
294: }
295:
296: #define STACK_STATE(state) \
297: { \
298: PUT_ON_STACK(state) \
299: CHECK_ACCEPT(state) \
300: if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
301: ADD_STATE(state) \
302: }
303:
304:
305: if ( ! did_stk_init )
306: {
307: stk = allocate_integer_array( current_max_dfa_size );
308: did_stk_init = true;
309: }
310:
311: nacc = stkend = hashval = 0;
312:
313: for ( nstate = 1; nstate <= numstates; ++nstate )
314: {
315: ns = t[nstate];
316:
317: /* The state could be marked if we've already pushed it onto
318: * the stack.
319: */
320: if ( ! IS_MARKED(ns) )
321: {
322: PUT_ON_STACK(ns)
323: CHECK_ACCEPT(ns)
324: hashval += ns;
325: }
326: }
327:
328: for ( stkpos = 1; stkpos <= stkend; ++stkpos )
329: {
330: ns = stk[stkpos];
331: transsym = transchar[ns];
332:
333: if ( transsym == SYM_EPSILON )
334: {
335: tsp = trans1[ns] + MARKER_DIFFERENCE;
336:
337: if ( tsp != NO_TRANSITION )
338: {
339: if ( ! IS_MARKED(tsp) )
340: STACK_STATE(tsp)
341:
342: tsp = trans2[ns];
343:
344: if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) )
345: STACK_STATE(tsp)
346: }
347: }
348: }
349:
350: /* Clear out "visit" markers. */
351:
352: for ( stkpos = 1; stkpos <= stkend; ++stkpos )
353: {
354: if ( IS_MARKED(stk[stkpos]) )
355: UNMARK_STATE(stk[stkpos])
356: else
357: flexfatal(
358: _( "consistency check failed in epsclosure()" ) );
359: }
360:
361: *ns_addr = numstates;
362: *hv_addr = hashval;
363: *nacc_addr = nacc;
364:
365: return t;
366: }
367:
368:
369: /* increase_max_dfas - increase the maximum number of DFAs */
370:
371: void increase_max_dfas()
372: {
373: current_max_dfas += MAX_DFAS_INCREMENT;
374:
375: ++num_reallocs;
376:
377: base = reallocate_integer_array( base, current_max_dfas );
378: def = reallocate_integer_array( def, current_max_dfas );
379: dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
380: accsiz = reallocate_integer_array( accsiz, current_max_dfas );
381: dhash = reallocate_integer_array( dhash, current_max_dfas );
382: dss = reallocate_int_ptr_array( dss, current_max_dfas );
383: dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
384:
385: if ( nultrans )
386: nultrans =
387: reallocate_integer_array( nultrans, current_max_dfas );
388: }
389:
390:
391: /* ntod - convert an ndfa to a dfa
392: *
393: * Creates the dfa corresponding to the ndfa we've constructed. The
394: * dfa starts out in state #1.
395: */
396:
397: void ntod()
398: {
399: int *accset, ds, nacc, newds;
400: int sym, hashval, numstates, dsize;
401: int num_full_table_rows; /* used only for -f */
402: int *nset, *dset;
403: int targptr, totaltrans, i, comstate, comfreq, targ;
404: int symlist[CSIZE + 1];
405: int num_start_states;
406: int todo_head, todo_next;
407:
408: /* Note that the following are indexed by *equivalence classes*
409: * and not by characters. Since equivalence classes are indexed
410: * beginning with 1, even if the scanner accepts NUL's, this
411: * means that (since every character is potentially in its own
412: * equivalence class) these arrays must have room for indices
413: * from 1 to CSIZE, so their size must be CSIZE + 1.
414: */
415: int duplist[CSIZE + 1], state[CSIZE + 1];
416: int targfreq[CSIZE + 1], targstate[CSIZE + 1];
417:
418: accset = allocate_integer_array( num_rules + 1 );
419: nset = allocate_integer_array( current_max_dfa_size );
420:
421: /* The "todo" queue is represented by the head, which is the DFA
422: * state currently being processed, and the "next", which is the
423: * next DFA state number available (not in use). We depend on the
424: * fact that snstods() returns DFA's \in increasing order/, and thus
425: * need only know the bounds of the dfas to be processed.
426: */
427: todo_head = todo_next = 0;
428:
429: for ( i = 0; i <= csize; ++i )
430: {
431: duplist[i] = NIL;
432: symlist[i] = false;
433: }
434:
435: for ( i = 0; i <= num_rules; ++i )
436: accset[i] = NIL;
437:
438: if ( trace )
439: {
440: dumpnfa( scset[1] );
441: fputs( _( "\n\nDFA Dump:\n\n" ), stderr );
442: }
443:
444: inittbl();
445:
446: /* Check to see whether we should build a separate table for
447: * transitions on NUL characters. We don't do this for full-speed
448: * (-F) scanners, since for them we don't have a simple state
449: * number lying around with which to index the table. We also
450: * don't bother doing it for scanners unless (1) NUL is in its own
451: * equivalence class (indicated by a positive value of
452: * ecgroup[NUL]), (2) NUL's equivalence class is the last
453: * equivalence class, and (3) the number of equivalence classes is
454: * the same as the number of characters. This latter case comes
455: * about when useecs is false or when it's true but every character
456: * still manages to land in its own class (unlikely, but it's
457: * cheap to check for). If all these things are true then the
458: * character code needed to represent NUL's equivalence class for
459: * indexing the tables is going to take one more bit than the
460: * number of characters, and therefore we won't be assured of
461: * being able to fit it into a YY_CHAR variable. This rules out
462: * storing the transitions in a compressed table, since the code
463: * for interpreting them uses a YY_CHAR variable (perhaps it
464: * should just use an integer, though; this is worth pondering ...
465: * ###).
466: *
467: * Finally, for full tables, we want the number of entries in the
468: * table to be a power of two so the array references go fast (it
469: * will just take a shift to compute the major index). If
470: * encoding NUL's transitions in the table will spoil this, we
471: * give it its own table (note that this will be the case if we're
472: * not using equivalence classes).
473: */
474:
475: /* Note that the test for ecgroup[0] == numecs below accomplishes
476: * both (1) and (2) above
477: */
478: if ( ! fullspd && ecgroup[0] == numecs )
479: {
480: /* NUL is alone in its equivalence class, which is the
481: * last one.
482: */
483: int use_NUL_table = (numecs == csize);
484:
485: if ( fulltbl && ! use_NUL_table )
486: {
487: /* We still may want to use the table if numecs
488: * is a power of 2.
489: */
490: int power_of_two;
491:
492: for ( power_of_two = 1; power_of_two <= csize;
493: power_of_two *= 2 )
494: if ( numecs == power_of_two )
495: {
496: use_NUL_table = true;
497: break;
498: }
499: }
500:
501: if ( use_NUL_table )
502: nultrans = allocate_integer_array( current_max_dfas );
503:
504: /* From now on, nultrans != nil indicates that we're
505: * saving null transitions for later, separate encoding.
506: */
507: }
508:
509:
510: if ( fullspd )
511: {
512: for ( i = 0; i <= numecs; ++i )
513: state[i] = 0;
514:
515: place_state( state, 0, 0 );
516: dfaacc[0].dfaacc_state = 0;
517: }
518:
519: else if ( fulltbl )
520: {
521: if ( nultrans )
522: /* We won't be including NUL's transitions in the
523: * table, so build it for entries from 0 .. numecs - 1.
524: */
525: num_full_table_rows = numecs;
526:
527: else
528: /* Take into account the fact that we'll be including
529: * the NUL entries in the transition table. Build it
530: * from 0 .. numecs.
531: */
532: num_full_table_rows = numecs + 1;
533:
534: /* Unless -Ca, declare it "short" because it's a real
535: * long-shot that that won't be large enough.
536: */
537: out_str_dec( "static yyconst %s yy_nxt[][%d] =\n {\n",
538: /* '}' so vi doesn't get too confused */
539: long_align ? "long" : "short", num_full_table_rows );
540:
541: outn( " {" );
542:
543: /* Generate 0 entries for state #0. */
544: for ( i = 0; i < num_full_table_rows; ++i )
545: mk2data( 0 );
546:
547: dataflush();
548: outn( " },\n" );
549: }
550:
551: /* Create the first states. */
552:
553: num_start_states = lastsc * 2;
554:
555: for ( i = 1; i <= num_start_states; ++i )
556: {
557: numstates = 1;
558:
559: /* For each start condition, make one state for the case when
560: * we're at the beginning of the line (the '^' operator) and
561: * one for the case when we're not.
562: */
563: if ( i % 2 == 1 )
564: nset[numstates] = scset[(i / 2) + 1];
565: else
566: nset[numstates] =
567: mkbranch( scbol[i / 2], scset[i / 2] );
568:
569: nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
570:
571: if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
572: {
573: numas += nacc;
574: totnst += numstates;
575: ++todo_next;
576:
577: if ( variable_trailing_context_rules && nacc > 0 )
578: check_trailing_context( nset, numstates,
579: accset, nacc );
580: }
581: }
582:
583: if ( ! fullspd )
584: {
585: if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
586: flexfatal(
587: _( "could not create unique end-of-buffer state" ) );
588:
589: ++numas;
590: ++num_start_states;
591: ++todo_next;
592: }
593:
594: while ( todo_head < todo_next )
595: {
596: targptr = 0;
597: totaltrans = 0;
598:
599: for ( i = 1; i <= numecs; ++i )
600: state[i] = 0;
601:
602: ds = ++todo_head;
603:
604: dset = dss[ds];
605: dsize = dfasiz[ds];
606:
607: if ( trace )
608: fprintf( stderr, _( "state # %d:\n" ), ds );
609:
610: sympartition( dset, dsize, symlist, duplist );
611:
612: for ( sym = 1; sym <= numecs; ++sym )
613: {
614: if ( symlist[sym] )
615: {
616: symlist[sym] = 0;
617:
618: if ( duplist[sym] == NIL )
619: {
620: /* Symbol has unique out-transitions. */
621: numstates = symfollowset( dset, dsize,
622: sym, nset );
623: nset = epsclosure( nset, &numstates,
624: accset, &nacc, &hashval );
625:
626: if ( snstods( nset, numstates, accset,
627: nacc, hashval, &newds ) )
628: {
629: totnst = totnst + numstates;
630: ++todo_next;
631: numas += nacc;
632:
633: if (
634: variable_trailing_context_rules &&
635: nacc > 0 )
636: check_trailing_context(
637: nset, numstates,
638: accset, nacc );
639: }
640:
641: state[sym] = newds;
642:
643: if ( trace )
644: fprintf( stderr, "\t%d\t%d\n",
645: sym, newds );
646:
647: targfreq[++targptr] = 1;
648: targstate[targptr] = newds;
649: ++numuniq;
650: }
651:
652: else
653: {
654: /* sym's equivalence class has the same
655: * transitions as duplist(sym)'s
656: * equivalence class.
657: */
658: targ = state[duplist[sym]];
659: state[sym] = targ;
660:
661: if ( trace )
662: fprintf( stderr, "\t%d\t%d\n",
663: sym, targ );
664:
665: /* Update frequency count for
666: * destination state.
667: */
668:
669: i = 0;
670: while ( targstate[++i] != targ )
671: ;
672:
673: ++targfreq[i];
674: ++numdup;
675: }
676:
677: ++totaltrans;
678: duplist[sym] = NIL;
679: }
680: }
681:
682: if ( caseins && ! useecs )
683: {
684: register int j;
685:
686: for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
687: {
688: if ( state[i] == 0 && state[j] != 0 )
689: /* We're adding a transition. */
690: ++totaltrans;
691:
692: else if ( state[i] != 0 && state[j] == 0 )
693: /* We're taking away a transition. */
694: --totaltrans;
695:
696: state[i] = state[j];
697: }
698: }
699:
700: numsnpairs += totaltrans;
701:
702: if ( ds > num_start_states )
703: check_for_backing_up( ds, state );
704:
705: if ( nultrans )
706: {
707: nultrans[ds] = state[NUL_ec];
708: state[NUL_ec] = 0; /* remove transition */
709: }
710:
711: if ( fulltbl )
712: {
713: outn( " {" );
714:
715: /* Supply array's 0-element. */
716: if ( ds == end_of_buffer_state )
717: mk2data( -end_of_buffer_state );
718: else
719: mk2data( end_of_buffer_state );
720:
721: for ( i = 1; i < num_full_table_rows; ++i )
722: /* Jams are marked by negative of state
723: * number.
724: */
725: mk2data( state[i] ? state[i] : -ds );
726:
727: dataflush();
728: outn( " },\n" );
729: }
730:
731: else if ( fullspd )
732: place_state( state, ds, totaltrans );
733:
734: else if ( ds == end_of_buffer_state )
735: /* Special case this state to make sure it does what
736: * it's supposed to, i.e., jam on end-of-buffer.
737: */
738: stack1( ds, 0, 0, JAMSTATE );
739:
740: else /* normal, compressed state */
741: {
742: /* Determine which destination state is the most
743: * common, and how many transitions to it there are.
744: */
745:
746: comfreq = 0;
747: comstate = 0;
748:
749: for ( i = 1; i <= targptr; ++i )
750: if ( targfreq[i] > comfreq )
751: {
752: comfreq = targfreq[i];
753: comstate = targstate[i];
754: }
755:
756: bldtbl( state, ds, totaltrans, comstate, comfreq );
757: }
758: }
759:
760: if ( fulltbl )
761: dataend();
762:
763: else if ( ! fullspd )
764: {
765: cmptmps(); /* create compressed template entries */
766:
767: /* Create tables for all the states with only one
768: * out-transition.
769: */
770: while ( onesp > 0 )
771: {
772: mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
773: onedef[onesp] );
774: --onesp;
775: }
776:
777: mkdeftbl();
778: }
779:
780: flex_free( (void *) accset );
781: flex_free( (void *) nset );
782: }
783:
784:
785: /* snstods - converts a set of ndfa states into a dfa state
786: *
787: * synopsis
788: * is_new_state = snstods( int sns[numstates], int numstates,
789: * int accset[num_rules+1], int nacc,
790: * int hashval, int *newds_addr );
791: *
792: * On return, the dfa state number is in newds.
793: */
794:
795: int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
796: int sns[], numstates, accset[], nacc, hashval, *newds_addr;
797: {
798: int didsort = 0;
799: register int i, j;
800: int newds, *oldsns;
801:
802: for ( i = 1; i <= lastdfa; ++i )
803: if ( hashval == dhash[i] )
804: {
805: if ( numstates == dfasiz[i] )
806: {
807: oldsns = dss[i];
808:
809: if ( ! didsort )
810: {
811: /* We sort the states in sns so we
812: * can compare it to oldsns quickly.
813: * We use bubble because there probably
814: * aren't very many states.
815: */
816: bubble( sns, numstates );
817: didsort = 1;
818: }
819:
820: for ( j = 1; j <= numstates; ++j )
821: if ( sns[j] != oldsns[j] )
822: break;
823:
824: if ( j > numstates )
825: {
826: ++dfaeql;
827: *newds_addr = i;
828: return 0;
829: }
830:
831: ++hshcol;
832: }
833:
834: else
835: ++hshsave;
836: }
837:
838: /* Make a new dfa. */
839:
840: if ( ++lastdfa >= current_max_dfas )
841: increase_max_dfas();
842:
843: newds = lastdfa;
844:
845: dss[newds] = allocate_integer_array( numstates + 1 );
846:
847: /* If we haven't already sorted the states in sns, we do so now,
848: * so that future comparisons with it can be made quickly.
849: */
850:
851: if ( ! didsort )
852: bubble( sns, numstates );
853:
854: for ( i = 1; i <= numstates; ++i )
855: dss[newds][i] = sns[i];
856:
857: dfasiz[newds] = numstates;
858: dhash[newds] = hashval;
859:
860: if ( nacc == 0 )
861: {
862: if ( reject )
863: dfaacc[newds].dfaacc_set = (int *) 0;
864: else
865: dfaacc[newds].dfaacc_state = 0;
866:
867: accsiz[newds] = 0;
868: }
869:
870: else if ( reject )
871: {
872: /* We sort the accepting set in increasing order so the
873: * disambiguating rule that the first rule listed is considered
874: * match in the event of ties will work. We use a bubble
875: * sort since the list is probably quite small.
876: */
877:
878: bubble( accset, nacc );
879:
880: dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 );
881:
882: /* Save the accepting set for later */
883: for ( i = 1; i <= nacc; ++i )
884: {
885: dfaacc[newds].dfaacc_set[i] = accset[i];
886:
887: if ( accset[i] <= num_rules )
888: /* Who knows, perhaps a REJECT can yield
889: * this rule.
890: */
891: rule_useful[accset[i]] = true;
892: }
893:
894: accsiz[newds] = nacc;
895: }
896:
897: else
898: {
899: /* Find lowest numbered rule so the disambiguating rule
900: * will work.
901: */
902: j = num_rules + 1;
903:
904: for ( i = 1; i <= nacc; ++i )
905: if ( accset[i] < j )
906: j = accset[i];
907:
908: dfaacc[newds].dfaacc_state = j;
909:
910: if ( j <= num_rules )
911: rule_useful[j] = true;
912: }
913:
914: *newds_addr = newds;
915:
916: return 1;
917: }
918:
919:
920: /* symfollowset - follow the symbol transitions one step
921: *
922: * synopsis
923: * numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
924: * int transsym, int nset[current_max_dfa_size] );
925: */
926:
927: int symfollowset( ds, dsize, transsym, nset )
928: int ds[], dsize, transsym, nset[];
929: {
930: int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
931:
932: numstates = 0;
933:
934: for ( i = 1; i <= dsize; ++i )
935: { /* for each nfa state ns in the state set of ds */
936: ns = ds[i];
937: sym = transchar[ns];
938: tsp = trans1[ns];
939:
940: if ( sym < 0 )
941: { /* it's a character class */
942: sym = -sym;
943: ccllist = cclmap[sym];
944: lenccl = ccllen[sym];
945:
946: if ( cclng[sym] )
947: {
948: for ( j = 0; j < lenccl; ++j )
949: {
950: /* Loop through negated character
951: * class.
952: */
953: ch = ccltbl[ccllist + j];
954:
955: if ( ch == 0 )
956: ch = NUL_ec;
957:
958: if ( ch > transsym )
959: /* Transsym isn't in negated
960: * ccl.
961: */
962: break;
963:
964: else if ( ch == transsym )
965: /* next 2 */ goto bottom;
966: }
967:
968: /* Didn't find transsym in ccl. */
969: nset[++numstates] = tsp;
970: }
971:
972: else
973: for ( j = 0; j < lenccl; ++j )
974: {
975: ch = ccltbl[ccllist + j];
976:
977: if ( ch == 0 )
978: ch = NUL_ec;
979:
980: if ( ch > transsym )
981: break;
982: else if ( ch == transsym )
983: {
984: nset[++numstates] = tsp;
985: break;
986: }
987: }
988: }
989:
990: else if ( sym >= 'A' && sym <= 'Z' && caseins )
991: flexfatal(
992: _( "consistency check failed in symfollowset" ) );
993:
994: else if ( sym == SYM_EPSILON )
995: { /* do nothing */
996: }
997:
998: else if ( ABS( ecgroup[sym] ) == transsym )
999: nset[++numstates] = tsp;
1000:
1001: bottom: ;
1002: }
1003:
1004: return numstates;
1005: }
1006:
1007:
1008: /* sympartition - partition characters with same out-transitions
1009: *
1010: * synopsis
1011: * sympartition( int ds[current_max_dfa_size], int numstates,
1012: * int symlist[numecs], int duplist[numecs] );
1013: */
1014:
1015: void sympartition( ds, numstates, symlist, duplist )
1016: int ds[], numstates;
1017: int symlist[], duplist[];
1018: {
1019: int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1020:
1021: /* Partitioning is done by creating equivalence classes for those
1022: * characters which have out-transitions from the given state. Thus
1023: * we are really creating equivalence classes of equivalence classes.
1024: */
1025:
1026: for ( i = 1; i <= numecs; ++i )
1027: { /* initialize equivalence class list */
1028: duplist[i] = i - 1;
1029: dupfwd[i] = i + 1;
1030: }
1031:
1032: duplist[1] = NIL;
1033: dupfwd[numecs] = NIL;
1034:
1035: for ( i = 1; i <= numstates; ++i )
1036: {
1037: ns = ds[i];
1038: tch = transchar[ns];
1039:
1040: if ( tch != SYM_EPSILON )
1041: {
1042: if ( tch < -lastccl || tch >= csize )
1043: {
1044: flexfatal(
1045: _( "bad transition character detected in sympartition()" ) );
1046: }
1047:
1048: if ( tch >= 0 )
1049: { /* character transition */
1050: int ec = ecgroup[tch];
1051:
1052: mkechar( ec, dupfwd, duplist );
1053: symlist[ec] = 1;
1054: }
1055:
1056: else
1057: { /* character class */
1058: tch = -tch;
1059:
1060: lenccl = ccllen[tch];
1061: cclp = cclmap[tch];
1062: mkeccl( ccltbl + cclp, lenccl, dupfwd,
1063: duplist, numecs, NUL_ec );
1064:
1065: if ( cclng[tch] )
1066: {
1067: j = 0;
1068:
1069: for ( k = 0; k < lenccl; ++k )
1070: {
1071: ich = ccltbl[cclp + k];
1072:
1073: if ( ich == 0 )
1074: ich = NUL_ec;
1075:
1076: for ( ++j; j < ich; ++j )
1077: symlist[j] = 1;
1078: }
1079:
1080: for ( ++j; j <= numecs; ++j )
1081: symlist[j] = 1;
1082: }
1083:
1084: else
1085: for ( k = 0; k < lenccl; ++k )
1086: {
1087: ich = ccltbl[cclp + k];
1088:
1089: if ( ich == 0 )
1090: ich = NUL_ec;
1091:
1092: symlist[ich] = 1;
1093: }
1094: }
1095: }
1096: }
1097: }