Annotation of src/usr.bin/yacc/lalr.c, Revision 1.7
1.7 ! millert 1: /* $OpenBSD: lalr.c,v 1.6 2002/02/16 21:28:00 millert Exp $ */
1.2 deraadt 2: /* $NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $ */
3:
4: /*
5: * Copyright (c) 1989 The Regents of the University of California.
6: * All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Robert Paul Corbett.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.7 ! millert 19: * 3. Neither the name of the University nor the names of its contributors
1.2 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
1.1 deraadt 36: #ifndef lint
1.2 deraadt 37: #if 0
38: static char sccsid[] = "@(#)lalr.c 5.3 (Berkeley) 6/1/90";
39: #else
1.7 ! millert 40: static char rcsid[] = "$OpenBSD: lalr.c,v 1.6 2002/02/16 21:28:00 millert Exp $";
1.2 deraadt 41: #endif
1.1 deraadt 42: #endif /* not lint */
43:
44: #include "defs.h"
45:
46: typedef
47: struct shorts
48: {
49: struct shorts *next;
50: short value;
51: }
52: shorts;
53:
54: int tokensetsize;
55: short *lookaheads;
56: short *LAruleno;
57: unsigned *LA;
58: short *accessing_symbol;
59: core **state_table;
60: shifts **shift_table;
61: reductions **reduction_table;
62: short *goto_map;
63: short *from_state;
64: short *to_state;
65:
66: short **transpose();
1.6 millert 67: void set_state_table(void);
68: void set_accessing_symbol(void);
69: void set_shift_table(void);
70: void set_reduction_table(void);
71: void set_maxrhs(void);
72: void initialize_LA(void);
73: void set_goto_map(void);
74: void initialize_F(void);
75: void build_relations(void);
76: void compute_FOLLOWS(void);
77: void compute_lookaheads(void);
78: int map_goto(int, int);
79: void digraph(short **);
80: void add_lookback_edge(int, int, int);
81: void traverse(int);
1.1 deraadt 82:
83: static int infinity;
84: static int maxrhs;
85: static int ngotos;
86: static unsigned *F;
87: static short **includes;
88: static shorts **lookback;
89: static short **R;
90: static short *INDEX;
91: static short *VERTICES;
92: static int top;
93:
1.4 pvalchev 94: void
1.1 deraadt 95: lalr()
96: {
97: tokensetsize = WORDSIZE(ntokens);
98:
99: set_state_table();
100: set_accessing_symbol();
101: set_shift_table();
102: set_reduction_table();
103: set_maxrhs();
104: initialize_LA();
105: set_goto_map();
106: initialize_F();
107: build_relations();
108: compute_FOLLOWS();
109: compute_lookaheads();
110: }
111:
112:
1.4 pvalchev 113: void
1.1 deraadt 114: set_state_table()
115: {
1.5 mpech 116: core *sp;
1.1 deraadt 117:
118: state_table = NEW2(nstates, core *);
119: for (sp = first_state; sp; sp = sp->next)
120: state_table[sp->number] = sp;
121: }
122:
123:
1.4 pvalchev 124: void
1.1 deraadt 125: set_accessing_symbol()
126: {
1.5 mpech 127: core *sp;
1.1 deraadt 128:
129: accessing_symbol = NEW2(nstates, short);
130: for (sp = first_state; sp; sp = sp->next)
131: accessing_symbol[sp->number] = sp->accessing_symbol;
132: }
133:
134:
1.4 pvalchev 135: void
1.1 deraadt 136: set_shift_table()
137: {
1.5 mpech 138: shifts *sp;
1.1 deraadt 139:
140: shift_table = NEW2(nstates, shifts *);
141: for (sp = first_shift; sp; sp = sp->next)
142: shift_table[sp->number] = sp;
143: }
144:
145:
1.4 pvalchev 146: void
1.1 deraadt 147: set_reduction_table()
148: {
1.5 mpech 149: reductions *rp;
1.1 deraadt 150:
151: reduction_table = NEW2(nstates, reductions *);
152: for (rp = first_reduction; rp; rp = rp->next)
153: reduction_table[rp->number] = rp;
154: }
155:
156:
1.4 pvalchev 157: void
1.1 deraadt 158: set_maxrhs()
159: {
1.5 mpech 160: short *itemp;
161: short *item_end;
162: int length;
163: int max;
1.1 deraadt 164:
165: length = 0;
166: max = 0;
167: item_end = ritem + nitems;
168: for (itemp = ritem; itemp < item_end; itemp++)
169: {
170: if (*itemp >= 0)
171: {
172: length++;
173: }
174: else
175: {
176: if (length > max) max = length;
177: length = 0;
178: }
179: }
180:
181: maxrhs = max;
182: }
183:
184:
1.4 pvalchev 185: void
1.1 deraadt 186: initialize_LA()
187: {
1.5 mpech 188: int i, j, k;
189: reductions *rp;
1.1 deraadt 190:
191: lookaheads = NEW2(nstates + 1, short);
192:
193: k = 0;
194: for (i = 0; i < nstates; i++)
195: {
196: lookaheads[i] = k;
197: rp = reduction_table[i];
198: if (rp)
199: k += rp->nreds;
200: }
201: lookaheads[nstates] = k;
202:
203: LA = NEW2(k * tokensetsize, unsigned);
204: LAruleno = NEW2(k, short);
205: lookback = NEW2(k, shorts *);
206:
207: k = 0;
208: for (i = 0; i < nstates; i++)
209: {
210: rp = reduction_table[i];
211: if (rp)
212: {
213: for (j = 0; j < rp->nreds; j++)
214: {
215: LAruleno[k] = rp->rules[j];
216: k++;
217: }
218: }
219: }
220: }
221:
1.4 pvalchev 222: void
1.1 deraadt 223: set_goto_map()
224: {
1.5 mpech 225: shifts *sp;
226: int i;
227: int symbol;
228: int k;
229: short *temp_map;
230: int state2;
231: int state1;
1.1 deraadt 232:
233: goto_map = NEW2(nvars + 1, short) - ntokens;
234: temp_map = NEW2(nvars + 1, short) - ntokens;
235:
236: ngotos = 0;
237: for (sp = first_shift; sp; sp = sp->next)
238: {
239: for (i = sp->nshifts - 1; i >= 0; i--)
240: {
241: symbol = accessing_symbol[sp->shift[i]];
242:
243: if (ISTOKEN(symbol)) break;
244:
245: if (ngotos == MAXSHORT)
246: fatal("too many gotos");
247:
248: ngotos++;
249: goto_map[symbol]++;
250: }
251: }
252:
253: k = 0;
254: for (i = ntokens; i < nsyms; i++)
255: {
256: temp_map[i] = k;
257: k += goto_map[i];
258: }
259:
260: for (i = ntokens; i < nsyms; i++)
261: goto_map[i] = temp_map[i];
262:
263: goto_map[nsyms] = ngotos;
264: temp_map[nsyms] = ngotos;
265:
266: from_state = NEW2(ngotos, short);
267: to_state = NEW2(ngotos, short);
268:
269: for (sp = first_shift; sp; sp = sp->next)
270: {
271: state1 = sp->number;
272: for (i = sp->nshifts - 1; i >= 0; i--)
273: {
274: state2 = sp->shift[i];
275: symbol = accessing_symbol[state2];
276:
277: if (ISTOKEN(symbol)) break;
278:
279: k = temp_map[symbol]++;
280: from_state[k] = state1;
281: to_state[k] = state2;
282: }
283: }
284:
285: FREE(temp_map + ntokens);
286: }
287:
288:
289:
290: /* Map_goto maps a state/symbol pair into its numeric representation. */
291:
292: int
293: map_goto(state, symbol)
294: int state;
295: int symbol;
296: {
1.5 mpech 297: int high;
298: int low;
299: int middle;
300: int s;
1.1 deraadt 301:
302: low = goto_map[symbol];
303: high = goto_map[symbol + 1];
304:
305: for (;;)
306: {
307: assert(low <= high);
308: middle = (low + high) >> 1;
309: s = from_state[middle];
310: if (s == state)
311: return (middle);
312: else if (s < state)
313: low = middle + 1;
314: else
315: high = middle - 1;
316: }
317: }
318:
319:
1.4 pvalchev 320: void
1.1 deraadt 321: initialize_F()
322: {
1.5 mpech 323: int i;
324: int j;
325: int k;
326: shifts *sp;
327: short *edge;
328: unsigned *rowp;
329: short *rp;
330: short **reads;
331: int nedges;
332: int stateno;
333: int symbol;
334: int nwords;
1.1 deraadt 335:
336: nwords = ngotos * tokensetsize;
337: F = NEW2(nwords, unsigned);
338:
339: reads = NEW2(ngotos, short *);
340: edge = NEW2(ngotos + 1, short);
341: nedges = 0;
342:
343: rowp = F;
344: for (i = 0; i < ngotos; i++)
345: {
346: stateno = to_state[i];
347: sp = shift_table[stateno];
348:
349: if (sp)
350: {
351: k = sp->nshifts;
352:
353: for (j = 0; j < k; j++)
354: {
355: symbol = accessing_symbol[sp->shift[j]];
356: if (ISVAR(symbol))
357: break;
358: SETBIT(rowp, symbol);
359: }
360:
361: for (; j < k; j++)
362: {
363: symbol = accessing_symbol[sp->shift[j]];
364: if (nullable[symbol])
365: edge[nedges++] = map_goto(stateno, symbol);
366: }
367:
368: if (nedges)
369: {
370: reads[i] = rp = NEW2(nedges + 1, short);
371:
372: for (j = 0; j < nedges; j++)
373: rp[j] = edge[j];
374:
375: rp[nedges] = -1;
376: nedges = 0;
377: }
378: }
379:
380: rowp += tokensetsize;
381: }
382:
383: SETBIT(F, 0);
384: digraph(reads);
385:
386: for (i = 0; i < ngotos; i++)
387: {
388: if (reads[i])
389: FREE(reads[i]);
390: }
391:
392: FREE(reads);
393: FREE(edge);
394: }
395:
396:
1.4 pvalchev 397: void
1.1 deraadt 398: build_relations()
399: {
1.5 mpech 400: int i;
401: int j;
402: int k;
403: short *rulep;
404: short *rp;
405: shifts *sp;
406: int length;
407: int nedges;
408: int done;
409: int state1;
410: int stateno;
411: int symbol1;
412: int symbol2;
413: short *shortp;
414: short *edge;
415: short *states;
416: short **new_includes;
1.1 deraadt 417:
418: includes = NEW2(ngotos, short *);
419: edge = NEW2(ngotos + 1, short);
420: states = NEW2(maxrhs + 1, short);
421:
422: for (i = 0; i < ngotos; i++)
423: {
424: nedges = 0;
425: state1 = from_state[i];
426: symbol1 = accessing_symbol[to_state[i]];
427:
428: for (rulep = derives[symbol1]; *rulep >= 0; rulep++)
429: {
430: length = 1;
431: states[0] = state1;
432: stateno = state1;
433:
434: for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++)
435: {
436: symbol2 = *rp;
437: sp = shift_table[stateno];
438: k = sp->nshifts;
439:
440: for (j = 0; j < k; j++)
441: {
442: stateno = sp->shift[j];
443: if (accessing_symbol[stateno] == symbol2) break;
444: }
445:
446: states[length++] = stateno;
447: }
448:
449: add_lookback_edge(stateno, *rulep, i);
450:
451: length--;
452: done = 0;
453: while (!done)
454: {
455: done = 1;
456: rp--;
457: if (ISVAR(*rp))
458: {
459: stateno = states[--length];
460: edge[nedges++] = map_goto(stateno, *rp);
461: if (nullable[*rp] && length > 0) done = 0;
462: }
463: }
464: }
465:
466: if (nedges)
467: {
468: includes[i] = shortp = NEW2(nedges + 1, short);
469: for (j = 0; j < nedges; j++)
470: shortp[j] = edge[j];
471: shortp[nedges] = -1;
472: }
473: }
474:
475: new_includes = transpose(includes, ngotos);
476:
477: for (i = 0; i < ngotos; i++)
478: if (includes[i])
479: FREE(includes[i]);
480:
481: FREE(includes);
482:
483: includes = new_includes;
484:
485: FREE(edge);
486: FREE(states);
487: }
488:
1.4 pvalchev 489: void
1.1 deraadt 490: add_lookback_edge(stateno, ruleno, gotono)
491: int stateno, ruleno, gotono;
492: {
1.5 mpech 493: int i, k;
494: int found;
495: shorts *sp;
1.1 deraadt 496:
497: i = lookaheads[stateno];
498: k = lookaheads[stateno + 1];
499: found = 0;
500: while (!found && i < k)
501: {
502: if (LAruleno[i] == ruleno)
503: found = 1;
504: else
505: ++i;
506: }
507: assert(found);
508:
509: sp = NEW(shorts);
510: sp->next = lookback[i];
511: sp->value = gotono;
512: lookback[i] = sp;
513: }
514:
515:
516:
517: short **
518: transpose(R, n)
519: short **R;
520: int n;
521: {
1.5 mpech 522: short **new_R;
523: short **temp_R;
524: short *nedges;
525: short *sp;
526: int i;
527: int k;
1.1 deraadt 528:
529: nedges = NEW2(n, short);
530:
531: for (i = 0; i < n; i++)
532: {
533: sp = R[i];
534: if (sp)
535: {
536: while (*sp >= 0)
537: nedges[*sp++]++;
538: }
539: }
540:
541: new_R = NEW2(n, short *);
542: temp_R = NEW2(n, short *);
543:
544: for (i = 0; i < n; i++)
545: {
546: k = nedges[i];
547: if (k > 0)
548: {
549: sp = NEW2(k + 1, short);
550: new_R[i] = sp;
551: temp_R[i] = sp;
552: sp[k] = -1;
553: }
554: }
555:
556: FREE(nedges);
557:
558: for (i = 0; i < n; i++)
559: {
560: sp = R[i];
561: if (sp)
562: {
563: while (*sp >= 0)
564: *temp_R[*sp++]++ = i;
565: }
566: }
567:
568: FREE(temp_R);
569:
570: return (new_R);
571: }
572:
573:
1.4 pvalchev 574: void
1.1 deraadt 575: compute_FOLLOWS()
576: {
577: digraph(includes);
578: }
579:
1.4 pvalchev 580: void
1.1 deraadt 581: compute_lookaheads()
582: {
1.5 mpech 583: int i, n;
584: unsigned *fp1, *fp2, *fp3;
585: shorts *sp, *next;
586: unsigned *rowp;
1.1 deraadt 587:
588: rowp = LA;
589: n = lookaheads[nstates];
590: for (i = 0; i < n; i++)
591: {
592: fp3 = rowp + tokensetsize;
593: for (sp = lookback[i]; sp; sp = sp->next)
594: {
595: fp1 = rowp;
596: fp2 = F + tokensetsize * sp->value;
597: while (fp1 < fp3)
598: *fp1++ |= *fp2++;
599: }
600: rowp = fp3;
601: }
602:
603: for (i = 0; i < n; i++)
604: for (sp = lookback[i]; sp; sp = next)
605: {
606: next = sp->next;
607: FREE(sp);
608: }
609:
610: FREE(lookback);
611: FREE(F);
612: }
613:
1.4 pvalchev 614: void
1.1 deraadt 615: digraph(relation)
616: short **relation;
617: {
1.5 mpech 618: int i;
1.1 deraadt 619:
620: infinity = ngotos + 2;
621: INDEX = NEW2(ngotos + 1, short);
622: VERTICES = NEW2(ngotos + 1, short);
623: top = 0;
624:
625: R = relation;
626:
627: for (i = 0; i < ngotos; i++)
628: INDEX[i] = 0;
629:
630: for (i = 0; i < ngotos; i++)
631: {
632: if (INDEX[i] == 0 && R[i])
633: traverse(i);
634: }
635:
636: FREE(INDEX);
637: FREE(VERTICES);
638: }
639:
640:
1.4 pvalchev 641: void
1.1 deraadt 642: traverse(i)
1.5 mpech 643: int i;
1.1 deraadt 644: {
1.5 mpech 645: unsigned *fp1;
646: unsigned *fp2;
647: unsigned *fp3;
648: int j;
649: short *rp;
1.1 deraadt 650:
651: int height;
652: unsigned *base;
653:
654: VERTICES[++top] = i;
655: INDEX[i] = height = top;
656:
657: base = F + i * tokensetsize;
658: fp3 = base + tokensetsize;
659:
660: rp = R[i];
661: if (rp)
662: {
663: while ((j = *rp++) >= 0)
664: {
665: if (INDEX[j] == 0)
666: traverse(j);
667:
668: if (INDEX[i] > INDEX[j])
669: INDEX[i] = INDEX[j];
670:
671: fp1 = base;
672: fp2 = F + j * tokensetsize;
673:
674: while (fp1 < fp3)
675: *fp1++ |= *fp2++;
676: }
677: }
678:
679: if (INDEX[i] == height)
680: {
681: for (;;)
682: {
683: j = VERTICES[top--];
684: INDEX[j] = infinity;
685:
686: if (i == j)
687: break;
688:
689: fp1 = base;
690: fp2 = F + j * tokensetsize;
691:
692: while (fp1 < fp3)
693: *fp2++ = *fp1++;
694: }
695: }
696: }