Annotation of src/usr.bin/yacc/reader.c, Revision 1.2
1.1 deraadt 1: #ifndef lint
1.2 ! etheisen 2: static char rcsid[] = "$Id: reader.c,v 1.1.1.1 1995/10/18 08:47:06 deraadt Exp $";
1.1 deraadt 3: #endif /* not lint */
4:
5: #include "defs.h"
6:
7: /* The line size must be a positive integer. One hundred was chosen */
8: /* because few lines in Yacc input grammars exceed 100 characters. */
9: /* Note that if a line exceeds LINESIZE characters, the line buffer */
10: /* will be expanded to accomodate it. */
11:
12: #define LINESIZE 100
13:
14: char *cache;
15: int cinc, cache_size;
16:
17: int ntags, tagmax;
18: char **tag_table;
19:
20: char saw_eof, unionized;
21: char *cptr, *line;
22: int linesize;
23:
24: bucket *goal;
25: int prec;
26: int gensym;
27: char last_was_action;
28:
29: int maxitems;
30: bucket **pitem;
31:
32: int maxrules;
33: bucket **plhs;
34:
35: int name_pool_size;
36: char *name_pool;
37:
38: char line_format[] = "#line %d \"%s\"\n";
39:
40:
41: cachec(c)
42: int c;
43: {
44: assert(cinc >= 0);
45: if (cinc >= cache_size)
46: {
47: cache_size += 256;
48: cache = REALLOC(cache, cache_size);
49: if (cache == 0) no_space();
50: }
51: cache[cinc] = c;
52: ++cinc;
53: }
54:
55:
56: get_line()
57: {
58: register FILE *f = input_file;
59: register int c;
60: register int i;
61:
62: if (saw_eof || (c = getc(f)) == EOF)
63: {
64: if (line) { FREE(line); line = 0; }
65: cptr = 0;
66: saw_eof = 1;
67: return;
68: }
69:
70: if (line == 0 || linesize != (LINESIZE + 1))
71: {
72: if (line) FREE(line);
73: linesize = LINESIZE + 1;
74: line = MALLOC(linesize);
75: if (line == 0) no_space();
76: }
77:
78: i = 0;
79: ++lineno;
80: for (;;)
81: {
82: line[i] = c;
83: if (c == '\n') { cptr = line; return; }
84: if (++i >= linesize)
85: {
86: linesize += LINESIZE;
87: line = REALLOC(line, linesize);
88: if (line == 0) no_space();
89: }
90: c = getc(f);
91: if (c == EOF)
92: {
93: line[i] = '\n';
94: saw_eof = 1;
95: cptr = line;
96: return;
97: }
98: }
99: }
100:
101:
102: char *
103: dup_line()
104: {
105: register char *p, *s, *t;
106:
107: if (line == 0) return (0);
108: s = line;
109: while (*s != '\n') ++s;
110: p = MALLOC(s - line + 1);
111: if (p == 0) no_space();
112:
113: s = line;
114: t = p;
115: while ((*t++ = *s++) != '\n') continue;
116: return (p);
117: }
118:
119:
120: skip_comment()
121: {
122: register char *s;
123:
124: int st_lineno = lineno;
125: char *st_line = dup_line();
126: char *st_cptr = st_line + (cptr - line);
127:
128: s = cptr + 2;
129: for (;;)
130: {
131: if (*s == '*' && s[1] == '/')
132: {
133: cptr = s + 2;
134: FREE(st_line);
135: return;
136: }
137: if (*s == '\n')
138: {
139: get_line();
140: if (line == 0)
141: unterminated_comment(st_lineno, st_line, st_cptr);
142: s = cptr;
143: }
144: else
145: ++s;
146: }
147: }
148:
149:
150: int
151: nextc()
152: {
153: register char *s;
154:
155: if (line == 0)
156: {
157: get_line();
158: if (line == 0)
159: return (EOF);
160: }
161:
162: s = cptr;
163: for (;;)
164: {
165: switch (*s)
166: {
167: case '\n':
168: get_line();
169: if (line == 0) return (EOF);
170: s = cptr;
171: break;
172:
173: case ' ':
174: case '\t':
175: case '\f':
176: case '\r':
177: case '\v':
178: case ',':
179: case ';':
180: ++s;
181: break;
182:
183: case '\\':
184: cptr = s;
185: return ('%');
186:
187: case '/':
188: if (s[1] == '*')
189: {
190: cptr = s;
191: skip_comment();
192: s = cptr;
193: break;
194: }
195: else if (s[1] == '/')
196: {
197: get_line();
198: if (line == 0) return (EOF);
199: s = cptr;
200: break;
201: }
202: /* fall through */
203:
204: default:
205: cptr = s;
206: return (*s);
207: }
208: }
209: }
210:
211:
212: int
213: keyword()
214: {
215: register int c;
216: char *t_cptr = cptr;
217:
218: c = *++cptr;
219: if (isalpha(c))
220: {
221: cinc = 0;
222: for (;;)
223: {
224: if (isalpha(c))
225: {
226: if (isupper(c)) c = tolower(c);
227: cachec(c);
228: }
229: else if (isdigit(c) || c == '_' || c == '.' || c == '$')
230: cachec(c);
231: else
232: break;
233: c = *++cptr;
234: }
235: cachec(NUL);
236:
237: if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
238: return (TOKEN);
239: if (strcmp(cache, "type") == 0)
240: return (TYPE);
241: if (strcmp(cache, "left") == 0)
242: return (LEFT);
243: if (strcmp(cache, "right") == 0)
244: return (RIGHT);
245: if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
246: return (NONASSOC);
247: if (strcmp(cache, "start") == 0)
248: return (START);
249: if (strcmp(cache, "union") == 0)
250: return (UNION);
251: if (strcmp(cache, "ident") == 0)
252: return (IDENT);
1.2 ! etheisen 253: if (strcmp(cache, "expect") == 0)
! 254: return (EXPECT);
1.1 deraadt 255: }
256: else
257: {
258: ++cptr;
259: if (c == '{')
260: return (TEXT);
261: if (c == '%' || c == '\\')
262: return (MARK);
263: if (c == '<')
264: return (LEFT);
265: if (c == '>')
266: return (RIGHT);
267: if (c == '0')
268: return (TOKEN);
269: if (c == '2')
270: return (NONASSOC);
271: }
272: syntax_error(lineno, line, t_cptr);
273: /*NOTREACHED*/
274: }
275:
276:
277: copy_ident()
278: {
279: register int c;
280: register FILE *f = output_file;
281:
282: c = nextc();
283: if (c == EOF) unexpected_EOF();
284: if (c != '"') syntax_error(lineno, line, cptr);
285: ++outline;
286: fprintf(f, "#ident \"");
287: for (;;)
288: {
289: c = *++cptr;
290: if (c == '\n')
291: {
292: fprintf(f, "\"\n");
293: return;
294: }
295: putc(c, f);
296: if (c == '"')
297: {
298: putc('\n', f);
299: ++cptr;
300: return;
301: }
302: }
303: }
304:
305:
306: copy_text()
307: {
308: register int c;
309: int quote;
310: register FILE *f = text_file;
311: int need_newline = 0;
312: int t_lineno = lineno;
313: char *t_line = dup_line();
314: char *t_cptr = t_line + (cptr - line - 2);
315:
316: if (*cptr == '\n')
317: {
318: get_line();
319: if (line == 0)
320: unterminated_text(t_lineno, t_line, t_cptr);
321: }
322: if (!lflag) fprintf(f, line_format, lineno, input_file_name);
323:
324: loop:
325: c = *cptr++;
326: switch (c)
327: {
328: case '\n':
329: next_line:
330: putc('\n', f);
331: need_newline = 0;
332: get_line();
333: if (line) goto loop;
334: unterminated_text(t_lineno, t_line, t_cptr);
335:
336: case '\'':
337: case '"':
338: {
339: int s_lineno = lineno;
340: char *s_line = dup_line();
341: char *s_cptr = s_line + (cptr - line - 1);
342:
343: quote = c;
344: putc(c, f);
345: for (;;)
346: {
347: c = *cptr++;
348: putc(c, f);
349: if (c == quote)
350: {
351: need_newline = 1;
352: FREE(s_line);
353: goto loop;
354: }
355: if (c == '\n')
356: unterminated_string(s_lineno, s_line, s_cptr);
357: if (c == '\\')
358: {
359: c = *cptr++;
360: putc(c, f);
361: if (c == '\n')
362: {
363: get_line();
364: if (line == 0)
365: unterminated_string(s_lineno, s_line, s_cptr);
366: }
367: }
368: }
369: }
370:
371: case '/':
372: putc(c, f);
373: need_newline = 1;
374: c = *cptr;
375: if (c == '/')
376: {
377: putc('*', f);
378: while ((c = *++cptr) != '\n')
379: {
380: if (c == '*' && cptr[1] == '/')
381: fprintf(f, "* ");
382: else
383: putc(c, f);
384: }
385: fprintf(f, "*/");
386: goto next_line;
387: }
388: if (c == '*')
389: {
390: int c_lineno = lineno;
391: char *c_line = dup_line();
392: char *c_cptr = c_line + (cptr - line - 1);
393:
394: putc('*', f);
395: ++cptr;
396: for (;;)
397: {
398: c = *cptr++;
399: putc(c, f);
400: if (c == '*' && *cptr == '/')
401: {
402: putc('/', f);
403: ++cptr;
404: FREE(c_line);
405: goto loop;
406: }
407: if (c == '\n')
408: {
409: get_line();
410: if (line == 0)
411: unterminated_comment(c_lineno, c_line, c_cptr);
412: }
413: }
414: }
415: need_newline = 1;
416: goto loop;
417:
418: case '%':
419: case '\\':
420: if (*cptr == '}')
421: {
422: if (need_newline) putc('\n', f);
423: ++cptr;
424: FREE(t_line);
425: return;
426: }
427: /* fall through */
428:
429: default:
430: putc(c, f);
431: need_newline = 1;
432: goto loop;
433: }
434: }
435:
436:
437: copy_union()
438: {
439: register int c;
440: int quote;
441: int depth;
442: int u_lineno = lineno;
443: char *u_line = dup_line();
444: char *u_cptr = u_line + (cptr - line - 6);
445:
446: if (unionized) over_unionized(cptr - 6);
447: unionized = 1;
448:
449: if (!lflag)
450: fprintf(text_file, line_format, lineno, input_file_name);
451:
452: fprintf(text_file, "typedef union");
453: if (dflag) fprintf(union_file, "typedef union");
454:
455: depth = 0;
456: loop:
457: c = *cptr++;
458: putc(c, text_file);
459: if (dflag) putc(c, union_file);
460: switch (c)
461: {
462: case '\n':
463: next_line:
464: get_line();
465: if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
466: goto loop;
467:
468: case '{':
469: ++depth;
470: goto loop;
471:
472: case '}':
473: if (--depth == 0)
474: {
475: fprintf(text_file, " YYSTYPE;\n");
476: FREE(u_line);
477: return;
478: }
479: goto loop;
480:
481: case '\'':
482: case '"':
483: {
484: int s_lineno = lineno;
485: char *s_line = dup_line();
486: char *s_cptr = s_line + (cptr - line - 1);
487:
488: quote = c;
489: for (;;)
490: {
491: c = *cptr++;
492: putc(c, text_file);
493: if (dflag) putc(c, union_file);
494: if (c == quote)
495: {
496: FREE(s_line);
497: goto loop;
498: }
499: if (c == '\n')
500: unterminated_string(s_lineno, s_line, s_cptr);
501: if (c == '\\')
502: {
503: c = *cptr++;
504: putc(c, text_file);
505: if (dflag) putc(c, union_file);
506: if (c == '\n')
507: {
508: get_line();
509: if (line == 0)
510: unterminated_string(s_lineno, s_line, s_cptr);
511: }
512: }
513: }
514: }
515:
516: case '/':
517: c = *cptr;
518: if (c == '/')
519: {
520: putc('*', text_file);
521: if (dflag) putc('*', union_file);
522: while ((c = *++cptr) != '\n')
523: {
524: if (c == '*' && cptr[1] == '/')
525: {
526: fprintf(text_file, "* ");
527: if (dflag) fprintf(union_file, "* ");
528: }
529: else
530: {
531: putc(c, text_file);
532: if (dflag) putc(c, union_file);
533: }
534: }
535: fprintf(text_file, "*/\n");
536: if (dflag) fprintf(union_file, "*/\n");
537: goto next_line;
538: }
539: if (c == '*')
540: {
541: int c_lineno = lineno;
542: char *c_line = dup_line();
543: char *c_cptr = c_line + (cptr - line - 1);
544:
545: putc('*', text_file);
546: if (dflag) putc('*', union_file);
547: ++cptr;
548: for (;;)
549: {
550: c = *cptr++;
551: putc(c, text_file);
552: if (dflag) putc(c, union_file);
553: if (c == '*' && *cptr == '/')
554: {
555: putc('/', text_file);
556: if (dflag) putc('/', union_file);
557: ++cptr;
558: FREE(c_line);
559: goto loop;
560: }
561: if (c == '\n')
562: {
563: get_line();
564: if (line == 0)
565: unterminated_comment(c_lineno, c_line, c_cptr);
566: }
567: }
568: }
569: goto loop;
570:
571: default:
572: goto loop;
573: }
574: }
575:
576:
577: int
578: hexval(c)
579: int c;
580: {
581: if (c >= '0' && c <= '9')
582: return (c - '0');
583: if (c >= 'A' && c <= 'F')
584: return (c - 'A' + 10);
585: if (c >= 'a' && c <= 'f')
586: return (c - 'a' + 10);
587: return (-1);
588: }
589:
590:
591: bucket *
592: get_literal()
593: {
594: register int c, quote;
595: register int i;
596: register int n;
597: register char *s;
598: register bucket *bp;
599: int s_lineno = lineno;
600: char *s_line = dup_line();
601: char *s_cptr = s_line + (cptr - line);
602:
603: quote = *cptr++;
604: cinc = 0;
605: for (;;)
606: {
607: c = *cptr++;
608: if (c == quote) break;
609: if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
610: if (c == '\\')
611: {
612: char *c_cptr = cptr - 1;
613:
614: c = *cptr++;
615: switch (c)
616: {
617: case '\n':
618: get_line();
619: if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
620: continue;
621:
622: case '0': case '1': case '2': case '3':
623: case '4': case '5': case '6': case '7':
624: n = c - '0';
625: c = *cptr;
626: if (IS_OCTAL(c))
627: {
628: n = (n << 3) + (c - '0');
629: c = *++cptr;
630: if (IS_OCTAL(c))
631: {
632: n = (n << 3) + (c - '0');
633: ++cptr;
634: }
635: }
636: if (n > MAXCHAR) illegal_character(c_cptr);
637: c = n;
638: break;
639:
640: case 'x':
641: c = *cptr++;
642: n = hexval(c);
643: if (n < 0 || n >= 16)
644: illegal_character(c_cptr);
645: for (;;)
646: {
647: c = *cptr;
648: i = hexval(c);
649: if (i < 0 || i >= 16) break;
650: ++cptr;
651: n = (n << 4) + i;
652: if (n > MAXCHAR) illegal_character(c_cptr);
653: }
654: c = n;
655: break;
656:
657: case 'a': c = 7; break;
658: case 'b': c = '\b'; break;
659: case 'f': c = '\f'; break;
660: case 'n': c = '\n'; break;
661: case 'r': c = '\r'; break;
662: case 't': c = '\t'; break;
663: case 'v': c = '\v'; break;
664: }
665: }
666: cachec(c);
667: }
668: FREE(s_line);
669:
670: n = cinc;
671: s = MALLOC(n);
672: if (s == 0) no_space();
673:
674: for (i = 0; i < n; ++i)
675: s[i] = cache[i];
676:
677: cinc = 0;
678: if (n == 1)
679: cachec('\'');
680: else
681: cachec('"');
682:
683: for (i = 0; i < n; ++i)
684: {
685: c = ((unsigned char *)s)[i];
686: if (c == '\\' || c == cache[0])
687: {
688: cachec('\\');
689: cachec(c);
690: }
691: else if (isprint(c))
692: cachec(c);
693: else
694: {
695: cachec('\\');
696: switch (c)
697: {
698: case 7: cachec('a'); break;
699: case '\b': cachec('b'); break;
700: case '\f': cachec('f'); break;
701: case '\n': cachec('n'); break;
702: case '\r': cachec('r'); break;
703: case '\t': cachec('t'); break;
704: case '\v': cachec('v'); break;
705: default:
706: cachec(((c >> 6) & 7) + '0');
707: cachec(((c >> 3) & 7) + '0');
708: cachec((c & 7) + '0');
709: break;
710: }
711: }
712: }
713:
714: if (n == 1)
715: cachec('\'');
716: else
717: cachec('"');
718:
719: cachec(NUL);
720: bp = lookup(cache);
721: bp->class = TERM;
722: if (n == 1 && bp->value == UNDEFINED)
723: bp->value = *(unsigned char *)s;
724: FREE(s);
725:
726: return (bp);
727: }
728:
729:
730: int
731: is_reserved(name)
732: char *name;
733: {
734: char *s;
735:
736: if (strcmp(name, ".") == 0 ||
737: strcmp(name, "$accept") == 0 ||
738: strcmp(name, "$end") == 0)
739: return (1);
740:
741: if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
742: {
743: s = name + 3;
744: while (isdigit(*s)) ++s;
745: if (*s == NUL) return (1);
746: }
747:
748: return (0);
749: }
750:
751:
752: bucket *
753: get_name()
754: {
755: register int c;
756:
757: cinc = 0;
758: for (c = *cptr; IS_IDENT(c); c = *++cptr)
759: cachec(c);
760: cachec(NUL);
761:
762: if (is_reserved(cache)) used_reserved(cache);
763:
764: return (lookup(cache));
765: }
766:
767:
768: int
769: get_number()
770: {
771: register int c;
772: register int n;
773:
774: n = 0;
775: for (c = *cptr; isdigit(c); c = *++cptr)
776: n = 10*n + (c - '0');
777:
778: return (n);
779: }
780:
781:
782: char *
783: get_tag()
784: {
785: register int c;
786: register int i;
787: register char *s;
788: int t_lineno = lineno;
789: char *t_line = dup_line();
790: char *t_cptr = t_line + (cptr - line);
791:
792: ++cptr;
793: c = nextc();
794: if (c == EOF) unexpected_EOF();
795: if (!isalpha(c) && c != '_' && c != '$')
796: illegal_tag(t_lineno, t_line, t_cptr);
797:
798: cinc = 0;
799: do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
800: cachec(NUL);
801:
802: c = nextc();
803: if (c == EOF) unexpected_EOF();
804: if (c != '>')
805: illegal_tag(t_lineno, t_line, t_cptr);
806: ++cptr;
807:
808: for (i = 0; i < ntags; ++i)
809: {
810: if (strcmp(cache, tag_table[i]) == 0)
811: return (tag_table[i]);
812: }
813:
814: if (ntags >= tagmax)
815: {
816: tagmax += 16;
817: tag_table = (char **)
818: (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
819: : MALLOC(tagmax*sizeof(char *)));
820: if (tag_table == 0) no_space();
821: }
822:
823: s = MALLOC(cinc);
824: if (s == 0) no_space();
825: strcpy(s, cache);
826: tag_table[ntags] = s;
827: ++ntags;
828: FREE(t_line);
829: return (s);
830: }
831:
832:
833: declare_tokens(assoc)
834: int assoc;
835: {
836: register int c;
837: register bucket *bp;
838: int value;
839: char *tag = 0;
840:
841: if (assoc != TOKEN) ++prec;
842:
843: c = nextc();
844: if (c == EOF) unexpected_EOF();
845: if (c == '<')
846: {
847: tag = get_tag();
848: c = nextc();
849: if (c == EOF) unexpected_EOF();
850: }
851:
852: for (;;)
853: {
854: if (isalpha(c) || c == '_' || c == '.' || c == '$')
855: bp = get_name();
856: else if (c == '\'' || c == '"')
857: bp = get_literal();
858: else
859: return;
860:
861: if (bp == goal) tokenized_start(bp->name);
862: bp->class = TERM;
863:
864: if (tag)
865: {
866: if (bp->tag && tag != bp->tag)
867: retyped_warning(bp->name);
868: bp->tag = tag;
869: }
870:
871: if (assoc != TOKEN)
872: {
873: if (bp->prec && prec != bp->prec)
874: reprec_warning(bp->name);
875: bp->assoc = assoc;
876: bp->prec = prec;
877: }
878:
879: c = nextc();
880: if (c == EOF) unexpected_EOF();
881: value = UNDEFINED;
882: if (isdigit(c))
883: {
884: value = get_number();
885: if (bp->value != UNDEFINED && value != bp->value)
886: revalued_warning(bp->name);
887: bp->value = value;
888: c = nextc();
889: if (c == EOF) unexpected_EOF();
890: }
891: }
892: }
893:
894:
1.2 ! etheisen 895: /*
! 896: * %expect requires special handling
! 897: * as it really isn't part of the yacc
! 898: * grammar only a flag for yacc proper.
! 899: */
! 900: declare_expect(assoc)
! 901: int assoc;
! 902: {
! 903: register int c;
! 904:
! 905: if (assoc != EXPECT) ++prec;
! 906:
! 907: /*
! 908: * Stay away from nextc - doesn't
! 909: * detect EOL and will read to EOF.
! 910: */
! 911: c = *++cptr;
! 912: if (c == EOF) unexpected_EOF();
! 913:
! 914: for(;;)
! 915: {
! 916: if (isdigit(c))
! 917: {
! 918: SRexpect = get_number();
! 919: break;
! 920: }
! 921: /*
! 922: * Looking for number before EOL.
! 923: * Spaces, tabs, and numbers are ok,
! 924: * words, punc., etc. are syntax errors.
! 925: */
! 926: else if (c == '\n' || isalpha(c) || !isspace(c))
! 927: {
! 928: syntax_error(lineno, line, cptr);
! 929: }
! 930: else
! 931: {
! 932: c = *++cptr;
! 933: if (c == EOF) unexpected_EOF();
! 934: }
! 935: }
! 936: }
! 937:
! 938:
1.1 deraadt 939: declare_types()
940: {
941: register int c;
942: register bucket *bp;
943: char *tag;
944:
945: c = nextc();
946: if (c == EOF) unexpected_EOF();
947: if (c != '<') syntax_error(lineno, line, cptr);
948: tag = get_tag();
949:
950: for (;;)
951: {
952: c = nextc();
953: if (isalpha(c) || c == '_' || c == '.' || c == '$')
954: bp = get_name();
955: else if (c == '\'' || c == '"')
956: bp = get_literal();
957: else
958: return;
959:
960: if (bp->tag && tag != bp->tag)
961: retyped_warning(bp->name);
962: bp->tag = tag;
963: }
964: }
965:
966:
967: declare_start()
968: {
969: register int c;
970: register bucket *bp;
971:
972: c = nextc();
973: if (c == EOF) unexpected_EOF();
974: if (!isalpha(c) && c != '_' && c != '.' && c != '$')
975: syntax_error(lineno, line, cptr);
976: bp = get_name();
977: if (bp->class == TERM)
978: terminal_start(bp->name);
979: if (goal && goal != bp)
980: restarted_warning();
981: goal = bp;
982: }
983:
984:
985: read_declarations()
986: {
987: register int c, k;
988:
989: cache_size = 256;
990: cache = MALLOC(cache_size);
991: if (cache == 0) no_space();
992:
993: for (;;)
994: {
995: c = nextc();
996: if (c == EOF) unexpected_EOF();
997: if (c != '%') syntax_error(lineno, line, cptr);
998: switch (k = keyword())
999: {
1000: case MARK:
1001: return;
1002:
1003: case IDENT:
1004: copy_ident();
1005: break;
1006:
1007: case TEXT:
1008: copy_text();
1009: break;
1010:
1011: case UNION:
1012: copy_union();
1013: break;
1014:
1015: case TOKEN:
1016: case LEFT:
1017: case RIGHT:
1018: case NONASSOC:
1019: declare_tokens(k);
1020: break;
1.2 ! etheisen 1021:
! 1022: case EXPECT:
! 1023: declare_expect(k);
! 1024: break;
1.1 deraadt 1025:
1026: case TYPE:
1027: declare_types();
1028: break;
1029:
1030: case START:
1031: declare_start();
1032: break;
1033: }
1034: }
1035: }
1036:
1037:
1038: initialize_grammar()
1039: {
1040: nitems = 4;
1041: maxitems = 300;
1042: pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
1043: if (pitem == 0) no_space();
1044: pitem[0] = 0;
1045: pitem[1] = 0;
1046: pitem[2] = 0;
1047: pitem[3] = 0;
1048:
1049: nrules = 3;
1050: maxrules = 100;
1051: plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
1052: if (plhs == 0) no_space();
1053: plhs[0] = 0;
1054: plhs[1] = 0;
1055: plhs[2] = 0;
1056: rprec = (short *) MALLOC(maxrules*sizeof(short));
1057: if (rprec == 0) no_space();
1058: rprec[0] = 0;
1059: rprec[1] = 0;
1060: rprec[2] = 0;
1061: rassoc = (char *) MALLOC(maxrules*sizeof(char));
1062: if (rassoc == 0) no_space();
1063: rassoc[0] = TOKEN;
1064: rassoc[1] = TOKEN;
1065: rassoc[2] = TOKEN;
1066: }
1067:
1068:
1069: expand_items()
1070: {
1071: maxitems += 300;
1072: pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
1073: if (pitem == 0) no_space();
1074: }
1075:
1076:
1077: expand_rules()
1078: {
1079: maxrules += 100;
1080: plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
1081: if (plhs == 0) no_space();
1082: rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
1083: if (rprec == 0) no_space();
1084: rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
1085: if (rassoc == 0) no_space();
1086: }
1087:
1088:
1089: advance_to_start()
1090: {
1091: register int c;
1092: register bucket *bp;
1093: char *s_cptr;
1094: int s_lineno;
1095:
1096: for (;;)
1097: {
1098: c = nextc();
1099: if (c != '%') break;
1100: s_cptr = cptr;
1101: switch (keyword())
1102: {
1103: case MARK:
1104: no_grammar();
1105:
1106: case TEXT:
1107: copy_text();
1108: break;
1109:
1110: case START:
1111: declare_start();
1112: break;
1113:
1114: default:
1115: syntax_error(lineno, line, s_cptr);
1116: }
1117: }
1118:
1119: c = nextc();
1120: if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1121: syntax_error(lineno, line, cptr);
1122: bp = get_name();
1123: if (goal == 0)
1124: {
1125: if (bp->class == TERM)
1126: terminal_start(bp->name);
1127: goal = bp;
1128: }
1129:
1130: s_lineno = lineno;
1131: c = nextc();
1132: if (c == EOF) unexpected_EOF();
1133: if (c != ':') syntax_error(lineno, line, cptr);
1134: start_rule(bp, s_lineno);
1135: ++cptr;
1136: }
1137:
1138:
1139: start_rule(bp, s_lineno)
1140: register bucket *bp;
1141: int s_lineno;
1142: {
1143: if (bp->class == TERM)
1144: terminal_lhs(s_lineno);
1145: bp->class = NONTERM;
1146: if (nrules >= maxrules)
1147: expand_rules();
1148: plhs[nrules] = bp;
1149: rprec[nrules] = UNDEFINED;
1150: rassoc[nrules] = TOKEN;
1151: }
1152:
1153:
1154: end_rule()
1155: {
1156: register int i;
1157:
1158: if (!last_was_action && plhs[nrules]->tag)
1159: {
1160: for (i = nitems - 1; pitem[i]; --i) continue;
1161: if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
1162: default_action_warning();
1163: }
1164:
1165: last_was_action = 0;
1166: if (nitems >= maxitems) expand_items();
1167: pitem[nitems] = 0;
1168: ++nitems;
1169: ++nrules;
1170: }
1171:
1172:
1173: insert_empty_rule()
1174: {
1175: register bucket *bp, **bpp;
1176:
1177: assert(cache);
1178: sprintf(cache, "$$%d", ++gensym);
1179: bp = make_bucket(cache);
1180: last_symbol->next = bp;
1181: last_symbol = bp;
1182: bp->tag = plhs[nrules]->tag;
1183: bp->class = NONTERM;
1184:
1185: if ((nitems += 2) > maxitems)
1186: expand_items();
1187: bpp = pitem + nitems - 1;
1188: *bpp-- = bp;
1189: while (bpp[0] = bpp[-1]) --bpp;
1190:
1191: if (++nrules >= maxrules)
1192: expand_rules();
1193: plhs[nrules] = plhs[nrules-1];
1194: plhs[nrules-1] = bp;
1195: rprec[nrules] = rprec[nrules-1];
1196: rprec[nrules-1] = 0;
1197: rassoc[nrules] = rassoc[nrules-1];
1198: rassoc[nrules-1] = TOKEN;
1199: }
1200:
1201:
1202: add_symbol()
1203: {
1204: register int c;
1205: register bucket *bp;
1206: int s_lineno = lineno;
1207:
1208: c = *cptr;
1209: if (c == '\'' || c == '"')
1210: bp = get_literal();
1211: else
1212: bp = get_name();
1213:
1214: c = nextc();
1215: if (c == ':')
1216: {
1217: end_rule();
1218: start_rule(bp, s_lineno);
1219: ++cptr;
1220: return;
1221: }
1222:
1223: if (last_was_action)
1224: insert_empty_rule();
1225: last_was_action = 0;
1226:
1227: if (++nitems > maxitems)
1228: expand_items();
1229: pitem[nitems-1] = bp;
1230: }
1231:
1232:
1233: copy_action()
1234: {
1235: register int c;
1236: register int i, n;
1237: int depth;
1238: int quote;
1239: char *tag;
1240: register FILE *f = action_file;
1241: int a_lineno = lineno;
1242: char *a_line = dup_line();
1243: char *a_cptr = a_line + (cptr - line);
1244:
1245: if (last_was_action)
1246: insert_empty_rule();
1247: last_was_action = 1;
1248:
1249: fprintf(f, "case %d:\n", nrules - 2);
1250: if (!lflag)
1251: fprintf(f, line_format, lineno, input_file_name);
1252: if (*cptr == '=') ++cptr;
1253:
1254: n = 0;
1255: for (i = nitems - 1; pitem[i]; --i) ++n;
1256:
1257: depth = 0;
1258: loop:
1259: c = *cptr;
1260: if (c == '$')
1261: {
1262: if (cptr[1] == '<')
1263: {
1264: int d_lineno = lineno;
1265: char *d_line = dup_line();
1266: char *d_cptr = d_line + (cptr - line);
1267:
1268: ++cptr;
1269: tag = get_tag();
1270: c = *cptr;
1271: if (c == '$')
1272: {
1273: fprintf(f, "yyval.%s", tag);
1274: ++cptr;
1275: FREE(d_line);
1276: goto loop;
1277: }
1278: else if (isdigit(c))
1279: {
1280: i = get_number();
1281: if (i > n) dollar_warning(d_lineno, i);
1282: fprintf(f, "yyvsp[%d].%s", i - n, tag);
1283: FREE(d_line);
1284: goto loop;
1285: }
1286: else if (c == '-' && isdigit(cptr[1]))
1287: {
1288: ++cptr;
1289: i = -get_number() - n;
1290: fprintf(f, "yyvsp[%d].%s", i, tag);
1291: FREE(d_line);
1292: goto loop;
1293: }
1294: else
1295: dollar_error(d_lineno, d_line, d_cptr);
1296: }
1297: else if (cptr[1] == '$')
1298: {
1299: if (ntags)
1300: {
1301: tag = plhs[nrules]->tag;
1302: if (tag == 0) untyped_lhs();
1303: fprintf(f, "yyval.%s", tag);
1304: }
1305: else
1306: fprintf(f, "yyval");
1307: cptr += 2;
1308: goto loop;
1309: }
1310: else if (isdigit(cptr[1]))
1311: {
1312: ++cptr;
1313: i = get_number();
1314: if (ntags)
1315: {
1316: if (i <= 0 || i > n)
1317: unknown_rhs(i);
1318: tag = pitem[nitems + i - n - 1]->tag;
1319: if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1320: fprintf(f, "yyvsp[%d].%s", i - n, tag);
1321: }
1322: else
1323: {
1324: if (i > n)
1325: dollar_warning(lineno, i);
1326: fprintf(f, "yyvsp[%d]", i - n);
1327: }
1328: goto loop;
1329: }
1330: else if (cptr[1] == '-')
1331: {
1332: cptr += 2;
1333: i = get_number();
1334: if (ntags)
1335: unknown_rhs(-i);
1336: fprintf(f, "yyvsp[%d]", -i - n);
1337: goto loop;
1338: }
1339: }
1340: if (isalpha(c) || c == '_' || c == '$')
1341: {
1342: do
1343: {
1344: putc(c, f);
1345: c = *++cptr;
1346: } while (isalnum(c) || c == '_' || c == '$');
1347: goto loop;
1348: }
1349: putc(c, f);
1350: ++cptr;
1351: switch (c)
1352: {
1353: case '\n':
1354: next_line:
1355: get_line();
1356: if (line) goto loop;
1357: unterminated_action(a_lineno, a_line, a_cptr);
1358:
1359: case ';':
1360: if (depth > 0) goto loop;
1361: fprintf(f, "\nbreak;\n");
1362: return;
1363:
1364: case '{':
1365: ++depth;
1366: goto loop;
1367:
1368: case '}':
1369: if (--depth > 0) goto loop;
1370: fprintf(f, "\nbreak;\n");
1371: return;
1372:
1373: case '\'':
1374: case '"':
1375: {
1376: int s_lineno = lineno;
1377: char *s_line = dup_line();
1378: char *s_cptr = s_line + (cptr - line - 1);
1379:
1380: quote = c;
1381: for (;;)
1382: {
1383: c = *cptr++;
1384: putc(c, f);
1385: if (c == quote)
1386: {
1387: FREE(s_line);
1388: goto loop;
1389: }
1390: if (c == '\n')
1391: unterminated_string(s_lineno, s_line, s_cptr);
1392: if (c == '\\')
1393: {
1394: c = *cptr++;
1395: putc(c, f);
1396: if (c == '\n')
1397: {
1398: get_line();
1399: if (line == 0)
1400: unterminated_string(s_lineno, s_line, s_cptr);
1401: }
1402: }
1403: }
1404: }
1405:
1406: case '/':
1407: c = *cptr;
1408: if (c == '/')
1409: {
1410: putc('*', f);
1411: while ((c = *++cptr) != '\n')
1412: {
1413: if (c == '*' && cptr[1] == '/')
1414: fprintf(f, "* ");
1415: else
1416: putc(c, f);
1417: }
1418: fprintf(f, "*/\n");
1419: goto next_line;
1420: }
1421: if (c == '*')
1422: {
1423: int c_lineno = lineno;
1424: char *c_line = dup_line();
1425: char *c_cptr = c_line + (cptr - line - 1);
1426:
1427: putc('*', f);
1428: ++cptr;
1429: for (;;)
1430: {
1431: c = *cptr++;
1432: putc(c, f);
1433: if (c == '*' && *cptr == '/')
1434: {
1435: putc('/', f);
1436: ++cptr;
1437: FREE(c_line);
1438: goto loop;
1439: }
1440: if (c == '\n')
1441: {
1442: get_line();
1443: if (line == 0)
1444: unterminated_comment(c_lineno, c_line, c_cptr);
1445: }
1446: }
1447: }
1448: goto loop;
1449:
1450: default:
1451: goto loop;
1452: }
1453: }
1454:
1455:
1456: int
1457: mark_symbol()
1458: {
1459: register int c;
1460: register bucket *bp;
1461:
1462: c = cptr[1];
1463: if (c == '%' || c == '\\')
1464: {
1465: cptr += 2;
1466: return (1);
1467: }
1468:
1469: if (c == '=')
1470: cptr += 2;
1471: else if ((c == 'p' || c == 'P') &&
1472: ((c = cptr[2]) == 'r' || c == 'R') &&
1473: ((c = cptr[3]) == 'e' || c == 'E') &&
1474: ((c = cptr[4]) == 'c' || c == 'C') &&
1475: ((c = cptr[5], !IS_IDENT(c))))
1476: cptr += 5;
1477: else
1478: syntax_error(lineno, line, cptr);
1479:
1480: c = nextc();
1481: if (isalpha(c) || c == '_' || c == '.' || c == '$')
1482: bp = get_name();
1483: else if (c == '\'' || c == '"')
1484: bp = get_literal();
1485: else
1486: {
1487: syntax_error(lineno, line, cptr);
1488: /*NOTREACHED*/
1489: }
1490:
1491: if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1492: prec_redeclared();
1493:
1494: rprec[nrules] = bp->prec;
1495: rassoc[nrules] = bp->assoc;
1496: return (0);
1497: }
1498:
1499:
1500: read_grammar()
1501: {
1502: register int c;
1503:
1504: initialize_grammar();
1505: advance_to_start();
1506:
1507: for (;;)
1508: {
1509: c = nextc();
1510: if (c == EOF) break;
1511: if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1512: c == '"')
1513: add_symbol();
1514: else if (c == '{' || c == '=')
1515: copy_action();
1516: else if (c == '|')
1517: {
1518: end_rule();
1519: start_rule(plhs[nrules-1], 0);
1520: ++cptr;
1521: }
1522: else if (c == '%')
1523: {
1524: if (mark_symbol()) break;
1525: }
1526: else
1527: syntax_error(lineno, line, cptr);
1528: }
1529: end_rule();
1530: }
1531:
1532:
1533: free_tags()
1534: {
1535: register int i;
1536:
1537: if (tag_table == 0) return;
1538:
1539: for (i = 0; i < ntags; ++i)
1540: {
1541: assert(tag_table[i]);
1542: FREE(tag_table[i]);
1543: }
1544: FREE(tag_table);
1545: }
1546:
1547:
1548: pack_names()
1549: {
1550: register bucket *bp;
1551: register char *p, *s, *t;
1552:
1553: name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1554: for (bp = first_symbol; bp; bp = bp->next)
1555: name_pool_size += strlen(bp->name) + 1;
1556: name_pool = MALLOC(name_pool_size);
1557: if (name_pool == 0) no_space();
1558:
1559: strcpy(name_pool, "$accept");
1560: strcpy(name_pool+8, "$end");
1561: t = name_pool + 13;
1562: for (bp = first_symbol; bp; bp = bp->next)
1563: {
1564: p = t;
1565: s = bp->name;
1566: while (*t++ = *s++) continue;
1567: FREE(bp->name);
1568: bp->name = p;
1569: }
1570: }
1571:
1572:
1573: check_symbols()
1574: {
1575: register bucket *bp;
1576:
1577: if (goal->class == UNKNOWN)
1578: undefined_goal(goal->name);
1579:
1580: for (bp = first_symbol; bp; bp = bp->next)
1581: {
1582: if (bp->class == UNKNOWN)
1583: {
1584: undefined_symbol_warning(bp->name);
1585: bp->class = TERM;
1586: }
1587: }
1588: }
1589:
1590:
1591: pack_symbols()
1592: {
1593: register bucket *bp;
1594: register bucket **v;
1595: register int i, j, k, n;
1596:
1597: nsyms = 2;
1598: ntokens = 1;
1599: for (bp = first_symbol; bp; bp = bp->next)
1600: {
1601: ++nsyms;
1602: if (bp->class == TERM) ++ntokens;
1603: }
1604: start_symbol = ntokens;
1605: nvars = nsyms - ntokens;
1606:
1607: symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1608: if (symbol_name == 0) no_space();
1609: symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1610: if (symbol_value == 0) no_space();
1611: symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1612: if (symbol_prec == 0) no_space();
1613: symbol_assoc = MALLOC(nsyms);
1614: if (symbol_assoc == 0) no_space();
1615:
1616: v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1617: if (v == 0) no_space();
1618:
1619: v[0] = 0;
1620: v[start_symbol] = 0;
1621:
1622: i = 1;
1623: j = start_symbol + 1;
1624: for (bp = first_symbol; bp; bp = bp->next)
1625: {
1626: if (bp->class == TERM)
1627: v[i++] = bp;
1628: else
1629: v[j++] = bp;
1630: }
1631: assert(i == ntokens && j == nsyms);
1632:
1633: for (i = 1; i < ntokens; ++i)
1634: v[i]->index = i;
1635:
1636: goal->index = start_symbol + 1;
1637: k = start_symbol + 2;
1638: while (++i < nsyms)
1639: if (v[i] != goal)
1640: {
1641: v[i]->index = k;
1642: ++k;
1643: }
1644:
1645: goal->value = 0;
1646: k = 1;
1647: for (i = start_symbol + 1; i < nsyms; ++i)
1648: {
1649: if (v[i] != goal)
1650: {
1651: v[i]->value = k;
1652: ++k;
1653: }
1654: }
1655:
1656: k = 0;
1657: for (i = 1; i < ntokens; ++i)
1658: {
1659: n = v[i]->value;
1660: if (n > 256)
1661: {
1662: for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1663: symbol_value[j] = symbol_value[j-1];
1664: symbol_value[j] = n;
1665: }
1666: }
1667:
1668: if (v[1]->value == UNDEFINED)
1669: v[1]->value = 256;
1670:
1671: j = 0;
1672: n = 257;
1673: for (i = 2; i < ntokens; ++i)
1674: {
1675: if (v[i]->value == UNDEFINED)
1676: {
1677: while (j < k && n == symbol_value[j])
1678: {
1679: while (++j < k && n == symbol_value[j]) continue;
1680: ++n;
1681: }
1682: v[i]->value = n;
1683: ++n;
1684: }
1685: }
1686:
1687: symbol_name[0] = name_pool + 8;
1688: symbol_value[0] = 0;
1689: symbol_prec[0] = 0;
1690: symbol_assoc[0] = TOKEN;
1691: for (i = 1; i < ntokens; ++i)
1692: {
1693: symbol_name[i] = v[i]->name;
1694: symbol_value[i] = v[i]->value;
1695: symbol_prec[i] = v[i]->prec;
1696: symbol_assoc[i] = v[i]->assoc;
1697: }
1698: symbol_name[start_symbol] = name_pool;
1699: symbol_value[start_symbol] = -1;
1700: symbol_prec[start_symbol] = 0;
1701: symbol_assoc[start_symbol] = TOKEN;
1702: for (++i; i < nsyms; ++i)
1703: {
1704: k = v[i]->index;
1705: symbol_name[k] = v[i]->name;
1706: symbol_value[k] = v[i]->value;
1707: symbol_prec[k] = v[i]->prec;
1708: symbol_assoc[k] = v[i]->assoc;
1709: }
1710:
1711: FREE(v);
1712: }
1713:
1714:
1715: pack_grammar()
1716: {
1717: register int i, j;
1718: int assoc, prec;
1719:
1720: ritem = (short *) MALLOC(nitems*sizeof(short));
1721: if (ritem == 0) no_space();
1722: rlhs = (short *) MALLOC(nrules*sizeof(short));
1723: if (rlhs == 0) no_space();
1724: rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1725: if (rrhs == 0) no_space();
1726: rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1727: if (rprec == 0) no_space();
1728: rassoc = REALLOC(rassoc, nrules);
1729: if (rassoc == 0) no_space();
1730:
1731: ritem[0] = -1;
1732: ritem[1] = goal->index;
1733: ritem[2] = 0;
1734: ritem[3] = -2;
1735: rlhs[0] = 0;
1736: rlhs[1] = 0;
1737: rlhs[2] = start_symbol;
1738: rrhs[0] = 0;
1739: rrhs[1] = 0;
1740: rrhs[2] = 1;
1741:
1742: j = 4;
1743: for (i = 3; i < nrules; ++i)
1744: {
1745: rlhs[i] = plhs[i]->index;
1746: rrhs[i] = j;
1747: assoc = TOKEN;
1748: prec = 0;
1749: while (pitem[j])
1750: {
1751: ritem[j] = pitem[j]->index;
1752: if (pitem[j]->class == TERM)
1753: {
1754: prec = pitem[j]->prec;
1755: assoc = pitem[j]->assoc;
1756: }
1757: ++j;
1758: }
1759: ritem[j] = -i;
1760: ++j;
1761: if (rprec[i] == UNDEFINED)
1762: {
1763: rprec[i] = prec;
1764: rassoc[i] = assoc;
1765: }
1766: }
1767: rrhs[i] = j;
1768:
1769: FREE(plhs);
1770: FREE(pitem);
1771: }
1772:
1773:
1774: print_grammar()
1775: {
1776: register int i, j, k;
1777: int spacing;
1778: register FILE *f = verbose_file;
1779:
1780: if (!vflag) return;
1781:
1782: k = 1;
1783: for (i = 2; i < nrules; ++i)
1784: {
1785: if (rlhs[i] != rlhs[i-1])
1786: {
1787: if (i != 2) fprintf(f, "\n");
1788: fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1789: spacing = strlen(symbol_name[rlhs[i]]) + 1;
1790: }
1791: else
1792: {
1793: fprintf(f, "%4d ", i - 2);
1794: j = spacing;
1795: while (--j >= 0) putc(' ', f);
1796: putc('|', f);
1797: }
1798:
1799: while (ritem[k] >= 0)
1800: {
1801: fprintf(f, " %s", symbol_name[ritem[k]]);
1802: ++k;
1803: }
1804: ++k;
1805: putc('\n', f);
1806: }
1807: }
1808:
1809:
1810: reader()
1811: {
1812: write_section(banner);
1813: create_symbol_table();
1814: read_declarations();
1815: read_grammar();
1816: free_symbol_table();
1817: free_tags();
1818: pack_names();
1819: check_symbols();
1820: pack_symbols();
1821: pack_grammar();
1822: free_symbols();
1823: print_grammar();
1824: }