Annotation of src/usr.bin/yacc/reader.c, Revision 1.1.1.1
1.1 deraadt 1: #ifndef lint
2: static char rcsid[] = "$Id: reader.c,v 1.3 1993/08/02 17:56:49 mycroft Exp $";
3: #endif /* not lint */
4:
5: #include "defs.h"
6:
7: /* The line size must be a positive integer. One hundred was chosen */
8: /* because few lines in Yacc input grammars exceed 100 characters. */
9: /* Note that if a line exceeds LINESIZE characters, the line buffer */
10: /* will be expanded to accomodate it. */
11:
12: #define LINESIZE 100
13:
14: char *cache;
15: int cinc, cache_size;
16:
17: int ntags, tagmax;
18: char **tag_table;
19:
20: char saw_eof, unionized;
21: char *cptr, *line;
22: int linesize;
23:
24: bucket *goal;
25: int prec;
26: int gensym;
27: char last_was_action;
28:
29: int maxitems;
30: bucket **pitem;
31:
32: int maxrules;
33: bucket **plhs;
34:
35: int name_pool_size;
36: char *name_pool;
37:
38: char line_format[] = "#line %d \"%s\"\n";
39:
40:
41: cachec(c)
42: int c;
43: {
44: assert(cinc >= 0);
45: if (cinc >= cache_size)
46: {
47: cache_size += 256;
48: cache = REALLOC(cache, cache_size);
49: if (cache == 0) no_space();
50: }
51: cache[cinc] = c;
52: ++cinc;
53: }
54:
55:
56: get_line()
57: {
58: register FILE *f = input_file;
59: register int c;
60: register int i;
61:
62: if (saw_eof || (c = getc(f)) == EOF)
63: {
64: if (line) { FREE(line); line = 0; }
65: cptr = 0;
66: saw_eof = 1;
67: return;
68: }
69:
70: if (line == 0 || linesize != (LINESIZE + 1))
71: {
72: if (line) FREE(line);
73: linesize = LINESIZE + 1;
74: line = MALLOC(linesize);
75: if (line == 0) no_space();
76: }
77:
78: i = 0;
79: ++lineno;
80: for (;;)
81: {
82: line[i] = c;
83: if (c == '\n') { cptr = line; return; }
84: if (++i >= linesize)
85: {
86: linesize += LINESIZE;
87: line = REALLOC(line, linesize);
88: if (line == 0) no_space();
89: }
90: c = getc(f);
91: if (c == EOF)
92: {
93: line[i] = '\n';
94: saw_eof = 1;
95: cptr = line;
96: return;
97: }
98: }
99: }
100:
101:
102: char *
103: dup_line()
104: {
105: register char *p, *s, *t;
106:
107: if (line == 0) return (0);
108: s = line;
109: while (*s != '\n') ++s;
110: p = MALLOC(s - line + 1);
111: if (p == 0) no_space();
112:
113: s = line;
114: t = p;
115: while ((*t++ = *s++) != '\n') continue;
116: return (p);
117: }
118:
119:
120: skip_comment()
121: {
122: register char *s;
123:
124: int st_lineno = lineno;
125: char *st_line = dup_line();
126: char *st_cptr = st_line + (cptr - line);
127:
128: s = cptr + 2;
129: for (;;)
130: {
131: if (*s == '*' && s[1] == '/')
132: {
133: cptr = s + 2;
134: FREE(st_line);
135: return;
136: }
137: if (*s == '\n')
138: {
139: get_line();
140: if (line == 0)
141: unterminated_comment(st_lineno, st_line, st_cptr);
142: s = cptr;
143: }
144: else
145: ++s;
146: }
147: }
148:
149:
150: int
151: nextc()
152: {
153: register char *s;
154:
155: if (line == 0)
156: {
157: get_line();
158: if (line == 0)
159: return (EOF);
160: }
161:
162: s = cptr;
163: for (;;)
164: {
165: switch (*s)
166: {
167: case '\n':
168: get_line();
169: if (line == 0) return (EOF);
170: s = cptr;
171: break;
172:
173: case ' ':
174: case '\t':
175: case '\f':
176: case '\r':
177: case '\v':
178: case ',':
179: case ';':
180: ++s;
181: break;
182:
183: case '\\':
184: cptr = s;
185: return ('%');
186:
187: case '/':
188: if (s[1] == '*')
189: {
190: cptr = s;
191: skip_comment();
192: s = cptr;
193: break;
194: }
195: else if (s[1] == '/')
196: {
197: get_line();
198: if (line == 0) return (EOF);
199: s = cptr;
200: break;
201: }
202: /* fall through */
203:
204: default:
205: cptr = s;
206: return (*s);
207: }
208: }
209: }
210:
211:
212: int
213: keyword()
214: {
215: register int c;
216: char *t_cptr = cptr;
217:
218: c = *++cptr;
219: if (isalpha(c))
220: {
221: cinc = 0;
222: for (;;)
223: {
224: if (isalpha(c))
225: {
226: if (isupper(c)) c = tolower(c);
227: cachec(c);
228: }
229: else if (isdigit(c) || c == '_' || c == '.' || c == '$')
230: cachec(c);
231: else
232: break;
233: c = *++cptr;
234: }
235: cachec(NUL);
236:
237: if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
238: return (TOKEN);
239: if (strcmp(cache, "type") == 0)
240: return (TYPE);
241: if (strcmp(cache, "left") == 0)
242: return (LEFT);
243: if (strcmp(cache, "right") == 0)
244: return (RIGHT);
245: if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
246: return (NONASSOC);
247: if (strcmp(cache, "start") == 0)
248: return (START);
249: if (strcmp(cache, "union") == 0)
250: return (UNION);
251: if (strcmp(cache, "ident") == 0)
252: return (IDENT);
253: }
254: else
255: {
256: ++cptr;
257: if (c == '{')
258: return (TEXT);
259: if (c == '%' || c == '\\')
260: return (MARK);
261: if (c == '<')
262: return (LEFT);
263: if (c == '>')
264: return (RIGHT);
265: if (c == '0')
266: return (TOKEN);
267: if (c == '2')
268: return (NONASSOC);
269: }
270: syntax_error(lineno, line, t_cptr);
271: /*NOTREACHED*/
272: }
273:
274:
275: copy_ident()
276: {
277: register int c;
278: register FILE *f = output_file;
279:
280: c = nextc();
281: if (c == EOF) unexpected_EOF();
282: if (c != '"') syntax_error(lineno, line, cptr);
283: ++outline;
284: fprintf(f, "#ident \"");
285: for (;;)
286: {
287: c = *++cptr;
288: if (c == '\n')
289: {
290: fprintf(f, "\"\n");
291: return;
292: }
293: putc(c, f);
294: if (c == '"')
295: {
296: putc('\n', f);
297: ++cptr;
298: return;
299: }
300: }
301: }
302:
303:
304: copy_text()
305: {
306: register int c;
307: int quote;
308: register FILE *f = text_file;
309: int need_newline = 0;
310: int t_lineno = lineno;
311: char *t_line = dup_line();
312: char *t_cptr = t_line + (cptr - line - 2);
313:
314: if (*cptr == '\n')
315: {
316: get_line();
317: if (line == 0)
318: unterminated_text(t_lineno, t_line, t_cptr);
319: }
320: if (!lflag) fprintf(f, line_format, lineno, input_file_name);
321:
322: loop:
323: c = *cptr++;
324: switch (c)
325: {
326: case '\n':
327: next_line:
328: putc('\n', f);
329: need_newline = 0;
330: get_line();
331: if (line) goto loop;
332: unterminated_text(t_lineno, t_line, t_cptr);
333:
334: case '\'':
335: case '"':
336: {
337: int s_lineno = lineno;
338: char *s_line = dup_line();
339: char *s_cptr = s_line + (cptr - line - 1);
340:
341: quote = c;
342: putc(c, f);
343: for (;;)
344: {
345: c = *cptr++;
346: putc(c, f);
347: if (c == quote)
348: {
349: need_newline = 1;
350: FREE(s_line);
351: goto loop;
352: }
353: if (c == '\n')
354: unterminated_string(s_lineno, s_line, s_cptr);
355: if (c == '\\')
356: {
357: c = *cptr++;
358: putc(c, f);
359: if (c == '\n')
360: {
361: get_line();
362: if (line == 0)
363: unterminated_string(s_lineno, s_line, s_cptr);
364: }
365: }
366: }
367: }
368:
369: case '/':
370: putc(c, f);
371: need_newline = 1;
372: c = *cptr;
373: if (c == '/')
374: {
375: putc('*', f);
376: while ((c = *++cptr) != '\n')
377: {
378: if (c == '*' && cptr[1] == '/')
379: fprintf(f, "* ");
380: else
381: putc(c, f);
382: }
383: fprintf(f, "*/");
384: goto next_line;
385: }
386: if (c == '*')
387: {
388: int c_lineno = lineno;
389: char *c_line = dup_line();
390: char *c_cptr = c_line + (cptr - line - 1);
391:
392: putc('*', f);
393: ++cptr;
394: for (;;)
395: {
396: c = *cptr++;
397: putc(c, f);
398: if (c == '*' && *cptr == '/')
399: {
400: putc('/', f);
401: ++cptr;
402: FREE(c_line);
403: goto loop;
404: }
405: if (c == '\n')
406: {
407: get_line();
408: if (line == 0)
409: unterminated_comment(c_lineno, c_line, c_cptr);
410: }
411: }
412: }
413: need_newline = 1;
414: goto loop;
415:
416: case '%':
417: case '\\':
418: if (*cptr == '}')
419: {
420: if (need_newline) putc('\n', f);
421: ++cptr;
422: FREE(t_line);
423: return;
424: }
425: /* fall through */
426:
427: default:
428: putc(c, f);
429: need_newline = 1;
430: goto loop;
431: }
432: }
433:
434:
435: copy_union()
436: {
437: register int c;
438: int quote;
439: int depth;
440: int u_lineno = lineno;
441: char *u_line = dup_line();
442: char *u_cptr = u_line + (cptr - line - 6);
443:
444: if (unionized) over_unionized(cptr - 6);
445: unionized = 1;
446:
447: if (!lflag)
448: fprintf(text_file, line_format, lineno, input_file_name);
449:
450: fprintf(text_file, "typedef union");
451: if (dflag) fprintf(union_file, "typedef union");
452:
453: depth = 0;
454: loop:
455: c = *cptr++;
456: putc(c, text_file);
457: if (dflag) putc(c, union_file);
458: switch (c)
459: {
460: case '\n':
461: next_line:
462: get_line();
463: if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
464: goto loop;
465:
466: case '{':
467: ++depth;
468: goto loop;
469:
470: case '}':
471: if (--depth == 0)
472: {
473: fprintf(text_file, " YYSTYPE;\n");
474: FREE(u_line);
475: return;
476: }
477: goto loop;
478:
479: case '\'':
480: case '"':
481: {
482: int s_lineno = lineno;
483: char *s_line = dup_line();
484: char *s_cptr = s_line + (cptr - line - 1);
485:
486: quote = c;
487: for (;;)
488: {
489: c = *cptr++;
490: putc(c, text_file);
491: if (dflag) putc(c, union_file);
492: if (c == quote)
493: {
494: FREE(s_line);
495: goto loop;
496: }
497: if (c == '\n')
498: unterminated_string(s_lineno, s_line, s_cptr);
499: if (c == '\\')
500: {
501: c = *cptr++;
502: putc(c, text_file);
503: if (dflag) putc(c, union_file);
504: if (c == '\n')
505: {
506: get_line();
507: if (line == 0)
508: unterminated_string(s_lineno, s_line, s_cptr);
509: }
510: }
511: }
512: }
513:
514: case '/':
515: c = *cptr;
516: if (c == '/')
517: {
518: putc('*', text_file);
519: if (dflag) putc('*', union_file);
520: while ((c = *++cptr) != '\n')
521: {
522: if (c == '*' && cptr[1] == '/')
523: {
524: fprintf(text_file, "* ");
525: if (dflag) fprintf(union_file, "* ");
526: }
527: else
528: {
529: putc(c, text_file);
530: if (dflag) putc(c, union_file);
531: }
532: }
533: fprintf(text_file, "*/\n");
534: if (dflag) fprintf(union_file, "*/\n");
535: goto next_line;
536: }
537: if (c == '*')
538: {
539: int c_lineno = lineno;
540: char *c_line = dup_line();
541: char *c_cptr = c_line + (cptr - line - 1);
542:
543: putc('*', text_file);
544: if (dflag) putc('*', union_file);
545: ++cptr;
546: for (;;)
547: {
548: c = *cptr++;
549: putc(c, text_file);
550: if (dflag) putc(c, union_file);
551: if (c == '*' && *cptr == '/')
552: {
553: putc('/', text_file);
554: if (dflag) putc('/', union_file);
555: ++cptr;
556: FREE(c_line);
557: goto loop;
558: }
559: if (c == '\n')
560: {
561: get_line();
562: if (line == 0)
563: unterminated_comment(c_lineno, c_line, c_cptr);
564: }
565: }
566: }
567: goto loop;
568:
569: default:
570: goto loop;
571: }
572: }
573:
574:
575: int
576: hexval(c)
577: int c;
578: {
579: if (c >= '0' && c <= '9')
580: return (c - '0');
581: if (c >= 'A' && c <= 'F')
582: return (c - 'A' + 10);
583: if (c >= 'a' && c <= 'f')
584: return (c - 'a' + 10);
585: return (-1);
586: }
587:
588:
589: bucket *
590: get_literal()
591: {
592: register int c, quote;
593: register int i;
594: register int n;
595: register char *s;
596: register bucket *bp;
597: int s_lineno = lineno;
598: char *s_line = dup_line();
599: char *s_cptr = s_line + (cptr - line);
600:
601: quote = *cptr++;
602: cinc = 0;
603: for (;;)
604: {
605: c = *cptr++;
606: if (c == quote) break;
607: if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
608: if (c == '\\')
609: {
610: char *c_cptr = cptr - 1;
611:
612: c = *cptr++;
613: switch (c)
614: {
615: case '\n':
616: get_line();
617: if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
618: continue;
619:
620: case '0': case '1': case '2': case '3':
621: case '4': case '5': case '6': case '7':
622: n = c - '0';
623: c = *cptr;
624: if (IS_OCTAL(c))
625: {
626: n = (n << 3) + (c - '0');
627: c = *++cptr;
628: if (IS_OCTAL(c))
629: {
630: n = (n << 3) + (c - '0');
631: ++cptr;
632: }
633: }
634: if (n > MAXCHAR) illegal_character(c_cptr);
635: c = n;
636: break;
637:
638: case 'x':
639: c = *cptr++;
640: n = hexval(c);
641: if (n < 0 || n >= 16)
642: illegal_character(c_cptr);
643: for (;;)
644: {
645: c = *cptr;
646: i = hexval(c);
647: if (i < 0 || i >= 16) break;
648: ++cptr;
649: n = (n << 4) + i;
650: if (n > MAXCHAR) illegal_character(c_cptr);
651: }
652: c = n;
653: break;
654:
655: case 'a': c = 7; break;
656: case 'b': c = '\b'; break;
657: case 'f': c = '\f'; break;
658: case 'n': c = '\n'; break;
659: case 'r': c = '\r'; break;
660: case 't': c = '\t'; break;
661: case 'v': c = '\v'; break;
662: }
663: }
664: cachec(c);
665: }
666: FREE(s_line);
667:
668: n = cinc;
669: s = MALLOC(n);
670: if (s == 0) no_space();
671:
672: for (i = 0; i < n; ++i)
673: s[i] = cache[i];
674:
675: cinc = 0;
676: if (n == 1)
677: cachec('\'');
678: else
679: cachec('"');
680:
681: for (i = 0; i < n; ++i)
682: {
683: c = ((unsigned char *)s)[i];
684: if (c == '\\' || c == cache[0])
685: {
686: cachec('\\');
687: cachec(c);
688: }
689: else if (isprint(c))
690: cachec(c);
691: else
692: {
693: cachec('\\');
694: switch (c)
695: {
696: case 7: cachec('a'); break;
697: case '\b': cachec('b'); break;
698: case '\f': cachec('f'); break;
699: case '\n': cachec('n'); break;
700: case '\r': cachec('r'); break;
701: case '\t': cachec('t'); break;
702: case '\v': cachec('v'); break;
703: default:
704: cachec(((c >> 6) & 7) + '0');
705: cachec(((c >> 3) & 7) + '0');
706: cachec((c & 7) + '0');
707: break;
708: }
709: }
710: }
711:
712: if (n == 1)
713: cachec('\'');
714: else
715: cachec('"');
716:
717: cachec(NUL);
718: bp = lookup(cache);
719: bp->class = TERM;
720: if (n == 1 && bp->value == UNDEFINED)
721: bp->value = *(unsigned char *)s;
722: FREE(s);
723:
724: return (bp);
725: }
726:
727:
728: int
729: is_reserved(name)
730: char *name;
731: {
732: char *s;
733:
734: if (strcmp(name, ".") == 0 ||
735: strcmp(name, "$accept") == 0 ||
736: strcmp(name, "$end") == 0)
737: return (1);
738:
739: if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
740: {
741: s = name + 3;
742: while (isdigit(*s)) ++s;
743: if (*s == NUL) return (1);
744: }
745:
746: return (0);
747: }
748:
749:
750: bucket *
751: get_name()
752: {
753: register int c;
754:
755: cinc = 0;
756: for (c = *cptr; IS_IDENT(c); c = *++cptr)
757: cachec(c);
758: cachec(NUL);
759:
760: if (is_reserved(cache)) used_reserved(cache);
761:
762: return (lookup(cache));
763: }
764:
765:
766: int
767: get_number()
768: {
769: register int c;
770: register int n;
771:
772: n = 0;
773: for (c = *cptr; isdigit(c); c = *++cptr)
774: n = 10*n + (c - '0');
775:
776: return (n);
777: }
778:
779:
780: char *
781: get_tag()
782: {
783: register int c;
784: register int i;
785: register char *s;
786: int t_lineno = lineno;
787: char *t_line = dup_line();
788: char *t_cptr = t_line + (cptr - line);
789:
790: ++cptr;
791: c = nextc();
792: if (c == EOF) unexpected_EOF();
793: if (!isalpha(c) && c != '_' && c != '$')
794: illegal_tag(t_lineno, t_line, t_cptr);
795:
796: cinc = 0;
797: do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
798: cachec(NUL);
799:
800: c = nextc();
801: if (c == EOF) unexpected_EOF();
802: if (c != '>')
803: illegal_tag(t_lineno, t_line, t_cptr);
804: ++cptr;
805:
806: for (i = 0; i < ntags; ++i)
807: {
808: if (strcmp(cache, tag_table[i]) == 0)
809: return (tag_table[i]);
810: }
811:
812: if (ntags >= tagmax)
813: {
814: tagmax += 16;
815: tag_table = (char **)
816: (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
817: : MALLOC(tagmax*sizeof(char *)));
818: if (tag_table == 0) no_space();
819: }
820:
821: s = MALLOC(cinc);
822: if (s == 0) no_space();
823: strcpy(s, cache);
824: tag_table[ntags] = s;
825: ++ntags;
826: FREE(t_line);
827: return (s);
828: }
829:
830:
831: declare_tokens(assoc)
832: int assoc;
833: {
834: register int c;
835: register bucket *bp;
836: int value;
837: char *tag = 0;
838:
839: if (assoc != TOKEN) ++prec;
840:
841: c = nextc();
842: if (c == EOF) unexpected_EOF();
843: if (c == '<')
844: {
845: tag = get_tag();
846: c = nextc();
847: if (c == EOF) unexpected_EOF();
848: }
849:
850: for (;;)
851: {
852: if (isalpha(c) || c == '_' || c == '.' || c == '$')
853: bp = get_name();
854: else if (c == '\'' || c == '"')
855: bp = get_literal();
856: else
857: return;
858:
859: if (bp == goal) tokenized_start(bp->name);
860: bp->class = TERM;
861:
862: if (tag)
863: {
864: if (bp->tag && tag != bp->tag)
865: retyped_warning(bp->name);
866: bp->tag = tag;
867: }
868:
869: if (assoc != TOKEN)
870: {
871: if (bp->prec && prec != bp->prec)
872: reprec_warning(bp->name);
873: bp->assoc = assoc;
874: bp->prec = prec;
875: }
876:
877: c = nextc();
878: if (c == EOF) unexpected_EOF();
879: value = UNDEFINED;
880: if (isdigit(c))
881: {
882: value = get_number();
883: if (bp->value != UNDEFINED && value != bp->value)
884: revalued_warning(bp->name);
885: bp->value = value;
886: c = nextc();
887: if (c == EOF) unexpected_EOF();
888: }
889: }
890: }
891:
892:
893: declare_types()
894: {
895: register int c;
896: register bucket *bp;
897: char *tag;
898:
899: c = nextc();
900: if (c == EOF) unexpected_EOF();
901: if (c != '<') syntax_error(lineno, line, cptr);
902: tag = get_tag();
903:
904: for (;;)
905: {
906: c = nextc();
907: if (isalpha(c) || c == '_' || c == '.' || c == '$')
908: bp = get_name();
909: else if (c == '\'' || c == '"')
910: bp = get_literal();
911: else
912: return;
913:
914: if (bp->tag && tag != bp->tag)
915: retyped_warning(bp->name);
916: bp->tag = tag;
917: }
918: }
919:
920:
921: declare_start()
922: {
923: register int c;
924: register bucket *bp;
925:
926: c = nextc();
927: if (c == EOF) unexpected_EOF();
928: if (!isalpha(c) && c != '_' && c != '.' && c != '$')
929: syntax_error(lineno, line, cptr);
930: bp = get_name();
931: if (bp->class == TERM)
932: terminal_start(bp->name);
933: if (goal && goal != bp)
934: restarted_warning();
935: goal = bp;
936: }
937:
938:
939: read_declarations()
940: {
941: register int c, k;
942:
943: cache_size = 256;
944: cache = MALLOC(cache_size);
945: if (cache == 0) no_space();
946:
947: for (;;)
948: {
949: c = nextc();
950: if (c == EOF) unexpected_EOF();
951: if (c != '%') syntax_error(lineno, line, cptr);
952: switch (k = keyword())
953: {
954: case MARK:
955: return;
956:
957: case IDENT:
958: copy_ident();
959: break;
960:
961: case TEXT:
962: copy_text();
963: break;
964:
965: case UNION:
966: copy_union();
967: break;
968:
969: case TOKEN:
970: case LEFT:
971: case RIGHT:
972: case NONASSOC:
973: declare_tokens(k);
974: break;
975:
976: case TYPE:
977: declare_types();
978: break;
979:
980: case START:
981: declare_start();
982: break;
983: }
984: }
985: }
986:
987:
988: initialize_grammar()
989: {
990: nitems = 4;
991: maxitems = 300;
992: pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
993: if (pitem == 0) no_space();
994: pitem[0] = 0;
995: pitem[1] = 0;
996: pitem[2] = 0;
997: pitem[3] = 0;
998:
999: nrules = 3;
1000: maxrules = 100;
1001: plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
1002: if (plhs == 0) no_space();
1003: plhs[0] = 0;
1004: plhs[1] = 0;
1005: plhs[2] = 0;
1006: rprec = (short *) MALLOC(maxrules*sizeof(short));
1007: if (rprec == 0) no_space();
1008: rprec[0] = 0;
1009: rprec[1] = 0;
1010: rprec[2] = 0;
1011: rassoc = (char *) MALLOC(maxrules*sizeof(char));
1012: if (rassoc == 0) no_space();
1013: rassoc[0] = TOKEN;
1014: rassoc[1] = TOKEN;
1015: rassoc[2] = TOKEN;
1016: }
1017:
1018:
1019: expand_items()
1020: {
1021: maxitems += 300;
1022: pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
1023: if (pitem == 0) no_space();
1024: }
1025:
1026:
1027: expand_rules()
1028: {
1029: maxrules += 100;
1030: plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
1031: if (plhs == 0) no_space();
1032: rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
1033: if (rprec == 0) no_space();
1034: rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
1035: if (rassoc == 0) no_space();
1036: }
1037:
1038:
1039: advance_to_start()
1040: {
1041: register int c;
1042: register bucket *bp;
1043: char *s_cptr;
1044: int s_lineno;
1045:
1046: for (;;)
1047: {
1048: c = nextc();
1049: if (c != '%') break;
1050: s_cptr = cptr;
1051: switch (keyword())
1052: {
1053: case MARK:
1054: no_grammar();
1055:
1056: case TEXT:
1057: copy_text();
1058: break;
1059:
1060: case START:
1061: declare_start();
1062: break;
1063:
1064: default:
1065: syntax_error(lineno, line, s_cptr);
1066: }
1067: }
1068:
1069: c = nextc();
1070: if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1071: syntax_error(lineno, line, cptr);
1072: bp = get_name();
1073: if (goal == 0)
1074: {
1075: if (bp->class == TERM)
1076: terminal_start(bp->name);
1077: goal = bp;
1078: }
1079:
1080: s_lineno = lineno;
1081: c = nextc();
1082: if (c == EOF) unexpected_EOF();
1083: if (c != ':') syntax_error(lineno, line, cptr);
1084: start_rule(bp, s_lineno);
1085: ++cptr;
1086: }
1087:
1088:
1089: start_rule(bp, s_lineno)
1090: register bucket *bp;
1091: int s_lineno;
1092: {
1093: if (bp->class == TERM)
1094: terminal_lhs(s_lineno);
1095: bp->class = NONTERM;
1096: if (nrules >= maxrules)
1097: expand_rules();
1098: plhs[nrules] = bp;
1099: rprec[nrules] = UNDEFINED;
1100: rassoc[nrules] = TOKEN;
1101: }
1102:
1103:
1104: end_rule()
1105: {
1106: register int i;
1107:
1108: if (!last_was_action && plhs[nrules]->tag)
1109: {
1110: for (i = nitems - 1; pitem[i]; --i) continue;
1111: if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
1112: default_action_warning();
1113: }
1114:
1115: last_was_action = 0;
1116: if (nitems >= maxitems) expand_items();
1117: pitem[nitems] = 0;
1118: ++nitems;
1119: ++nrules;
1120: }
1121:
1122:
1123: insert_empty_rule()
1124: {
1125: register bucket *bp, **bpp;
1126:
1127: assert(cache);
1128: sprintf(cache, "$$%d", ++gensym);
1129: bp = make_bucket(cache);
1130: last_symbol->next = bp;
1131: last_symbol = bp;
1132: bp->tag = plhs[nrules]->tag;
1133: bp->class = NONTERM;
1134:
1135: if ((nitems += 2) > maxitems)
1136: expand_items();
1137: bpp = pitem + nitems - 1;
1138: *bpp-- = bp;
1139: while (bpp[0] = bpp[-1]) --bpp;
1140:
1141: if (++nrules >= maxrules)
1142: expand_rules();
1143: plhs[nrules] = plhs[nrules-1];
1144: plhs[nrules-1] = bp;
1145: rprec[nrules] = rprec[nrules-1];
1146: rprec[nrules-1] = 0;
1147: rassoc[nrules] = rassoc[nrules-1];
1148: rassoc[nrules-1] = TOKEN;
1149: }
1150:
1151:
1152: add_symbol()
1153: {
1154: register int c;
1155: register bucket *bp;
1156: int s_lineno = lineno;
1157:
1158: c = *cptr;
1159: if (c == '\'' || c == '"')
1160: bp = get_literal();
1161: else
1162: bp = get_name();
1163:
1164: c = nextc();
1165: if (c == ':')
1166: {
1167: end_rule();
1168: start_rule(bp, s_lineno);
1169: ++cptr;
1170: return;
1171: }
1172:
1173: if (last_was_action)
1174: insert_empty_rule();
1175: last_was_action = 0;
1176:
1177: if (++nitems > maxitems)
1178: expand_items();
1179: pitem[nitems-1] = bp;
1180: }
1181:
1182:
1183: copy_action()
1184: {
1185: register int c;
1186: register int i, n;
1187: int depth;
1188: int quote;
1189: char *tag;
1190: register FILE *f = action_file;
1191: int a_lineno = lineno;
1192: char *a_line = dup_line();
1193: char *a_cptr = a_line + (cptr - line);
1194:
1195: if (last_was_action)
1196: insert_empty_rule();
1197: last_was_action = 1;
1198:
1199: fprintf(f, "case %d:\n", nrules - 2);
1200: if (!lflag)
1201: fprintf(f, line_format, lineno, input_file_name);
1202: if (*cptr == '=') ++cptr;
1203:
1204: n = 0;
1205: for (i = nitems - 1; pitem[i]; --i) ++n;
1206:
1207: depth = 0;
1208: loop:
1209: c = *cptr;
1210: if (c == '$')
1211: {
1212: if (cptr[1] == '<')
1213: {
1214: int d_lineno = lineno;
1215: char *d_line = dup_line();
1216: char *d_cptr = d_line + (cptr - line);
1217:
1218: ++cptr;
1219: tag = get_tag();
1220: c = *cptr;
1221: if (c == '$')
1222: {
1223: fprintf(f, "yyval.%s", tag);
1224: ++cptr;
1225: FREE(d_line);
1226: goto loop;
1227: }
1228: else if (isdigit(c))
1229: {
1230: i = get_number();
1231: if (i > n) dollar_warning(d_lineno, i);
1232: fprintf(f, "yyvsp[%d].%s", i - n, tag);
1233: FREE(d_line);
1234: goto loop;
1235: }
1236: else if (c == '-' && isdigit(cptr[1]))
1237: {
1238: ++cptr;
1239: i = -get_number() - n;
1240: fprintf(f, "yyvsp[%d].%s", i, tag);
1241: FREE(d_line);
1242: goto loop;
1243: }
1244: else
1245: dollar_error(d_lineno, d_line, d_cptr);
1246: }
1247: else if (cptr[1] == '$')
1248: {
1249: if (ntags)
1250: {
1251: tag = plhs[nrules]->tag;
1252: if (tag == 0) untyped_lhs();
1253: fprintf(f, "yyval.%s", tag);
1254: }
1255: else
1256: fprintf(f, "yyval");
1257: cptr += 2;
1258: goto loop;
1259: }
1260: else if (isdigit(cptr[1]))
1261: {
1262: ++cptr;
1263: i = get_number();
1264: if (ntags)
1265: {
1266: if (i <= 0 || i > n)
1267: unknown_rhs(i);
1268: tag = pitem[nitems + i - n - 1]->tag;
1269: if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1270: fprintf(f, "yyvsp[%d].%s", i - n, tag);
1271: }
1272: else
1273: {
1274: if (i > n)
1275: dollar_warning(lineno, i);
1276: fprintf(f, "yyvsp[%d]", i - n);
1277: }
1278: goto loop;
1279: }
1280: else if (cptr[1] == '-')
1281: {
1282: cptr += 2;
1283: i = get_number();
1284: if (ntags)
1285: unknown_rhs(-i);
1286: fprintf(f, "yyvsp[%d]", -i - n);
1287: goto loop;
1288: }
1289: }
1290: if (isalpha(c) || c == '_' || c == '$')
1291: {
1292: do
1293: {
1294: putc(c, f);
1295: c = *++cptr;
1296: } while (isalnum(c) || c == '_' || c == '$');
1297: goto loop;
1298: }
1299: putc(c, f);
1300: ++cptr;
1301: switch (c)
1302: {
1303: case '\n':
1304: next_line:
1305: get_line();
1306: if (line) goto loop;
1307: unterminated_action(a_lineno, a_line, a_cptr);
1308:
1309: case ';':
1310: if (depth > 0) goto loop;
1311: fprintf(f, "\nbreak;\n");
1312: return;
1313:
1314: case '{':
1315: ++depth;
1316: goto loop;
1317:
1318: case '}':
1319: if (--depth > 0) goto loop;
1320: fprintf(f, "\nbreak;\n");
1321: return;
1322:
1323: case '\'':
1324: case '"':
1325: {
1326: int s_lineno = lineno;
1327: char *s_line = dup_line();
1328: char *s_cptr = s_line + (cptr - line - 1);
1329:
1330: quote = c;
1331: for (;;)
1332: {
1333: c = *cptr++;
1334: putc(c, f);
1335: if (c == quote)
1336: {
1337: FREE(s_line);
1338: goto loop;
1339: }
1340: if (c == '\n')
1341: unterminated_string(s_lineno, s_line, s_cptr);
1342: if (c == '\\')
1343: {
1344: c = *cptr++;
1345: putc(c, f);
1346: if (c == '\n')
1347: {
1348: get_line();
1349: if (line == 0)
1350: unterminated_string(s_lineno, s_line, s_cptr);
1351: }
1352: }
1353: }
1354: }
1355:
1356: case '/':
1357: c = *cptr;
1358: if (c == '/')
1359: {
1360: putc('*', f);
1361: while ((c = *++cptr) != '\n')
1362: {
1363: if (c == '*' && cptr[1] == '/')
1364: fprintf(f, "* ");
1365: else
1366: putc(c, f);
1367: }
1368: fprintf(f, "*/\n");
1369: goto next_line;
1370: }
1371: if (c == '*')
1372: {
1373: int c_lineno = lineno;
1374: char *c_line = dup_line();
1375: char *c_cptr = c_line + (cptr - line - 1);
1376:
1377: putc('*', f);
1378: ++cptr;
1379: for (;;)
1380: {
1381: c = *cptr++;
1382: putc(c, f);
1383: if (c == '*' && *cptr == '/')
1384: {
1385: putc('/', f);
1386: ++cptr;
1387: FREE(c_line);
1388: goto loop;
1389: }
1390: if (c == '\n')
1391: {
1392: get_line();
1393: if (line == 0)
1394: unterminated_comment(c_lineno, c_line, c_cptr);
1395: }
1396: }
1397: }
1398: goto loop;
1399:
1400: default:
1401: goto loop;
1402: }
1403: }
1404:
1405:
1406: int
1407: mark_symbol()
1408: {
1409: register int c;
1410: register bucket *bp;
1411:
1412: c = cptr[1];
1413: if (c == '%' || c == '\\')
1414: {
1415: cptr += 2;
1416: return (1);
1417: }
1418:
1419: if (c == '=')
1420: cptr += 2;
1421: else if ((c == 'p' || c == 'P') &&
1422: ((c = cptr[2]) == 'r' || c == 'R') &&
1423: ((c = cptr[3]) == 'e' || c == 'E') &&
1424: ((c = cptr[4]) == 'c' || c == 'C') &&
1425: ((c = cptr[5], !IS_IDENT(c))))
1426: cptr += 5;
1427: else
1428: syntax_error(lineno, line, cptr);
1429:
1430: c = nextc();
1431: if (isalpha(c) || c == '_' || c == '.' || c == '$')
1432: bp = get_name();
1433: else if (c == '\'' || c == '"')
1434: bp = get_literal();
1435: else
1436: {
1437: syntax_error(lineno, line, cptr);
1438: /*NOTREACHED*/
1439: }
1440:
1441: if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1442: prec_redeclared();
1443:
1444: rprec[nrules] = bp->prec;
1445: rassoc[nrules] = bp->assoc;
1446: return (0);
1447: }
1448:
1449:
1450: read_grammar()
1451: {
1452: register int c;
1453:
1454: initialize_grammar();
1455: advance_to_start();
1456:
1457: for (;;)
1458: {
1459: c = nextc();
1460: if (c == EOF) break;
1461: if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1462: c == '"')
1463: add_symbol();
1464: else if (c == '{' || c == '=')
1465: copy_action();
1466: else if (c == '|')
1467: {
1468: end_rule();
1469: start_rule(plhs[nrules-1], 0);
1470: ++cptr;
1471: }
1472: else if (c == '%')
1473: {
1474: if (mark_symbol()) break;
1475: }
1476: else
1477: syntax_error(lineno, line, cptr);
1478: }
1479: end_rule();
1480: }
1481:
1482:
1483: free_tags()
1484: {
1485: register int i;
1486:
1487: if (tag_table == 0) return;
1488:
1489: for (i = 0; i < ntags; ++i)
1490: {
1491: assert(tag_table[i]);
1492: FREE(tag_table[i]);
1493: }
1494: FREE(tag_table);
1495: }
1496:
1497:
1498: pack_names()
1499: {
1500: register bucket *bp;
1501: register char *p, *s, *t;
1502:
1503: name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1504: for (bp = first_symbol; bp; bp = bp->next)
1505: name_pool_size += strlen(bp->name) + 1;
1506: name_pool = MALLOC(name_pool_size);
1507: if (name_pool == 0) no_space();
1508:
1509: strcpy(name_pool, "$accept");
1510: strcpy(name_pool+8, "$end");
1511: t = name_pool + 13;
1512: for (bp = first_symbol; bp; bp = bp->next)
1513: {
1514: p = t;
1515: s = bp->name;
1516: while (*t++ = *s++) continue;
1517: FREE(bp->name);
1518: bp->name = p;
1519: }
1520: }
1521:
1522:
1523: check_symbols()
1524: {
1525: register bucket *bp;
1526:
1527: if (goal->class == UNKNOWN)
1528: undefined_goal(goal->name);
1529:
1530: for (bp = first_symbol; bp; bp = bp->next)
1531: {
1532: if (bp->class == UNKNOWN)
1533: {
1534: undefined_symbol_warning(bp->name);
1535: bp->class = TERM;
1536: }
1537: }
1538: }
1539:
1540:
1541: pack_symbols()
1542: {
1543: register bucket *bp;
1544: register bucket **v;
1545: register int i, j, k, n;
1546:
1547: nsyms = 2;
1548: ntokens = 1;
1549: for (bp = first_symbol; bp; bp = bp->next)
1550: {
1551: ++nsyms;
1552: if (bp->class == TERM) ++ntokens;
1553: }
1554: start_symbol = ntokens;
1555: nvars = nsyms - ntokens;
1556:
1557: symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1558: if (symbol_name == 0) no_space();
1559: symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1560: if (symbol_value == 0) no_space();
1561: symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1562: if (symbol_prec == 0) no_space();
1563: symbol_assoc = MALLOC(nsyms);
1564: if (symbol_assoc == 0) no_space();
1565:
1566: v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1567: if (v == 0) no_space();
1568:
1569: v[0] = 0;
1570: v[start_symbol] = 0;
1571:
1572: i = 1;
1573: j = start_symbol + 1;
1574: for (bp = first_symbol; bp; bp = bp->next)
1575: {
1576: if (bp->class == TERM)
1577: v[i++] = bp;
1578: else
1579: v[j++] = bp;
1580: }
1581: assert(i == ntokens && j == nsyms);
1582:
1583: for (i = 1; i < ntokens; ++i)
1584: v[i]->index = i;
1585:
1586: goal->index = start_symbol + 1;
1587: k = start_symbol + 2;
1588: while (++i < nsyms)
1589: if (v[i] != goal)
1590: {
1591: v[i]->index = k;
1592: ++k;
1593: }
1594:
1595: goal->value = 0;
1596: k = 1;
1597: for (i = start_symbol + 1; i < nsyms; ++i)
1598: {
1599: if (v[i] != goal)
1600: {
1601: v[i]->value = k;
1602: ++k;
1603: }
1604: }
1605:
1606: k = 0;
1607: for (i = 1; i < ntokens; ++i)
1608: {
1609: n = v[i]->value;
1610: if (n > 256)
1611: {
1612: for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1613: symbol_value[j] = symbol_value[j-1];
1614: symbol_value[j] = n;
1615: }
1616: }
1617:
1618: if (v[1]->value == UNDEFINED)
1619: v[1]->value = 256;
1620:
1621: j = 0;
1622: n = 257;
1623: for (i = 2; i < ntokens; ++i)
1624: {
1625: if (v[i]->value == UNDEFINED)
1626: {
1627: while (j < k && n == symbol_value[j])
1628: {
1629: while (++j < k && n == symbol_value[j]) continue;
1630: ++n;
1631: }
1632: v[i]->value = n;
1633: ++n;
1634: }
1635: }
1636:
1637: symbol_name[0] = name_pool + 8;
1638: symbol_value[0] = 0;
1639: symbol_prec[0] = 0;
1640: symbol_assoc[0] = TOKEN;
1641: for (i = 1; i < ntokens; ++i)
1642: {
1643: symbol_name[i] = v[i]->name;
1644: symbol_value[i] = v[i]->value;
1645: symbol_prec[i] = v[i]->prec;
1646: symbol_assoc[i] = v[i]->assoc;
1647: }
1648: symbol_name[start_symbol] = name_pool;
1649: symbol_value[start_symbol] = -1;
1650: symbol_prec[start_symbol] = 0;
1651: symbol_assoc[start_symbol] = TOKEN;
1652: for (++i; i < nsyms; ++i)
1653: {
1654: k = v[i]->index;
1655: symbol_name[k] = v[i]->name;
1656: symbol_value[k] = v[i]->value;
1657: symbol_prec[k] = v[i]->prec;
1658: symbol_assoc[k] = v[i]->assoc;
1659: }
1660:
1661: FREE(v);
1662: }
1663:
1664:
1665: pack_grammar()
1666: {
1667: register int i, j;
1668: int assoc, prec;
1669:
1670: ritem = (short *) MALLOC(nitems*sizeof(short));
1671: if (ritem == 0) no_space();
1672: rlhs = (short *) MALLOC(nrules*sizeof(short));
1673: if (rlhs == 0) no_space();
1674: rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1675: if (rrhs == 0) no_space();
1676: rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1677: if (rprec == 0) no_space();
1678: rassoc = REALLOC(rassoc, nrules);
1679: if (rassoc == 0) no_space();
1680:
1681: ritem[0] = -1;
1682: ritem[1] = goal->index;
1683: ritem[2] = 0;
1684: ritem[3] = -2;
1685: rlhs[0] = 0;
1686: rlhs[1] = 0;
1687: rlhs[2] = start_symbol;
1688: rrhs[0] = 0;
1689: rrhs[1] = 0;
1690: rrhs[2] = 1;
1691:
1692: j = 4;
1693: for (i = 3; i < nrules; ++i)
1694: {
1695: rlhs[i] = plhs[i]->index;
1696: rrhs[i] = j;
1697: assoc = TOKEN;
1698: prec = 0;
1699: while (pitem[j])
1700: {
1701: ritem[j] = pitem[j]->index;
1702: if (pitem[j]->class == TERM)
1703: {
1704: prec = pitem[j]->prec;
1705: assoc = pitem[j]->assoc;
1706: }
1707: ++j;
1708: }
1709: ritem[j] = -i;
1710: ++j;
1711: if (rprec[i] == UNDEFINED)
1712: {
1713: rprec[i] = prec;
1714: rassoc[i] = assoc;
1715: }
1716: }
1717: rrhs[i] = j;
1718:
1719: FREE(plhs);
1720: FREE(pitem);
1721: }
1722:
1723:
1724: print_grammar()
1725: {
1726: register int i, j, k;
1727: int spacing;
1728: register FILE *f = verbose_file;
1729:
1730: if (!vflag) return;
1731:
1732: k = 1;
1733: for (i = 2; i < nrules; ++i)
1734: {
1735: if (rlhs[i] != rlhs[i-1])
1736: {
1737: if (i != 2) fprintf(f, "\n");
1738: fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1739: spacing = strlen(symbol_name[rlhs[i]]) + 1;
1740: }
1741: else
1742: {
1743: fprintf(f, "%4d ", i - 2);
1744: j = spacing;
1745: while (--j >= 0) putc(' ', f);
1746: putc('|', f);
1747: }
1748:
1749: while (ritem[k] >= 0)
1750: {
1751: fprintf(f, " %s", symbol_name[ritem[k]]);
1752: ++k;
1753: }
1754: ++k;
1755: putc('\n', f);
1756: }
1757: }
1758:
1759:
1760: reader()
1761: {
1762: write_section(banner);
1763: create_symbol_table();
1764: read_declarations();
1765: read_grammar();
1766: free_symbol_table();
1767: free_tags();
1768: pack_names();
1769: check_symbols();
1770: pack_symbols();
1771: pack_grammar();
1772: free_symbols();
1773: print_grammar();
1774: }