Annotation of src/usr.bin/bc/bc.y, Revision 1.10
1.1 otto 1: %{
1.10 ! otto 2: /* $OpenBSD: bc.y,v 1.9 2003/09/29 03:24:27 otto Exp $ */
1.1 otto 3:
4: /*
5: * Copyright (c) 2003, Otto Moerbeek <otto@drijf.net>
6: *
7: * Permission to use, copy, modify, and distribute this software for any
8: * purpose with or without fee is hereby granted, provided that the above
9: * copyright notice and this permission notice appear in all copies.
10: *
11: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18: */
19:
20: /*
21: * This implementation of bc(1) uses concepts from the original 4.4
22: * BSD bc(1). The code itself is a complete rewrite, based on the
23: * Posix defined bc(1) grammar. Other differences include type safe
24: * usage of pointers to build the tree of emitted code, typed yacc
25: * rule values, dynamic allocation of all data structures and a
26: * completely rewritten lexical analyzer using lex(1).
27: *
28: * Some effort has been made to make sure that the generated code is
29: * the same as the code generated by the older version, to provide
30: * easy regression testing.
31: */
32:
33: #ifndef lint
1.10 ! otto 34: static const char rcsid[] = "$OpenBSD: bc.y,v 1.9 2003/09/29 03:24:27 otto Exp $";
1.1 otto 35: #endif /* not lint */
36:
37: #include <ctype.h>
38: #include <err.h>
39: #include <limits.h>
40: #include <signal.h>
41: #include <stdarg.h>
42: #include <stdbool.h>
43: #include <string.h>
44: #include <unistd.h>
45:
46: #include "extern.h"
47: #include "pathnames.h"
48:
49: #define END_NODE ((ssize_t) -1)
50: #define CONST_STRING ((ssize_t) -2)
51: #define ALLOC_STRING ((ssize_t) -3)
52:
53: struct tree {
54: ssize_t index;
55: union {
56: char *astr;
57: const char *cstr;
58: } u;
59: };
60:
61: int yyparse(void);
62: int yywrap(void);
63:
64: static void grow(void);
65: static ssize_t cs(const char *);
66: static ssize_t as(const char *);
67: static ssize_t node(ssize_t, ...);
68: static void emit(ssize_t);
69: static void emit_macro(int, ssize_t);
70: static void free_tree(void);
71: static ssize_t numnode(int);
72: static void add_par(ssize_t);
73: static void add_local(ssize_t);
74: static void warning(const char *);
75: static void init(void);
76: static __dead void usage(void);
77:
78: static size_t instr_sz = 0;
79: static struct tree *instructions = NULL;
1.7 otto 80: static ssize_t current = 0;
1.1 otto 81: static int macro_char = '0';
82: static int reset_macro_char = '0';
83: static int nesting = 0;
84: static int breakstack[16];
85: static int breaksp = 0;
86: static ssize_t prologue;
87: static ssize_t epilogue;
88: static char str_table[UCHAR_MAX][2];
89: static int sargc;
90: static char **sargv;
91: static char *filename;
92: static bool do_fork = true;
93:
94: extern char *__progname;
95:
96: #define BREAKSTACK_SZ (sizeof(breakstack)/sizeof(breakstack[0]))
97:
98: /* These values are 4.4BSD dc compatible */
99: #define FUNC_CHAR 0x01
100: #define ARRAY_CHAR 0xa1
101:
102: #define LETTER_NODE(str) (cs(str_table[(int)str[0]]))
103: #define ARRAY_NODE(str) (cs(str_table[(int)str[0] - 'a' + ARRAY_CHAR]))
104: #define FUNCTION_NODE(str) (cs(str_table[(int)str[0] - 'a' + FUNC_CHAR]))
105:
106: %}
107:
108: %start program
109:
110: %union {
111: ssize_t node;
112: struct lvalue lvalue;
113: const char *str;
114: }
115:
1.9 otto 116: %token COMMA SEMICOLON LPAR RPAR LBRACE RBRACE LBRACKET RBRACKET DOT
117: %token NEWLINE
1.1 otto 118: %token <str> LETTER NUMBER STRING
119: %token DEFINE BREAK QUIT LENGTH
120: %token RETURN FOR IF WHILE SQRT
121: %token SCALE IBASE OBASE AUTO
122:
123: %nonassoc EQUALS LESS_EQ GREATER_EQ UNEQUALS LESS GREATER
124: %right <str> ASSIGN_OP
125: %left PLUS MINUS
126: %left MULTIPLY DIVIDE REMAINDER
127: %left EXPONENT
128: %nonassoc UMINUS
129: %nonassoc INCR DECR
130:
131: %type <lvalue> named_expression
132: %type <node> argument_list
133: %type <node> alloc_macro
134: %type <node> expression
135: %type <node> function
136: %type <node> function_header
137: %type <node> input_item
138: %type <node> opt_argument_list
139: %type <node> opt_statement
140: %type <node> relational_expression
141: %type <node> return_expression
142: %type <node> semicolon_list
143: %type <node> statement
144: %type <node> statement_list
145:
146: %%
147:
148: program : /* empty */
149: {
150: putchar('q');
151: fflush(stdout);
152: exit(0);
153: }
154: | input_item program
155: ;
156:
157: input_item : semicolon_list NEWLINE
158: {
159: emit($1);
160: macro_char = reset_macro_char;
161: putchar('\n');
162: free_tree();
163: }
164: | function
165: {
166: putchar('\n');
167: free_tree();
168: }
1.8 otto 169: | error NEWLINE
1.1 otto 170: {
1.8 otto 171: yyerrok;
1.1 otto 172: }
173: ;
174:
175: semicolon_list : /* empty */
176: {
177: $$ = cs("");
178: }
179: | statement
180: | semicolon_list SEMICOLON statement
181: {
182: $$ = node($1, $3, END_NODE);
183: }
184: | semicolon_list SEMICOLON
185: ;
186:
187: statement_list : /* empty */
188: {
189: $$ = cs("");
190: }
191: | statement
192: | statement_list NEWLINE
193: | statement_list NEWLINE statement
194: {
195: $$ = node($1, $3, END_NODE);
196: }
197: | statement_list SEMICOLON
198: | statement_list SEMICOLON statement
199: {
200: $$ = node($1, $3, END_NODE);
201: }
202: ;
203:
204:
205: opt_statement : /* empty */
206: {
207: $$ = cs("");
208: }
209: | statement
210: ;
211:
212: statement : expression
213: {
214: $$ = node($1, cs("ps."), END_NODE);
215: }
216: | named_expression ASSIGN_OP expression
217: {
218: $$ = node($3, cs($2), $1.store, END_NODE);
219: }
220: | STRING
221: {
222: $$ = node(cs("["), as($1),
223: cs("]P"), END_NODE);
224: }
225: | BREAK
226: {
227: if (breaksp == 0) {
228: warning("break not in for or while");
229: YYERROR;
230: } else {
231: $$ = node(
232: numnode(nesting -
233: breakstack[breaksp-1]),
234: cs("Q"), END_NODE);
235: }
236: }
237: | QUIT
238: {
239: putchar('q');
240: fflush(stdout);
241: exit(0);
242: }
243: | RETURN
244: {
245: if (nesting == 0) {
1.8 otto 246: warning("return must be in a function");
1.1 otto 247: YYERROR;
248: }
249: $$ = node(cs("0"), epilogue,
250: numnode(nesting), cs("Q"), END_NODE);
251: }
252: | RETURN LPAR return_expression RPAR
253: {
254: if (nesting == 0) {
1.8 otto 255: warning("return must be in a function");
1.1 otto 256: YYERROR;
257: }
258: $$ = $3;
259: }
260: | FOR LPAR alloc_macro expression SEMICOLON
261: relational_expression SEMICOLON
262: expression RPAR opt_statement pop_nesting
263: {
264: int n = node($10, $8, cs("s."), $6, $3,
265: END_NODE);
266: emit_macro($3, n);
267: $$ = node($4, cs("s."), $6, $3, cs(" "),
268: END_NODE);
269: }
270: | IF LPAR alloc_macro pop_nesting relational_expression RPAR
271: opt_statement
272: {
273: emit_macro($3, $7);
274: $$ = node($5, $3, cs(" "), END_NODE);
275: }
276: | WHILE LPAR alloc_macro relational_expression RPAR
277: opt_statement pop_nesting
278: {
279: int n = node($6, $4, $3, END_NODE);
280: emit_macro($3, n);
281: $$ = node($4, $3, cs(" "), END_NODE);
282: }
283: | LBRACE statement_list RBRACE
284: {
285: $$ = $2;
286: }
287: ;
288:
289: alloc_macro : /* empty */
290: {
291: $$ = cs(str_table[macro_char]);
292: macro_char++;
293: /* Do not use [, \ and ] */
294: if (macro_char == '[')
295: macro_char += 3;
296: /* skip letters */
297: else if (macro_char == 'a')
298: macro_char = '{';
299: else if (macro_char == ARRAY_CHAR)
300: macro_char += 26;
301: else if (macro_char == 256)
302: fatal("program too big");
303: if (breaksp == BREAKSTACK_SZ)
304: fatal("nesting too deep");
305: breakstack[breaksp++] = nesting++;
306: }
307: pop_nesting : /* empty */
308: {
309: breaksp--;
310: }
311:
312: function : function_header opt_parameter_list RPAR
313: LBRACE NEWLINE opt_auto_define_list
314: statement_list RBRACE
315: {
316: int n = node(prologue, $7, epilogue,
317: cs("0"), numnode(nesting),
318: cs("Q"), END_NODE);
319: emit_macro($1, n);
320: reset_macro_char = macro_char;
321: nesting = 0;
322: breaksp = 0;
323: }
324: ;
325:
326: function_header : DEFINE LETTER LPAR
327: {
328: $$ = FUNCTION_NODE($2);
329: prologue = cs("");
330: epilogue = cs("");
331: nesting = 1;
332: breaksp = 0;
333: breakstack[breaksp] = 0;
334: }
335:
336: opt_parameter_list
337: : /* empty */
338: | parameter_list
339: ;
340:
341:
342: parameter_list : LETTER
343: {
344: add_par(LETTER_NODE($1));
345: }
346: | LETTER LBRACKET RBRACKET
347: {
348: add_par(ARRAY_NODE($1));
349: }
350: | parameter_list COMMA LETTER
351: {
352: add_par(LETTER_NODE($3));
353: }
354: | parameter_list COMMA LETTER LBRACKET RBRACKET
355: {
356: add_par(ARRAY_NODE($3));
357: }
358: ;
359:
360:
361:
362: opt_auto_define_list
363: : /* empty */
364: | AUTO define_list NEWLINE
365: | AUTO define_list SEMICOLON
366: ;
367:
368:
369: define_list : LETTER
370: {
371: add_local(LETTER_NODE($1));
372: }
373: | LETTER LBRACKET RBRACKET
374: {
375: add_local(ARRAY_NODE($1));
376: }
377: | define_list COMMA LETTER
378: {
379: add_local(LETTER_NODE($3));
380: }
381: | define_list COMMA LETTER LBRACKET RBRACKET
382: {
383: add_local(ARRAY_NODE($3));
384: }
385: ;
386:
387:
388: opt_argument_list
389: : /* empty */
390: {
391: $$ = cs("");
392: }
393: | argument_list
394: ;
395:
396:
397: argument_list : expression
398: | argument_list COMMA expression
399: {
400: $$ = node($1, $3, END_NODE);
401: }
402: | argument_list COMMA LETTER LBRACKET RBRACKET
403: {
404: $$ = node($1, cs("l"), ARRAY_NODE($3),
1.3 deraadt 405: END_NODE);
1.1 otto 406: }
407: ;
408:
409:
410: relational_expression
411: : expression
412: {
413: $$ = node($1, cs(" 0!="), END_NODE);
414: }
415: | expression EQUALS expression
416: {
417: $$ = node($1, $3, cs("="), END_NODE);
418: }
419: | expression UNEQUALS expression
420: {
421: $$ = node($1, $3, cs("!="), END_NODE);
422: }
423: | expression LESS expression
424: {
425: $$ = node($1, $3, cs(">"), END_NODE);
426: }
427: | expression LESS_EQ expression
428: {
429: $$ = node($1, $3, cs("!<"), END_NODE);
430: }
431: | expression GREATER expression
432: {
433: $$ = node($1, $3, cs("<"), END_NODE);
434: }
435: | expression GREATER_EQ expression
436: {
437: $$ = node($1, $3, cs("!>"), END_NODE);
438: }
439: ;
440:
441:
442: return_expression
443: : /* empty */
444: {
445: $$ = node(cs("0"), epilogue,
446: numnode(nesting), cs("Q"), END_NODE);
447: }
448: | expression
449: {
450: $$ = node($1, epilogue,
451: numnode(nesting), cs("Q"), END_NODE);
452: }
453: ;
454:
455:
456: expression : named_expression
457: {
458: $$ = node($1.load, END_NODE);
1.9 otto 459: }
460: | DOT {
461: $$ = node(cs("l."), END_NODE);
1.1 otto 462: }
463: | NUMBER
464: {
465: $$ = node(cs(" "), as($1), END_NODE);
466: }
467: | LPAR expression RPAR
468: {
469: $$ = $2;
470: }
471: | LETTER LPAR opt_argument_list RPAR
472: {
473: $$ = node($3, cs("l"),
474: FUNCTION_NODE($1), cs("x"),
475: END_NODE);
476: }
477: | MINUS expression %prec UMINUS
478: {
479: $$ = node(cs(" 0"), $2, cs("-"),
480: END_NODE);
481: }
482: | expression PLUS expression
483: {
484: $$ = node($1, $3, cs("+"), END_NODE);
485: }
486: | expression MINUS expression
487: {
488: $$ = node($1, $3, cs("-"), END_NODE);
489: }
490: | expression MULTIPLY expression
491: {
492: $$ = node($1, $3, cs("*"), END_NODE);
493: }
494: | expression DIVIDE expression
495: {
496: $$ = node($1, $3, cs("/"), END_NODE);
497: }
498: | expression REMAINDER expression
499: {
500: $$ = node($1, $3, cs("%"), END_NODE);
501: }
502: | expression EXPONENT expression
503: {
504: $$ = node($1, $3, cs("^"), END_NODE);
505: }
506: | INCR named_expression
507: {
508: $$ = node($2.load, cs("1+d"), $2.store,
509: END_NODE);
510: }
511: | DECR named_expression
512: {
513: $$ = node($2.load, cs("1-d"),
514: $2.store, END_NODE);
515: }
516: | named_expression INCR
517: {
518: $$ = node($1.load, cs("d1+"),
519: $1.store, END_NODE);
520: }
521: | named_expression DECR
522: {
523: $$ = node($1.load, cs("d1-"),
524: $1.store, END_NODE);
525: }
526: | named_expression ASSIGN_OP expression
527: {
528: $$ = node($3, cs($2), cs("d"),
529: $1.store, END_NODE);
530: }
531: | LENGTH LPAR expression RPAR
532: {
533: $$ = node($3, cs("Z"), END_NODE);
534: }
535: | SQRT LPAR expression RPAR
536: {
537: $$ = node($3, cs("v"), END_NODE);
538: }
539: | SCALE LPAR expression RPAR
540: {
541: $$ = node($3, cs("X"), END_NODE);
542: }
543: ;
544:
545: named_expression
546: : LETTER
547: {
548: $$.load = node(cs("l"), LETTER_NODE($1),
549: END_NODE);
550: $$.store = node(cs("s"), LETTER_NODE($1),
551: END_NODE);
552: }
553: | LETTER LBRACKET expression RBRACKET
554: {
555: $$.load = node($3, cs(";"),
556: ARRAY_NODE($1), END_NODE);
557: $$.store = node($3, cs(":"),
558: ARRAY_NODE($1), END_NODE);
559: }
560: | SCALE
561: {
562: $$.load = cs("K");
563: $$.store = cs("k");
564: }
565: | IBASE
566: {
567: $$.load = cs("I");
568: $$.store = cs("i");
569: }
570: | OBASE
571: {
572: $$.load = cs("O");
573: $$.store = cs("o");
574: }
575: ;
576: %%
577:
578:
579: static void
580: grow(void)
581: {
582: struct tree *p;
583: int newsize;
584:
585: if (current == instr_sz) {
586: newsize = instr_sz * 2 + 1;
587: p = realloc(instructions, newsize * sizeof(*p));
588: if (p == NULL) {
589: free(instructions);
590: err(1, "cannot realloc instruction buffer");
591: }
592: instructions = p;
593: instr_sz = newsize;
594: }
595: }
596:
1.7 otto 597: static ssize_t
1.1 otto 598: cs(const char *str)
599: {
600: grow();
601: instructions[current].index = CONST_STRING;
602: instructions[current].u.cstr = str;
603: return current++;
604: }
605:
1.7 otto 606: static ssize_t
1.1 otto 607: as(const char *str)
608: {
609: grow();
610: instructions[current].index = ALLOC_STRING;
611: instructions[current].u.astr = strdup(str);
1.2 otto 612: if (instructions[current].u.astr == NULL)
613: err(1, "cannot allocate string");
1.1 otto 614: return current++;
615: }
616:
617: static ssize_t
618: node(ssize_t arg, ...)
619: {
620: va_list ap;
621: ssize_t ret;
622:
623: va_start(ap, arg);
624:
625: ret = current;
626: grow();
627: instructions[current++].index = arg;
628:
629: do {
630: arg = va_arg(ap, ssize_t);
631: grow();
632: instructions[current++].index = arg;
633: } while (arg != END_NODE);
634:
635: va_end(ap);
636: return ret;
637: }
638:
639: static void
640: emit(ssize_t i)
641: {
642: if (instructions[i].index >= 0)
643: while (instructions[i].index != END_NODE)
644: emit(instructions[i++].index);
645: else
646: fputs(instructions[i].u.cstr, stdout);
647: }
648:
649: static void
650: emit_macro(int node, ssize_t code)
651: {
652: putchar('[');
653: emit(code);
654: printf("]s%s\n", instructions[node].u.cstr);
655: nesting--;
656: }
657:
658: static void
659: free_tree(void)
660: {
661: size_t i;
662:
663: for (i = 0; i < current; i++)
664: if (instructions[i].index == ALLOC_STRING)
665: free(instructions[i].u.astr);
666: current = 0;
667: }
668:
669: static ssize_t
670: numnode(int num)
671: {
672: const char *p;
673:
674: if (num < 10)
675: p = str_table['0' + num];
676: else if (num < 16)
677: p = str_table['A' - 10 + num];
678: else
679: err(1, "internal error: break num > 15");
680: return node(cs(" "), cs(p), END_NODE);
681: }
682:
683: static void
684: add_par(ssize_t n)
685: {
686: prologue = node(cs("S"), n, prologue, END_NODE);
687: epilogue = node(epilogue, cs("L"), n, cs("s."), END_NODE);
688: }
689:
690: static void
691: add_local(ssize_t n)
692: {
693: prologue = node(cs("0S"), n, prologue, END_NODE);
694: epilogue = node(epilogue, cs("L"), n, cs("s."), END_NODE);
695: }
696:
697: int
698: yywrap(void)
699: {
700: if (optind < sargc) {
701: filename = sargv[optind++];
702: yyin = fopen(filename, "r");
1.8 otto 703: lineno = 1;
1.1 otto 704: if (yyin == NULL)
705: err(1, "cannot open %s", filename);
706: return 0;
1.6 deraadt 707: } else if (optind == sargc) {
1.1 otto 708: optind++;
709: yyin = stdin;
1.8 otto 710: lineno = 1;
1.1 otto 711: filename = "stdin";
712: return 0;
713: }
714: return 1;
715: }
716:
717: void
718: yyerror(char *s)
719: {
1.10 ! otto 720: char *str, *p;
! 721:
! 722: if (isspace(yytext[0]) || !isprint(yytext[0]))
! 723: asprintf(&str, "%s: %s:%d: %s: ascii char 0x%x unexpected",
! 724: __progname, filename, lineno, s, yytext[0]);
1.1 otto 725: else
1.10 ! otto 726: asprintf(&str, "%s: %s:%d: %s: %s unexpected",
1.8 otto 727: __progname, filename, lineno, s, yytext);
1.10 ! otto 728: if (str == NULL)
! 729: err(1, "cannot allocate string");
! 730:
! 731: fputs("c[", stdout);
! 732: for (p = str; *p != '\0'; p++) {
! 733: if (*p == '[' || *p == ']' || *p =='\\')
! 734: putchar('\\');
! 735: putchar(*p);
! 736: }
! 737: fputs("]pc\n", stdout);
! 738: free(str);
1.1 otto 739: }
740:
1.8 otto 741: void
1.1 otto 742: fatal(const char *s)
743: {
744: errx(1, "%s:%d: %s", filename, lineno, s);
745: }
746:
747: static void
748: warning(const char *s)
749: {
750: warnx("%s:%d: %s", filename, lineno, s);
751: }
752:
753: static void
754: init(void)
755: {
756: int i;
757:
758: for (i = 0; i < UCHAR_MAX; i++) {
759: str_table[i][0] = i;
760: str_table[i][1] = '\0';
761: }
762: }
763:
764:
765: static __dead void
766: usage(void)
767: {
1.4 deraadt 768: fprintf(stderr, "%s: usage: [-cl] [file ...]\n", __progname);
1.1 otto 769: exit(1);
770: }
771:
772:
773: int
774: main(int argc, char *argv[])
775: {
776: int ch, ret;
777: int p[2];
778:
779: init();
780: setlinebuf(stdout);
781:
782: /* The d debug option is 4.4 BSD dc(1) compatible */
783: while ((ch = getopt(argc, argv, "cdl")) != -1) {
784: switch (ch) {
785: case 'c':
786: case 'd':
787: do_fork = false;
788: break;
789: case 'l':
790: argv[1] = _PATH_LIBB;
791: optind = 1;
792: break;
793: default:
1.5 deraadt 794: usage();
1.1 otto 795: }
796: }
797:
798: sargc = argc;
799: sargv = argv;
800:
801: if (do_fork) {
802: if (pipe(p) == -1)
803: err(1, "cannot create pipe");
804: ret = fork();
805: if (ret == -1)
806: err(1, "cannot fork");
807: else if (ret == 0) {
808: close(STDOUT_FILENO);
809: dup(p[1]);
810: close(p[0]);
811: close(p[1]);
1.6 deraadt 812: } else {
1.1 otto 813: signal(SIGINT, SIG_IGN);
814: close(STDIN_FILENO);
815: dup(p[0]);
816: close(p[0]);
817: close(p[1]);
818: execl(_PATH_DC, "dc", "-", (char *)NULL);
819: err(1, "cannot find dc");
820: }
821: }
822: signal(SIGINT, abort_line);
823: yywrap();
824: return yyparse();
825: }