Annotation of src/usr.bin/bc/bc.y, Revision 1.3
1.1 otto 1: %{
1.3 ! deraadt 2: /* $OpenBSD: bc.y,v 1.2 2003/09/26 07:23:06 otto Exp $ */
1.1 otto 3:
4: /*
5: * Copyright (c) 2003, Otto Moerbeek <otto@drijf.net>
6: *
7: * Permission to use, copy, modify, and distribute this software for any
8: * purpose with or without fee is hereby granted, provided that the above
9: * copyright notice and this permission notice appear in all copies.
10: *
11: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18: */
19:
20: /*
21: * This implementation of bc(1) uses concepts from the original 4.4
22: * BSD bc(1). The code itself is a complete rewrite, based on the
23: * Posix defined bc(1) grammar. Other differences include type safe
24: * usage of pointers to build the tree of emitted code, typed yacc
25: * rule values, dynamic allocation of all data structures and a
26: * completely rewritten lexical analyzer using lex(1).
27: *
28: * Some effort has been made to make sure that the generated code is
29: * the same as the code generated by the older version, to provide
30: * easy regression testing.
31: */
32:
33: #ifndef lint
1.3 ! deraadt 34: static const char rcsid[] = "$OpenBSD: bc.y,v 1.2 2003/09/26 07:23:06 otto Exp $";
1.1 otto 35: #endif /* not lint */
36:
37: #include <ctype.h>
38: #include <err.h>
39: #include <limits.h>
40: #include <signal.h>
41: #include <stdarg.h>
42: #include <stdbool.h>
43: #include <string.h>
44: #include <unistd.h>
45:
46: #include "extern.h"
47: #include "pathnames.h"
48:
49: #define END_NODE ((ssize_t) -1)
50: #define CONST_STRING ((ssize_t) -2)
51: #define ALLOC_STRING ((ssize_t) -3)
52:
53: struct tree {
54: ssize_t index;
55: union {
56: char *astr;
57: const char *cstr;
58: } u;
59: };
60:
61: int yyparse(void);
62: int yywrap(void);
63: void yyerror(char *);
64:
65: static void grow(void);
66: static ssize_t cs(const char *);
67: static ssize_t as(const char *);
68: static ssize_t node(ssize_t, ...);
69: static void emit(ssize_t);
70: static void emit_macro(int, ssize_t);
71: static void free_tree(void);
72: static ssize_t numnode(int);
73: static void add_par(ssize_t);
74: static void add_local(ssize_t);
75: static void fatal(const char *);
76: static void warning(const char *);
77: static void init(void);
78: static __dead void usage(void);
79:
80: static size_t instr_sz = 0;
81: static struct tree *instructions = NULL;
82: static size_t current = 0;
83: static int macro_char = '0';
84: static int reset_macro_char = '0';
85: static int nesting = 0;
86: static int breakstack[16];
87: static int breaksp = 0;
88: static ssize_t prologue;
89: static ssize_t epilogue;
90: static char str_table[UCHAR_MAX][2];
91: static int sargc;
92: static char **sargv;
93: static char *filename;
94: static bool do_fork = true;
95:
96: extern char *__progname;
97:
98: #define BREAKSTACK_SZ (sizeof(breakstack)/sizeof(breakstack[0]))
99:
100: /* These values are 4.4BSD dc compatible */
101: #define FUNC_CHAR 0x01
102: #define ARRAY_CHAR 0xa1
103:
104: #define LETTER_NODE(str) (cs(str_table[(int)str[0]]))
105: #define ARRAY_NODE(str) (cs(str_table[(int)str[0] - 'a' + ARRAY_CHAR]))
106: #define FUNCTION_NODE(str) (cs(str_table[(int)str[0] - 'a' + FUNC_CHAR]))
107:
108: %}
109:
110: %start program
111:
112: %union {
113: ssize_t node;
114: struct lvalue lvalue;
115: const char *str;
116: }
117:
118: %token COMMA SEMICOLON LPAR RPAR LBRACE RBRACE LBRACKET RBRACKET
119: %token ENDOFFILE NEWLINE
120: %token <str> LETTER NUMBER STRING
121: %token DEFINE BREAK QUIT LENGTH
122: %token RETURN FOR IF WHILE SQRT
123: %token SCALE IBASE OBASE AUTO
124:
125: %nonassoc EQUALS LESS_EQ GREATER_EQ UNEQUALS LESS GREATER
126: %right <str> ASSIGN_OP
127: %left PLUS MINUS
128: %left MULTIPLY DIVIDE REMAINDER
129: %left EXPONENT
130: %nonassoc UMINUS
131: %nonassoc INCR DECR
132:
133: %type <lvalue> named_expression
134: %type <node> argument_list
135: %type <node> alloc_macro
136: %type <node> expression
137: %type <node> function
138: %type <node> function_header
139: %type <node> input_item
140: %type <node> opt_argument_list
141: %type <node> opt_statement
142: %type <node> relational_expression
143: %type <node> return_expression
144: %type <node> semicolon_list
145: %type <node> statement
146: %type <node> statement_list
147:
148: %%
149:
150: program : /* empty */
151: {
152: putchar('q');
153: fflush(stdout);
154: exit(0);
155: }
156: | input_item program
157: ;
158:
159: input_item : semicolon_list NEWLINE
160: {
161: emit($1);
162: macro_char = reset_macro_char;
163: putchar('\n');
164: free_tree();
165: }
166: | function
167: {
168: putchar('\n');
169: free_tree();
170: }
171: | error
172: {
173: }
174: ;
175:
176: semicolon_list : /* empty */
177: {
178: $$ = cs("");
179: }
180: | statement
181: | semicolon_list SEMICOLON statement
182: {
183: $$ = node($1, $3, END_NODE);
184: }
185: | semicolon_list SEMICOLON
186: ;
187:
188: statement_list : /* empty */
189: {
190: $$ = cs("");
191: }
192: | statement
193: | statement_list NEWLINE
194: | statement_list NEWLINE statement
195: {
196: $$ = node($1, $3, END_NODE);
197: }
198: | statement_list SEMICOLON
199: | statement_list SEMICOLON statement
200: {
201: $$ = node($1, $3, END_NODE);
202: }
203: ;
204:
205:
206: opt_statement : /* empty */
207: {
208: $$ = cs("");
209: }
210: | statement
211: ;
212:
213: statement : expression
214: {
215: $$ = node($1, cs("ps."), END_NODE);
216: }
217: | named_expression ASSIGN_OP expression
218: {
219: $$ = node($3, cs($2), $1.store, END_NODE);
220: }
221: | STRING
222: {
223: $$ = node(cs("["), as($1),
224: cs("]P"), END_NODE);
225: }
226: | BREAK
227: {
228: if (breaksp == 0) {
229: warning("break not in for or while");
230: YYERROR;
231: } else {
232: $$ = node(
233: numnode(nesting -
234: breakstack[breaksp-1]),
235: cs("Q"), END_NODE);
236: }
237: }
238: | QUIT
239: {
240: putchar('q');
241: fflush(stdout);
242: exit(0);
243: }
244: | RETURN
245: {
246: if (nesting == 0) {
247: warnx("return must be in a function");
248: YYERROR;
249: }
250: $$ = node(cs("0"), epilogue,
251: numnode(nesting), cs("Q"), END_NODE);
252: }
253: | RETURN LPAR return_expression RPAR
254: {
255: if (nesting == 0) {
256: warnx("return must be in a function");
257: YYERROR;
258: }
259: $$ = $3;
260: }
261: | FOR LPAR alloc_macro expression SEMICOLON
262: relational_expression SEMICOLON
263: expression RPAR opt_statement pop_nesting
264: {
265: int n = node($10, $8, cs("s."), $6, $3,
266: END_NODE);
267: emit_macro($3, n);
268: $$ = node($4, cs("s."), $6, $3, cs(" "),
269: END_NODE);
270: }
271: | IF LPAR alloc_macro pop_nesting relational_expression RPAR
272: opt_statement
273: {
274: emit_macro($3, $7);
275: $$ = node($5, $3, cs(" "), END_NODE);
276: }
277: | WHILE LPAR alloc_macro relational_expression RPAR
278: opt_statement pop_nesting
279: {
280: int n = node($6, $4, $3, END_NODE);
281: emit_macro($3, n);
282: $$ = node($4, $3, cs(" "), END_NODE);
283: }
284: | LBRACE statement_list RBRACE
285: {
286: $$ = $2;
287: }
288: ;
289:
290: alloc_macro : /* empty */
291: {
292: $$ = cs(str_table[macro_char]);
293: macro_char++;
294: /* Do not use [, \ and ] */
295: if (macro_char == '[')
296: macro_char += 3;
297: /* skip letters */
298: else if (macro_char == 'a')
299: macro_char = '{';
300: else if (macro_char == ARRAY_CHAR)
301: macro_char += 26;
302: else if (macro_char == 256)
303: fatal("program too big");
304: if (breaksp == BREAKSTACK_SZ)
305: fatal("nesting too deep");
306: breakstack[breaksp++] = nesting++;
307: }
308: pop_nesting : /* empty */
309: {
310: breaksp--;
311: }
312:
313: function : function_header opt_parameter_list RPAR
314: LBRACE NEWLINE opt_auto_define_list
315: statement_list RBRACE
316: {
317: int n = node(prologue, $7, epilogue,
318: cs("0"), numnode(nesting),
319: cs("Q"), END_NODE);
320: emit_macro($1, n);
321: reset_macro_char = macro_char;
322: nesting = 0;
323: breaksp = 0;
324: }
325: ;
326:
327: function_header : DEFINE LETTER LPAR
328: {
329: $$ = FUNCTION_NODE($2);
330: prologue = cs("");
331: epilogue = cs("");
332: nesting = 1;
333: breaksp = 0;
334: breakstack[breaksp] = 0;
335: }
336:
337: opt_parameter_list
338: : /* empty */
339: | parameter_list
340: ;
341:
342:
343: parameter_list : LETTER
344: {
345: add_par(LETTER_NODE($1));
346: }
347: | LETTER LBRACKET RBRACKET
348: {
349: add_par(ARRAY_NODE($1));
350: }
351: | parameter_list COMMA LETTER
352: {
353: add_par(LETTER_NODE($3));
354: }
355: | parameter_list COMMA LETTER LBRACKET RBRACKET
356: {
357: add_par(ARRAY_NODE($3));
358: }
359: ;
360:
361:
362:
363: opt_auto_define_list
364: : /* empty */
365: | AUTO define_list NEWLINE
366: | AUTO define_list SEMICOLON
367: ;
368:
369:
370: define_list : LETTER
371: {
372: add_local(LETTER_NODE($1));
373: }
374: | LETTER LBRACKET RBRACKET
375: {
376: add_local(ARRAY_NODE($1));
377: }
378: | define_list COMMA LETTER
379: {
380: add_local(LETTER_NODE($3));
381: }
382: | define_list COMMA LETTER LBRACKET RBRACKET
383: {
384: add_local(ARRAY_NODE($3));
385: }
386: ;
387:
388:
389: opt_argument_list
390: : /* empty */
391: {
392: $$ = cs("");
393: }
394: | argument_list
395: ;
396:
397:
398: argument_list : expression
399: | argument_list COMMA expression
400: {
401: $$ = node($1, $3, END_NODE);
402: }
403: | argument_list COMMA LETTER LBRACKET RBRACKET
404: {
405: $$ = node($1, cs("l"), ARRAY_NODE($3),
1.3 ! deraadt 406: END_NODE);
1.1 otto 407: }
408: ;
409:
410:
411: relational_expression
412: : expression
413: {
414: $$ = node($1, cs(" 0!="), END_NODE);
415: }
416: | expression EQUALS expression
417: {
418: $$ = node($1, $3, cs("="), END_NODE);
419: }
420: | expression UNEQUALS expression
421: {
422: $$ = node($1, $3, cs("!="), END_NODE);
423: }
424: | expression LESS expression
425: {
426: $$ = node($1, $3, cs(">"), END_NODE);
427: }
428: | expression LESS_EQ expression
429: {
430: $$ = node($1, $3, cs("!<"), END_NODE);
431: }
432: | expression GREATER expression
433: {
434: $$ = node($1, $3, cs("<"), END_NODE);
435: }
436: | expression GREATER_EQ expression
437: {
438: $$ = node($1, $3, cs("!>"), END_NODE);
439: }
440: ;
441:
442:
443: return_expression
444: : /* empty */
445: {
446: $$ = node(cs("0"), epilogue,
447: numnode(nesting), cs("Q"), END_NODE);
448: }
449: | expression
450: {
451: $$ = node($1, epilogue,
452: numnode(nesting), cs("Q"), END_NODE);
453: }
454: ;
455:
456:
457: expression : named_expression
458: {
459: $$ = node($1.load, END_NODE);
460: }
461: | NUMBER
462: {
463: $$ = node(cs(" "), as($1), END_NODE);
464: }
465: | LPAR expression RPAR
466: {
467: $$ = $2;
468: }
469: | LETTER LPAR opt_argument_list RPAR
470: {
471: $$ = node($3, cs("l"),
472: FUNCTION_NODE($1), cs("x"),
473: END_NODE);
474: }
475: | MINUS expression %prec UMINUS
476: {
477: $$ = node(cs(" 0"), $2, cs("-"),
478: END_NODE);
479: }
480: | expression PLUS expression
481: {
482: $$ = node($1, $3, cs("+"), END_NODE);
483: }
484: | expression MINUS expression
485: {
486: $$ = node($1, $3, cs("-"), END_NODE);
487: }
488: | expression MULTIPLY expression
489: {
490: $$ = node($1, $3, cs("*"), END_NODE);
491: }
492: | expression DIVIDE expression
493: {
494: $$ = node($1, $3, cs("/"), END_NODE);
495: }
496: | expression REMAINDER expression
497: {
498: $$ = node($1, $3, cs("%"), END_NODE);
499: }
500: | expression EXPONENT expression
501: {
502: $$ = node($1, $3, cs("^"), END_NODE);
503: }
504: | INCR named_expression
505: {
506: $$ = node($2.load, cs("1+d"), $2.store,
507: END_NODE);
508: }
509: | DECR named_expression
510: {
511: $$ = node($2.load, cs("1-d"),
512: $2.store, END_NODE);
513: }
514: | named_expression INCR
515: {
516: $$ = node($1.load, cs("d1+"),
517: $1.store, END_NODE);
518: }
519: | named_expression DECR
520: {
521: $$ = node($1.load, cs("d1-"),
522: $1.store, END_NODE);
523: }
524: | named_expression ASSIGN_OP expression
525: {
526: $$ = node($3, cs($2), cs("d"),
527: $1.store, END_NODE);
528: }
529: | LENGTH LPAR expression RPAR
530: {
531: $$ = node($3, cs("Z"), END_NODE);
532: }
533: | SQRT LPAR expression RPAR
534: {
535: $$ = node($3, cs("v"), END_NODE);
536: }
537: | SCALE LPAR expression RPAR
538: {
539: $$ = node($3, cs("X"), END_NODE);
540: }
541: ;
542:
543: named_expression
544: : LETTER
545: {
546: $$.load = node(cs("l"), LETTER_NODE($1),
547: END_NODE);
548: $$.store = node(cs("s"), LETTER_NODE($1),
549: END_NODE);
550: }
551: | LETTER LBRACKET expression RBRACKET
552: {
553: $$.load = node($3, cs(";"),
554: ARRAY_NODE($1), END_NODE);
555: $$.store = node($3, cs(":"),
556: ARRAY_NODE($1), END_NODE);
557: }
558: | SCALE
559: {
560: $$.load = cs("K");
561: $$.store = cs("k");
562: }
563: | IBASE
564: {
565: $$.load = cs("I");
566: $$.store = cs("i");
567: }
568: | OBASE
569: {
570: $$.load = cs("O");
571: $$.store = cs("o");
572: }
573: ;
574: %%
575:
576:
577: static void
578: grow(void)
579: {
580: struct tree *p;
581: int newsize;
582:
583: if (current == instr_sz) {
584: newsize = instr_sz * 2 + 1;
585: p = realloc(instructions, newsize * sizeof(*p));
586: if (p == NULL) {
587: free(instructions);
588: err(1, "cannot realloc instruction buffer");
589: }
590: instructions = p;
591: instr_sz = newsize;
592: }
593: }
594:
595: static int
596: cs(const char *str)
597: {
598: grow();
599: instructions[current].index = CONST_STRING;
600: instructions[current].u.cstr = str;
601: return current++;
602: }
603:
604: static int
605: as(const char *str)
606: {
607: grow();
608: instructions[current].index = ALLOC_STRING;
609: instructions[current].u.astr = strdup(str);
1.2 otto 610: if (instructions[current].u.astr == NULL)
611: err(1, "cannot allocate string");
1.1 otto 612: return current++;
613: }
614:
615: static ssize_t
616: node(ssize_t arg, ...)
617: {
618: va_list ap;
619: ssize_t ret;
620:
621: va_start(ap, arg);
622:
623: ret = current;
624: grow();
625: instructions[current++].index = arg;
626:
627: do {
628: arg = va_arg(ap, ssize_t);
629: grow();
630: instructions[current++].index = arg;
631: } while (arg != END_NODE);
632:
633: va_end(ap);
634: return ret;
635: }
636:
637: static void
638: emit(ssize_t i)
639: {
640: if (instructions[i].index >= 0)
641: while (instructions[i].index != END_NODE)
642: emit(instructions[i++].index);
643: else
644: fputs(instructions[i].u.cstr, stdout);
645: }
646:
647: static void
648: emit_macro(int node, ssize_t code)
649: {
650: putchar('[');
651: emit(code);
652: printf("]s%s\n", instructions[node].u.cstr);
653: nesting--;
654: }
655:
656: static void
657: free_tree(void)
658: {
659: size_t i;
660:
661: for (i = 0; i < current; i++)
662: if (instructions[i].index == ALLOC_STRING)
663: free(instructions[i].u.astr);
664: current = 0;
665: }
666:
667: static ssize_t
668: numnode(int num)
669: {
670: const char *p;
671:
672: if (num < 10)
673: p = str_table['0' + num];
674: else if (num < 16)
675: p = str_table['A' - 10 + num];
676: else
677: err(1, "internal error: break num > 15");
678: return node(cs(" "), cs(p), END_NODE);
679: }
680:
681: static void
682: add_par(ssize_t n)
683: {
684: prologue = node(cs("S"), n, prologue, END_NODE);
685: epilogue = node(epilogue, cs("L"), n, cs("s."), END_NODE);
686: }
687:
688: static void
689: add_local(ssize_t n)
690: {
691: prologue = node(cs("0S"), n, prologue, END_NODE);
692: epilogue = node(epilogue, cs("L"), n, cs("s."), END_NODE);
693: }
694:
695: int
696: yywrap(void)
697: {
698: lineno = 1;
699: if (optind < sargc) {
700: filename = sargv[optind++];
701: yyin = fopen(filename, "r");
702: if (yyin == NULL)
703: err(1, "cannot open %s", filename);
704: return 0;
705: }
706: else if (optind == sargc) {
707: optind++;
708: yyin = stdin;
709: filename = "stdin";
710: return 0;
711: }
712: return 1;
713: }
714:
715: void
716: yyerror(char *s)
717: {
718: if (isspace(*yytext) || !isprint(*yytext))
719: printf("c[%s:%d: %s: ascii char 0x%x unexpected]pc\n",
720: filename, lineno, s, *yytext);
721: else
722: printf("c[%s:%d: %s: %s unexpected]pc\n",
723: filename, lineno, s, yytext);
724: }
725:
726: static void
727: fatal(const char *s)
728: {
729: errx(1, "%s:%d: %s", filename, lineno, s);
730: }
731:
732: static void
733: warning(const char *s)
734: {
735: warnx("%s:%d: %s", filename, lineno, s);
736: }
737:
738: static void
739: init(void)
740: {
741: int i;
742:
743: for (i = 0; i < UCHAR_MAX; i++) {
744: str_table[i][0] = i;
745: str_table[i][1] = '\0';
746: }
747: }
748:
749:
750: static __dead void
751: usage(void)
752: {
753: fprintf(stderr, "%s: usage: [-c] [-l] [file ...]\n", __progname);
754: exit(1);
755: }
756:
757:
758: int
759: main(int argc, char *argv[])
760: {
761: int ch, ret;
762: int p[2];
763:
764: init();
765: setlinebuf(stdout);
766:
767: /* The d debug option is 4.4 BSD dc(1) compatible */
768: while ((ch = getopt(argc, argv, "cdl")) != -1) {
769: switch (ch) {
770: case 'c':
771: case 'd':
772: do_fork = false;
773: break;
774: case 'l':
775: argv[1] = _PATH_LIBB;
776: optind = 1;
777: break;
778: default:
779: usage();
780: }
781: }
782:
783: sargc = argc;
784: sargv = argv;
785:
786: if (do_fork) {
787: if (pipe(p) == -1)
788: err(1, "cannot create pipe");
789: ret = fork();
790: if (ret == -1)
791: err(1, "cannot fork");
792: else if (ret == 0) {
793: close(STDOUT_FILENO);
794: dup(p[1]);
795: close(p[0]);
796: close(p[1]);
797: }
798: else {
799: signal(SIGINT, SIG_IGN);
800: close(STDIN_FILENO);
801: dup(p[0]);
802: close(p[0]);
803: close(p[1]);
804: execl(_PATH_DC, "dc", "-", (char *)NULL);
805: err(1, "cannot find dc");
806: }
807: }
808: signal(SIGINT, abort_line);
809: yywrap();
810: return yyparse();
811: }