Annotation of src/usr.bin/mg/interpreter.c, Revision 1.19
1.19 ! lum 1: /* $OpenBSD: interpreter.c,v 1.18 2021/03/26 07:25:23 lum Exp $ */
1.1 lum 2: /*
3: * This file is in the public domain.
4: *
5: * Author: Mark Lumsden <mark@showcomplex.com>
6: */
7:
8: /*
9: * This file attempts to add some 'scripting' functionality into mg.
10: *
11: * The initial goal is to give mg the ability to use it's existing functions
12: * and structures in a linked-up way. Hopefully resulting in user definable
13: * functions. The syntax is 'scheme' like but currently it is not a scheme
14: * interpreter.
15: *
16: * At the moment there is no manual page reference to this file. The code below
17: * is liable to change, so use at your own risk!
18: *
19: * If you do want to do some testing, you can add some lines to your .mg file
20: * like:
21: *
22: * 1. Give multiple arguments to a function that usually would accept only one:
1.11 lum 23: * (find-file "a.txt" "b.txt" "c.txt")
1.1 lum 24: *
1.7 lum 25: * 2. Define a single value variable:
1.11 lum 26: * (define myfile "d.txt")
1.1 lum 27: *
1.7 lum 28: * 3. Define a list:
1.11 lum 29: * (define myfiles(list "e.txt" "f.txt"))
1.7 lum 30: *
31: * 4. Use the previously defined variable or list:
1.1 lum 32: * (find-file myfiles)
33: *
34: * To do:
35: * 1. multiline parsing - currently only single lines supported.
36: * 2. parsing for '(' and ')' throughout whole string and evaluate correctly.
37: * 3. conditional execution.
1.11 lum 38: * 4. deal with special characters in a string: "x\" x" etc
1.19 ! lum 39: * 5. do symbol names need more complex regex patterns? [A-Za-z][.0-9_A-Z+a-z-]
! 40: * at the moment.
1.11 lum 41: * 6. oh so many things....
1.1 lum 42: * [...]
43: * n. implement user definable functions.
1.11 lum 44: *
1.14 lum 45: * Notes:
46: * - Currently calls to excline() from this file have the line length set to
47: * zero. That's because excline() uses '\0' as the end of line indicator
48: * and only the call to foundparen() within excline() uses excline's 2nd
49: * argument. Importantly, any lines sent to there from here will not be
50: * coming back here.
1.1 lum 51: */
52: #include <sys/queue.h>
1.13 lum 53:
54: #include <limits.h>
1.1 lum 55: #include <regex.h>
56: #include <signal.h>
57: #include <stdio.h>
58: #include <stdlib.h>
59: #include <string.h>
60:
61: #include "def.h"
62: #include "funmap.h"
63:
1.2 lum 64: #ifdef MGLOG
65: #include "kbd.h"
66: #include "log.h"
67: #endif
68:
1.1 lum 69: static int multiarg(char *);
70: static int isvar(char **, char **, int);
71: static int foundvar(char *);
1.8 lum 72: static int doregex(char *, char *);
1.19 ! lum 73: static int parseexp_tmp(char *);
1.10 lum 74: static void clearexp(void);
1.17 lum 75: static int addexp(char *, int, int, int, int);
1.16 lum 76: static int exitinterpreter(void);
1.10 lum 77:
1.12 lum 78: TAILQ_HEAD(exphead, expentry) ehead;
1.10 lum 79: struct expentry {
1.12 lum 80: TAILQ_ENTRY(expentry) eentry;
1.10 lum 81: char *exp; /* The string found between paraenthesis. */
82: int par1; /* Parenthesis at start of string (=1 */
83: int par2; /* Parenthesis at end of string )=2 */
84: int expctr; /* An incremental counter:+1 for each exp */
85: int blkid; /* Which block are we in? */
86: };
1.1 lum 87:
88: /*
89: * Structure for variables during buffer evaluation.
90: */
91: struct varentry {
92: SLIST_ENTRY(varentry) entry;
93: char *name;
94: char *vals;
95: int count;
96: };
97: SLIST_HEAD(vlisthead, varentry) varhead = SLIST_HEAD_INITIALIZER(varhead);
98:
99: /*
1.12 lum 100: * Structure for scheme keywords.
101: */
102: #define NUMSCHKEYS 4
103: #define MAXLENSCHKEYS 17 /* 17 = longest keyword (16) + 1 */
104:
105: char scharkey[NUMSCHKEYS][MAXLENSCHKEYS] =
106: {
107: "define",
108: "list",
109: "if",
110: "lambda"
111: };
112:
113:
114: /*
1.10 lum 115: * Line has a '(' as the first non-white char.
116: * Do some very basic parsing of line.
117: * Multi-line not supported at the moment, To do.
118: */
119: int
1.14 lum 120: foundparen(char *funstr, int llen)
1.10 lum 121: {
1.12 lum 122: struct expentry *e1 = NULL, *e2 = NULL;
1.15 lum 123: char *p, *begp = NULL, *endp = NULL, *regs;
1.10 lum 124: char expbuf[BUFSIZE], tmpbuf[BUFSIZE];
1.18 lum 125: int i, ret, pctr, expctr, blkid, inquote;
1.10 lum 126:
1.18 lum 127: pctr = expctr = inquote = 0;
1.10 lum 128: blkid = 1;
129:
130: /*
131: * Currently can't do () or (( at the moment,
132: * just drop out - stops a segv. TODO.
133: */
134: regs = "[(]+[\t ]*[)]+";
135: if (doregex(regs, funstr))
136: return(dobeep_msg("Empty lists not supported at moment"));
137: regs = "[(]+[\t ]*[(]+";
138: if (doregex(regs, funstr))
1.11 lum 139: return(dobeep_msg("Multiple consecutive left parantheses "\
140: "found."));
1.10 lum 141: /*
142: * load expressions into a list called 'expentry', to be processd
143: * when all are obtained.
144: * Not really live code at the moment. Just part of the process of
145: * working out what needs to be done.
146: */
1.12 lum 147: TAILQ_INIT(&ehead);
148:
1.17 lum 149: /*
150: * Check for blocks of code with opening and closing ().
151: * One block = (cmd p a r a m)
152: * Two blocks = (cmd p a r a m s)(hola)
153: * Two blocks = (cmd p a r (list a m s))(hola)
154: * Only single line at moment, but more for multiline.
155: */
156: p = funstr;
157:
1.18 lum 158: for (i = 0; i < llen; ++i, p++) {
1.10 lum 159: if (*p == '(') {
1.18 lum 160: if (begp != NULL) {
1.10 lum 161: if (endp == NULL)
162: *p = '\0';
163: else
164: *endp = '\0';
1.17 lum 165:
166: ret = addexp(begp, 1, 1, blkid, ++expctr);
167: if (!ret) {
1.11 lum 168: cleanup();
1.17 lum 169: return(ret);
1.11 lum 170: }
1.10 lum 171: }
1.18 lum 172: begp = endp = NULL;
1.10 lum 173: pctr++;
174: } else if (*p == ')') {
1.11 lum 175: if (inquote == 1) {
176: cleanup();
177: return(dobeep_msg("Opening and closing quote "\
178: "char error"));
179: }
1.18 lum 180: if (begp != NULL) {
1.17 lum 181: if (endp == NULL)
182: *p = '\0';
183: else
184: *endp = '\0';
185:
186: ret = addexp(begp, 1, 2, blkid, ++expctr);
187: if (!ret) {
188: cleanup();
189: return(ret);
190: }
1.11 lum 191: }
1.18 lum 192: begp = endp = NULL;
1.10 lum 193: pctr--;
194: } else if (*p != ' ' && *p != '\t') {
1.18 lum 195: if (begp == NULL)
196: begp = p;
1.11 lum 197: if (*p == '"') {
198: if (inquote == 0)
199: inquote = 1;
200: else
201: inquote = 0;
202: }
1.10 lum 203: endp = NULL;
1.18 lum 204: } else if (endp == NULL && (*p == ' ' || *p == '\t')) {
1.10 lum 205: *p = ' ';
206: endp = p;
1.12 lum 207: } else if (*p == '\t')
1.11 lum 208: if (inquote == 0)
209: *p = ' ';
1.15 lum 210:
1.10 lum 211: if (pctr == 0)
212: blkid++;
213: }
214:
1.11 lum 215: if (pctr != 0) {
216: cleanup();
217: return(dobeep_msg("Opening and closing parentheses error"));
218: }
1.10 lum 219: /*
220: * Join expressions together for the moment, to progess.
221: * This needs to be totally redone and
222: * iterate in-to-out, evaluating as we go. Eventually.
223: */
1.11 lum 224: expbuf[0] = tmpbuf[0] = '\0';
1.12 lum 225: TAILQ_FOREACH_SAFE(e1, &ehead, eentry, e2) {
1.10 lum 226: if (strlcpy(tmpbuf, expbuf, sizeof(tmpbuf)) >= sizeof(tmpbuf))
227: return (dobeep_msg("strlcpy error"));
228: expbuf[0] = '\0';
229: if (strlcpy(expbuf, e1->exp, sizeof(expbuf)) >= sizeof(expbuf))
230: return (dobeep_msg("strlcat error"));
231: if (*tmpbuf != '\0')
232: if (strlcat(expbuf, " ", sizeof(expbuf)) >=
233: sizeof(expbuf))
234: return (dobeep_msg("strlcat error"));
235: if (strlcat(expbuf, tmpbuf, sizeof(expbuf)) >= sizeof(expbuf))
236: return (dobeep_msg("strlcat error"));
237: #ifdef MGLOG
238: mglog_misc("exp|%s|\n", e1->exp);
239: #endif
240: }
1.14 lum 241:
1.19 ! lum 242: ret = parseexp_tmp(expbuf);
1.11 lum 243: if (ret == FALSE)
244: cleanup();
245: else
246: clearexp(); /* leave lists but remove expressions */
1.10 lum 247:
248: return (ret);
1.17 lum 249: }
250:
251:
252: static int
253: addexp(char *begp, int par1, int par2, int blkid, int expctr)
254: {
255: struct expentry *e1 = NULL;
256:
257: if ((e1 = malloc(sizeof(struct expentry))) == NULL) {
258: cleanup();
259: return (dobeep_msg("malloc Error"));
260: }
261: TAILQ_INSERT_HEAD(&ehead, e1, eentry);
262: if ((e1->exp = strndup(begp, BUFSIZE)) == NULL) {
263: cleanup();
264: return(dobeep_msg("strndup error"));
265: }
266: e1->expctr = expctr;
267: e1->blkid = blkid;
268: /* need to think about these two */
269: e1->par1 = par1;
270: e1->par2 = par2;
271:
272: return (TRUE);
1.10 lum 273: }
274:
275: /*
1.19 ! lum 276: * At the moment, use parseexp_tmp in lieu of proper block parsing.
! 277: * Move away from this eventually.
1.10 lum 278: */
279: static int
1.19 ! lum 280: parseexp_tmp(char *funstr)
1.10 lum 281: {
282: char *regs;
283:
284: /* Does the line have a list 'define' like: */
285: /* (define alist(list 1 2 3 4)) */
1.19 ! lum 286: regs = "^define[ ]+[A-Za-z][.0-9_A-Z+a-z-]*[ ]+list[ ]+.*[ ]*";
1.10 lum 287: if (doregex(regs, funstr))
288: return(foundvar(funstr));
289:
1.12 lum 290: /* Does the line have a variable 'define' like: */
291: /* (define i (function-name j)) */
1.19 ! lum 292: regs = "^define[ ]+[A-Za-z][.0-9_A-Z+a-z-]*[ ]+[A-Za-z-]+[ ]+.*$";
1.12 lum 293: if (doregex(regs, funstr))
294: return(foundvar(funstr));
295:
1.10 lum 296: /* Does the line have a incorrect variable 'define' like: */
297: /* (define i y z) */
1.19 ! lum 298: regs = "^define[ ]+[A-Za-z][.0-9_A-Z+a-z-]*[ ]+.*[ ]+.*$";
1.10 lum 299: if (doregex(regs, funstr))
300: return(dobeep_msg("Invalid use of define."));
301:
302: /* Does the line have a single variable 'define' like: */
303: /* (define i 0) */
1.19 ! lum 304: regs = "^define[ ]+[A-Za-z][.0-9_A-Z+a-z-]*[ ]+.*$";
1.10 lum 305: if (doregex(regs, funstr))
306: return(foundvar(funstr));
307:
308: /* Does the line have an unrecognised 'define' */
309: regs = "^define[\t ]+";
310: if (doregex(regs, funstr))
311: return(dobeep_msg("Invalid use of define"));
312:
1.16 lum 313: /* Exit? */
314: regs = "^exit$";
315: if (doregex(regs, funstr))
316: return(exitinterpreter());
317:
1.10 lum 318: return(multiarg(funstr));
319: }
320:
321: /*
1.1 lum 322: * Pass a list of arguments to a function.
323: */
324: static int
325: multiarg(char *funstr)
326: {
327: PF funcp;
1.10 lum 328: char excbuf[BUFSIZE], argbuf[BUFSIZE];
329: char contbuf[BUFSIZE], varbuf[BUFSIZE];
330: char *cmdp = NULL, *argp, *fendp = NULL, *endp, *p, *v, *s = " ";
1.11 lum 331: char *regs;
1.10 lum 332: int spc, numparams, numspc;
1.11 lum 333: int inlist, sizof, fin, inquote;
334:
335: /* mg function name regex */
1.10 lum 336: if (doregex("^[A-Za-z-]+$", funstr))
1.14 lum 337: return(excline(funstr, 0));
1.1 lum 338:
1.10 lum 339: cmdp = funstr;
340: fendp = strchr(cmdp, ' ');
1.1 lum 341: *fendp = '\0';
342: /*
343: * If no extant mg command found, just return.
344: */
345: if ((funcp = name_function(cmdp)) == NULL)
346: return (dobeep_msgs("Unknown command: ", cmdp));
347:
348: numparams = numparams_function(funcp);
349: if (numparams == 0)
350: return (dobeep_msgs("Command takes no arguments: ", cmdp));
351:
352: /* now find the first argument */
1.10 lum 353: p = fendp + 1;
1.1 lum 354: p = skipwhite(p);
1.10 lum 355:
1.1 lum 356: if (strlcpy(argbuf, p, sizeof(argbuf)) >= sizeof(argbuf))
357: return (dobeep_msg("strlcpy error"));
358: argp = argbuf;
359: numspc = spc = 1; /* initially fake a space so we find first argument */
1.11 lum 360: inlist = fin = inquote = 0;
1.10 lum 361:
362: for (p = argbuf; *p != '\0'; p++) {
363: if (*(p + 1) == '\0')
364: fin = 1;
1.1 lum 365:
1.10 lum 366: if (*p != ' ') {
1.11 lum 367: if (*p == '"') {
368: if (inquote == 1)
369: inquote = 0;
370: else
371: inquote = 1;
372: }
1.10 lum 373: if (spc == 1)
374: argp = p;
375: spc = 0;
1.1 lum 376: }
1.11 lum 377: if ((*p == ' ' && inquote == 0) || fin) {
1.1 lum 378: if (spc == 1)
379: continue;
380:
1.10 lum 381: if (*p == ' ') {
1.11 lum 382: *p = '\0'; /* terminate arg string */
1.10 lum 383: }
384: endp = p + 1;
385: excbuf[0] = '\0';
386: varbuf[0] = '\0';
387: contbuf[0] = '\0';
388: sizof = sizeof(varbuf);
389: v = varbuf;
1.11 lum 390: regs = "[\"]+.*[\"]+";
391: if (doregex(regs, argp))
392: ; /* found quotes */
393: else if (isvar(&argp, &v, sizof)) {
1.10 lum 394: (void)(strlcat(varbuf, " ",
395: sizof) >= sizof);
396:
397: *p = ' ';
398:
399: (void)(strlcpy(contbuf, endp,
400: sizeof(contbuf)) >= sizeof(contbuf));
401:
402: (void)(strlcat(varbuf, contbuf,
403: sizof) >= sizof);
1.11 lum 404:
405: argbuf[0] = ' ';
406: argbuf[1] = '\0';
407: (void)(strlcat(argbuf, varbuf,
1.10 lum 408: sizof) >= sizof);
409:
410: p = argp = argbuf;
411: spc = 1;
412: fin = 0;
1.11 lum 413: continue;
1.13 lum 414: } else {
415: const char *errstr;
416: int iters;
417:
418: iters = strtonum(argp, 0, INT_MAX, &errstr);
419: if (errstr != NULL)
420: return (dobeep_msgs("Var not found:",
421: argp));
422: }
1.11 lum 423:
1.10 lum 424: if (strlcpy(excbuf, cmdp, sizeof(excbuf))
425: >= sizeof(excbuf))
426: return (dobeep_msg("strlcpy error"));
427: if (strlcat(excbuf, s, sizeof(excbuf))
428: >= sizeof(excbuf))
429: return (dobeep_msg("strlcat error"));
430: if (strlcat(excbuf, argp, sizeof(excbuf))
431: >= sizeof(excbuf))
432: return (dobeep_msg("strlcat error"));
433:
1.14 lum 434: excline(excbuf, 0);
1.10 lum 435:
436: if (fin)
437: break;
438:
439: *p = ' '; /* unterminate arg string */
1.1 lum 440: spc = 1;
441: }
442: }
443: return (TRUE);
444: }
445:
446: /*
447: * Is an item a value or a variable?
448: */
449: static int
1.10 lum 450: isvar(char **argp, char **varbuf, int sizof)
1.1 lum 451: {
452: struct varentry *v1 = NULL;
453:
454: if (SLIST_EMPTY(&varhead))
455: return (FALSE);
1.2 lum 456: #ifdef MGLOG
1.10 lum 457: mglog_isvar(*varbuf, *argp, sizof);
1.2 lum 458: #endif
1.1 lum 459: SLIST_FOREACH(v1, &varhead, entry) {
460: if (strcmp(*argp, v1->name) == 0) {
1.10 lum 461: (void)(strlcpy(*varbuf, v1->vals, sizof) >= sizof);
1.1 lum 462: return (TRUE);
463: }
464: }
465: return (FALSE);
466: }
467:
468: /*
1.10 lum 469: * The define string _must_ adhere to the regex in parsexp().
1.1 lum 470: * This is not the correct way to do parsing but it does highlight
1.19 ! lum 471: * the issues. Also, vars should find their way into one list only.
! 472: * Currently they go into two.
1.1 lum 473: */
474: static int
1.7 lum 475: foundvar(char *defstr)
1.1 lum 476: {
477: struct varentry *vt, *v1 = NULL;
1.10 lum 478: const char t[2] = "t";
479: char *p, *vnamep, *vendp = NULL, *valp;
480: int spc;
481:
1.11 lum 482: /* vars names can't start with these. */
483: /* char *spchrs = "+-.#"; */
484:
1.10 lum 485: p = strstr(defstr, " "); /* move to first ' ' char. */
1.11 lum 486: vnamep = skipwhite(p); /* find first char of var name. */
1.1 lum 487: vendp = vnamep;
488:
489: /* now find the end of the list name */
490: while (1) {
491: ++vendp;
1.10 lum 492: if (*vendp == ' ')
1.1 lum 493: break;
494: }
495: *vendp = '\0';
1.10 lum 496:
1.1 lum 497: /*
498: * Check list name is not an existing function.
499: * Although could this be allowed? Shouldn't context dictate?
500: */
501: if (name_function(vnamep) != NULL)
502: return(dobeep_msgs("Variable/function name clash:", vnamep));
503:
504: p = ++vendp;
1.7 lum 505: p = skipwhite(p);
1.10 lum 506:
507: if ((*p == 'l') && (*(p + 1) == 'i') && (*(p + 2) == 's')) {
1.7 lum 508: p = strstr(p, t); /* find 't' in 'list'. */
509: valp = skipwhite(++p); /* find first value */
510: } else
511: valp = p;
1.1 lum 512: /*
513: * Now we have the name of the list starting at 'vnamep',
514: * and the first value is at 'valp', record the details
515: * in a linked list. But first remove variable, if existing already.
516: */
517: if (!SLIST_EMPTY(&varhead)) {
518: SLIST_FOREACH_SAFE(v1, &varhead, entry, vt) {
519: if (strcmp(vnamep, v1->name) == 0)
520: SLIST_REMOVE(&varhead, v1, varentry, entry);
521: }
522: }
523: if ((v1 = malloc(sizeof(struct varentry))) == NULL)
524: return (ABORT);
525: SLIST_INSERT_HEAD(&varhead, v1, entry);
526: if ((v1->name = strndup(vnamep, BUFSIZE)) == NULL)
527: return(dobeep_msg("strndup error"));
528: v1->count = 0;
529: vendp = NULL;
1.3 lum 530:
1.1 lum 531: /* initially fake a space so we find first value */
532: spc = 1;
533: /* now loop through values in list value string while counting them */
534: for (p = valp; *p != '\0'; p++) {
1.10 lum 535: if (*p != ' ' && *p != '\t') {
1.1 lum 536: if (spc == 1)
537: v1->count++;
538: spc = 0;
539: }
540: }
541: if ((v1->vals = strndup(valp, BUFSIZE)) == NULL)
542: return(dobeep_msg("strndup error"));
543:
1.7 lum 544: #ifdef MGLOG
1.10 lum 545: mglog_misc("var:%s\t#items:%d\tvals:|%s|\n", vnamep, v1->count, v1->vals);
1.7 lum 546: #endif
1.1 lum 547:
548: return (TRUE);
549: }
550:
551: /*
1.10 lum 552: * Finished with buffer evaluation, so clean up any vars.
553: * Perhaps keeps them in mg even after use,...
1.1 lum 554: */
1.11 lum 555: static int
1.1 lum 556: clearvars(void)
557: {
558: struct varentry *v1 = NULL;
559:
560: while (!SLIST_EMPTY(&varhead)) {
561: v1 = SLIST_FIRST(&varhead);
562: SLIST_REMOVE_HEAD(&varhead, entry);
563: free(v1->vals);
564: free(v1->name);
565: free(v1);
566: }
567: return (FALSE);
568: }
569:
570: /*
1.10 lum 571: * Finished with block evaluation, so clean up any expressions.
1.1 lum 572: */
1.10 lum 573: static void
574: clearexp(void)
1.1 lum 575: {
1.10 lum 576: struct expentry *e1 = NULL;
1.9 lum 577:
1.12 lum 578: while (!TAILQ_EMPTY(&ehead)) {
579: e1 = TAILQ_FIRST(&ehead);
580: TAILQ_REMOVE(&ehead, e1, eentry);
1.10 lum 581: free(e1->exp);
582: free(e1);
1.9 lum 583: }
1.10 lum 584: return;
1.11 lum 585: }
586:
587: /*
588: * Cleanup before leaving.
589: */
590: void
591: cleanup(void)
592: {
593: clearexp();
594: clearvars();
1.8 lum 595: }
596:
597: /*
598: * Test a string against a regular expression.
599: */
1.10 lum 600: static int
1.8 lum 601: doregex(char *r, char *e)
602: {
603: regex_t regex_buff;
604:
605: if (regcomp(®ex_buff, r, REG_EXTENDED)) {
1.1 lum 606: regfree(®ex_buff);
1.8 lum 607: return(dobeep_msg("Regex compilation error"));
1.1 lum 608: }
1.8 lum 609: if (!regexec(®ex_buff, e, 0, NULL, 0)) {
610: regfree(®ex_buff);
611: return(TRUE);
1.1 lum 612: }
1.9 lum 613: regfree(®ex_buff);
1.16 lum 614: return(FALSE);
615: }
616:
617: /*
618: * Display a message so it is apparent that this is the method which stopped
619: * execution.
620: */
621: static int
622: exitinterpreter()
623: {
624: cleanup();
625: if (batch == 0)
626: return(dobeep_msg("Interpreter exited via exit command."));
1.9 lum 627: return(FALSE);
1.1 lum 628: }