Annotation of src/usr.bin/mg/interpreter.c, Revision 1.13
1.13 ! lum 1: /* $OpenBSD: interpreter.c,v 1.12 2021/03/22 09:26:23 lum Exp $ */
1.1 lum 2: /*
3: * This file is in the public domain.
4: *
5: * Author: Mark Lumsden <mark@showcomplex.com>
6: */
7:
8: /*
9: * This file attempts to add some 'scripting' functionality into mg.
10: *
11: * The initial goal is to give mg the ability to use it's existing functions
12: * and structures in a linked-up way. Hopefully resulting in user definable
13: * functions. The syntax is 'scheme' like but currently it is not a scheme
14: * interpreter.
15: *
16: * At the moment there is no manual page reference to this file. The code below
17: * is liable to change, so use at your own risk!
18: *
19: * If you do want to do some testing, you can add some lines to your .mg file
20: * like:
21: *
22: * 1. Give multiple arguments to a function that usually would accept only one:
1.11 lum 23: * (find-file "a.txt" "b.txt" "c.txt")
1.1 lum 24: *
1.7 lum 25: * 2. Define a single value variable:
1.11 lum 26: * (define myfile "d.txt")
1.1 lum 27: *
1.7 lum 28: * 3. Define a list:
1.11 lum 29: * (define myfiles(list "e.txt" "f.txt"))
1.7 lum 30: *
31: * 4. Use the previously defined variable or list:
1.1 lum 32: * (find-file myfiles)
33: *
34: * To do:
35: * 1. multiline parsing - currently only single lines supported.
36: * 2. parsing for '(' and ')' throughout whole string and evaluate correctly.
37: * 3. conditional execution.
1.11 lum 38: * 4. deal with special characters in a string: "x\" x" etc
39: * 5. do symbol names need more complex regex patterns? A-Za-z- at the moment.
40: * 6. oh so many things....
1.1 lum 41: * [...]
42: * n. implement user definable functions.
1.11 lum 43: *
1.1 lum 44: */
45: #include <sys/queue.h>
1.13 ! lum 46:
! 47: #include <limits.h>
1.1 lum 48: #include <regex.h>
49: #include <signal.h>
50: #include <stdio.h>
51: #include <stdlib.h>
52: #include <string.h>
53:
54: #include "def.h"
55: #include "funmap.h"
56:
1.2 lum 57: #ifdef MGLOG
58: #include "kbd.h"
59: #include "log.h"
60: #endif
61:
1.1 lum 62: static int multiarg(char *);
63: static int isvar(char **, char **, int);
64: static int foundvar(char *);
1.8 lum 65: static int doregex(char *, char *);
1.10 lum 66: static int parseexp(char *);
67: static void clearexp(void);
68:
1.12 lum 69: TAILQ_HEAD(exphead, expentry) ehead;
1.10 lum 70: struct expentry {
1.12 lum 71: TAILQ_ENTRY(expentry) eentry;
1.10 lum 72: char *exp; /* The string found between paraenthesis. */
73: int par1; /* Parenthesis at start of string (=1 */
74: int par2; /* Parenthesis at end of string )=2 */
75: int expctr; /* An incremental counter:+1 for each exp */
76: int blkid; /* Which block are we in? */
77: };
1.1 lum 78:
79: /*
80: * Structure for variables during buffer evaluation.
81: */
82: struct varentry {
83: SLIST_ENTRY(varentry) entry;
84: char *name;
85: char *vals;
86: int count;
87: };
88: SLIST_HEAD(vlisthead, varentry) varhead = SLIST_HEAD_INITIALIZER(varhead);
89:
90: /*
1.12 lum 91: * Structure for scheme keywords.
92: */
93: #define NUMSCHKEYS 4
94: #define MAXLENSCHKEYS 17 /* 17 = longest keyword (16) + 1 */
95:
96: char scharkey[NUMSCHKEYS][MAXLENSCHKEYS] =
97: {
98: "define",
99: "list",
100: "if",
101: "lambda"
102: };
103:
104:
105: /*
1.10 lum 106: * Line has a '(' as the first non-white char.
107: * Do some very basic parsing of line.
108: * Multi-line not supported at the moment, To do.
109: */
110: int
111: foundparen(char *funstr)
112: {
1.12 lum 113: struct expentry *e1 = NULL, *e2 = NULL;
1.10 lum 114: char *p, *valp, *endp = NULL, *regs;
115: char expbuf[BUFSIZE], tmpbuf[BUFSIZE];
116: int ret, pctr, fndstart, expctr, blkid, fndchr, fndend;
1.11 lum 117: int inquote;
1.10 lum 118:
1.11 lum 119: pctr = fndstart = expctr = fndchr = fndend = inquote = 0;
1.10 lum 120: blkid = 1;
121: /*
122: * Check for blocks of code with opening and closing ().
123: * One block = (cmd p a r a m)
124: * Two blocks = (cmd p a r a m s)(hola)
125: * Two blocks = (cmd p a r (list a m s))(hola)
126: * Only single line at moment, but more for multiline.
127: */
128: p = funstr;
129:
130: /*
131: * Currently can't do () or (( at the moment,
132: * just drop out - stops a segv. TODO.
133: */
134: regs = "[(]+[\t ]*[)]+";
135: if (doregex(regs, funstr))
136: return(dobeep_msg("Empty lists not supported at moment"));
137: regs = "[(]+[\t ]*[(]+";
138: if (doregex(regs, funstr))
1.11 lum 139: return(dobeep_msg("Multiple consecutive left parantheses "\
140: "found."));
1.10 lum 141: /*
142: * load expressions into a list called 'expentry', to be processd
143: * when all are obtained.
144: * Not really live code at the moment. Just part of the process of
145: * working out what needs to be done.
146: */
1.12 lum 147: TAILQ_INIT(&ehead);
148:
1.10 lum 149: while (*p != '\0') {
150: if (*p == '(') {
151: if (fndstart == 1) {
152: if (endp == NULL)
153: *p = '\0';
154: else
155: *endp = '\0';
156: e1->par2 = 1;
1.11 lum 157: if ((e1->exp = strndup(valp, BUFSIZE)) ==
158: NULL) {
159: cleanup();
1.10 lum 160: return(dobeep_msg("strndup error"));
1.11 lum 161: }
1.10 lum 162: }
1.11 lum 163: if ((e1 = malloc(sizeof(struct expentry))) == NULL) {
164: cleanup();
1.10 lum 165: return (dobeep_msg("malloc Error"));
1.11 lum 166: }
1.12 lum 167: TAILQ_INSERT_HEAD(&ehead, e1, eentry);
1.10 lum 168: e1->exp = NULL;
169: e1->expctr = ++expctr;
170: e1->blkid = blkid;
171: e1->par1 = 1;
172: fndstart = 1;
173: fndend = 0;
174: fndchr = 0;
175: endp = NULL;
176: pctr++;
177: } else if (*p == ')') {
1.11 lum 178: if (inquote == 1) {
179: cleanup();
180: return(dobeep_msg("Opening and closing quote "\
181: "char error"));
182: }
1.10 lum 183: if (endp == NULL)
184: *p = '\0';
185: else
186: *endp = '\0';
1.11 lum 187: if ((e1->exp = strndup(valp, BUFSIZE)) == NULL) {
188: cleanup();
1.10 lum 189: return(dobeep_msg("strndup error"));
1.11 lum 190: }
1.10 lum 191: fndstart = 0;
192: pctr--;
193: } else if (*p != ' ' && *p != '\t') {
194: if (fndchr == 0) {
195: valp = p;
196: fndchr = 1;
197: }
1.11 lum 198: if (*p == '"') {
199: if (inquote == 0)
200: inquote = 1;
201: else
202: inquote = 0;
203: }
1.10 lum 204: fndend = 0;
205: endp = NULL;
206: } else if (fndend == 0 && (*p == ' ' || *p == '\t')) {
207: *p = ' ';
208: fndend = 1;
209: endp = p;
1.12 lum 210: } else if (*p == '\t')
1.11 lum 211: if (inquote == 0)
212: *p = ' ';
1.10 lum 213: if (pctr == 0)
214: blkid++;
215: p++;
216: }
217:
1.11 lum 218: if (pctr != 0) {
219: cleanup();
220: return(dobeep_msg("Opening and closing parentheses error"));
221: }
1.10 lum 222: /*
223: * Join expressions together for the moment, to progess.
224: * This needs to be totally redone and
225: * iterate in-to-out, evaluating as we go. Eventually.
226: */
1.11 lum 227: expbuf[0] = tmpbuf[0] = '\0';
1.12 lum 228: TAILQ_FOREACH_SAFE(e1, &ehead, eentry, e2) {
1.10 lum 229: if (strlcpy(tmpbuf, expbuf, sizeof(tmpbuf)) >= sizeof(tmpbuf))
230: return (dobeep_msg("strlcpy error"));
231: expbuf[0] = '\0';
232: if (strlcpy(expbuf, e1->exp, sizeof(expbuf)) >= sizeof(expbuf))
233: return (dobeep_msg("strlcat error"));
234: if (*tmpbuf != '\0')
235: if (strlcat(expbuf, " ", sizeof(expbuf)) >=
236: sizeof(expbuf))
237: return (dobeep_msg("strlcat error"));
238: if (strlcat(expbuf, tmpbuf, sizeof(expbuf)) >= sizeof(expbuf))
239: return (dobeep_msg("strlcat error"));
240: #ifdef MGLOG
241: mglog_misc("exp|%s|\n", e1->exp);
242: #endif
243: }
244:
245: ret = parseexp(expbuf);
1.11 lum 246: if (ret == FALSE)
247: cleanup();
248: else
249: clearexp(); /* leave lists but remove expressions */
1.10 lum 250:
251: return (ret);
252: }
253:
254: /*
1.11 lum 255: * At the moment, only parsing list defines. Much more to do.
256: * Also only use basic chars for symbol names like ones found in
257: * mg functions.
1.10 lum 258: */
259: static int
260: parseexp(char *funstr)
261: {
262: char *regs;
263:
264: /* Does the line have a list 'define' like: */
265: /* (define alist(list 1 2 3 4)) */
1.11 lum 266: regs = "^define[ ]+[A-Za-z-]+[ ]+list[ ]+.*[ ]*";
1.10 lum 267: if (doregex(regs, funstr))
268: return(foundvar(funstr));
269:
1.12 lum 270: /* Does the line have a variable 'define' like: */
271: /* (define i (function-name j)) */
272: regs = "^define[ ]+[A-Za-z-]+[ ]+[A-Za-z-]+[ ]+.*$";
273: if (doregex(regs, funstr))
274: return(foundvar(funstr));
275:
1.10 lum 276: /* Does the line have a incorrect variable 'define' like: */
277: /* (define i y z) */
1.11 lum 278: regs = "^define[ ]+[A-Za-z-]+[ ]+.*[ ]+.*$";
1.10 lum 279: if (doregex(regs, funstr))
280: return(dobeep_msg("Invalid use of define."));
281:
282: /* Does the line have a single variable 'define' like: */
283: /* (define i 0) */
1.11 lum 284: regs = "^define[ ]+[A-Za-z-]+[ ]+.*$";
1.10 lum 285: if (doregex(regs, funstr))
286: return(foundvar(funstr));
287:
288: /* Does the line have an unrecognised 'define' */
289: regs = "^define[\t ]+";
290: if (doregex(regs, funstr))
291: return(dobeep_msg("Invalid use of define"));
292:
293: return(multiarg(funstr));
294: }
295:
296: /*
1.1 lum 297: * Pass a list of arguments to a function.
298: */
299: static int
300: multiarg(char *funstr)
301: {
302: PF funcp;
1.10 lum 303: char excbuf[BUFSIZE], argbuf[BUFSIZE];
304: char contbuf[BUFSIZE], varbuf[BUFSIZE];
305: char *cmdp = NULL, *argp, *fendp = NULL, *endp, *p, *v, *s = " ";
1.11 lum 306: char *regs;
1.10 lum 307: int spc, numparams, numspc;
1.11 lum 308: int inlist, sizof, fin, inquote;
309:
310: /* mg function name regex */
1.10 lum 311: if (doregex("^[A-Za-z-]+$", funstr))
312: return(excline(funstr));
1.1 lum 313:
1.10 lum 314: cmdp = funstr;
315: fendp = strchr(cmdp, ' ');
1.1 lum 316: *fendp = '\0';
317: /*
318: * If no extant mg command found, just return.
319: */
320: if ((funcp = name_function(cmdp)) == NULL)
321: return (dobeep_msgs("Unknown command: ", cmdp));
322:
323: numparams = numparams_function(funcp);
324: if (numparams == 0)
325: return (dobeep_msgs("Command takes no arguments: ", cmdp));
326:
327: /* now find the first argument */
1.10 lum 328: p = fendp + 1;
1.1 lum 329: p = skipwhite(p);
1.10 lum 330:
1.1 lum 331: if (strlcpy(argbuf, p, sizeof(argbuf)) >= sizeof(argbuf))
332: return (dobeep_msg("strlcpy error"));
333: argp = argbuf;
334: numspc = spc = 1; /* initially fake a space so we find first argument */
1.11 lum 335: inlist = fin = inquote = 0;
1.10 lum 336:
337: for (p = argbuf; *p != '\0'; p++) {
338: if (*(p + 1) == '\0')
339: fin = 1;
1.1 lum 340:
1.10 lum 341: if (*p != ' ') {
1.11 lum 342: if (*p == '"') {
343: if (inquote == 1)
344: inquote = 0;
345: else
346: inquote = 1;
347: }
1.10 lum 348: if (spc == 1)
349: argp = p;
350: spc = 0;
1.1 lum 351: }
1.11 lum 352: if ((*p == ' ' && inquote == 0) || fin) {
1.1 lum 353: if (spc == 1)
354: continue;
355:
1.10 lum 356: if (*p == ' ') {
1.11 lum 357: *p = '\0'; /* terminate arg string */
1.10 lum 358: }
359: endp = p + 1;
360: excbuf[0] = '\0';
361: varbuf[0] = '\0';
362: contbuf[0] = '\0';
363: sizof = sizeof(varbuf);
364: v = varbuf;
1.11 lum 365: regs = "[\"]+.*[\"]+";
366: if (doregex(regs, argp))
367: ; /* found quotes */
368: else if (isvar(&argp, &v, sizof)) {
1.10 lum 369: (void)(strlcat(varbuf, " ",
370: sizof) >= sizof);
371:
372: *p = ' ';
373:
374: (void)(strlcpy(contbuf, endp,
375: sizeof(contbuf)) >= sizeof(contbuf));
376:
377: (void)(strlcat(varbuf, contbuf,
378: sizof) >= sizof);
1.11 lum 379:
380: argbuf[0] = ' ';
381: argbuf[1] = '\0';
382: (void)(strlcat(argbuf, varbuf,
1.10 lum 383: sizof) >= sizof);
384:
385: p = argp = argbuf;
386: spc = 1;
387: fin = 0;
1.11 lum 388: continue;
1.13 ! lum 389: } else {
! 390: const char *errstr;
! 391: int iters;
! 392:
! 393: iters = strtonum(argp, 0, INT_MAX, &errstr);
! 394: if (errstr != NULL)
! 395: return (dobeep_msgs("Var not found:",
! 396: argp));
! 397: }
1.11 lum 398:
1.10 lum 399: if (strlcpy(excbuf, cmdp, sizeof(excbuf))
400: >= sizeof(excbuf))
401: return (dobeep_msg("strlcpy error"));
402: if (strlcat(excbuf, s, sizeof(excbuf))
403: >= sizeof(excbuf))
404: return (dobeep_msg("strlcat error"));
405: if (strlcat(excbuf, argp, sizeof(excbuf))
406: >= sizeof(excbuf))
407: return (dobeep_msg("strlcat error"));
408:
409: excline(excbuf);
410:
411: if (fin)
412: break;
413:
414: *p = ' '; /* unterminate arg string */
1.1 lum 415: spc = 1;
416: }
417: }
418: return (TRUE);
419: }
420:
421: /*
422: * Is an item a value or a variable?
423: */
424: static int
1.10 lum 425: isvar(char **argp, char **varbuf, int sizof)
1.1 lum 426: {
427: struct varentry *v1 = NULL;
428:
429: if (SLIST_EMPTY(&varhead))
430: return (FALSE);
1.2 lum 431: #ifdef MGLOG
1.10 lum 432: mglog_isvar(*varbuf, *argp, sizof);
1.2 lum 433: #endif
1.1 lum 434: SLIST_FOREACH(v1, &varhead, entry) {
435: if (strcmp(*argp, v1->name) == 0) {
1.10 lum 436: (void)(strlcpy(*varbuf, v1->vals, sizof) >= sizof);
1.1 lum 437: return (TRUE);
438: }
439: }
440: return (FALSE);
441: }
442:
443: /*
1.10 lum 444: * The define string _must_ adhere to the regex in parsexp().
1.1 lum 445: * This is not the correct way to do parsing but it does highlight
446: * the issues.
447: */
448: static int
1.7 lum 449: foundvar(char *defstr)
1.1 lum 450: {
451: struct varentry *vt, *v1 = NULL;
1.10 lum 452: const char t[2] = "t";
453: char *p, *vnamep, *vendp = NULL, *valp;
454: int spc;
455:
1.11 lum 456: /* vars names can't start with these. */
457: /* char *spchrs = "+-.#"; */
458:
1.10 lum 459: p = strstr(defstr, " "); /* move to first ' ' char. */
1.11 lum 460: vnamep = skipwhite(p); /* find first char of var name. */
1.1 lum 461: vendp = vnamep;
462:
463: /* now find the end of the list name */
464: while (1) {
465: ++vendp;
1.10 lum 466: if (*vendp == ' ')
1.1 lum 467: break;
468: }
469: *vendp = '\0';
1.10 lum 470:
1.1 lum 471: /*
472: * Check list name is not an existing function.
473: * Although could this be allowed? Shouldn't context dictate?
474: */
475: if (name_function(vnamep) != NULL)
476: return(dobeep_msgs("Variable/function name clash:", vnamep));
477:
478: p = ++vendp;
1.7 lum 479: p = skipwhite(p);
1.10 lum 480:
481: if ((*p == 'l') && (*(p + 1) == 'i') && (*(p + 2) == 's')) {
1.7 lum 482: p = strstr(p, t); /* find 't' in 'list'. */
483: valp = skipwhite(++p); /* find first value */
484: } else
485: valp = p;
1.1 lum 486: /*
487: * Now we have the name of the list starting at 'vnamep',
488: * and the first value is at 'valp', record the details
489: * in a linked list. But first remove variable, if existing already.
490: */
491: if (!SLIST_EMPTY(&varhead)) {
492: SLIST_FOREACH_SAFE(v1, &varhead, entry, vt) {
493: if (strcmp(vnamep, v1->name) == 0)
494: SLIST_REMOVE(&varhead, v1, varentry, entry);
495: }
496: }
497: if ((v1 = malloc(sizeof(struct varentry))) == NULL)
498: return (ABORT);
499: SLIST_INSERT_HEAD(&varhead, v1, entry);
500: if ((v1->name = strndup(vnamep, BUFSIZE)) == NULL)
501: return(dobeep_msg("strndup error"));
502: v1->count = 0;
503: vendp = NULL;
1.3 lum 504:
1.1 lum 505: /* initially fake a space so we find first value */
506: spc = 1;
507: /* now loop through values in list value string while counting them */
508: for (p = valp; *p != '\0'; p++) {
1.10 lum 509: if (*p != ' ' && *p != '\t') {
1.1 lum 510: if (spc == 1)
511: v1->count++;
512: spc = 0;
513: }
514: }
515: if ((v1->vals = strndup(valp, BUFSIZE)) == NULL)
516: return(dobeep_msg("strndup error"));
517:
1.7 lum 518: #ifdef MGLOG
1.10 lum 519: mglog_misc("var:%s\t#items:%d\tvals:|%s|\n", vnamep, v1->count, v1->vals);
1.7 lum 520: #endif
1.1 lum 521:
522: return (TRUE);
523: }
524:
525: /*
1.10 lum 526: * Finished with buffer evaluation, so clean up any vars.
527: * Perhaps keeps them in mg even after use,...
1.1 lum 528: */
1.11 lum 529: static int
1.1 lum 530: clearvars(void)
531: {
532: struct varentry *v1 = NULL;
533:
534: while (!SLIST_EMPTY(&varhead)) {
535: v1 = SLIST_FIRST(&varhead);
536: SLIST_REMOVE_HEAD(&varhead, entry);
537: free(v1->vals);
538: free(v1->name);
539: free(v1);
540: }
541: return (FALSE);
542: }
543:
544: /*
1.10 lum 545: * Finished with block evaluation, so clean up any expressions.
1.1 lum 546: */
1.10 lum 547: static void
548: clearexp(void)
1.1 lum 549: {
1.10 lum 550: struct expentry *e1 = NULL;
1.9 lum 551:
1.12 lum 552: while (!TAILQ_EMPTY(&ehead)) {
553: e1 = TAILQ_FIRST(&ehead);
554: TAILQ_REMOVE(&ehead, e1, eentry);
1.10 lum 555: free(e1->exp);
556: free(e1);
1.9 lum 557: }
1.10 lum 558: return;
1.11 lum 559: }
560:
561: /*
562: * Cleanup before leaving.
563: */
564: void
565: cleanup(void)
566: {
567: clearexp();
568: clearvars();
1.8 lum 569: }
570:
571: /*
572: * Test a string against a regular expression.
573: */
1.10 lum 574: static int
1.8 lum 575: doregex(char *r, char *e)
576: {
577: regex_t regex_buff;
578:
579: if (regcomp(®ex_buff, r, REG_EXTENDED)) {
1.1 lum 580: regfree(®ex_buff);
1.8 lum 581: return(dobeep_msg("Regex compilation error"));
1.1 lum 582: }
1.8 lum 583: if (!regexec(®ex_buff, e, 0, NULL, 0)) {
584: regfree(®ex_buff);
585: return(TRUE);
1.1 lum 586: }
1.9 lum 587: regfree(®ex_buff);
588: return(FALSE);
1.1 lum 589: }