Annotation of src/usr.bin/mg/interpreter.c, Revision 1.12
1.12 ! lum 1: /* $OpenBSD: interpreter.c,v 1.11 2021/03/21 12:56:16 lum Exp $ */
1.1 lum 2: /*
3: * This file is in the public domain.
4: *
5: * Author: Mark Lumsden <mark@showcomplex.com>
6: */
7:
8: /*
9: * This file attempts to add some 'scripting' functionality into mg.
10: *
11: * The initial goal is to give mg the ability to use it's existing functions
12: * and structures in a linked-up way. Hopefully resulting in user definable
13: * functions. The syntax is 'scheme' like but currently it is not a scheme
14: * interpreter.
15: *
16: * At the moment there is no manual page reference to this file. The code below
17: * is liable to change, so use at your own risk!
18: *
19: * If you do want to do some testing, you can add some lines to your .mg file
20: * like:
21: *
22: * 1. Give multiple arguments to a function that usually would accept only one:
1.11 lum 23: * (find-file "a.txt" "b.txt" "c.txt")
1.1 lum 24: *
1.7 lum 25: * 2. Define a single value variable:
1.11 lum 26: * (define myfile "d.txt")
1.1 lum 27: *
1.7 lum 28: * 3. Define a list:
1.11 lum 29: * (define myfiles(list "e.txt" "f.txt"))
1.7 lum 30: *
31: * 4. Use the previously defined variable or list:
1.1 lum 32: * (find-file myfiles)
33: *
34: * To do:
35: * 1. multiline parsing - currently only single lines supported.
36: * 2. parsing for '(' and ')' throughout whole string and evaluate correctly.
37: * 3. conditional execution.
1.11 lum 38: * 4. deal with special characters in a string: "x\" x" etc
39: * 5. do symbol names need more complex regex patterns? A-Za-z- at the moment.
40: * 6. oh so many things....
1.1 lum 41: * [...]
42: * n. implement user definable functions.
1.11 lum 43: *
1.1 lum 44: */
45: #include <sys/queue.h>
46: #include <regex.h>
47: #include <signal.h>
48: #include <stdio.h>
49: #include <stdlib.h>
50: #include <string.h>
51:
52: #include "def.h"
53: #include "funmap.h"
54:
1.2 lum 55: #ifdef MGLOG
56: #include "kbd.h"
57: #include "log.h"
58: #endif
59:
1.1 lum 60: static int multiarg(char *);
61: static int isvar(char **, char **, int);
62: static int foundvar(char *);
1.8 lum 63: static int doregex(char *, char *);
1.10 lum 64: static int parseexp(char *);
65: static void clearexp(void);
66:
1.12 ! lum 67: TAILQ_HEAD(exphead, expentry) ehead;
1.10 lum 68: struct expentry {
1.12 ! lum 69: TAILQ_ENTRY(expentry) eentry;
1.10 lum 70: char *exp; /* The string found between paraenthesis. */
71: int par1; /* Parenthesis at start of string (=1 */
72: int par2; /* Parenthesis at end of string )=2 */
73: int expctr; /* An incremental counter:+1 for each exp */
74: int blkid; /* Which block are we in? */
75: };
1.1 lum 76:
77: /*
78: * Structure for variables during buffer evaluation.
79: */
80: struct varentry {
81: SLIST_ENTRY(varentry) entry;
82: char *name;
83: char *vals;
84: int count;
85: };
86: SLIST_HEAD(vlisthead, varentry) varhead = SLIST_HEAD_INITIALIZER(varhead);
87:
88: /*
1.12 ! lum 89: * Structure for scheme keywords.
! 90: */
! 91: #define NUMSCHKEYS 4
! 92: #define MAXLENSCHKEYS 17 /* 17 = longest keyword (16) + 1 */
! 93:
! 94: char scharkey[NUMSCHKEYS][MAXLENSCHKEYS] =
! 95: {
! 96: "define",
! 97: "list",
! 98: "if",
! 99: "lambda"
! 100: };
! 101:
! 102:
! 103: /*
1.10 lum 104: * Line has a '(' as the first non-white char.
105: * Do some very basic parsing of line.
106: * Multi-line not supported at the moment, To do.
107: */
108: int
109: foundparen(char *funstr)
110: {
1.12 ! lum 111: struct expentry *e1 = NULL, *e2 = NULL;
1.10 lum 112: char *p, *valp, *endp = NULL, *regs;
113: char expbuf[BUFSIZE], tmpbuf[BUFSIZE];
114: int ret, pctr, fndstart, expctr, blkid, fndchr, fndend;
1.11 lum 115: int inquote;
1.10 lum 116:
1.11 lum 117: pctr = fndstart = expctr = fndchr = fndend = inquote = 0;
1.10 lum 118: blkid = 1;
119: /*
120: * Check for blocks of code with opening and closing ().
121: * One block = (cmd p a r a m)
122: * Two blocks = (cmd p a r a m s)(hola)
123: * Two blocks = (cmd p a r (list a m s))(hola)
124: * Only single line at moment, but more for multiline.
125: */
126: p = funstr;
127:
128: /*
129: * Currently can't do () or (( at the moment,
130: * just drop out - stops a segv. TODO.
131: */
132: regs = "[(]+[\t ]*[)]+";
133: if (doregex(regs, funstr))
134: return(dobeep_msg("Empty lists not supported at moment"));
135: regs = "[(]+[\t ]*[(]+";
136: if (doregex(regs, funstr))
1.11 lum 137: return(dobeep_msg("Multiple consecutive left parantheses "\
138: "found."));
1.10 lum 139: /*
140: * load expressions into a list called 'expentry', to be processd
141: * when all are obtained.
142: * Not really live code at the moment. Just part of the process of
143: * working out what needs to be done.
144: */
1.12 ! lum 145: TAILQ_INIT(&ehead);
! 146:
1.10 lum 147: while (*p != '\0') {
148: if (*p == '(') {
149: if (fndstart == 1) {
150: if (endp == NULL)
151: *p = '\0';
152: else
153: *endp = '\0';
154: e1->par2 = 1;
1.11 lum 155: if ((e1->exp = strndup(valp, BUFSIZE)) ==
156: NULL) {
157: cleanup();
1.10 lum 158: return(dobeep_msg("strndup error"));
1.11 lum 159: }
1.10 lum 160: }
1.11 lum 161: if ((e1 = malloc(sizeof(struct expentry))) == NULL) {
162: cleanup();
1.10 lum 163: return (dobeep_msg("malloc Error"));
1.11 lum 164: }
1.12 ! lum 165: TAILQ_INSERT_HEAD(&ehead, e1, eentry);
1.10 lum 166: e1->exp = NULL;
167: e1->expctr = ++expctr;
168: e1->blkid = blkid;
169: e1->par1 = 1;
170: fndstart = 1;
171: fndend = 0;
172: fndchr = 0;
173: endp = NULL;
174: pctr++;
175: } else if (*p == ')') {
1.11 lum 176: if (inquote == 1) {
177: cleanup();
178: return(dobeep_msg("Opening and closing quote "\
179: "char error"));
180: }
1.10 lum 181: if (endp == NULL)
182: *p = '\0';
183: else
184: *endp = '\0';
1.11 lum 185: if ((e1->exp = strndup(valp, BUFSIZE)) == NULL) {
186: cleanup();
1.10 lum 187: return(dobeep_msg("strndup error"));
1.11 lum 188: }
1.10 lum 189: fndstart = 0;
190: pctr--;
191: } else if (*p != ' ' && *p != '\t') {
192: if (fndchr == 0) {
193: valp = p;
194: fndchr = 1;
195: }
1.11 lum 196: if (*p == '"') {
197: if (inquote == 0)
198: inquote = 1;
199: else
200: inquote = 0;
201: }
1.10 lum 202: fndend = 0;
203: endp = NULL;
204: } else if (fndend == 0 && (*p == ' ' || *p == '\t')) {
205: *p = ' ';
206: fndend = 1;
207: endp = p;
1.12 ! lum 208: } else if (*p == '\t')
1.11 lum 209: if (inquote == 0)
210: *p = ' ';
1.10 lum 211: if (pctr == 0)
212: blkid++;
213: p++;
214: }
215:
1.11 lum 216: if (pctr != 0) {
217: cleanup();
218: return(dobeep_msg("Opening and closing parentheses error"));
219: }
1.10 lum 220: /*
221: * Join expressions together for the moment, to progess.
222: * This needs to be totally redone and
223: * iterate in-to-out, evaluating as we go. Eventually.
224: */
1.11 lum 225: expbuf[0] = tmpbuf[0] = '\0';
1.12 ! lum 226: TAILQ_FOREACH_SAFE(e1, &ehead, eentry, e2) {
1.10 lum 227: if (strlcpy(tmpbuf, expbuf, sizeof(tmpbuf)) >= sizeof(tmpbuf))
228: return (dobeep_msg("strlcpy error"));
229: expbuf[0] = '\0';
230: if (strlcpy(expbuf, e1->exp, sizeof(expbuf)) >= sizeof(expbuf))
231: return (dobeep_msg("strlcat error"));
232: if (*tmpbuf != '\0')
233: if (strlcat(expbuf, " ", sizeof(expbuf)) >=
234: sizeof(expbuf))
235: return (dobeep_msg("strlcat error"));
236: if (strlcat(expbuf, tmpbuf, sizeof(expbuf)) >= sizeof(expbuf))
237: return (dobeep_msg("strlcat error"));
238: #ifdef MGLOG
239: mglog_misc("exp|%s|\n", e1->exp);
240: #endif
241: }
242:
243: ret = parseexp(expbuf);
1.11 lum 244: if (ret == FALSE)
245: cleanup();
246: else
247: clearexp(); /* leave lists but remove expressions */
1.10 lum 248:
249: return (ret);
250: }
251:
252: /*
1.11 lum 253: * At the moment, only parsing list defines. Much more to do.
254: * Also only use basic chars for symbol names like ones found in
255: * mg functions.
1.10 lum 256: */
257: static int
258: parseexp(char *funstr)
259: {
260: char *regs;
261:
262: /* Does the line have a list 'define' like: */
263: /* (define alist(list 1 2 3 4)) */
1.11 lum 264: regs = "^define[ ]+[A-Za-z-]+[ ]+list[ ]+.*[ ]*";
1.10 lum 265: if (doregex(regs, funstr))
266: return(foundvar(funstr));
267:
1.12 ! lum 268: /* Does the line have a variable 'define' like: */
! 269: /* (define i (function-name j)) */
! 270: regs = "^define[ ]+[A-Za-z-]+[ ]+[A-Za-z-]+[ ]+.*$";
! 271: if (doregex(regs, funstr))
! 272: return(foundvar(funstr));
! 273:
1.10 lum 274: /* Does the line have a incorrect variable 'define' like: */
275: /* (define i y z) */
1.11 lum 276: regs = "^define[ ]+[A-Za-z-]+[ ]+.*[ ]+.*$";
1.10 lum 277: if (doregex(regs, funstr))
278: return(dobeep_msg("Invalid use of define."));
279:
280: /* Does the line have a single variable 'define' like: */
281: /* (define i 0) */
1.11 lum 282: regs = "^define[ ]+[A-Za-z-]+[ ]+.*$";
1.10 lum 283: if (doregex(regs, funstr))
284: return(foundvar(funstr));
285:
286: /* Does the line have an unrecognised 'define' */
287: regs = "^define[\t ]+";
288: if (doregex(regs, funstr))
289: return(dobeep_msg("Invalid use of define"));
290:
291: return(multiarg(funstr));
292: }
293:
294: /*
1.1 lum 295: * Pass a list of arguments to a function.
296: */
297: static int
298: multiarg(char *funstr)
299: {
300: PF funcp;
1.10 lum 301: char excbuf[BUFSIZE], argbuf[BUFSIZE];
302: char contbuf[BUFSIZE], varbuf[BUFSIZE];
303: char *cmdp = NULL, *argp, *fendp = NULL, *endp, *p, *v, *s = " ";
1.11 lum 304: char *regs;
1.10 lum 305: int spc, numparams, numspc;
1.11 lum 306: int inlist, sizof, fin, inquote;
307:
308: /* mg function name regex */
1.10 lum 309: if (doregex("^[A-Za-z-]+$", funstr))
310: return(excline(funstr));
1.1 lum 311:
1.10 lum 312: cmdp = funstr;
313: fendp = strchr(cmdp, ' ');
1.1 lum 314: *fendp = '\0';
315: /*
316: * If no extant mg command found, just return.
317: */
318: if ((funcp = name_function(cmdp)) == NULL)
319: return (dobeep_msgs("Unknown command: ", cmdp));
320:
321: numparams = numparams_function(funcp);
322: if (numparams == 0)
323: return (dobeep_msgs("Command takes no arguments: ", cmdp));
324:
325: /* now find the first argument */
1.10 lum 326: p = fendp + 1;
1.1 lum 327: p = skipwhite(p);
1.10 lum 328:
1.1 lum 329: if (strlcpy(argbuf, p, sizeof(argbuf)) >= sizeof(argbuf))
330: return (dobeep_msg("strlcpy error"));
331: argp = argbuf;
332: numspc = spc = 1; /* initially fake a space so we find first argument */
1.11 lum 333: inlist = fin = inquote = 0;
1.10 lum 334:
335: for (p = argbuf; *p != '\0'; p++) {
336: if (*(p + 1) == '\0')
337: fin = 1;
1.1 lum 338:
1.10 lum 339: if (*p != ' ') {
1.11 lum 340: if (*p == '"') {
341: if (inquote == 1)
342: inquote = 0;
343: else
344: inquote = 1;
345: }
1.10 lum 346: if (spc == 1)
347: argp = p;
348: spc = 0;
1.1 lum 349: }
1.11 lum 350: if ((*p == ' ' && inquote == 0) || fin) {
1.1 lum 351: if (spc == 1)
352: continue;
353:
1.10 lum 354: if (*p == ' ') {
1.11 lum 355: *p = '\0'; /* terminate arg string */
1.10 lum 356: }
357: endp = p + 1;
358: excbuf[0] = '\0';
359: varbuf[0] = '\0';
360: contbuf[0] = '\0';
361: sizof = sizeof(varbuf);
362: v = varbuf;
1.11 lum 363: regs = "[\"]+.*[\"]+";
364: if (doregex(regs, argp))
365: ; /* found quotes */
366: else if (isvar(&argp, &v, sizof)) {
1.10 lum 367: (void)(strlcat(varbuf, " ",
368: sizof) >= sizof);
369:
370: *p = ' ';
371:
372: (void)(strlcpy(contbuf, endp,
373: sizeof(contbuf)) >= sizeof(contbuf));
374:
375: (void)(strlcat(varbuf, contbuf,
376: sizof) >= sizof);
1.11 lum 377:
378: argbuf[0] = ' ';
379: argbuf[1] = '\0';
380: (void)(strlcat(argbuf, varbuf,
1.10 lum 381: sizof) >= sizof);
382:
383: p = argp = argbuf;
384: spc = 1;
385: fin = 0;
1.11 lum 386: continue;
387: } else
388: return (dobeep_msgs("Var not found:", argp));
389:
1.10 lum 390: if (strlcpy(excbuf, cmdp, sizeof(excbuf))
391: >= sizeof(excbuf))
392: return (dobeep_msg("strlcpy error"));
393: if (strlcat(excbuf, s, sizeof(excbuf))
394: >= sizeof(excbuf))
395: return (dobeep_msg("strlcat error"));
396: if (strlcat(excbuf, argp, sizeof(excbuf))
397: >= sizeof(excbuf))
398: return (dobeep_msg("strlcat error"));
399:
400: excline(excbuf);
401:
402: if (fin)
403: break;
404:
405: *p = ' '; /* unterminate arg string */
1.1 lum 406: spc = 1;
407: }
408: }
409: return (TRUE);
410: }
411:
412: /*
413: * Is an item a value or a variable?
414: */
415: static int
1.10 lum 416: isvar(char **argp, char **varbuf, int sizof)
1.1 lum 417: {
418: struct varentry *v1 = NULL;
419:
420: if (SLIST_EMPTY(&varhead))
421: return (FALSE);
1.2 lum 422: #ifdef MGLOG
1.10 lum 423: mglog_isvar(*varbuf, *argp, sizof);
1.2 lum 424: #endif
1.1 lum 425: SLIST_FOREACH(v1, &varhead, entry) {
426: if (strcmp(*argp, v1->name) == 0) {
1.10 lum 427: (void)(strlcpy(*varbuf, v1->vals, sizof) >= sizof);
1.1 lum 428: return (TRUE);
429: }
430: }
431: return (FALSE);
432: }
433:
434: /*
1.10 lum 435: * The define string _must_ adhere to the regex in parsexp().
1.1 lum 436: * This is not the correct way to do parsing but it does highlight
437: * the issues.
438: */
439: static int
1.7 lum 440: foundvar(char *defstr)
1.1 lum 441: {
442: struct varentry *vt, *v1 = NULL;
1.10 lum 443: const char t[2] = "t";
444: char *p, *vnamep, *vendp = NULL, *valp;
445: int spc;
446:
1.11 lum 447: /* vars names can't start with these. */
448: /* char *spchrs = "+-.#"; */
449:
1.10 lum 450: p = strstr(defstr, " "); /* move to first ' ' char. */
1.11 lum 451: vnamep = skipwhite(p); /* find first char of var name. */
1.1 lum 452: vendp = vnamep;
453:
454: /* now find the end of the list name */
455: while (1) {
456: ++vendp;
1.10 lum 457: if (*vendp == ' ')
1.1 lum 458: break;
459: }
460: *vendp = '\0';
1.10 lum 461:
1.1 lum 462: /*
463: * Check list name is not an existing function.
464: * Although could this be allowed? Shouldn't context dictate?
465: */
466: if (name_function(vnamep) != NULL)
467: return(dobeep_msgs("Variable/function name clash:", vnamep));
468:
469: p = ++vendp;
1.7 lum 470: p = skipwhite(p);
1.10 lum 471:
472: if ((*p == 'l') && (*(p + 1) == 'i') && (*(p + 2) == 's')) {
1.7 lum 473: p = strstr(p, t); /* find 't' in 'list'. */
474: valp = skipwhite(++p); /* find first value */
475: } else
476: valp = p;
1.1 lum 477: /*
478: * Now we have the name of the list starting at 'vnamep',
479: * and the first value is at 'valp', record the details
480: * in a linked list. But first remove variable, if existing already.
481: */
482: if (!SLIST_EMPTY(&varhead)) {
483: SLIST_FOREACH_SAFE(v1, &varhead, entry, vt) {
484: if (strcmp(vnamep, v1->name) == 0)
485: SLIST_REMOVE(&varhead, v1, varentry, entry);
486: }
487: }
488: if ((v1 = malloc(sizeof(struct varentry))) == NULL)
489: return (ABORT);
490: SLIST_INSERT_HEAD(&varhead, v1, entry);
491: if ((v1->name = strndup(vnamep, BUFSIZE)) == NULL)
492: return(dobeep_msg("strndup error"));
493: v1->count = 0;
494: vendp = NULL;
1.3 lum 495:
1.1 lum 496: /* initially fake a space so we find first value */
497: spc = 1;
498: /* now loop through values in list value string while counting them */
499: for (p = valp; *p != '\0'; p++) {
1.10 lum 500: if (*p != ' ' && *p != '\t') {
1.1 lum 501: if (spc == 1)
502: v1->count++;
503: spc = 0;
504: }
505: }
506: if ((v1->vals = strndup(valp, BUFSIZE)) == NULL)
507: return(dobeep_msg("strndup error"));
508:
1.7 lum 509: #ifdef MGLOG
1.10 lum 510: mglog_misc("var:%s\t#items:%d\tvals:|%s|\n", vnamep, v1->count, v1->vals);
1.7 lum 511: #endif
1.1 lum 512:
513: return (TRUE);
514: }
515:
516: /*
1.10 lum 517: * Finished with buffer evaluation, so clean up any vars.
518: * Perhaps keeps them in mg even after use,...
1.1 lum 519: */
1.11 lum 520: static int
1.1 lum 521: clearvars(void)
522: {
523: struct varentry *v1 = NULL;
524:
525: while (!SLIST_EMPTY(&varhead)) {
526: v1 = SLIST_FIRST(&varhead);
527: SLIST_REMOVE_HEAD(&varhead, entry);
528: free(v1->vals);
529: free(v1->name);
530: free(v1);
531: }
532: return (FALSE);
533: }
534:
535: /*
1.10 lum 536: * Finished with block evaluation, so clean up any expressions.
1.1 lum 537: */
1.10 lum 538: static void
539: clearexp(void)
1.1 lum 540: {
1.10 lum 541: struct expentry *e1 = NULL;
1.9 lum 542:
1.12 ! lum 543: while (!TAILQ_EMPTY(&ehead)) {
! 544: e1 = TAILQ_FIRST(&ehead);
! 545: TAILQ_REMOVE(&ehead, e1, eentry);
1.10 lum 546: free(e1->exp);
547: free(e1);
1.9 lum 548: }
1.10 lum 549: return;
1.11 lum 550: }
551:
552: /*
553: * Cleanup before leaving.
554: */
555: void
556: cleanup(void)
557: {
558: clearexp();
559: clearvars();
1.8 lum 560: }
561:
562: /*
563: * Test a string against a regular expression.
564: */
1.10 lum 565: static int
1.8 lum 566: doregex(char *r, char *e)
567: {
568: regex_t regex_buff;
569:
570: if (regcomp(®ex_buff, r, REG_EXTENDED)) {
1.1 lum 571: regfree(®ex_buff);
1.8 lum 572: return(dobeep_msg("Regex compilation error"));
1.1 lum 573: }
1.8 lum 574: if (!regexec(®ex_buff, e, 0, NULL, 0)) {
575: regfree(®ex_buff);
576: return(TRUE);
1.1 lum 577: }
1.9 lum 578: regfree(®ex_buff);
579: return(FALSE);
1.1 lum 580: }