Annotation of src/usr.bin/mg/interpreter.c, Revision 1.15
1.15 ! lum 1: /* $OpenBSD: interpreter.c,v 1.14 2021/03/25 12:46:11 lum Exp $ */
1.1 lum 2: /*
3: * This file is in the public domain.
4: *
5: * Author: Mark Lumsden <mark@showcomplex.com>
6: */
7:
8: /*
9: * This file attempts to add some 'scripting' functionality into mg.
10: *
11: * The initial goal is to give mg the ability to use it's existing functions
12: * and structures in a linked-up way. Hopefully resulting in user definable
13: * functions. The syntax is 'scheme' like but currently it is not a scheme
14: * interpreter.
15: *
16: * At the moment there is no manual page reference to this file. The code below
17: * is liable to change, so use at your own risk!
18: *
19: * If you do want to do some testing, you can add some lines to your .mg file
20: * like:
21: *
22: * 1. Give multiple arguments to a function that usually would accept only one:
1.11 lum 23: * (find-file "a.txt" "b.txt" "c.txt")
1.1 lum 24: *
1.7 lum 25: * 2. Define a single value variable:
1.11 lum 26: * (define myfile "d.txt")
1.1 lum 27: *
1.7 lum 28: * 3. Define a list:
1.11 lum 29: * (define myfiles(list "e.txt" "f.txt"))
1.7 lum 30: *
31: * 4. Use the previously defined variable or list:
1.1 lum 32: * (find-file myfiles)
33: *
34: * To do:
35: * 1. multiline parsing - currently only single lines supported.
36: * 2. parsing for '(' and ')' throughout whole string and evaluate correctly.
37: * 3. conditional execution.
1.11 lum 38: * 4. deal with special characters in a string: "x\" x" etc
39: * 5. do symbol names need more complex regex patterns? A-Za-z- at the moment.
40: * 6. oh so many things....
1.1 lum 41: * [...]
42: * n. implement user definable functions.
1.11 lum 43: *
1.14 lum 44: * Notes:
45: * - Currently calls to excline() from this file have the line length set to
46: * zero. That's because excline() uses '\0' as the end of line indicator
47: * and only the call to foundparen() within excline() uses excline's 2nd
48: * argument. Importantly, any lines sent to there from here will not be
49: * coming back here.
1.1 lum 50: */
51: #include <sys/queue.h>
1.13 lum 52:
53: #include <limits.h>
1.1 lum 54: #include <regex.h>
55: #include <signal.h>
56: #include <stdio.h>
57: #include <stdlib.h>
58: #include <string.h>
59:
60: #include "def.h"
61: #include "funmap.h"
62:
1.2 lum 63: #ifdef MGLOG
64: #include "kbd.h"
65: #include "log.h"
66: #endif
67:
1.1 lum 68: static int multiarg(char *);
69: static int isvar(char **, char **, int);
70: static int foundvar(char *);
1.8 lum 71: static int doregex(char *, char *);
1.10 lum 72: static int parseexp(char *);
73: static void clearexp(void);
74:
1.12 lum 75: TAILQ_HEAD(exphead, expentry) ehead;
1.10 lum 76: struct expentry {
1.12 lum 77: TAILQ_ENTRY(expentry) eentry;
1.10 lum 78: char *exp; /* The string found between paraenthesis. */
79: int par1; /* Parenthesis at start of string (=1 */
80: int par2; /* Parenthesis at end of string )=2 */
81: int expctr; /* An incremental counter:+1 for each exp */
82: int blkid; /* Which block are we in? */
83: };
1.1 lum 84:
85: /*
86: * Structure for variables during buffer evaluation.
87: */
88: struct varentry {
89: SLIST_ENTRY(varentry) entry;
90: char *name;
91: char *vals;
92: int count;
93: };
94: SLIST_HEAD(vlisthead, varentry) varhead = SLIST_HEAD_INITIALIZER(varhead);
95:
96: /*
1.12 lum 97: * Structure for scheme keywords.
98: */
99: #define NUMSCHKEYS 4
100: #define MAXLENSCHKEYS 17 /* 17 = longest keyword (16) + 1 */
101:
102: char scharkey[NUMSCHKEYS][MAXLENSCHKEYS] =
103: {
104: "define",
105: "list",
106: "if",
107: "lambda"
108: };
109:
110:
111: /*
1.10 lum 112: * Line has a '(' as the first non-white char.
113: * Do some very basic parsing of line.
114: * Multi-line not supported at the moment, To do.
115: */
116: int
1.14 lum 117: foundparen(char *funstr, int llen)
1.10 lum 118: {
1.12 lum 119: struct expentry *e1 = NULL, *e2 = NULL;
1.15 ! lum 120: char *p, *begp = NULL, *endp = NULL, *regs;
1.10 lum 121: char expbuf[BUFSIZE], tmpbuf[BUFSIZE];
1.15 ! lum 122: int i, ret, pctr, fndstart, expctr, blkid, fndend;
1.11 lum 123: int inquote;
1.10 lum 124:
1.15 ! lum 125: pctr = fndstart = expctr = fndend = inquote = 0;
1.10 lum 126: blkid = 1;
127: /*
128: * Check for blocks of code with opening and closing ().
129: * One block = (cmd p a r a m)
130: * Two blocks = (cmd p a r a m s)(hola)
131: * Two blocks = (cmd p a r (list a m s))(hola)
132: * Only single line at moment, but more for multiline.
133: */
134: p = funstr;
135:
136: /*
137: * Currently can't do () or (( at the moment,
138: * just drop out - stops a segv. TODO.
139: */
140: regs = "[(]+[\t ]*[)]+";
141: if (doregex(regs, funstr))
142: return(dobeep_msg("Empty lists not supported at moment"));
143: regs = "[(]+[\t ]*[(]+";
144: if (doregex(regs, funstr))
1.11 lum 145: return(dobeep_msg("Multiple consecutive left parantheses "\
146: "found."));
1.10 lum 147: /*
148: * load expressions into a list called 'expentry', to be processd
149: * when all are obtained.
150: * Not really live code at the moment. Just part of the process of
151: * working out what needs to be done.
152: */
1.12 lum 153: TAILQ_INIT(&ehead);
154:
1.14 lum 155: for (i = llen; i > 0; --i) {
1.10 lum 156: if (*p == '(') {
157: if (fndstart == 1) {
158: if (endp == NULL)
159: *p = '\0';
160: else
161: *endp = '\0';
162: e1->par2 = 1;
1.15 ! lum 163: if ((e1->exp = strndup(begp, BUFSIZE)) ==
1.11 lum 164: NULL) {
165: cleanup();
1.10 lum 166: return(dobeep_msg("strndup error"));
1.11 lum 167: }
1.15 ! lum 168: begp = NULL;
1.10 lum 169: }
1.11 lum 170: if ((e1 = malloc(sizeof(struct expentry))) == NULL) {
171: cleanup();
1.10 lum 172: return (dobeep_msg("malloc Error"));
1.11 lum 173: }
1.12 lum 174: TAILQ_INSERT_HEAD(&ehead, e1, eentry);
1.10 lum 175: e1->exp = NULL;
176: e1->expctr = ++expctr;
177: e1->blkid = blkid;
178: e1->par1 = 1;
179: fndstart = 1;
180: fndend = 0;
181: endp = NULL;
182: pctr++;
183: } else if (*p == ')') {
1.11 lum 184: if (inquote == 1) {
185: cleanup();
186: return(dobeep_msg("Opening and closing quote "\
187: "char error"));
188: }
1.10 lum 189: if (endp == NULL)
190: *p = '\0';
191: else
192: *endp = '\0';
1.15 ! lum 193: if ((e1->exp = strndup(begp, BUFSIZE)) == NULL) {
1.11 lum 194: cleanup();
1.10 lum 195: return(dobeep_msg("strndup error"));
1.11 lum 196: }
1.10 lum 197: fndstart = 0;
198: pctr--;
199: } else if (*p != ' ' && *p != '\t') {
1.15 ! lum 200: if (begp == NULL)
! 201: begp = p;
! 202:
1.11 lum 203: if (*p == '"') {
204: if (inquote == 0)
205: inquote = 1;
206: else
207: inquote = 0;
208: }
1.10 lum 209: fndend = 0;
210: endp = NULL;
211: } else if (fndend == 0 && (*p == ' ' || *p == '\t')) {
212: *p = ' ';
213: fndend = 1;
214: endp = p;
1.12 lum 215: } else if (*p == '\t')
1.11 lum 216: if (inquote == 0)
217: *p = ' ';
1.15 ! lum 218:
1.10 lum 219: if (pctr == 0)
220: blkid++;
221: p++;
222: }
223:
1.11 lum 224: if (pctr != 0) {
225: cleanup();
226: return(dobeep_msg("Opening and closing parentheses error"));
227: }
1.10 lum 228: /*
229: * Join expressions together for the moment, to progess.
230: * This needs to be totally redone and
231: * iterate in-to-out, evaluating as we go. Eventually.
232: */
1.11 lum 233: expbuf[0] = tmpbuf[0] = '\0';
1.12 lum 234: TAILQ_FOREACH_SAFE(e1, &ehead, eentry, e2) {
1.10 lum 235: if (strlcpy(tmpbuf, expbuf, sizeof(tmpbuf)) >= sizeof(tmpbuf))
236: return (dobeep_msg("strlcpy error"));
237: expbuf[0] = '\0';
238: if (strlcpy(expbuf, e1->exp, sizeof(expbuf)) >= sizeof(expbuf))
239: return (dobeep_msg("strlcat error"));
240: if (*tmpbuf != '\0')
241: if (strlcat(expbuf, " ", sizeof(expbuf)) >=
242: sizeof(expbuf))
243: return (dobeep_msg("strlcat error"));
244: if (strlcat(expbuf, tmpbuf, sizeof(expbuf)) >= sizeof(expbuf))
245: return (dobeep_msg("strlcat error"));
246: #ifdef MGLOG
247: mglog_misc("exp|%s|\n", e1->exp);
248: #endif
249: }
1.14 lum 250:
1.10 lum 251: ret = parseexp(expbuf);
1.11 lum 252: if (ret == FALSE)
253: cleanup();
254: else
255: clearexp(); /* leave lists but remove expressions */
1.10 lum 256:
257: return (ret);
258: }
259:
260: /*
1.11 lum 261: * At the moment, only parsing list defines. Much more to do.
262: * Also only use basic chars for symbol names like ones found in
263: * mg functions.
1.10 lum 264: */
265: static int
266: parseexp(char *funstr)
267: {
268: char *regs;
269:
270: /* Does the line have a list 'define' like: */
271: /* (define alist(list 1 2 3 4)) */
1.11 lum 272: regs = "^define[ ]+[A-Za-z-]+[ ]+list[ ]+.*[ ]*";
1.10 lum 273: if (doregex(regs, funstr))
274: return(foundvar(funstr));
275:
1.12 lum 276: /* Does the line have a variable 'define' like: */
277: /* (define i (function-name j)) */
278: regs = "^define[ ]+[A-Za-z-]+[ ]+[A-Za-z-]+[ ]+.*$";
279: if (doregex(regs, funstr))
280: return(foundvar(funstr));
281:
1.10 lum 282: /* Does the line have a incorrect variable 'define' like: */
283: /* (define i y z) */
1.11 lum 284: regs = "^define[ ]+[A-Za-z-]+[ ]+.*[ ]+.*$";
1.10 lum 285: if (doregex(regs, funstr))
286: return(dobeep_msg("Invalid use of define."));
287:
288: /* Does the line have a single variable 'define' like: */
289: /* (define i 0) */
1.11 lum 290: regs = "^define[ ]+[A-Za-z-]+[ ]+.*$";
1.10 lum 291: if (doregex(regs, funstr))
292: return(foundvar(funstr));
293:
294: /* Does the line have an unrecognised 'define' */
295: regs = "^define[\t ]+";
296: if (doregex(regs, funstr))
297: return(dobeep_msg("Invalid use of define"));
298:
299: return(multiarg(funstr));
300: }
301:
302: /*
1.1 lum 303: * Pass a list of arguments to a function.
304: */
305: static int
306: multiarg(char *funstr)
307: {
308: PF funcp;
1.10 lum 309: char excbuf[BUFSIZE], argbuf[BUFSIZE];
310: char contbuf[BUFSIZE], varbuf[BUFSIZE];
311: char *cmdp = NULL, *argp, *fendp = NULL, *endp, *p, *v, *s = " ";
1.11 lum 312: char *regs;
1.10 lum 313: int spc, numparams, numspc;
1.11 lum 314: int inlist, sizof, fin, inquote;
315:
316: /* mg function name regex */
1.10 lum 317: if (doregex("^[A-Za-z-]+$", funstr))
1.14 lum 318: return(excline(funstr, 0));
1.1 lum 319:
1.10 lum 320: cmdp = funstr;
321: fendp = strchr(cmdp, ' ');
1.1 lum 322: *fendp = '\0';
323: /*
324: * If no extant mg command found, just return.
325: */
326: if ((funcp = name_function(cmdp)) == NULL)
327: return (dobeep_msgs("Unknown command: ", cmdp));
328:
329: numparams = numparams_function(funcp);
330: if (numparams == 0)
331: return (dobeep_msgs("Command takes no arguments: ", cmdp));
332:
333: /* now find the first argument */
1.10 lum 334: p = fendp + 1;
1.1 lum 335: p = skipwhite(p);
1.10 lum 336:
1.1 lum 337: if (strlcpy(argbuf, p, sizeof(argbuf)) >= sizeof(argbuf))
338: return (dobeep_msg("strlcpy error"));
339: argp = argbuf;
340: numspc = spc = 1; /* initially fake a space so we find first argument */
1.11 lum 341: inlist = fin = inquote = 0;
1.10 lum 342:
343: for (p = argbuf; *p != '\0'; p++) {
344: if (*(p + 1) == '\0')
345: fin = 1;
1.1 lum 346:
1.10 lum 347: if (*p != ' ') {
1.11 lum 348: if (*p == '"') {
349: if (inquote == 1)
350: inquote = 0;
351: else
352: inquote = 1;
353: }
1.10 lum 354: if (spc == 1)
355: argp = p;
356: spc = 0;
1.1 lum 357: }
1.11 lum 358: if ((*p == ' ' && inquote == 0) || fin) {
1.1 lum 359: if (spc == 1)
360: continue;
361:
1.10 lum 362: if (*p == ' ') {
1.11 lum 363: *p = '\0'; /* terminate arg string */
1.10 lum 364: }
365: endp = p + 1;
366: excbuf[0] = '\0';
367: varbuf[0] = '\0';
368: contbuf[0] = '\0';
369: sizof = sizeof(varbuf);
370: v = varbuf;
1.11 lum 371: regs = "[\"]+.*[\"]+";
372: if (doregex(regs, argp))
373: ; /* found quotes */
374: else if (isvar(&argp, &v, sizof)) {
1.10 lum 375: (void)(strlcat(varbuf, " ",
376: sizof) >= sizof);
377:
378: *p = ' ';
379:
380: (void)(strlcpy(contbuf, endp,
381: sizeof(contbuf)) >= sizeof(contbuf));
382:
383: (void)(strlcat(varbuf, contbuf,
384: sizof) >= sizof);
1.11 lum 385:
386: argbuf[0] = ' ';
387: argbuf[1] = '\0';
388: (void)(strlcat(argbuf, varbuf,
1.10 lum 389: sizof) >= sizof);
390:
391: p = argp = argbuf;
392: spc = 1;
393: fin = 0;
1.11 lum 394: continue;
1.13 lum 395: } else {
396: const char *errstr;
397: int iters;
398:
399: iters = strtonum(argp, 0, INT_MAX, &errstr);
400: if (errstr != NULL)
401: return (dobeep_msgs("Var not found:",
402: argp));
403: }
1.11 lum 404:
1.10 lum 405: if (strlcpy(excbuf, cmdp, sizeof(excbuf))
406: >= sizeof(excbuf))
407: return (dobeep_msg("strlcpy error"));
408: if (strlcat(excbuf, s, sizeof(excbuf))
409: >= sizeof(excbuf))
410: return (dobeep_msg("strlcat error"));
411: if (strlcat(excbuf, argp, sizeof(excbuf))
412: >= sizeof(excbuf))
413: return (dobeep_msg("strlcat error"));
414:
1.14 lum 415: excline(excbuf, 0);
1.10 lum 416:
417: if (fin)
418: break;
419:
420: *p = ' '; /* unterminate arg string */
1.1 lum 421: spc = 1;
422: }
423: }
424: return (TRUE);
425: }
426:
427: /*
428: * Is an item a value or a variable?
429: */
430: static int
1.10 lum 431: isvar(char **argp, char **varbuf, int sizof)
1.1 lum 432: {
433: struct varentry *v1 = NULL;
434:
435: if (SLIST_EMPTY(&varhead))
436: return (FALSE);
1.2 lum 437: #ifdef MGLOG
1.10 lum 438: mglog_isvar(*varbuf, *argp, sizof);
1.2 lum 439: #endif
1.1 lum 440: SLIST_FOREACH(v1, &varhead, entry) {
441: if (strcmp(*argp, v1->name) == 0) {
1.10 lum 442: (void)(strlcpy(*varbuf, v1->vals, sizof) >= sizof);
1.1 lum 443: return (TRUE);
444: }
445: }
446: return (FALSE);
447: }
448:
449: /*
1.10 lum 450: * The define string _must_ adhere to the regex in parsexp().
1.1 lum 451: * This is not the correct way to do parsing but it does highlight
452: * the issues.
453: */
454: static int
1.7 lum 455: foundvar(char *defstr)
1.1 lum 456: {
457: struct varentry *vt, *v1 = NULL;
1.10 lum 458: const char t[2] = "t";
459: char *p, *vnamep, *vendp = NULL, *valp;
460: int spc;
461:
1.11 lum 462: /* vars names can't start with these. */
463: /* char *spchrs = "+-.#"; */
464:
1.10 lum 465: p = strstr(defstr, " "); /* move to first ' ' char. */
1.11 lum 466: vnamep = skipwhite(p); /* find first char of var name. */
1.1 lum 467: vendp = vnamep;
468:
469: /* now find the end of the list name */
470: while (1) {
471: ++vendp;
1.10 lum 472: if (*vendp == ' ')
1.1 lum 473: break;
474: }
475: *vendp = '\0';
1.10 lum 476:
1.1 lum 477: /*
478: * Check list name is not an existing function.
479: * Although could this be allowed? Shouldn't context dictate?
480: */
481: if (name_function(vnamep) != NULL)
482: return(dobeep_msgs("Variable/function name clash:", vnamep));
483:
484: p = ++vendp;
1.7 lum 485: p = skipwhite(p);
1.10 lum 486:
487: if ((*p == 'l') && (*(p + 1) == 'i') && (*(p + 2) == 's')) {
1.7 lum 488: p = strstr(p, t); /* find 't' in 'list'. */
489: valp = skipwhite(++p); /* find first value */
490: } else
491: valp = p;
1.1 lum 492: /*
493: * Now we have the name of the list starting at 'vnamep',
494: * and the first value is at 'valp', record the details
495: * in a linked list. But first remove variable, if existing already.
496: */
497: if (!SLIST_EMPTY(&varhead)) {
498: SLIST_FOREACH_SAFE(v1, &varhead, entry, vt) {
499: if (strcmp(vnamep, v1->name) == 0)
500: SLIST_REMOVE(&varhead, v1, varentry, entry);
501: }
502: }
503: if ((v1 = malloc(sizeof(struct varentry))) == NULL)
504: return (ABORT);
505: SLIST_INSERT_HEAD(&varhead, v1, entry);
506: if ((v1->name = strndup(vnamep, BUFSIZE)) == NULL)
507: return(dobeep_msg("strndup error"));
508: v1->count = 0;
509: vendp = NULL;
1.3 lum 510:
1.1 lum 511: /* initially fake a space so we find first value */
512: spc = 1;
513: /* now loop through values in list value string while counting them */
514: for (p = valp; *p != '\0'; p++) {
1.10 lum 515: if (*p != ' ' && *p != '\t') {
1.1 lum 516: if (spc == 1)
517: v1->count++;
518: spc = 0;
519: }
520: }
521: if ((v1->vals = strndup(valp, BUFSIZE)) == NULL)
522: return(dobeep_msg("strndup error"));
523:
1.7 lum 524: #ifdef MGLOG
1.10 lum 525: mglog_misc("var:%s\t#items:%d\tvals:|%s|\n", vnamep, v1->count, v1->vals);
1.7 lum 526: #endif
1.1 lum 527:
528: return (TRUE);
529: }
530:
531: /*
1.10 lum 532: * Finished with buffer evaluation, so clean up any vars.
533: * Perhaps keeps them in mg even after use,...
1.1 lum 534: */
1.11 lum 535: static int
1.1 lum 536: clearvars(void)
537: {
538: struct varentry *v1 = NULL;
539:
540: while (!SLIST_EMPTY(&varhead)) {
541: v1 = SLIST_FIRST(&varhead);
542: SLIST_REMOVE_HEAD(&varhead, entry);
543: free(v1->vals);
544: free(v1->name);
545: free(v1);
546: }
547: return (FALSE);
548: }
549:
550: /*
1.10 lum 551: * Finished with block evaluation, so clean up any expressions.
1.1 lum 552: */
1.10 lum 553: static void
554: clearexp(void)
1.1 lum 555: {
1.10 lum 556: struct expentry *e1 = NULL;
1.9 lum 557:
1.12 lum 558: while (!TAILQ_EMPTY(&ehead)) {
559: e1 = TAILQ_FIRST(&ehead);
560: TAILQ_REMOVE(&ehead, e1, eentry);
1.10 lum 561: free(e1->exp);
562: free(e1);
1.9 lum 563: }
1.10 lum 564: return;
1.11 lum 565: }
566:
567: /*
568: * Cleanup before leaving.
569: */
570: void
571: cleanup(void)
572: {
573: clearexp();
574: clearvars();
1.8 lum 575: }
576:
577: /*
578: * Test a string against a regular expression.
579: */
1.10 lum 580: static int
1.8 lum 581: doregex(char *r, char *e)
582: {
583: regex_t regex_buff;
584:
585: if (regcomp(®ex_buff, r, REG_EXTENDED)) {
1.1 lum 586: regfree(®ex_buff);
1.8 lum 587: return(dobeep_msg("Regex compilation error"));
1.1 lum 588: }
1.8 lum 589: if (!regexec(®ex_buff, e, 0, NULL, 0)) {
590: regfree(®ex_buff);
591: return(TRUE);
1.1 lum 592: }
1.9 lum 593: regfree(®ex_buff);
594: return(FALSE);
1.1 lum 595: }