Annotation of src/usr.bin/awk/run.c, Revision 1.86
1.86 ! millert 1: /* $OpenBSD: run.c,v 1.85 2024/04/25 18:33:53 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.13 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.13 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: #define DEBUG
27: #include <stdio.h>
28: #include <ctype.h>
1.63 millert 29: #include <errno.h>
1.57 millert 30: #include <wctype.h>
1.56 millert 31: #include <fcntl.h>
1.1 tholo 32: #include <setjmp.h>
1.25 millert 33: #include <limits.h>
1.1 tholo 34: #include <math.h>
35: #include <string.h>
36: #include <stdlib.h>
37: #include <time.h>
1.47 millert 38: #include <sys/types.h>
39: #include <sys/wait.h>
1.1 tholo 40: #include "awk.h"
1.66 millert 41: #include "awkgram.tab.h"
1.1 tholo 42:
1.75 millert 43:
1.57 millert 44: static void stdinit(void);
45: static void flush_all(void);
1.75 millert 46: static char *wide_char_to_byte_str(int rune, size_t *outlen);
1.1 tholo 47:
1.57 millert 48: #if 1
49: #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
50: #else
1.1 tholo 51: void tempfree(Cell *p) {
52: if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
1.16 millert 53: WARNING("bad csub %d in Cell %d %s",
54: p->csub, p->ctype, p->sval);
1.1 tholo 55: }
56: if (istemp(p))
57: tfree(p);
58: }
1.57 millert 59: #endif
1.1 tholo 60:
1.30 millert 61: /* do we really need these? */
62: /* #ifdef _NFILE */
63: /* #ifndef FOPEN_MAX */
64: /* #define FOPEN_MAX _NFILE */
65: /* #endif */
66: /* #endif */
67: /* */
68: /* #ifndef FOPEN_MAX */
69: /* #define FOPEN_MAX 40 */ /* max number of open files */
70: /* #endif */
71: /* */
72: /* #ifndef RAND_MAX */
73: /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
74: /* #endif */
1.1 tholo 75:
76: jmp_buf env;
1.13 kstailey 77: extern int pairstack[];
1.33 millert 78: extern Awkfloat srand_seed;
1.1 tholo 79:
80: Node *winner = NULL; /* root of parse tree */
81: Cell *tmps; /* free temporary cells for execution */
82:
1.57 millert 83: static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
1.15 millert 84: Cell *True = &truecell;
1.57 millert 85: static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
1.15 millert 86: Cell *False = &falsecell;
1.57 millert 87: static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 88: Cell *jbreak = &breakcell;
1.57 millert 89: static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 90: Cell *jcont = &contcell;
1.57 millert 91: static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 92: Cell *jnext = &nextcell;
1.57 millert 93: static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 94: Cell *jnextfile = &nextfilecell;
1.57 millert 95: static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 96: Cell *jexit = &exitcell;
1.57 millert 97: static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 98: Cell *jret = &retcell;
1.57 millert 99: static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
1.1 tholo 100:
101: Node *curnode = NULL; /* the node being executed, for debugging */
1.22 deraadt 102:
1.13 kstailey 103: /* buffer memory management */
104: int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
1.18 millert 105: const char *whatrtn)
1.13 kstailey 106: /* pbuf: address of pointer to buffer being managed
107: * psiz: address of buffer size variable
108: * minlen: minimum length of buffer needed
109: * quantum: buffer size quantum
110: * pbptr: address of movable pointer into buffer, or 0 if none
111: * whatrtn: name of the calling routine if failure should cause fatal error
112: *
113: * return 0 for realloc failure, !=0 for success
114: */
115: {
116: if (minlen > *psiz) {
117: char *tbuf;
118: int rminlen = quantum ? minlen % quantum : 0;
119: int boff = pbptr ? *pbptr - *pbuf : 0;
120: /* round up to next multiple of quantum */
121: if (rminlen)
122: minlen += quantum - rminlen;
1.69 millert 123: tbuf = (char *) realloc(*pbuf, minlen);
1.67 millert 124: DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
1.13 kstailey 125: if (tbuf == NULL) {
126: if (whatrtn)
1.16 millert 127: FATAL("out of memory in %s", whatrtn);
1.13 kstailey 128: return 0;
129: }
130: *pbuf = tbuf;
131: *psiz = minlen;
132: if (pbptr)
133: *pbptr = tbuf + boff;
134: }
135: return 1;
136: }
137:
1.1 tholo 138: void run(Node *a) /* execution of parse tree starts here */
139: {
1.63 millert 140:
1.16 millert 141: stdinit();
1.1 tholo 142: execute(a);
143: closeall();
144: }
145:
146: Cell *execute(Node *u) /* execute a node of the parse tree */
147: {
148: Cell *(*proc)(Node **, int);
149: Cell *x;
150: Node *a;
151:
152: if (u == NULL)
1.15 millert 153: return(True);
1.1 tholo 154: for (a = u; ; a = a->nnext) {
155: curnode = a;
156: if (isvalue(a)) {
1.2 millert 157: x = (Cell *) (a->narg[0]);
1.13 kstailey 158: if (isfld(x) && !donefld)
1.1 tholo 159: fldbld();
1.13 kstailey 160: else if (isrec(x) && !donerec)
1.1 tholo 161: recbld();
162: return(x);
163: }
164: if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
1.16 millert 165: FATAL("illegal statement");
1.1 tholo 166: proc = proctab[a->nobj-FIRSTTOKEN];
167: x = (*proc)(a->narg, a->nobj);
1.13 kstailey 168: if (isfld(x) && !donefld)
1.1 tholo 169: fldbld();
1.13 kstailey 170: else if (isrec(x) && !donerec)
1.1 tholo 171: recbld();
172: if (isexpr(a))
173: return(x);
174: if (isjump(x))
175: return(x);
176: if (a->nnext == NULL)
177: return(x);
178: tempfree(x);
179: }
180: }
181:
182:
183: Cell *program(Node **a, int n) /* execute an awk program */
184: { /* a[0] = BEGIN, a[1] = body, a[2] = END */
185: Cell *x;
186:
187: if (setjmp(env) != 0)
188: goto ex;
189: if (a[0]) { /* BEGIN */
190: x = execute(a[0]);
191: if (isexit(x))
1.15 millert 192: return(True);
1.1 tholo 193: if (isjump(x))
1.16 millert 194: FATAL("illegal break, continue, next or nextfile from BEGIN");
1.1 tholo 195: tempfree(x);
196: }
197: if (a[1] || a[2])
1.54 millert 198: while (getrec(&record, &recsize, true) > 0) {
1.1 tholo 199: x = execute(a[1]);
200: if (isexit(x))
201: break;
202: tempfree(x);
203: }
204: ex:
205: if (setjmp(env) != 0) /* handles exit within END */
206: goto ex1;
207: if (a[2]) { /* END */
208: x = execute(a[2]);
209: if (isbreak(x) || isnext(x) || iscont(x))
1.16 millert 210: FATAL("illegal break, continue, next or nextfile from END");
1.1 tholo 211: tempfree(x);
212: }
213: ex1:
1.15 millert 214: return(True);
1.1 tholo 215: }
216:
217: struct Frame { /* stack frame for awk function calls */
218: int nargs; /* number of arguments in this call */
219: Cell *fcncell; /* pointer to Cell for function */
220: Cell **args; /* pointer to array of arguments after execute */
221: Cell *retval; /* return value */
222: };
223:
224: #define NARGS 50 /* max args in a call */
225:
226: struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
227: int nframe = 0; /* number of frames allocated */
1.57 millert 228: struct Frame *frp = NULL; /* frame pointer. bottom level unused */
1.1 tholo 229:
230: Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
231: {
1.57 millert 232: static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
1.1 tholo 233: int i, ncall, ndef;
1.25 millert 234: int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
1.1 tholo 235: Node *x;
1.13 kstailey 236: Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
237: Cell *y, *z, *fcn;
1.1 tholo 238: char *s;
239:
240: fcn = execute(a[0]); /* the function itself */
241: s = fcn->nval;
1.13 kstailey 242: if (!isfcn(fcn))
1.16 millert 243: FATAL("calling undefined function %s", s);
1.1 tholo 244: if (frame == NULL) {
1.69 millert 245: frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
1.1 tholo 246: if (frame == NULL)
1.16 millert 247: FATAL("out of space for stack frames calling %s", s);
1.1 tholo 248: }
249: for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
250: ncall++;
1.12 millert 251: ndef = (int) fcn->fval; /* args in defn */
1.60 millert 252: DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
1.1 tholo 253: if (ncall > ndef)
1.16 millert 254: WARNING("function %s called with %d args, uses only %d",
255: s, ncall, ndef);
1.1 tholo 256: if (ncall + ndef > NARGS)
1.16 millert 257: FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
1.1 tholo 258: for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
1.60 millert 259: DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
1.1 tholo 260: y = execute(x);
261: oargs[i] = y;
1.60 millert 262: DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
263: i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
1.13 kstailey 264: if (isfcn(y))
1.16 millert 265: FATAL("can't use function %s as argument in %s", y->nval, s);
1.1 tholo 266: if (isarr(y))
267: args[i] = y; /* arrays by ref */
268: else
269: args[i] = copycell(y);
270: tempfree(y);
271: }
272: for ( ; i < ndef; i++) { /* add null args for ones not provided */
273: args[i] = gettemp();
274: *args[i] = newcopycell;
275: }
1.57 millert 276: frp++; /* now ok to up frame */
277: if (frp >= frame + nframe) {
278: int dfp = frp - frame; /* old index */
1.69 millert 279: frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame));
1.1 tholo 280: if (frame == NULL)
1.16 millert 281: FATAL("out of space for stack frames in %s", s);
1.57 millert 282: frp = frame + dfp;
1.1 tholo 283: }
1.57 millert 284: frp->fcncell = fcn;
285: frp->args = args;
286: frp->nargs = ndef; /* number defined with (excess are locals) */
287: frp->retval = gettemp();
1.1 tholo 288:
1.60 millert 289: DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
1.1 tholo 290: y = execute((Node *)(fcn->sval)); /* execute body */
1.60 millert 291: DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
1.1 tholo 292:
293: for (i = 0; i < ndef; i++) {
1.57 millert 294: Cell *t = frp->args[i];
1.1 tholo 295: if (isarr(t)) {
296: if (t->csub == CCOPY) {
297: if (i >= ncall) {
298: freesymtab(t);
299: t->csub = CTEMP;
1.14 millert 300: tempfree(t);
1.1 tholo 301: } else {
302: oargs[i]->tval = t->tval;
303: oargs[i]->tval &= ~(STR|NUM|DONTFREE);
304: oargs[i]->sval = t->sval;
305: tempfree(t);
306: }
307: }
308: } else if (t != y) { /* kludge to prevent freeing twice */
309: t->csub = CTEMP;
310: tempfree(t);
1.25 millert 311: } else if (t == y && t->csub == CCOPY) {
312: t->csub = CTEMP;
313: tempfree(t);
314: freed = 1;
1.1 tholo 315: }
316: }
317: tempfree(fcn);
1.17 millert 318: if (isexit(y) || isnext(y))
1.1 tholo 319: return y;
1.25 millert 320: if (freed == 0) {
321: tempfree(y); /* don't free twice! */
322: }
1.57 millert 323: z = frp->retval; /* return value */
1.60 millert 324: DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
1.57 millert 325: frp--;
1.1 tholo 326: return(z);
327: }
328:
329: Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
330: {
331: Cell *y;
332:
1.47 millert 333: /* copy is not constant or field */
334:
1.1 tholo 335: y = gettemp();
1.47 millert 336: y->tval = x->tval & ~(CON|FLD|REC);
1.1 tholo 337: y->csub = CCOPY; /* prevents freeing until call is over */
1.13 kstailey 338: y->nval = x->nval; /* BUG? */
1.47 millert 339: if (isstr(x) /* || x->ctype == OCELL */) {
1.17 millert 340: y->sval = tostring(x->sval);
1.47 millert 341: y->tval &= ~DONTFREE;
342: } else
343: y->tval |= DONTFREE;
1.1 tholo 344: y->fval = x->fval;
345: return y;
346: }
347:
348: Cell *arg(Node **a, int n) /* nth argument of a function */
349: {
350:
1.15 millert 351: n = ptoi(a[0]); /* argument number, counting from 0 */
1.60 millert 352: DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
1.57 millert 353: if (n+1 > frp->nargs)
1.16 millert 354: FATAL("argument #%d of function %s was not supplied",
1.57 millert 355: n+1, frp->fcncell->nval);
356: return frp->args[n];
1.1 tholo 357: }
358:
359: Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
360: {
361: Cell *y;
362:
363: switch (n) {
364: case EXIT:
365: if (a[0] != NULL) {
366: y = execute(a[0]);
1.14 millert 367: errorflag = (int) getfval(y);
1.1 tholo 368: tempfree(y);
369: }
370: longjmp(env, 1);
371: case RETURN:
372: if (a[0] != NULL) {
373: y = execute(a[0]);
374: if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1.57 millert 375: setsval(frp->retval, getsval(y));
376: frp->retval->fval = getfval(y);
377: frp->retval->tval |= NUM;
1.1 tholo 378: }
379: else if (y->tval & STR)
1.57 millert 380: setsval(frp->retval, getsval(y));
1.1 tholo 381: else if (y->tval & NUM)
1.57 millert 382: setfval(frp->retval, getfval(y));
1.1 tholo 383: else /* can't happen */
1.16 millert 384: FATAL("bad type variable %d", y->tval);
1.1 tholo 385: tempfree(y);
386: }
387: return(jret);
388: case NEXT:
389: return(jnext);
390: case NEXTFILE:
391: nextfile();
392: return(jnextfile);
393: case BREAK:
394: return(jbreak);
395: case CONTINUE:
396: return(jcont);
397: default: /* can't happen */
1.16 millert 398: FATAL("illegal jump type %d", n);
1.1 tholo 399: }
400: return 0; /* not reached */
401: }
402:
1.31 millert 403: Cell *awkgetline(Node **a, int n) /* get next line from specific input */
1.1 tholo 404: { /* a[0] is variable, a[1] is operator, a[2] is filename */
405: Cell *r, *x;
1.13 kstailey 406: extern Cell **fldtab;
1.1 tholo 407: FILE *fp;
1.13 kstailey 408: char *buf;
409: int bufsize = recsize;
1.15 millert 410: int mode;
1.57 millert 411: bool newflag;
1.69 millert 412: double result;
1.13 kstailey 413:
1.69 millert 414: if ((buf = (char *) malloc(bufsize)) == NULL)
1.16 millert 415: FATAL("out of memory in getline");
1.1 tholo 416:
417: fflush(stdout); /* in case someone is waiting for a prompt */
418: r = gettemp();
419: if (a[1] != NULL) { /* getline < file */
420: x = execute(a[2]); /* filename */
1.15 millert 421: mode = ptoi(a[1]);
422: if (mode == '|') /* input pipe */
423: mode = LE; /* arbitrary flag */
1.57 millert 424: fp = openfile(mode, getsval(x), &newflag);
1.1 tholo 425: tempfree(x);
426: if (fp == NULL)
427: n = -1;
428: else
1.57 millert 429: n = readrec(&buf, &bufsize, fp, newflag);
1.1 tholo 430: if (n <= 0) {
431: ;
432: } else if (a[0] != NULL) { /* getline var <file */
1.13 kstailey 433: x = execute(a[0]);
434: setsval(x, buf);
1.69 millert 435: if (is_number(x->sval, & result)) {
436: x->fval = result;
1.49 millert 437: x->tval |= NUM;
438: }
1.13 kstailey 439: tempfree(x);
1.1 tholo 440: } else { /* getline <file */
1.13 kstailey 441: setsval(fldtab[0], buf);
1.69 millert 442: if (is_number(fldtab[0]->sval, & result)) {
443: fldtab[0]->fval = result;
1.13 kstailey 444: fldtab[0]->tval |= NUM;
1.1 tholo 445: }
446: }
447: } else { /* bare getline; use current input */
448: if (a[0] == NULL) /* getline */
1.54 millert 449: n = getrec(&record, &recsize, true);
1.1 tholo 450: else { /* getline var */
1.54 millert 451: n = getrec(&buf, &bufsize, false);
1.70 millert 452: if (n > 0) {
453: x = execute(a[0]);
454: setsval(x, buf);
455: if (is_number(x->sval, & result)) {
456: x->fval = result;
457: x->tval |= NUM;
458: }
459: tempfree(x);
1.49 millert 460: }
1.1 tholo 461: }
462: }
463: setfval(r, (Awkfloat) n);
1.13 kstailey 464: free(buf);
1.1 tholo 465: return r;
466: }
467:
468: Cell *getnf(Node **a, int n) /* get NF */
469: {
1.54 millert 470: if (!donefld)
1.1 tholo 471: fldbld();
472: return (Cell *) a[0];
473: }
474:
1.53 millert 475: static char *
476: makearraystring(Node *p, const char *func)
1.1 tholo 477: {
1.13 kstailey 478: char *buf;
479: int bufsz = recsize;
1.62 millert 480: size_t blen;
1.53 millert 481:
1.69 millert 482: if ((buf = (char *) malloc(bufsz)) == NULL) {
1.53 millert 483: FATAL("%s: out of memory", func);
484: }
1.13 kstailey 485:
1.53 millert 486: blen = 0;
487: buf[blen] = '\0';
488:
489: for (; p; p = p->nnext) {
490: Cell *x = execute(p); /* expr */
491: char *s = getsval(x);
1.62 millert 492: size_t seplen = strlen(getsval(subseploc));
1.53 millert 493: size_t nsub = p->nnext ? seplen : 0;
494: size_t slen = strlen(s);
495: size_t tlen = blen + slen + nsub;
496:
497: if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
498: FATAL("%s: out of memory %s[%s...]",
499: func, x->nval, buf);
500: }
501: memcpy(buf + blen, s, slen);
502: if (nsub) {
503: memcpy(buf + blen + slen, *SUBSEP, nsub);
504: }
505: buf[tlen] = '\0';
506: blen = tlen;
507: tempfree(x);
508: }
509: return buf;
510: }
511:
512: Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
513: {
514: Cell *x, *z;
515: char *buf;
1.1 tholo 516:
517: x = execute(a[0]); /* Cell* for symbol table */
1.53 millert 518: buf = makearraystring(a[1], __func__);
1.1 tholo 519: if (!isarr(x)) {
1.60 millert 520: DPRINTF("making %s into an array\n", NN(x->nval));
1.1 tholo 521: if (freeable(x))
522: xfree(x->sval);
523: x->tval &= ~(STR|NUM|DONTFREE);
524: x->tval |= ARR;
525: x->sval = (char *) makesymtab(NSYMTAB);
526: }
527: z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
528: z->ctype = OCELL;
529: z->csub = CVAR;
530: tempfree(x);
1.13 kstailey 531: free(buf);
1.1 tholo 532: return(z);
533: }
534:
1.14 millert 535: Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
1.1 tholo 536: {
1.53 millert 537: Cell *x;
1.1 tholo 538:
539: x = execute(a[0]); /* Cell* for symbol table */
1.50 millert 540: if (x == symtabloc) {
541: FATAL("cannot delete SYMTAB or its elements");
542: }
1.1 tholo 543: if (!isarr(x))
1.15 millert 544: return True;
1.51 millert 545: if (a[1] == NULL) { /* delete the elements, not the table */
1.1 tholo 546: freesymtab(x);
547: x->tval &= ~STR;
548: x->tval |= ARR;
549: x->sval = (char *) makesymtab(NSYMTAB);
550: } else {
1.53 millert 551: char *buf = makearraystring(a[1], __func__);
1.1 tholo 552: freeelem(x, buf);
1.13 kstailey 553: free(buf);
1.1 tholo 554: }
555: tempfree(x);
1.15 millert 556: return True;
1.1 tholo 557: }
558:
559: Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
560: {
1.53 millert 561: Cell *ap, *k;
1.13 kstailey 562: char *buf;
1.1 tholo 563:
564: ap = execute(a[1]); /* array name */
565: if (!isarr(ap)) {
1.60 millert 566: DPRINTF("making %s into an array\n", ap->nval);
1.1 tholo 567: if (freeable(ap))
568: xfree(ap->sval);
569: ap->tval &= ~(STR|NUM|DONTFREE);
570: ap->tval |= ARR;
571: ap->sval = (char *) makesymtab(NSYMTAB);
572: }
1.53 millert 573: buf = makearraystring(a[0], __func__);
1.1 tholo 574: k = lookup(buf, (Array *) ap->sval);
575: tempfree(ap);
1.13 kstailey 576: free(buf);
1.1 tholo 577: if (k == NULL)
1.15 millert 578: return(False);
1.1 tholo 579: else
1.15 millert 580: return(True);
1.1 tholo 581: }
582:
583:
1.75 millert 584: /* ======== utf-8 code ========== */
585:
586: /*
587: * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
588: * or utf-8. u8_isutf tests whether a string starts with a valid
589: * utf-8 sequence, and returns 0 if not (e.g., high bit set).
590: * u8_nextlen returns length of next valid sequence, which is
591: * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
592: * u8_strlen returns length of string in valid utf-8 sequences
593: * and/or high-bit bytes. Conversion functions go between byte
594: * number and character number.
595: *
596: * In theory, this behaves the same as before for non-utf8 bytes.
597: *
598: * Limited checking! This is a potential security hole.
599: */
600:
601: /* is s the beginning of a valid utf-8 string? */
602: /* return length 1..4 if yes, 0 if no */
603: int u8_isutf(const char *s)
604: {
605: int n, ret;
606: unsigned char c;
607:
608: c = s[0];
1.77 millert 609: if (c < 128 || awk_mb_cur_max == 1)
1.75 millert 610: return 1; /* what if it's 0? */
611:
612: n = strlen(s);
613: if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
614: ret = 2; /* 110xxxxx 10xxxxxx */
615: } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
616: && (s[2] & 0xC0) == 0x80) {
617: ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
618: } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
619: && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
620: ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
621: } else {
622: ret = 0;
623: }
624: return ret;
625: }
626:
627: /* Convert (prefix of) utf8 string to utf-32 rune. */
628: /* Sets *rune to the value, returns the length. */
629: /* No error checking: watch out. */
630: int u8_rune(int *rune, const char *s)
631: {
632: int n, ret;
633: unsigned char c;
634:
635: c = s[0];
1.77 millert 636: if (c < 128 || awk_mb_cur_max == 1) {
1.75 millert 637: *rune = c;
638: return 1;
639: }
640:
641: n = strlen(s);
642: if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
643: *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
644: ret = 2;
645: } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
646: && (s[2] & 0xC0) == 0x80) {
647: *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
648: /* 1110xxxx 10xxxxxx 10xxxxxx */
649: ret = 3;
650: } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
651: && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
652: *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
653: /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
654: ret = 4;
655: } else {
656: *rune = c;
657: ret = 1;
658: }
659: return ret; /* returns one byte if sequence doesn't look like utf */
660: }
661:
662: /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
663: int u8_nextlen(const char *s)
664: {
665: int len;
666:
667: len = u8_isutf(s);
668: if (len == 0)
669: len = 1;
670: return len;
671: }
672:
673: /* return number of utf characters or single non-utf bytes */
674: int u8_strlen(const char *s)
675: {
676: int i, len, n, totlen;
677: unsigned char c;
678:
679: n = strlen(s);
680: totlen = 0;
681: for (i = 0; i < n; i += len) {
682: c = s[i];
1.77 millert 683: if (c < 128 || awk_mb_cur_max == 1) {
1.75 millert 684: len = 1;
685: } else {
686: len = u8_nextlen(&s[i]);
687: }
688: totlen++;
689: if (i > n)
690: FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
691: }
692: return totlen;
693: }
694:
695: /* convert utf-8 char number in a string to its byte offset */
696: int u8_char2byte(const char *s, int charnum)
697: {
698: int n;
699: int bytenum = 0;
700:
701: while (charnum > 0) {
702: n = u8_nextlen(s);
703: s += n;
704: bytenum += n;
705: charnum--;
706: }
707: return bytenum;
708: }
709:
710: /* convert byte offset in s to utf-8 char number that starts there */
711: int u8_byte2char(const char *s, int bytenum)
712: {
713: int i, len, b;
714: int charnum = 0; /* BUG: what origin? */
715: /* should be 0 to match start==0 which means no match */
716:
717: b = strlen(s);
718: if (bytenum > b) {
719: return -1; /* ??? */
720: }
721: for (i = 0; i <= bytenum; i += len) {
722: len = u8_nextlen(s+i);
723: charnum++;
724: }
725: return charnum;
726: }
727:
1.86 ! millert 728: /* runetochar() adapted from rune.c in the Plan 9 distribution */
1.75 millert 729:
730: enum
731: {
732: Runeerror = 128, /* from somewhere else */
733: Runemax = 0x10FFFF,
734:
735: Bit1 = 7,
736: Bitx = 6,
737: Bit2 = 5,
738: Bit3 = 4,
739: Bit4 = 3,
740: Bit5 = 2,
741:
742: T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
743: Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
744: T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
745: T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
746: T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
747: T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
748:
749: Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
750: Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
751: Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
752: Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
753:
754: Maskx = (1<<Bitx)-1, /* 0011 1111 */
755: Testx = Maskx ^ 0xFF, /* 1100 0000 */
756:
757: };
758:
759: int runetochar(char *str, int c)
760: {
761: /* one character sequence 00000-0007F => 00-7F */
762: if (c <= Rune1) {
763: str[0] = c;
764: return 1;
765: }
766:
767: /* two character sequence 00080-007FF => T2 Tx */
768: if (c <= Rune2) {
769: str[0] = T2 | (c >> 1*Bitx);
770: str[1] = Tx | (c & Maskx);
771: return 2;
772: }
773:
774: /* three character sequence 00800-0FFFF => T3 Tx Tx */
775: if (c > Runemax)
776: c = Runeerror;
777: if (c <= Rune3) {
778: str[0] = T3 | (c >> 2*Bitx);
779: str[1] = Tx | ((c >> 1*Bitx) & Maskx);
780: str[2] = Tx | (c & Maskx);
781: return 3;
782: }
783:
784: /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
785: str[0] = T4 | (c >> 3*Bitx);
786: str[1] = Tx | ((c >> 2*Bitx) & Maskx);
787: str[2] = Tx | ((c >> 1*Bitx) & Maskx);
788: str[3] = Tx | (c & Maskx);
789: return 4;
790: }
791:
792:
793: /* ========== end of utf8 code =========== */
794:
795:
796:
1.1 tholo 797: Cell *matchop(Node **a, int n) /* ~ and match() */
798: {
1.84 millert 799: Cell *x, *y, *z;
1.1 tholo 800: char *s, *t;
801: int i;
1.75 millert 802: int cstart, cpatlen, len;
1.1 tholo 803: fa *pfa;
1.18 millert 804: int (*mf)(fa *, const char *) = match, mode = 0;
1.1 tholo 805:
806: if (n == MATCHFCN) {
807: mf = pmatch;
808: mode = 1;
809: }
810: x = execute(a[1]); /* a[1] = target text */
811: s = getsval(x);
1.51 millert 812: if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
1.1 tholo 813: i = (*mf)((fa *) a[2], s);
814: else {
815: y = execute(a[2]); /* a[2] = regular expr */
816: t = getsval(y);
817: pfa = makedfa(t, mode);
818: i = (*mf)(pfa, s);
819: tempfree(y);
820: }
1.84 millert 821: z = x;
1.1 tholo 822: if (n == MATCHFCN) {
1.75 millert 823: int start = patbeg - s + 1; /* origin 1 */
824: if (patlen < 0) {
825: start = 0; /* not found */
826: } else {
827: cstart = u8_byte2char(s, start-1);
828: cpatlen = 0;
829: for (i = 0; i < patlen; i += len) {
830: len = u8_nextlen(patbeg+i);
831: cpatlen++;
832: }
833:
834: start = cstart;
835: patlen = cpatlen;
836: }
837:
1.1 tholo 838: setfval(rstartloc, (Awkfloat) start);
839: setfval(rlengthloc, (Awkfloat) patlen);
840: x = gettemp();
841: x->tval = NUM;
842: x->fval = start;
843: } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
1.84 millert 844: x = True;
1.1 tholo 845: else
1.84 millert 846: x = False;
847:
848: tempfree(z);
849: return x;
1.1 tholo 850: }
851:
852:
853: Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
854: {
855: Cell *x, *y;
856: int i;
857:
858: x = execute(a[0]);
859: i = istrue(x);
860: tempfree(x);
861: switch (n) {
862: case BOR:
1.15 millert 863: if (i) return(True);
1.1 tholo 864: y = execute(a[1]);
865: i = istrue(y);
866: tempfree(y);
1.15 millert 867: if (i) return(True);
868: else return(False);
1.1 tholo 869: case AND:
1.15 millert 870: if ( !i ) return(False);
1.1 tholo 871: y = execute(a[1]);
872: i = istrue(y);
873: tempfree(y);
1.15 millert 874: if (i) return(True);
875: else return(False);
1.1 tholo 876: case NOT:
1.15 millert 877: if (i) return(False);
878: else return(True);
1.1 tholo 879: default: /* can't happen */
1.16 millert 880: FATAL("unknown boolean operator %d", n);
1.1 tholo 881: }
882: return 0; /*NOTREACHED*/
883: }
884:
885: Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
886: {
887: int i;
888: Cell *x, *y;
889: Awkfloat j;
1.75 millert 890: bool x_is_nan, y_is_nan;
1.1 tholo 891:
892: x = execute(a[0]);
893: y = execute(a[1]);
1.75 millert 894: x_is_nan = isnan(x->fval);
895: y_is_nan = isnan(y->fval);
1.1 tholo 896: if (x->tval&NUM && y->tval&NUM) {
1.75 millert 897: if ((x_is_nan || y_is_nan) && n != NE)
898: return(False);
1.1 tholo 899: j = x->fval - y->fval;
900: i = j<0? -1: (j>0? 1: 0);
901: } else {
902: i = strcmp(getsval(x), getsval(y));
903: }
904: tempfree(x);
905: tempfree(y);
906: switch (n) {
1.15 millert 907: case LT: if (i<0) return(True);
908: else return(False);
909: case LE: if (i<=0) return(True);
910: else return(False);
1.75 millert 911: case NE: if (x_is_nan && y_is_nan) return(True);
912: else if (i!=0) return(True);
1.15 millert 913: else return(False);
914: case EQ: if (i == 0) return(True);
915: else return(False);
916: case GE: if (i>=0) return(True);
917: else return(False);
918: case GT: if (i>0) return(True);
919: else return(False);
1.1 tholo 920: default: /* can't happen */
1.16 millert 921: FATAL("unknown relational operator %d", n);
1.1 tholo 922: }
923: return 0; /*NOTREACHED*/
924: }
925:
926: void tfree(Cell *a) /* free a tempcell */
927: {
1.13 kstailey 928: if (freeable(a)) {
1.60 millert 929: DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
1.1 tholo 930: xfree(a->sval);
1.13 kstailey 931: }
1.1 tholo 932: if (a == tmps)
1.16 millert 933: FATAL("tempcell list is curdled");
1.1 tholo 934: a->cnext = tmps;
935: tmps = a;
936: }
937:
938: Cell *gettemp(void) /* get a tempcell */
939: { int i;
940: Cell *x;
941:
942: if (!tmps) {
1.69 millert 943: tmps = (Cell *) calloc(100, sizeof(*tmps));
1.1 tholo 944: if (!tmps)
1.16 millert 945: FATAL("out of space for temporaries");
1.52 millert 946: for (i = 1; i < 100; i++)
1.1 tholo 947: tmps[i-1].cnext = &tmps[i];
1.51 millert 948: tmps[i-1].cnext = NULL;
1.1 tholo 949: }
950: x = tmps;
951: tmps = x->cnext;
952: *x = tempcell;
953: return(x);
954: }
955:
956: Cell *indirect(Node **a, int n) /* $( a[0] ) */
957: {
1.25 millert 958: Awkfloat val;
1.1 tholo 959: Cell *x;
960: int m;
961: char *s;
962:
963: x = execute(a[0]);
1.25 millert 964: val = getfval(x); /* freebsd: defend against super large field numbers */
965: if ((Awkfloat)INT_MAX < val)
966: FATAL("trying to access out of range field %s", x->nval);
967: m = (int) val;
1.69 millert 968: if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */
1.16 millert 969: FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
1.13 kstailey 970: /* BUG: can x->nval ever be null??? */
1.1 tholo 971: tempfree(x);
972: x = fieldadr(m);
1.13 kstailey 973: x->ctype = OCELL; /* BUG? why are these needed? */
1.1 tholo 974: x->csub = CFLD;
975: return(x);
976: }
977:
978: Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
979: {
980: int k, m, n;
1.75 millert 981: int mb, nb;
1.1 tholo 982: char *s;
983: int temp;
1.51 millert 984: Cell *x, *y, *z = NULL;
1.1 tholo 985:
986: x = execute(a[0]);
987: y = execute(a[1]);
1.51 millert 988: if (a[2] != NULL)
1.1 tholo 989: z = execute(a[2]);
990: s = getsval(x);
1.80 millert 991: k = u8_strlen(s) + 1;
1.1 tholo 992: if (k <= 1) {
993: tempfree(x);
994: tempfree(y);
1.51 millert 995: if (a[2] != NULL) {
1.1 tholo 996: tempfree(z);
1.17 millert 997: }
1.1 tholo 998: x = gettemp();
999: setsval(x, "");
1000: return(x);
1001: }
1.14 millert 1002: m = (int) getfval(y);
1.1 tholo 1003: if (m <= 0)
1004: m = 1;
1005: else if (m > k)
1006: m = k;
1007: tempfree(y);
1.51 millert 1008: if (a[2] != NULL) {
1.14 millert 1009: n = (int) getfval(z);
1.1 tholo 1010: tempfree(z);
1011: } else
1012: n = k - 1;
1013: if (n < 0)
1014: n = 0;
1015: else if (n > k - m)
1016: n = k - m;
1.75 millert 1017: /* m is start, n is length from there */
1.60 millert 1018: DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1.1 tholo 1019: y = gettemp();
1.75 millert 1020: mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1021: nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */
1022:
1023: temp = s[nb]; /* with thanks to John Linderman */
1024: s[nb] = '\0';
1025: setsval(y, s + mb);
1026: s[nb] = temp;
1.1 tholo 1027: tempfree(x);
1028: return(y);
1029: }
1030:
1031: Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1032: {
1033: Cell *x, *y, *z;
1034: char *s1, *s2, *p1, *p2, *q;
1035: Awkfloat v = 0.0;
1036:
1037: x = execute(a[0]);
1038: s1 = getsval(x);
1039: y = execute(a[1]);
1040: s2 = getsval(y);
1041:
1042: z = gettemp();
1043: for (p1 = s1; *p1 != '\0'; p1++) {
1.57 millert 1044: for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1045: continue;
1.1 tholo 1046: if (*p2 == '\0') {
1.75 millert 1047: /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1048:
1049: /* should be a function: used in match() as well */
1050: int i, len;
1051: v = 0;
1052: for (i = 0; i < p1-s1+1; i += len) {
1053: len = u8_nextlen(s1+i);
1054: v++;
1055: }
1.1 tholo 1056: break;
1057: }
1058: }
1059: tempfree(x);
1060: tempfree(y);
1061: setfval(z, v);
1062: return(z);
1063: }
1064:
1.75 millert 1065: int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1066: {
1067: int n;
1068:
1069: for (n = 0; *s != 0; s += n) {
1070: n = u8_nextlen(s);
1071: if (n > 1)
1072: return 1;
1073: }
1074: return 0;
1075: }
1076:
1.13 kstailey 1077: #define MAXNUMSIZE 50
1078:
1.18 millert 1079: int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1.1 tholo 1080: {
1.13 kstailey 1081: char *fmt;
1.18 millert 1082: char *p, *t;
1083: const char *os;
1.1 tholo 1084: Cell *x;
1.10 kstailey 1085: int flag = 0, n;
1.13 kstailey 1086: int fmtwd; /* format width */
1087: int fmtsz = recsize;
1088: char *buf = *pbuf;
1089: int bufsize = *pbufsize;
1.53 millert 1090: #define FMTSZ(a) (fmtsz - ((a) - fmt))
1091: #define BUFSZ(a) (bufsize - ((a) - buf))
1.1 tholo 1092:
1.54 millert 1093: static bool first = true;
1094: static bool have_a_format = false;
1.47 millert 1095:
1096: if (first) {
1.57 millert 1097: char xbuf[100];
1.47 millert 1098:
1.57 millert 1099: snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1100: have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1.54 millert 1101: first = false;
1.47 millert 1102: }
1103:
1.1 tholo 1104: os = s;
1105: p = buf;
1.69 millert 1106: if ((fmt = (char *) malloc(fmtsz)) == NULL)
1.16 millert 1107: FATAL("out of memory in format()");
1.1 tholo 1108: while (*s) {
1.30 millert 1109: adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1.1 tholo 1110: if (*s != '%') {
1111: *p++ = *s++;
1112: continue;
1113: }
1114: if (*(s+1) == '%') {
1115: *p++ = '%';
1116: s += 2;
1117: continue;
1118: }
1.13 kstailey 1119: fmtwd = atoi(s+1);
1120: if (fmtwd < 0)
1121: fmtwd = -fmtwd;
1.30 millert 1122: adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1.13 kstailey 1123: for (t = fmt; (*t++ = *s) != '\0'; s++) {
1.30 millert 1124: if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1.16 millert 1125: FATAL("format item %.30s... ran format() out of memory", os);
1.55 millert 1126: /* Ignore size specifiers */
1127: if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
1128: t--;
1129: continue;
1130: }
1131: if (isalpha((uschar)*s))
1132: break;
1.48 millert 1133: if (*s == '$') {
1134: FATAL("'$' not permitted in awk formats");
1135: }
1.1 tholo 1136: if (*s == '*') {
1.49 millert 1137: if (a == NULL) {
1.27 deraadt 1138: FATAL("not enough args in printf(%s)", os);
1.49 millert 1139: }
1.1 tholo 1140: x = execute(a);
1141: a = a->nnext;
1.53 millert 1142: snprintf(t - 1, FMTSZ(t - 1),
1143: "%d", fmtwd=(int) getfval(x));
1.13 kstailey 1144: if (fmtwd < 0)
1145: fmtwd = -fmtwd;
1146: adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1.1 tholo 1147: t = fmt + strlen(fmt);
1148: tempfree(x);
1149: }
1150: }
1151: *t = '\0';
1.13 kstailey 1152: if (fmtwd < 0)
1153: fmtwd = -fmtwd;
1.30 millert 1154: adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1.1 tholo 1155: switch (*s) {
1.47 millert 1156: case 'a': case 'A':
1157: if (have_a_format)
1158: flag = *s;
1159: else
1160: flag = 'f';
1161: break;
1.1 tholo 1162: case 'f': case 'e': case 'g': case 'E': case 'G':
1.18 millert 1163: flag = 'f';
1.1 tholo 1164: break;
1.55 millert 1165: case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1166: flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1.53 millert 1167: *(t-1) = 'j';
1168: *t = *s;
1169: *++t = '\0';
1.1 tholo 1170: break;
1171: case 's':
1.18 millert 1172: flag = 's';
1.1 tholo 1173: break;
1174: case 'c':
1.18 millert 1175: flag = 'c';
1.1 tholo 1176: break;
1177: default:
1.16 millert 1178: WARNING("weird printf conversion %s", fmt);
1.18 millert 1179: flag = '?';
1.1 tholo 1180: break;
1181: }
1182: if (a == NULL)
1.16 millert 1183: FATAL("not enough args in printf(%s)", os);
1.1 tholo 1184: x = execute(a);
1185: a = a->nnext;
1.13 kstailey 1186: n = MAXNUMSIZE;
1187: if (fmtwd > n)
1188: n = fmtwd;
1.30 millert 1189: adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1.1 tholo 1190: switch (flag) {
1.75 millert 1191: case '?':
1192: snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
1.13 kstailey 1193: t = getsval(x);
1194: n = strlen(t);
1195: if (fmtwd > n)
1196: n = fmtwd;
1.30 millert 1197: adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1.5 kstailey 1198: p += strlen(p);
1.53 millert 1199: snprintf(p, BUFSZ(p), "%s", t);
1.1 tholo 1200: break;
1.47 millert 1201: case 'a':
1202: case 'A':
1.53 millert 1203: case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1.55 millert 1204: case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1205: case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1.75 millert 1206:
1207: case 's': {
1.1 tholo 1208: t = getsval(x);
1209: n = strlen(t);
1.75 millert 1210: /* if simple format or no utf-8 in the string, sprintf works */
1211: if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1212: if (fmtwd > n)
1213: n = fmtwd;
1214: if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1215: FATAL("huge string/format (%d chars) in printf %.30s..." \
1216: " ran format() out of memory", n, t);
1217: snprintf(p, BUFSZ(p), fmt, t);
1218: break;
1219: }
1220:
1221: /* get here if string has utf-8 chars and fmt is not plain %s */
1222: /* "%-w.ps", where -, w and .p are all optional */
1223: /* '0' before the w is a flag character */
1224: /* fmt points at % */
1225: int ljust = 0, wid = 0, prec = n, pad = 0;
1226: char *f = fmt+1;
1227: if (f[0] == '-') {
1228: ljust = 1;
1229: f++;
1230: }
1231: // flags '0' and '+' are recognized but skipped
1232: if (f[0] == '0') {
1233: f++;
1234: if (f[0] == '+')
1235: f++;
1236: }
1237: if (f[0] == '+') {
1238: f++;
1239: if (f[0] == '0')
1240: f++;
1241: }
1242: if (isdigit((uschar)f[0])) { /* there is a wid */
1243: wid = strtol(f, &f, 10);
1244: }
1245: if (f[0] == '.') { /* there is a .prec */
1246: prec = strtol(++f, &f, 10);
1247: }
1248: if (prec > u8_strlen(t))
1249: prec = u8_strlen(t);
1250: pad = wid>prec ? wid - prec : 0; // has to be >= 0
1251: int i, k, n;
1252:
1253: if (ljust) { // print prec chars from t, then pad blanks
1254: n = u8_char2byte(t, prec);
1255: for (k = 0; k < n; k++) {
1256: //putchar(t[k]);
1257: *p++ = t[k];
1258: }
1259: for (i = 0; i < pad; i++) {
1260: //printf(" ");
1261: *p++ = ' ';
1262: }
1263: } else { // print pad blanks, then prec chars from t
1264: for (i = 0; i < pad; i++) {
1265: //printf(" ");
1266: *p++ = ' ';
1267: }
1268: n = u8_char2byte(t, prec);
1269: for (k = 0; k < n; k++) {
1270: //putchar(t[k]);
1271: *p++ = t[k];
1272: }
1273: }
1274: *p = 0;
1.1 tholo 1275: break;
1.75 millert 1276: }
1277:
1278: case 'c': {
1279: /*
1280: * If a numeric value is given, awk should just turn
1281: * it into a character and print it:
1282: * BEGIN { printf("%c\n", 65) }
1283: * prints "A".
1284: *
1285: * But what if the numeric value is > 128 and
1286: * represents a valid Unicode code point?!? We do
1287: * our best to convert it back into UTF-8. If we
1288: * can't, we output the encoding of the Unicode
1289: * "invalid character", 0xFFFD.
1290: */
1.13 kstailey 1291: if (isnum(x)) {
1.75 millert 1292: int charval = (int) getfval(x);
1293:
1294: if (charval != 0) {
1.77 millert 1295: if (charval < 128 || awk_mb_cur_max == 1)
1.75 millert 1296: snprintf(p, BUFSZ(p), fmt, charval);
1297: else {
1298: // possible unicode character
1299: size_t count;
1300: char *bs = wide_char_to_byte_str(charval, &count);
1301:
1302: if (bs == NULL) { // invalid character
1303: // use unicode invalid character, 0xFFFD
1.84 millert 1304: static char invalid_char[] = "\357\277\275";
1305: bs = invalid_char;
1.75 millert 1306: count = 3;
1307: }
1308: t = bs;
1309: n = count;
1310: goto format_percent_c;
1311: }
1312: } else {
1.18 millert 1313: *p++ = '\0'; /* explicit null byte */
1314: *p = '\0'; /* next output will start here */
1315: }
1.75 millert 1316: break;
1317: }
1318: t = getsval(x);
1319: n = u8_nextlen(t);
1320: format_percent_c:
1321: if (n < 2) { /* not utf8 */
1.53 millert 1322: snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1.75 millert 1323: break;
1324: }
1325:
1326: // utf8 character, almost same song and dance as for %s
1327: int ljust = 0, wid = 0, prec = n, pad = 0;
1328: char *f = fmt+1;
1329: if (f[0] == '-') {
1330: ljust = 1;
1331: f++;
1332: }
1333: // flags '0' and '+' are recognized but skipped
1334: if (f[0] == '0') {
1335: f++;
1336: if (f[0] == '+')
1337: f++;
1338: }
1339: if (f[0] == '+') {
1340: f++;
1341: if (f[0] == '0')
1342: f++;
1343: }
1344: if (isdigit((uschar)f[0])) { /* there is a wid */
1345: wid = strtol(f, &f, 10);
1346: }
1347: if (f[0] == '.') { /* there is a .prec */
1348: prec = strtol(++f, &f, 10);
1349: }
1350: if (prec > 1) // %c --> only one character
1351: prec = 1;
1352: pad = wid>prec ? wid - prec : 0; // has to be >= 0
1353: int i;
1354:
1355: if (ljust) { // print one char from t, then pad blanks
1.76 deraadt 1356: for (i = 0; i < n; i++)
1.75 millert 1357: *p++ = t[i];
1358: for (i = 0; i < pad; i++) {
1359: //printf(" ");
1360: *p++ = ' ';
1361: }
1362: } else { // print pad blanks, then prec chars from t
1363: for (i = 0; i < pad; i++) {
1364: //printf(" ");
1365: *p++ = ' ';
1366: }
1.76 deraadt 1367: for (i = 0; i < n; i++)
1.75 millert 1368: *p++ = t[i];
1369: }
1370: *p = 0;
1.1 tholo 1371: break;
1.75 millert 1372: }
1.18 millert 1373: default:
1374: FATAL("can't happen: bad conversion %c in format()", flag);
1.1 tholo 1375: }
1.75 millert 1376:
1.1 tholo 1377: tempfree(x);
1.5 kstailey 1378: p += strlen(p);
1.1 tholo 1379: s++;
1380: }
1381: *p = '\0';
1.13 kstailey 1382: free(fmt);
1.73 millert 1383: for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1384: x = execute(a);
1385: tempfree(x);
1386: }
1.13 kstailey 1387: *pbuf = buf;
1388: *pbufsize = bufsize;
1389: return p - buf;
1.1 tholo 1390: }
1391:
1392: Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1393: {
1394: Cell *x;
1395: Node *y;
1.13 kstailey 1396: char *buf;
1397: int bufsz=3*recsize;
1.1 tholo 1398:
1.69 millert 1399: if ((buf = (char *) malloc(bufsz)) == NULL)
1.16 millert 1400: FATAL("out of memory in awksprintf");
1.1 tholo 1401: y = a[0]->nnext;
1402: x = execute(a[0]);
1.13 kstailey 1403: if (format(&buf, &bufsz, getsval(x), y) == -1)
1.16 millert 1404: FATAL("sprintf string %.30s... too long. can't happen.", buf);
1.1 tholo 1405: tempfree(x);
1406: x = gettemp();
1.13 kstailey 1407: x->sval = buf;
1.1 tholo 1408: x->tval = STR;
1409: return(x);
1410: }
1411:
1412: Cell *awkprintf(Node **a, int n) /* printf */
1413: { /* a[0] is list of args, starting with format string */
1414: /* a[1] is redirection operator, a[2] is redirection file */
1415: FILE *fp;
1416: Cell *x;
1417: Node *y;
1.13 kstailey 1418: char *buf;
1.9 kstailey 1419: int len;
1.13 kstailey 1420: int bufsz=3*recsize;
1.1 tholo 1421:
1.69 millert 1422: if ((buf = (char *) malloc(bufsz)) == NULL)
1.16 millert 1423: FATAL("out of memory in awkprintf");
1.1 tholo 1424: y = a[0]->nnext;
1425: x = execute(a[0]);
1.13 kstailey 1426: if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1.16 millert 1427: FATAL("printf string %.30s... too long. can't happen.", buf);
1.1 tholo 1428: tempfree(x);
1429: if (a[1] == NULL) {
1.13 kstailey 1430: /* fputs(buf, stdout); */
1.9 kstailey 1431: fwrite(buf, len, 1, stdout);
1.8 kstailey 1432: if (ferror(stdout))
1.16 millert 1433: FATAL("write error on stdout");
1.1 tholo 1434: } else {
1.15 millert 1435: fp = redirect(ptoi(a[1]), a[2]);
1.13 kstailey 1436: /* fputs(buf, fp); */
1.9 kstailey 1437: fwrite(buf, len, 1, fp);
1.8 kstailey 1438: fflush(fp);
1439: if (ferror(fp))
1.16 millert 1440: FATAL("write error on %s", filename(fp));
1.1 tholo 1441: }
1.13 kstailey 1442: free(buf);
1.15 millert 1443: return(True);
1.1 tholo 1444: }
1445:
1446: Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1447: {
1448: Awkfloat i, j = 0;
1449: double v;
1450: Cell *x, *y, *z;
1451:
1452: x = execute(a[0]);
1453: i = getfval(x);
1454: tempfree(x);
1.47 millert 1455: if (n != UMINUS && n != UPLUS) {
1.1 tholo 1456: y = execute(a[1]);
1457: j = getfval(y);
1458: tempfree(y);
1459: }
1460: z = gettemp();
1461: switch (n) {
1462: case ADD:
1463: i += j;
1464: break;
1465: case MINUS:
1466: i -= j;
1467: break;
1468: case MULT:
1469: i *= j;
1470: break;
1471: case DIVIDE:
1472: if (j == 0)
1.16 millert 1473: FATAL("division by zero");
1.1 tholo 1474: i /= j;
1475: break;
1476: case MOD:
1477: if (j == 0)
1.16 millert 1478: FATAL("division by zero in mod");
1.1 tholo 1479: modf(i/j, &v);
1480: i = i - j * v;
1481: break;
1482: case UMINUS:
1483: i = -i;
1484: break;
1.57 millert 1485: case UPLUS: /* handled by getfval(), above */
1.47 millert 1486: break;
1.1 tholo 1487: case POWER:
1488: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1.12 millert 1489: i = ipow(i, (int) j);
1.63 millert 1490: else {
1.45 guenther 1491: errno = 0;
1.1 tholo 1492: i = errcheck(pow(i, j), "pow");
1.63 millert 1493: }
1.1 tholo 1494: break;
1495: default: /* can't happen */
1.16 millert 1496: FATAL("illegal arithmetic operator %d", n);
1.1 tholo 1497: }
1498: setfval(z, i);
1499: return(z);
1500: }
1501:
1502: double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1503: {
1504: double v;
1505:
1506: if (n <= 0)
1507: return 1;
1508: v = ipow(x, n/2);
1509: if (n % 2 == 0)
1510: return v * v;
1511: else
1512: return x * v * v;
1513: }
1514:
1515: Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1516: {
1517: Cell *x, *z;
1518: int k;
1519: Awkfloat xf;
1520:
1521: x = execute(a[0]);
1522: xf = getfval(x);
1523: k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1524: if (n == PREINCR || n == PREDECR) {
1525: setfval(x, xf + k);
1526: return(x);
1527: }
1528: z = gettemp();
1529: setfval(z, xf);
1530: setfval(x, xf + k);
1531: tempfree(x);
1532: return(z);
1533: }
1534:
1535: Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1536: { /* this is subtle; don't muck with it. */
1537: Cell *x, *y;
1538: Awkfloat xf, yf;
1539: double v;
1540:
1541: y = execute(a[1]);
1542: x = execute(a[0]);
1543: if (n == ASSIGN) { /* ordinary assignment */
1.49 millert 1544: if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1545: ; /* self-assignment: leave alone unless it's a field or NF */
1.1 tholo 1546: else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1.82 millert 1547: yf = getfval(y);
1.1 tholo 1548: setsval(x, getsval(y));
1.82 millert 1549: x->fval = yf;
1.1 tholo 1550: x->tval |= NUM;
1551: }
1.13 kstailey 1552: else if (isstr(y))
1.1 tholo 1553: setsval(x, getsval(y));
1.13 kstailey 1554: else if (isnum(y))
1.1 tholo 1555: setfval(x, getfval(y));
1556: else
1557: funnyvar(y, "read value of");
1558: tempfree(y);
1559: return(x);
1560: }
1561: xf = getfval(x);
1562: yf = getfval(y);
1563: switch (n) {
1564: case ADDEQ:
1565: xf += yf;
1566: break;
1567: case SUBEQ:
1568: xf -= yf;
1569: break;
1570: case MULTEQ:
1571: xf *= yf;
1572: break;
1573: case DIVEQ:
1574: if (yf == 0)
1.16 millert 1575: FATAL("division by zero in /=");
1.1 tholo 1576: xf /= yf;
1577: break;
1578: case MODEQ:
1579: if (yf == 0)
1.16 millert 1580: FATAL("division by zero in %%=");
1.1 tholo 1581: modf(xf/yf, &v);
1582: xf = xf - yf * v;
1583: break;
1584: case POWEQ:
1585: if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1.12 millert 1586: xf = ipow(xf, (int) yf);
1.63 millert 1587: else {
1.45 guenther 1588: errno = 0;
1.1 tholo 1589: xf = errcheck(pow(xf, yf), "pow");
1.63 millert 1590: }
1.1 tholo 1591: break;
1592: default:
1.16 millert 1593: FATAL("illegal assignment operator %d", n);
1.1 tholo 1594: break;
1595: }
1596: tempfree(y);
1597: setfval(x, xf);
1598: return(x);
1599: }
1600:
1601: Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1602: {
1603: Cell *x, *y, *z;
1604: int n1, n2;
1.49 millert 1605: char *s = NULL;
1606: int ssz = 0;
1.1 tholo 1607:
1608: x = execute(a[0]);
1.49 millert 1609: n1 = strlen(getsval(x));
1.74 millert 1610: adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1.61 millert 1611: memcpy(s, x->sval, n1);
1.49 millert 1612:
1.74 millert 1613: tempfree(x);
1614:
1.1 tholo 1615: y = execute(a[1]);
1.49 millert 1616: n2 = strlen(getsval(y));
1.61 millert 1617: adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1.53 millert 1618: memcpy(s + n1, y->sval, n2);
1619: s[n1 + n2] = '\0';
1.49 millert 1620:
1.1 tholo 1621: tempfree(y);
1.49 millert 1622:
1.1 tholo 1623: z = gettemp();
1624: z->sval = s;
1625: z->tval = STR;
1.49 millert 1626:
1.1 tholo 1627: return(z);
1628: }
1629:
1630: Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1631: {
1632: Cell *x;
1633:
1.51 millert 1634: if (a[0] == NULL)
1.1 tholo 1635: x = execute(a[1]);
1636: else {
1637: x = execute(a[0]);
1638: if (istrue(x)) {
1639: tempfree(x);
1640: x = execute(a[1]);
1641: }
1642: }
1643: return x;
1644: }
1645:
1646: Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1647: {
1648: Cell *x;
1649: int pair;
1650:
1.15 millert 1651: pair = ptoi(a[3]);
1.1 tholo 1652: if (pairstack[pair] == 0) {
1653: x = execute(a[0]);
1654: if (istrue(x))
1655: pairstack[pair] = 1;
1656: tempfree(x);
1657: }
1658: if (pairstack[pair] == 1) {
1659: x = execute(a[1]);
1660: if (istrue(x))
1661: pairstack[pair] = 0;
1662: tempfree(x);
1663: x = execute(a[2]);
1664: return(x);
1665: }
1.15 millert 1666: return(False);
1.1 tholo 1667: }
1668:
1669: Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1670: {
1.51 millert 1671: Cell *x = NULL, *y, *ap;
1.53 millert 1672: const char *s, *origs, *t;
1.56 millert 1673: const char *fs = NULL;
1674: char *origfs = NULL;
1.1 tholo 1675: int sep;
1.53 millert 1676: char temp, num[50];
1.75 millert 1677: int j, n, tempstat, arg3type;
1.69 millert 1678: double result;
1.1 tholo 1679:
1680: y = execute(a[0]); /* source string */
1.43 fcambus 1681: origs = s = strdup(getsval(y));
1.44 fcambus 1682: if (s == NULL)
1683: FATAL("out of space in split");
1.73 millert 1684: tempfree(y);
1.15 millert 1685: arg3type = ptoi(a[3]);
1.75 millert 1686: if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
1.49 millert 1687: fs = getsval(fsloc);
1.75 millert 1688: } else if (arg3type == STRING) { /* split(str,arr,"string") */
1.1 tholo 1689: x = execute(a[2]);
1.56 millert 1690: fs = origfs = strdup(getsval(x));
1.49 millert 1691: if (fs == NULL)
1692: FATAL("out of space in split");
1693: tempfree(x);
1.75 millert 1694: } else if (arg3type == REGEXPR) {
1.13 kstailey 1695: fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1.75 millert 1696: } else {
1.16 millert 1697: FATAL("illegal type of split");
1.75 millert 1698: }
1.1 tholo 1699: sep = *fs;
1700: ap = execute(a[1]); /* array name */
1.75 millert 1701: /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1.1 tholo 1702: freesymtab(ap);
1.60 millert 1703: DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1.1 tholo 1704: ap->tval &= ~STR;
1705: ap->tval |= ARR;
1706: ap->sval = (char *) makesymtab(NSYMTAB);
1707:
1708: n = 0;
1.33 millert 1709: if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1710: /* split(s, a, //); have to arrange that it looks like empty sep */
1711: arg3type = 0;
1712: fs = "";
1713: sep = 0;
1714: }
1.25 millert 1715: if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1.1 tholo 1716: fa *pfa;
1.15 millert 1717: if (arg3type == REGEXPR) { /* it's ready already */
1.1 tholo 1718: pfa = (fa *) a[2];
1719: } else {
1720: pfa = makedfa(fs, 1);
1721: }
1722: if (nematch(pfa,s)) {
1723: tempstat = pfa->initstat;
1724: pfa->initstat = 2;
1725: do {
1726: n++;
1.53 millert 1727: snprintf(num, sizeof(num), "%d", n);
1.1 tholo 1728: temp = *patbeg;
1.53 millert 1729: setptr(patbeg, '\0');
1.69 millert 1730: if (is_number(s, & result))
1731: setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1.1 tholo 1732: else
1733: setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1.53 millert 1734: setptr(patbeg, temp);
1.1 tholo 1735: s = patbeg + patlen;
1.57 millert 1736: if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1.1 tholo 1737: n++;
1.53 millert 1738: snprintf(num, sizeof(num), "%d", n);
1.1 tholo 1739: setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1740: pfa->initstat = tempstat;
1741: goto spdone;
1742: }
1743: } while (nematch(pfa,s));
1.25 millert 1744: pfa->initstat = tempstat; /* bwk: has to be here to reset */
1745: /* cf gsub and refldbld */
1.1 tholo 1746: }
1747: n++;
1.53 millert 1748: snprintf(num, sizeof(num), "%d", n);
1.69 millert 1749: if (is_number(s, & result))
1750: setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1.1 tholo 1751: else
1752: setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1753: spdone:
1754: pfa = NULL;
1.75 millert 1755:
1756: } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
1757: char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1758: for (;;) {
1759: char *fr = newt;
1760: n++;
1761: if (*s == '"' ) { /* start of "..." */
1762: for (s++ ; *s != '\0'; ) {
1763: if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1764: s += 2; /* doubled quote */
1765: *fr++ = '"';
1766: } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1767: s++; /* skip over closing quote */
1768: break;
1769: } else {
1770: *fr++ = *s++;
1771: }
1772: }
1773: *fr++ = 0;
1774: } else { /* unquoted field */
1775: while (*s != ',' && *s != '\0')
1776: *fr++ = *s++;
1777: *fr++ = 0;
1778: }
1779: snprintf(num, sizeof(num), "%d", n);
1780: if (is_number(newt, &result))
1781: setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1782: else
1783: setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1784: if (*s++ == '\0')
1785: break;
1786: }
1787: free(newt);
1788:
1789: } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1.1 tholo 1790: for (n = 0; ; ) {
1.57 millert 1791: #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1792: while (ISWS(*s))
1.1 tholo 1793: s++;
1.57 millert 1794: if (*s == '\0')
1.1 tholo 1795: break;
1796: n++;
1797: t = s;
1798: do
1799: s++;
1.57 millert 1800: while (*s != '\0' && !ISWS(*s));
1.1 tholo 1801: temp = *s;
1.53 millert 1802: setptr(s, '\0');
1803: snprintf(num, sizeof(num), "%d", n);
1.69 millert 1804: if (is_number(t, & result))
1805: setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1.1 tholo 1806: else
1807: setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1.53 millert 1808: setptr(s, temp);
1.57 millert 1809: if (*s != '\0')
1.1 tholo 1810: s++;
1811: }
1.75 millert 1812:
1.1 tholo 1813: } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1.75 millert 1814: for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1815: char buf[10];
1.1 tholo 1816: n++;
1.53 millert 1817: snprintf(num, sizeof(num), "%d", n);
1.75 millert 1818:
1819: for (j = 0; j < u8_nextlen(s); j++) {
1820: buf[j] = s[j];
1821: }
1822: buf[j] = '\0';
1823:
1.17 millert 1824: if (isdigit((uschar)buf[0]))
1.1 tholo 1825: setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1826: else
1827: setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1828: }
1.75 millert 1829:
1830: } else if (*s != '\0') { /* some random single character */
1.1 tholo 1831: for (;;) {
1832: n++;
1833: t = s;
1.85 millert 1834: while (*s != sep && *s != '\0')
1.1 tholo 1835: s++;
1836: temp = *s;
1.53 millert 1837: setptr(s, '\0');
1838: snprintf(num, sizeof(num), "%d", n);
1.69 millert 1839: if (is_number(t, & result))
1840: setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1.1 tholo 1841: else
1842: setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1.53 millert 1843: setptr(s, temp);
1.57 millert 1844: if (*s++ == '\0')
1.1 tholo 1845: break;
1846: }
1847: }
1848: tempfree(ap);
1.53 millert 1849: xfree(origs);
1850: xfree(origfs);
1.1 tholo 1851: x = gettemp();
1852: x->tval = NUM;
1853: x->fval = n;
1854: return(x);
1855: }
1856:
1857: Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1858: {
1859: Cell *x;
1860:
1861: x = execute(a[0]);
1862: if (istrue(x)) {
1863: tempfree(x);
1864: x = execute(a[1]);
1865: } else {
1866: tempfree(x);
1867: x = execute(a[2]);
1868: }
1869: return(x);
1870: }
1871:
1872: Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1873: {
1874: Cell *x;
1875:
1876: x = execute(a[0]);
1877: if (istrue(x)) {
1878: tempfree(x);
1879: x = execute(a[1]);
1.51 millert 1880: } else if (a[2] != NULL) {
1.1 tholo 1881: tempfree(x);
1882: x = execute(a[2]);
1883: }
1884: return(x);
1885: }
1886:
1887: Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1888: {
1889: Cell *x;
1890:
1891: for (;;) {
1892: x = execute(a[0]);
1893: if (!istrue(x))
1894: return(x);
1895: tempfree(x);
1896: x = execute(a[1]);
1897: if (isbreak(x)) {
1.15 millert 1898: x = True;
1.1 tholo 1899: return(x);
1900: }
1901: if (isnext(x) || isexit(x) || isret(x))
1902: return(x);
1903: tempfree(x);
1904: }
1905: }
1906:
1907: Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1908: {
1909: Cell *x;
1910:
1911: for (;;) {
1912: x = execute(a[0]);
1913: if (isbreak(x))
1.15 millert 1914: return True;
1.17 millert 1915: if (isnext(x) || isexit(x) || isret(x))
1.1 tholo 1916: return(x);
1917: tempfree(x);
1918: x = execute(a[1]);
1919: if (!istrue(x))
1920: return(x);
1921: tempfree(x);
1922: }
1923: }
1924:
1925: Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1926: {
1927: Cell *x;
1928:
1929: x = execute(a[0]);
1930: tempfree(x);
1931: for (;;) {
1.51 millert 1932: if (a[1]!=NULL) {
1.1 tholo 1933: x = execute(a[1]);
1934: if (!istrue(x)) return(x);
1935: else tempfree(x);
1936: }
1937: x = execute(a[3]);
1938: if (isbreak(x)) /* turn off break */
1.15 millert 1939: return True;
1.1 tholo 1940: if (isnext(x) || isexit(x) || isret(x))
1941: return(x);
1942: tempfree(x);
1943: x = execute(a[2]);
1944: tempfree(x);
1945: }
1946: }
1947:
1948: Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1949: {
1950: Cell *x, *vp, *arrayp, *cp, *ncp;
1951: Array *tp;
1952: int i;
1953:
1954: vp = execute(a[0]);
1955: arrayp = execute(a[1]);
1956: if (!isarr(arrayp)) {
1.15 millert 1957: return True;
1.1 tholo 1958: }
1959: tp = (Array *) arrayp->sval;
1960: tempfree(arrayp);
1961: for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1962: for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1963: setsval(vp, cp->nval);
1964: ncp = cp->cnext;
1965: x = execute(a[2]);
1966: if (isbreak(x)) {
1967: tempfree(vp);
1.15 millert 1968: return True;
1.1 tholo 1969: }
1970: if (isnext(x) || isexit(x) || isret(x)) {
1971: tempfree(vp);
1972: return(x);
1973: }
1974: tempfree(x);
1975: }
1976: }
1.15 millert 1977: return True;
1.1 tholo 1978: }
1979:
1.57 millert 1980: static char *nawk_convert(const char *s, int (*fun_c)(int),
1981: wint_t (*fun_wc)(wint_t))
1982: {
1983: char *buf = NULL;
1984: char *pbuf = NULL;
1985: const char *ps = NULL;
1986: size_t n = 0;
1987: wchar_t wc;
1.78 millert 1988: const size_t sz = awk_mb_cur_max;
1.75 millert 1989: int unused;
1.57 millert 1990:
1991: if (sz == 1) {
1992: buf = tostring(s);
1993:
1994: for (pbuf = buf; *pbuf; pbuf++)
1995: *pbuf = fun_c((uschar)*pbuf);
1996:
1997: return buf;
1998: } else {
1999: /* upper/lower character may be shorter/longer */
2000: buf = tostringN(s, strlen(s) * sz + 1);
2001:
1.67 millert 2002: (void) mbtowc(NULL, NULL, 0); /* reset internal state */
2003: /*
2004: * Reset internal state here too.
2005: * Assign result to avoid a compiler warning. (Casting to void
2006: * doesn't work.)
2007: * Increment said variable to avoid a different warning.
2008: */
1.75 millert 2009: unused = wctomb(NULL, L'\0');
1.67 millert 2010: unused++;
1.57 millert 2011:
2012: ps = s;
2013: pbuf = buf;
1.67 millert 2014: while (n = mbtowc(&wc, ps, sz),
1.57 millert 2015: n > 0 && n != (size_t)-1 && n != (size_t)-2)
2016: {
2017: ps += n;
2018:
1.67 millert 2019: n = wctomb(pbuf, fun_wc(wc));
1.57 millert 2020: if (n == (size_t)-1)
2021: FATAL("illegal wide character %s", s);
2022:
2023: pbuf += n;
2024: }
2025:
2026: *pbuf = '\0';
2027:
2028: if (n)
2029: FATAL("illegal byte sequence %s", s);
2030:
2031: return buf;
2032: }
2033: }
2034:
1.69 millert 2035: #ifdef __DJGPP__
2036: static wint_t towupper(wint_t wc)
2037: {
2038: if (wc >= 0 && wc < 256)
2039: return toupper(wc & 0xFF);
2040:
2041: return wc;
2042: }
2043:
2044: static wint_t towlower(wint_t wc)
2045: {
2046: if (wc >= 0 && wc < 256)
2047: return tolower(wc & 0xFF);
2048:
2049: return wc;
2050: }
2051: #endif
2052:
1.57 millert 2053: static char *nawk_toupper(const char *s)
2054: {
2055: return nawk_convert(s, toupper, towupper);
2056: }
2057:
2058: static char *nawk_tolower(const char *s)
2059: {
2060: return nawk_convert(s, tolower, towlower);
2061: }
2062:
1.1 tholo 2063: Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2064: {
2065: Cell *x, *y;
1.86 ! millert 2066: Awkfloat u = 0;
1.59 millert 2067: int t, sz;
1.33 millert 2068: Awkfloat tmp;
1.59 millert 2069: char *buf, *fmt;
1.1 tholo 2070: Node *nextarg;
2071: FILE *fp;
1.47 millert 2072: int status = 0;
1.59 millert 2073: time_t tv;
1.68 millert 2074: struct tm *tm, tmbuf;
1.83 millert 2075: int estatus = 0;
1.1 tholo 2076:
1.15 millert 2077: t = ptoi(a[0]);
1.1 tholo 2078: x = execute(a[1]);
2079: nextarg = a[1]->nnext;
2080: switch (t) {
2081: case FLENGTH:
1.18 millert 2082: if (isarr(x))
2083: u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2084: else
1.75 millert 2085: u = u8_strlen(getsval(x));
1.18 millert 2086: break;
1.1 tholo 2087: case FLOG:
1.45 guenther 2088: errno = 0;
1.63 millert 2089: u = errcheck(log(getfval(x)), "log");
2090: break;
1.1 tholo 2091: case FINT:
2092: modf(getfval(x), &u); break;
2093: case FEXP:
1.45 guenther 2094: errno = 0;
1.63 millert 2095: u = errcheck(exp(getfval(x)), "exp");
2096: break;
1.1 tholo 2097: case FSQRT:
1.45 guenther 2098: errno = 0;
1.63 millert 2099: u = errcheck(sqrt(getfval(x)), "sqrt");
2100: break;
1.1 tholo 2101: case FSIN:
2102: u = sin(getfval(x)); break;
2103: case FCOS:
2104: u = cos(getfval(x)); break;
2105: case FATAN:
1.51 millert 2106: if (nextarg == NULL) {
1.16 millert 2107: WARNING("atan2 requires two arguments; returning 1.0");
1.1 tholo 2108: u = 1.0;
2109: } else {
2110: y = execute(a[1]->nnext);
2111: u = atan2(getfval(x), getfval(y));
2112: tempfree(y);
2113: nextarg = nextarg->nnext;
2114: }
1.29 pyr 2115: break;
2116: case FCOMPL:
2117: u = ~((int)getfval(x));
2118: break;
2119: case FAND:
2120: if (nextarg == 0) {
2121: WARNING("and requires two arguments; returning 0");
2122: u = 0;
2123: break;
2124: }
2125: y = execute(a[1]->nnext);
2126: u = ((int)getfval(x)) & ((int)getfval(y));
2127: tempfree(y);
2128: nextarg = nextarg->nnext;
2129: break;
2130: case FFOR:
2131: if (nextarg == 0) {
2132: WARNING("or requires two arguments; returning 0");
2133: u = 0;
2134: break;
2135: }
2136: y = execute(a[1]->nnext);
2137: u = ((int)getfval(x)) | ((int)getfval(y));
2138: tempfree(y);
2139: nextarg = nextarg->nnext;
2140: break;
2141: case FXOR:
2142: if (nextarg == 0) {
1.41 ajacouto 2143: WARNING("xor requires two arguments; returning 0");
1.29 pyr 2144: u = 0;
2145: break;
2146: }
2147: y = execute(a[1]->nnext);
2148: u = ((int)getfval(x)) ^ ((int)getfval(y));
2149: tempfree(y);
2150: nextarg = nextarg->nnext;
2151: break;
2152: case FLSHIFT:
2153: if (nextarg == 0) {
1.41 ajacouto 2154: WARNING("lshift requires two arguments; returning 0");
1.29 pyr 2155: u = 0;
2156: break;
2157: }
2158: y = execute(a[1]->nnext);
2159: u = ((int)getfval(x)) << ((int)getfval(y));
2160: tempfree(y);
2161: nextarg = nextarg->nnext;
2162: break;
2163: case FRSHIFT:
2164: if (nextarg == 0) {
1.41 ajacouto 2165: WARNING("rshift requires two arguments; returning 0");
1.29 pyr 2166: u = 0;
2167: break;
2168: }
2169: y = execute(a[1]->nnext);
2170: u = ((int)getfval(x)) >> ((int)getfval(y));
2171: tempfree(y);
2172: nextarg = nextarg->nnext;
1.1 tholo 2173: break;
2174: case FSYSTEM:
2175: fflush(stdout); /* in case something is buffered already */
1.83 millert 2176: estatus = status = system(getsval(x));
1.47 millert 2177: if (status != -1) {
2178: if (WIFEXITED(status)) {
1.83 millert 2179: estatus = WEXITSTATUS(status);
1.47 millert 2180: } else if (WIFSIGNALED(status)) {
1.83 millert 2181: estatus = WTERMSIG(status) + 256;
1.47 millert 2182: #ifdef WCOREDUMP
2183: if (WCOREDUMP(status))
1.83 millert 2184: estatus += 256;
1.47 millert 2185: #endif
2186: } else /* something else?!? */
1.83 millert 2187: estatus = 0;
1.47 millert 2188: }
1.83 millert 2189: /* else estatus was set to -1 */
2190: u = estatus;
1.1 tholo 2191: break;
2192: case FRAND:
1.51 millert 2193: /* random() returns numbers in [0..2^31-1]
2194: * in order to get a number in [0, 1), divide it by 2^31
2195: */
2196: u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
1.1 tholo 2197: break;
2198: case FSRAND:
1.39 deraadt 2199: if (isrec(x)) { /* no argument provided */
2200: u = time(NULL);
2201: tmp = u;
2202: srandom((unsigned int) u);
2203: } else {
1.33 millert 2204: u = getfval(x);
2205: tmp = u;
1.37 deraadt 2206: srandom_deterministic((unsigned int) u);
1.24 millert 2207: }
1.39 deraadt 2208: u = srand_seed;
2209: srand_seed = tmp;
1.1 tholo 2210: break;
2211: case FTOUPPER:
2212: case FTOLOWER:
1.57 millert 2213: if (t == FTOUPPER)
2214: buf = nawk_toupper(getsval(x));
2215: else
2216: buf = nawk_tolower(getsval(x));
1.1 tholo 2217: tempfree(x);
2218: x = gettemp();
2219: setsval(x, buf);
1.13 kstailey 2220: free(buf);
1.1 tholo 2221: return x;
2222: case FFLUSH:
1.18 millert 2223: if (isrec(x) || strlen(getsval(x)) == 0) {
2224: flush_all(); /* fflush() or fflush("") -> all */
2225: u = 0;
1.57 millert 2226: } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1.1 tholo 2227: u = EOF;
2228: else
2229: u = fflush(fp);
1.68 millert 2230: break;
2231: case FMKTIME:
2232: memset(&tmbuf, 0, sizeof(tmbuf));
2233: tm = &tmbuf;
2234: t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2235: &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2236: &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2237: switch (t) {
2238: case 6:
2239: tm->tm_isdst = -1; /* let mktime figure it out */
2240: /* FALLTHROUGH */
2241: case 7:
2242: tm->tm_year -= 1900;
2243: tm->tm_mon--;
2244: u = mktime(tm);
2245: break;
2246: default:
2247: u = -1;
2248: break;
2249: }
1.1 tholo 2250: break;
1.59 millert 2251: case FSYSTIME:
2252: u = time((time_t *) 0);
2253: break;
2254: case FSTRFTIME:
2255: /* strftime([format [,timestamp]]) */
2256: if (nextarg) {
2257: y = execute(nextarg);
2258: nextarg = nextarg->nnext;
2259: tv = (time_t) getfval(y);
2260: tempfree(y);
2261: } else
2262: tv = time((time_t *) 0);
2263: tm = localtime(&tv);
2264: if (tm == NULL)
2265: FATAL("bad time %ld", (long)tv);
2266:
2267: if (isrec(x)) {
2268: /* format argument not provided, use default */
2269: fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2270: } else
2271: fmt = tostring(getsval(x));
2272:
2273: sz = 32;
2274: buf = NULL;
2275: do {
1.69 millert 2276: if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL)
1.59 millert 2277: FATAL("out of memory in strftime");
2278: sz *= 2;
2279: } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2280:
2281: y = gettemp();
2282: setsval(y, buf);
2283: free(fmt);
2284: free(buf);
2285:
2286: return y;
1.1 tholo 2287: default: /* can't happen */
1.16 millert 2288: FATAL("illegal function type %d", t);
1.1 tholo 2289: break;
2290: }
2291: tempfree(x);
2292: x = gettemp();
2293: setfval(x, u);
1.51 millert 2294: if (nextarg != NULL) {
1.16 millert 2295: WARNING("warning: function has too many arguments");
1.73 millert 2296: for ( ; nextarg; nextarg = nextarg->nnext) {
2297: y = execute(nextarg);
2298: tempfree(y);
2299: }
1.1 tholo 2300: }
2301: return(x);
2302: }
2303:
2304: Cell *printstat(Node **a, int n) /* print a[0] */
2305: {
2306: Node *x;
2307: Cell *y;
2308: FILE *fp;
2309:
1.51 millert 2310: if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
1.1 tholo 2311: fp = stdout;
2312: else
1.15 millert 2313: fp = redirect(ptoi(a[1]), a[2]);
1.1 tholo 2314: for (x = a[0]; x != NULL; x = x->nnext) {
2315: y = execute(x);
1.18 millert 2316: fputs(getpssval(y), fp);
1.1 tholo 2317: tempfree(y);
2318: if (x->nnext == NULL)
1.49 millert 2319: fputs(getsval(orsloc), fp);
1.1 tholo 2320: else
1.49 millert 2321: fputs(getsval(ofsloc), fp);
1.1 tholo 2322: }
1.51 millert 2323: if (a[1] != NULL)
1.1 tholo 2324: fflush(fp);
2325: if (ferror(fp))
1.16 millert 2326: FATAL("write error on %s", filename(fp));
1.15 millert 2327: return(True);
1.1 tholo 2328: }
2329:
2330: Cell *nullproc(Node **a, int n)
2331: {
2332: return 0;
2333: }
2334:
2335:
2336: FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2337: {
2338: FILE *fp;
2339: Cell *x;
2340: char *fname;
2341:
2342: x = execute(b);
2343: fname = getsval(x);
1.57 millert 2344: fp = openfile(a, fname, NULL);
1.1 tholo 2345: if (fp == NULL)
1.16 millert 2346: FATAL("can't open file %s", fname);
1.1 tholo 2347: tempfree(x);
2348: return fp;
2349: }
2350:
2351: struct files {
2352: FILE *fp;
1.18 millert 2353: const char *fname;
1.1 tholo 2354: int mode; /* '|', 'a', 'w' => LE/LT, GT */
1.33 millert 2355: } *files;
2356:
1.57 millert 2357: size_t nfiles;
1.1 tholo 2358:
1.57 millert 2359: static void stdinit(void) /* in case stdin, etc., are not constants */
1.16 millert 2360: {
1.33 millert 2361: nfiles = FOPEN_MAX;
1.69 millert 2362: files = (struct files *) calloc(nfiles, sizeof(*files));
1.33 millert 2363: if (files == NULL)
1.57 millert 2364: FATAL("can't allocate file memory for %zu files", nfiles);
1.33 millert 2365: files[0].fp = stdin;
1.72 millert 2366: files[0].fname = tostring("/dev/stdin");
1.33 millert 2367: files[0].mode = LT;
2368: files[1].fp = stdout;
1.72 millert 2369: files[1].fname = tostring("/dev/stdout");
1.33 millert 2370: files[1].mode = GT;
2371: files[2].fp = stderr;
1.72 millert 2372: files[2].fname = tostring("/dev/stderr");
1.33 millert 2373: files[2].mode = GT;
1.16 millert 2374: }
2375:
1.57 millert 2376: FILE *openfile(int a, const char *us, bool *pnewflag)
1.1 tholo 2377: {
1.18 millert 2378: const char *s = us;
1.57 millert 2379: size_t i;
2380: int m;
1.51 millert 2381: FILE *fp = NULL;
1.1 tholo 2382:
2383: if (*s == '\0')
1.16 millert 2384: FATAL("null file name in print or getline");
1.57 millert 2385: for (i = 0; i < nfiles; i++)
2386: if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2387: (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2388: a == FFLUSH)) {
2389: if (pnewflag)
2390: *pnewflag = false;
2391: return files[i].fp;
1.13 kstailey 2392: }
2393: if (a == FFLUSH) /* didn't find it, so don't create it! */
2394: return NULL;
2395:
1.57 millert 2396: for (i = 0; i < nfiles; i++)
1.51 millert 2397: if (files[i].fp == NULL)
1.1 tholo 2398: break;
1.33 millert 2399: if (i >= nfiles) {
2400: struct files *nf;
1.57 millert 2401: size_t nnf = nfiles + FOPEN_MAX;
1.69 millert 2402: nf = (struct files *) reallocarray(files, nnf, sizeof(*nf));
1.33 millert 2403: if (nf == NULL)
1.57 millert 2404: FATAL("cannot grow files for %s and %zu files", s, nnf);
1.33 millert 2405: memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2406: nfiles = nnf;
2407: files = nf;
2408: }
1.1 tholo 2409: fflush(stdout); /* force a semblance of order */
2410: m = a;
2411: if (a == GT) {
2412: fp = fopen(s, "w");
2413: } else if (a == APPEND) {
2414: fp = fopen(s, "a");
2415: m = GT; /* so can mix > and >> */
2416: } else if (a == '|') { /* output pipe */
2417: fp = popen(s, "w");
2418: } else if (a == LE) { /* input pipe */
2419: fp = popen(s, "r");
2420: } else if (a == LT) { /* getline <file */
2421: fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
2422: } else /* can't happen */
1.16 millert 2423: FATAL("illegal redirection %d", a);
1.1 tholo 2424: if (fp != NULL) {
2425: files[i].fname = tostring(s);
2426: files[i].fp = fp;
2427: files[i].mode = m;
1.57 millert 2428: if (pnewflag)
2429: *pnewflag = true;
1.56 millert 2430: if (fp != stdin && fp != stdout && fp != stderr)
2431: (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
1.1 tholo 2432: }
2433: return fp;
2434: }
2435:
1.18 millert 2436: const char *filename(FILE *fp)
1.1 tholo 2437: {
1.57 millert 2438: size_t i;
1.1 tholo 2439:
1.33 millert 2440: for (i = 0; i < nfiles; i++)
1.1 tholo 2441: if (fp == files[i].fp)
2442: return files[i].fname;
2443: return "???";
2444: }
2445:
1.71 millert 2446: Cell *closefile(Node **a, int n)
2447: {
1.57 millert 2448: Cell *x;
2449: size_t i;
2450: bool stat;
1.67 millert 2451:
1.57 millert 2452: x = execute(a[0]);
2453: getsval(x);
2454: stat = true;
2455: for (i = 0; i < nfiles; i++) {
2456: if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2457: continue;
1.71 millert 2458: if (files[i].mode == GT || files[i].mode == '|')
2459: fflush(files[i].fp);
2460: if (ferror(files[i].fp)) {
2461: if ((files[i].mode == GT && files[i].fp != stderr)
2462: || files[i].mode == '|')
2463: FATAL("write error on %s", files[i].fname);
2464: else
2465: WARNING("i/o error occurred on %s", files[i].fname);
2466: }
1.65 millert 2467: if (files[i].fp == stdin || files[i].fp == stdout ||
2468: files[i].fp == stderr)
2469: stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2470: else if (files[i].mode == '|' || files[i].mode == LE)
1.57 millert 2471: stat = pclose(files[i].fp) == -1;
2472: else
2473: stat = fclose(files[i].fp) == EOF;
2474: if (stat)
1.71 millert 2475: WARNING("i/o error occurred closing %s", files[i].fname);
1.72 millert 2476: xfree(files[i].fname);
1.57 millert 2477: files[i].fname = NULL; /* watch out for ref thru this */
2478: files[i].fp = NULL;
1.65 millert 2479: break;
1.57 millert 2480: }
2481: tempfree(x);
2482: x = gettemp();
2483: setfval(x, (Awkfloat) (stat ? -1 : 0));
2484: return(x);
1.71 millert 2485: }
1.1 tholo 2486:
2487: void closeall(void)
2488: {
1.57 millert 2489: size_t i;
2490: bool stat = false;
1.1 tholo 2491:
1.57 millert 2492: for (i = 0; i < nfiles; i++) {
2493: if (! files[i].fp)
2494: continue;
1.71 millert 2495: if (files[i].mode == GT || files[i].mode == '|')
2496: fflush(files[i].fp);
2497: if (ferror(files[i].fp)) {
2498: if ((files[i].mode == GT && files[i].fp != stderr)
2499: || files[i].mode == '|')
2500: FATAL("write error on %s", files[i].fname);
2501: else
2502: WARNING("i/o error occurred on %s", files[i].fname);
2503: }
2504: if (files[i].fp == stdin || files[i].fp == stdout ||
2505: files[i].fp == stderr)
1.64 millert 2506: continue;
1.57 millert 2507: if (files[i].mode == '|' || files[i].mode == LE)
2508: stat = pclose(files[i].fp) == -1;
2509: else
2510: stat = fclose(files[i].fp) == EOF;
2511: if (stat)
1.71 millert 2512: WARNING("i/o error occurred while closing %s", files[i].fname);
1.17 millert 2513: }
1.18 millert 2514: }
2515:
1.57 millert 2516: static void flush_all(void)
1.18 millert 2517: {
1.57 millert 2518: size_t i;
1.18 millert 2519:
1.33 millert 2520: for (i = 0; i < nfiles; i++)
1.18 millert 2521: if (files[i].fp)
2522: fflush(files[i].fp);
1.1 tholo 2523: }
2524:
1.53 millert 2525: void backsub(char **pb_ptr, const char **sptr_ptr);
1.1 tholo 2526:
1.81 millert 2527: Cell *dosub(Node **a, int subop) /* sub and gsub */
1.1 tholo 2528: {
2529: fa *pfa;
1.85 millert 2530: int tempstat = 0;
1.81 millert 2531: char *repl;
2532: Cell *x;
2533:
2534: char *buf = NULL;
2535: char *pb = NULL;
1.13 kstailey 2536: int bufsz = recsize;
1.1 tholo 2537:
1.81 millert 2538: const char *r, *s;
2539: const char *start;
2540: const char *noempty = NULL; /* empty match disallowed here */
2541: size_t m = 0; /* match count */
1.86 ! millert 2542: size_t whichm = 0; /* which match to select, 0 = global */
1.81 millert 2543: int mtype; /* match type */
2544:
2545: if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */
2546: pfa = (fa *) a[1];
2547: } else {
2548: x = execute(a[1]);
2549: pfa = makedfa(getsval(x), 1);
2550: tempfree(x);
2551: }
2552:
2553: x = execute(a[2]); /* replacement string */
2554: repl = tostring(getsval(x));
2555: tempfree(x);
2556:
2557: switch (subop) {
2558: case SUB:
2559: whichm = 1;
2560: x = execute(a[3]); /* source string */
2561: break;
2562: case GSUB:
2563: whichm = 0;
2564: x = execute(a[3]); /* source string */
2565: break;
2566: default:
2567: FATAL("dosub: unrecognized subop: %d", subop);
1.1 tholo 2568: }
1.81 millert 2569:
2570: start = getsval(x);
2571: while (pmatch(pfa, start)) {
2572: if (buf == NULL) {
1.84 millert 2573: if ((pb = buf = (char *) malloc(bufsz)) == NULL)
1.81 millert 2574: FATAL("out of memory in dosub");
2575: tempstat = pfa->initstat;
2576: pfa->initstat = 2;
2577: }
2578:
2579: /* match types */
2580: #define MT_IGNORE 0 /* unselected or invalid */
2581: #define MT_INSERT 1 /* selected, empty */
2582: #define MT_REPLACE 2 /* selected, not empty */
2583:
2584: /* an empty match just after replacement is invalid */
2585:
2586: if (patbeg == noempty && patlen == 0) {
2587: mtype = MT_IGNORE; /* invalid, not counted */
2588: } else if (whichm == ++m || whichm == 0) {
2589: mtype = patlen ? MT_REPLACE : MT_INSERT;
2590: } else {
2591: mtype = MT_IGNORE; /* unselected, but counted */
2592: }
2593:
2594: /* leading text: */
2595: if (patbeg > start) {
2596: adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2597: recsize, &pb, "dosub");
2598: s = start;
2599: while (s < patbeg)
2600: *pb++ = *s++;
2601: }
2602:
2603: if (mtype == MT_IGNORE)
2604: goto matching_text; /* skip replacement text */
2605:
2606: r = repl;
2607: while (*r != 0) {
2608: adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2609: if (*r == '\\') {
2610: backsub(&pb, &r);
2611: } else if (*r == '&') {
2612: r++;
2613: adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2614: &pb, "dosub");
2615: for (s = patbeg; s < patbeg+patlen; )
2616: *pb++ = *s++;
2617: } else {
2618: *pb++ = *r++;
2619: }
1.13 kstailey 2620: }
1.81 millert 2621:
2622: matching_text:
2623: if (mtype == MT_REPLACE || *patbeg == '\0')
2624: goto next_search; /* skip matching text */
2625:
2626: if (patlen == 0)
2627: patlen = u8_nextlen(patbeg);
2628: adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2629: s = patbeg;
2630: while (s < patbeg + patlen)
2631: *pb++ = *s++;
2632:
2633: next_search:
2634: start = patbeg + patlen;
2635: if (m == whichm || *patbeg == '\0')
2636: break;
2637: if (mtype == MT_REPLACE)
2638: noempty = start;
2639:
2640: #undef MT_IGNORE
2641: #undef MT_INSERT
2642: #undef MT_REPLACE
1.1 tholo 2643: }
2644:
1.81 millert 2645: xfree(repl);
1.1 tholo 2646:
1.81 millert 2647: if (buf != NULL) {
1.1 tholo 2648: pfa->initstat = tempstat;
1.81 millert 2649:
2650: /* trailing text */
2651: adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2652: while ((*pb++ = *start++) != '\0')
2653: ;
2654:
2655: setsval(x, buf);
2656: free(buf);
1.1 tholo 2657: }
1.81 millert 2658:
1.1 tholo 2659: tempfree(x);
2660: x = gettemp();
2661: x->tval = NUM;
1.81 millert 2662: x->fval = m;
2663: return x;
1.59 millert 2664: }
2665:
2666: Cell *gensub(Node **a, int nnn) /* global selective substitute */
2667: /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2668: {
2669: Cell *x, *y, *res, *h;
2670: char *rptr;
2671: const char *sptr;
2672: char *buf, *pb;
2673: const char *t, *q;
2674: fa *pfa;
2675: int mflag, tempstat, num, whichm;
2676: int bufsz = recsize;
2677:
1.84 millert 2678: if ((buf = (char *) malloc(bufsz)) == NULL)
1.59 millert 2679: FATAL("out of memory in gensub");
2680: mflag = 0; /* if mflag == 0, can replace empty string */
2681: num = 0;
2682: x = execute(a[4]); /* source string */
2683: t = getsval(x);
2684: res = copycell(x); /* target string - initially copy of source */
2685: res->csub = CTEMP; /* result values are temporary */
2686: if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2687: pfa = (fa *) a[1]; /* regular expression */
2688: else {
2689: y = execute(a[1]);
2690: pfa = makedfa(getsval(y), 1);
2691: tempfree(y);
2692: }
2693: y = execute(a[2]); /* replacement string */
2694: h = execute(a[3]); /* which matches should be replaced */
2695: sptr = getsval(h);
2696: if (sptr[0] == 'g' || sptr[0] == 'G')
2697: whichm = -1;
2698: else {
2699: /*
2700: * The specified number is index of replacement, starting
2701: * from 1. GNU awk treats index lower than 0 same as
2702: * 1, we do same for compatibility.
2703: */
2704: whichm = (int) getfval(h) - 1;
2705: if (whichm < 0)
2706: whichm = 0;
2707: }
2708: tempfree(h);
2709:
2710: if (pmatch(pfa, t)) {
2711: char *sl;
2712:
2713: tempstat = pfa->initstat;
2714: pfa->initstat = 2;
2715: pb = buf;
2716: rptr = getsval(y);
2717: /*
2718: * XXX if there are any backreferences in subst string,
2719: * complain now.
2720: */
2721: for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2722: if (strchr("0123456789", sl[1])) {
2723: FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2724: }
2725: }
2726:
2727: do {
2728: if (whichm >= 0 && whichm != num) {
2729: num++;
2730: adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2731:
2732: /* copy the part of string up to and including
2733: * match to output buffer */
2734: while (t < patbeg + patlen)
2735: *pb++ = *t++;
2736: continue;
2737: }
2738:
2739: if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2740: if (mflag == 0) { /* can replace empty */
2741: num++;
2742: sptr = rptr;
2743: while (*sptr != 0) {
2744: adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2745: if (*sptr == '\\') {
2746: backsub(&pb, &sptr);
2747: } else if (*sptr == '&') {
2748: sptr++;
2749: adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2750: for (q = patbeg; q < patbeg+patlen; )
2751: *pb++ = *q++;
2752: } else
2753: *pb++ = *sptr++;
2754: }
2755: }
2756: if (*t == 0) /* at end */
2757: goto done;
2758: adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2759: *pb++ = *t++;
2760: if (pb > buf + bufsz) /* BUG: not sure of this test */
2761: FATAL("gensub result0 %.30s too big; can't happen", buf);
2762: mflag = 0;
2763: }
2764: else { /* matched nonempty string */
2765: num++;
2766: sptr = t;
2767: adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2768: while (sptr < patbeg)
2769: *pb++ = *sptr++;
2770: sptr = rptr;
2771: while (*sptr != 0) {
2772: adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2773: if (*sptr == '\\') {
2774: backsub(&pb, &sptr);
2775: } else if (*sptr == '&') {
2776: sptr++;
2777: adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2778: for (q = patbeg; q < patbeg+patlen; )
2779: *pb++ = *q++;
2780: } else
2781: *pb++ = *sptr++;
2782: }
2783: t = patbeg + patlen;
2784: if (patlen == 0 || *t == 0 || *(t-1) == 0)
2785: goto done;
2786: if (pb > buf + bufsz)
2787: FATAL("gensub result1 %.30s too big; can't happen", buf);
2788: mflag = 1;
2789: }
2790: } while (pmatch(pfa,t));
2791: sptr = t;
2792: adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2793: while ((*pb++ = *sptr++) != 0)
2794: ;
2795: done: if (pb > buf + bufsz)
2796: FATAL("gensub result2 %.30s too big; can't happen", buf);
2797: *pb = '\0';
2798: setsval(res, buf);
2799: pfa->initstat = tempstat;
2800: }
2801: tempfree(x);
2802: tempfree(y);
2803: free(buf);
2804: return(res);
1.13 kstailey 2805: }
2806:
1.53 millert 2807: void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
1.13 kstailey 2808: { /* sptr[0] == '\\' */
1.53 millert 2809: char *pb = *pb_ptr;
2810: const char *sptr = *sptr_ptr;
1.13 kstailey 2811:
2812: if (sptr[1] == '\\') {
2813: if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2814: *pb++ = '\\';
2815: *pb++ = '&';
2816: sptr += 4;
2817: } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2818: *pb++ = '\\';
2819: sptr += 2;
1.56 millert 2820: } else if (do_posix) { /* \\x -> \x */
2821: sptr++;
2822: *pb++ = *sptr++;
1.13 kstailey 2823: } else { /* \\x -> \\x */
2824: *pb++ = *sptr++;
2825: *pb++ = *sptr++;
2826: }
2827: } else if (sptr[1] == '&') { /* literal & */
2828: sptr++;
2829: *pb++ = *sptr++;
2830: } else /* literal \ */
2831: *pb++ = *sptr++;
2832:
2833: *pb_ptr = pb;
2834: *sptr_ptr = sptr;
1.75 millert 2835: }
2836:
2837: static char *wide_char_to_byte_str(int rune, size_t *outlen)
2838: {
2839: static char buf[5];
2840: int len;
2841:
2842: if (rune < 0 || rune > 0x10FFFF)
2843: return NULL;
2844:
2845: memset(buf, 0, sizeof(buf));
2846:
2847: len = 0;
2848: if (rune <= 0x0000007F) {
2849: buf[len++] = rune;
2850: } else if (rune <= 0x000007FF) {
2851: // 110xxxxx 10xxxxxx
2852: buf[len++] = 0xC0 | (rune >> 6);
2853: buf[len++] = 0x80 | (rune & 0x3F);
2854: } else if (rune <= 0x0000FFFF) {
2855: // 1110xxxx 10xxxxxx 10xxxxxx
2856: buf[len++] = 0xE0 | (rune >> 12);
2857: buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2858: buf[len++] = 0x80 | (rune & 0x3F);
2859:
2860: } else {
2861: // 0x00010000 - 0x10FFFF
2862: // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2863: buf[len++] = 0xF0 | (rune >> 18);
2864: buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2865: buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2866: buf[len++] = 0x80 | (rune & 0x3F);
2867: }
2868:
2869: *outlen = len;
2870: buf[len++] = '\0';
2871:
2872: return buf;
1.1 tholo 2873: }