Annotation of src/usr.bin/awk/run.c, Revision 1.80
1.80 ! millert 1: /* $OpenBSD: run.c,v 1.79 2023/10/06 22:29:24 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.13 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.13 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: #define DEBUG
27: #include <stdio.h>
28: #include <ctype.h>
1.63 millert 29: #include <errno.h>
1.57 millert 30: #include <wctype.h>
1.56 millert 31: #include <fcntl.h>
1.1 tholo 32: #include <setjmp.h>
1.25 millert 33: #include <limits.h>
1.1 tholo 34: #include <math.h>
35: #include <string.h>
36: #include <stdlib.h>
37: #include <time.h>
1.47 millert 38: #include <sys/types.h>
39: #include <sys/wait.h>
1.1 tholo 40: #include "awk.h"
1.66 millert 41: #include "awkgram.tab.h"
1.1 tholo 42:
1.75 millert 43:
1.57 millert 44: static void stdinit(void);
45: static void flush_all(void);
1.75 millert 46: static char *wide_char_to_byte_str(int rune, size_t *outlen);
1.1 tholo 47:
1.57 millert 48: #if 1
49: #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
50: #else
1.1 tholo 51: void tempfree(Cell *p) {
52: if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
1.16 millert 53: WARNING("bad csub %d in Cell %d %s",
54: p->csub, p->ctype, p->sval);
1.1 tholo 55: }
56: if (istemp(p))
57: tfree(p);
58: }
1.57 millert 59: #endif
1.1 tholo 60:
1.30 millert 61: /* do we really need these? */
62: /* #ifdef _NFILE */
63: /* #ifndef FOPEN_MAX */
64: /* #define FOPEN_MAX _NFILE */
65: /* #endif */
66: /* #endif */
67: /* */
68: /* #ifndef FOPEN_MAX */
69: /* #define FOPEN_MAX 40 */ /* max number of open files */
70: /* #endif */
71: /* */
72: /* #ifndef RAND_MAX */
73: /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
74: /* #endif */
1.1 tholo 75:
76: jmp_buf env;
1.13 kstailey 77: extern int pairstack[];
1.33 millert 78: extern Awkfloat srand_seed;
1.1 tholo 79:
80: Node *winner = NULL; /* root of parse tree */
81: Cell *tmps; /* free temporary cells for execution */
82:
1.57 millert 83: static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
1.15 millert 84: Cell *True = &truecell;
1.57 millert 85: static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
1.15 millert 86: Cell *False = &falsecell;
1.57 millert 87: static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 88: Cell *jbreak = &breakcell;
1.57 millert 89: static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 90: Cell *jcont = &contcell;
1.57 millert 91: static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 92: Cell *jnext = &nextcell;
1.57 millert 93: static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 94: Cell *jnextfile = &nextfilecell;
1.57 millert 95: static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 96: Cell *jexit = &exitcell;
1.57 millert 97: static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
1.1 tholo 98: Cell *jret = &retcell;
1.57 millert 99: static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
1.1 tholo 100:
101: Node *curnode = NULL; /* the node being executed, for debugging */
1.22 deraadt 102:
1.13 kstailey 103: /* buffer memory management */
104: int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
1.18 millert 105: const char *whatrtn)
1.13 kstailey 106: /* pbuf: address of pointer to buffer being managed
107: * psiz: address of buffer size variable
108: * minlen: minimum length of buffer needed
109: * quantum: buffer size quantum
110: * pbptr: address of movable pointer into buffer, or 0 if none
111: * whatrtn: name of the calling routine if failure should cause fatal error
112: *
113: * return 0 for realloc failure, !=0 for success
114: */
115: {
116: if (minlen > *psiz) {
117: char *tbuf;
118: int rminlen = quantum ? minlen % quantum : 0;
119: int boff = pbptr ? *pbptr - *pbuf : 0;
120: /* round up to next multiple of quantum */
121: if (rminlen)
122: minlen += quantum - rminlen;
1.69 millert 123: tbuf = (char *) realloc(*pbuf, minlen);
1.67 millert 124: DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
1.13 kstailey 125: if (tbuf == NULL) {
126: if (whatrtn)
1.16 millert 127: FATAL("out of memory in %s", whatrtn);
1.13 kstailey 128: return 0;
129: }
130: *pbuf = tbuf;
131: *psiz = minlen;
132: if (pbptr)
133: *pbptr = tbuf + boff;
134: }
135: return 1;
136: }
137:
1.1 tholo 138: void run(Node *a) /* execution of parse tree starts here */
139: {
1.63 millert 140:
1.16 millert 141: stdinit();
1.1 tholo 142: execute(a);
143: closeall();
144: }
145:
146: Cell *execute(Node *u) /* execute a node of the parse tree */
147: {
148: Cell *(*proc)(Node **, int);
149: Cell *x;
150: Node *a;
151:
152: if (u == NULL)
1.15 millert 153: return(True);
1.1 tholo 154: for (a = u; ; a = a->nnext) {
155: curnode = a;
156: if (isvalue(a)) {
1.2 millert 157: x = (Cell *) (a->narg[0]);
1.13 kstailey 158: if (isfld(x) && !donefld)
1.1 tholo 159: fldbld();
1.13 kstailey 160: else if (isrec(x) && !donerec)
1.1 tholo 161: recbld();
162: return(x);
163: }
164: if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
1.16 millert 165: FATAL("illegal statement");
1.1 tholo 166: proc = proctab[a->nobj-FIRSTTOKEN];
167: x = (*proc)(a->narg, a->nobj);
1.13 kstailey 168: if (isfld(x) && !donefld)
1.1 tholo 169: fldbld();
1.13 kstailey 170: else if (isrec(x) && !donerec)
1.1 tholo 171: recbld();
172: if (isexpr(a))
173: return(x);
174: if (isjump(x))
175: return(x);
176: if (a->nnext == NULL)
177: return(x);
178: tempfree(x);
179: }
180: }
181:
182:
183: Cell *program(Node **a, int n) /* execute an awk program */
184: { /* a[0] = BEGIN, a[1] = body, a[2] = END */
185: Cell *x;
186:
187: if (setjmp(env) != 0)
188: goto ex;
189: if (a[0]) { /* BEGIN */
190: x = execute(a[0]);
191: if (isexit(x))
1.15 millert 192: return(True);
1.1 tholo 193: if (isjump(x))
1.16 millert 194: FATAL("illegal break, continue, next or nextfile from BEGIN");
1.1 tholo 195: tempfree(x);
196: }
197: if (a[1] || a[2])
1.54 millert 198: while (getrec(&record, &recsize, true) > 0) {
1.1 tholo 199: x = execute(a[1]);
200: if (isexit(x))
201: break;
202: tempfree(x);
203: }
204: ex:
205: if (setjmp(env) != 0) /* handles exit within END */
206: goto ex1;
207: if (a[2]) { /* END */
208: x = execute(a[2]);
209: if (isbreak(x) || isnext(x) || iscont(x))
1.16 millert 210: FATAL("illegal break, continue, next or nextfile from END");
1.1 tholo 211: tempfree(x);
212: }
213: ex1:
1.15 millert 214: return(True);
1.1 tholo 215: }
216:
217: struct Frame { /* stack frame for awk function calls */
218: int nargs; /* number of arguments in this call */
219: Cell *fcncell; /* pointer to Cell for function */
220: Cell **args; /* pointer to array of arguments after execute */
221: Cell *retval; /* return value */
222: };
223:
224: #define NARGS 50 /* max args in a call */
225:
226: struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
227: int nframe = 0; /* number of frames allocated */
1.57 millert 228: struct Frame *frp = NULL; /* frame pointer. bottom level unused */
1.1 tholo 229:
230: Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
231: {
1.57 millert 232: static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
1.1 tholo 233: int i, ncall, ndef;
1.25 millert 234: int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
1.1 tholo 235: Node *x;
1.13 kstailey 236: Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
237: Cell *y, *z, *fcn;
1.1 tholo 238: char *s;
239:
240: fcn = execute(a[0]); /* the function itself */
241: s = fcn->nval;
1.13 kstailey 242: if (!isfcn(fcn))
1.16 millert 243: FATAL("calling undefined function %s", s);
1.1 tholo 244: if (frame == NULL) {
1.69 millert 245: frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
1.1 tholo 246: if (frame == NULL)
1.16 millert 247: FATAL("out of space for stack frames calling %s", s);
1.1 tholo 248: }
249: for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
250: ncall++;
1.12 millert 251: ndef = (int) fcn->fval; /* args in defn */
1.60 millert 252: DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
1.1 tholo 253: if (ncall > ndef)
1.16 millert 254: WARNING("function %s called with %d args, uses only %d",
255: s, ncall, ndef);
1.1 tholo 256: if (ncall + ndef > NARGS)
1.16 millert 257: FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
1.1 tholo 258: for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
1.60 millert 259: DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
1.1 tholo 260: y = execute(x);
261: oargs[i] = y;
1.60 millert 262: DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
263: i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
1.13 kstailey 264: if (isfcn(y))
1.16 millert 265: FATAL("can't use function %s as argument in %s", y->nval, s);
1.1 tholo 266: if (isarr(y))
267: args[i] = y; /* arrays by ref */
268: else
269: args[i] = copycell(y);
270: tempfree(y);
271: }
272: for ( ; i < ndef; i++) { /* add null args for ones not provided */
273: args[i] = gettemp();
274: *args[i] = newcopycell;
275: }
1.57 millert 276: frp++; /* now ok to up frame */
277: if (frp >= frame + nframe) {
278: int dfp = frp - frame; /* old index */
1.69 millert 279: frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame));
1.1 tholo 280: if (frame == NULL)
1.16 millert 281: FATAL("out of space for stack frames in %s", s);
1.57 millert 282: frp = frame + dfp;
1.1 tholo 283: }
1.57 millert 284: frp->fcncell = fcn;
285: frp->args = args;
286: frp->nargs = ndef; /* number defined with (excess are locals) */
287: frp->retval = gettemp();
1.1 tholo 288:
1.60 millert 289: DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
1.1 tholo 290: y = execute((Node *)(fcn->sval)); /* execute body */
1.60 millert 291: DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
1.1 tholo 292:
293: for (i = 0; i < ndef; i++) {
1.57 millert 294: Cell *t = frp->args[i];
1.1 tholo 295: if (isarr(t)) {
296: if (t->csub == CCOPY) {
297: if (i >= ncall) {
298: freesymtab(t);
299: t->csub = CTEMP;
1.14 millert 300: tempfree(t);
1.1 tholo 301: } else {
302: oargs[i]->tval = t->tval;
303: oargs[i]->tval &= ~(STR|NUM|DONTFREE);
304: oargs[i]->sval = t->sval;
305: tempfree(t);
306: }
307: }
308: } else if (t != y) { /* kludge to prevent freeing twice */
309: t->csub = CTEMP;
310: tempfree(t);
1.25 millert 311: } else if (t == y && t->csub == CCOPY) {
312: t->csub = CTEMP;
313: tempfree(t);
314: freed = 1;
1.1 tholo 315: }
316: }
317: tempfree(fcn);
1.17 millert 318: if (isexit(y) || isnext(y))
1.1 tholo 319: return y;
1.25 millert 320: if (freed == 0) {
321: tempfree(y); /* don't free twice! */
322: }
1.57 millert 323: z = frp->retval; /* return value */
1.60 millert 324: DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
1.57 millert 325: frp--;
1.1 tholo 326: return(z);
327: }
328:
329: Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
330: {
331: Cell *y;
332:
1.47 millert 333: /* copy is not constant or field */
334:
1.1 tholo 335: y = gettemp();
1.47 millert 336: y->tval = x->tval & ~(CON|FLD|REC);
1.1 tholo 337: y->csub = CCOPY; /* prevents freeing until call is over */
1.13 kstailey 338: y->nval = x->nval; /* BUG? */
1.47 millert 339: if (isstr(x) /* || x->ctype == OCELL */) {
1.17 millert 340: y->sval = tostring(x->sval);
1.47 millert 341: y->tval &= ~DONTFREE;
342: } else
343: y->tval |= DONTFREE;
1.1 tholo 344: y->fval = x->fval;
345: return y;
346: }
347:
348: Cell *arg(Node **a, int n) /* nth argument of a function */
349: {
350:
1.15 millert 351: n = ptoi(a[0]); /* argument number, counting from 0 */
1.60 millert 352: DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
1.57 millert 353: if (n+1 > frp->nargs)
1.16 millert 354: FATAL("argument #%d of function %s was not supplied",
1.57 millert 355: n+1, frp->fcncell->nval);
356: return frp->args[n];
1.1 tholo 357: }
358:
359: Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
360: {
361: Cell *y;
362:
363: switch (n) {
364: case EXIT:
365: if (a[0] != NULL) {
366: y = execute(a[0]);
1.14 millert 367: errorflag = (int) getfval(y);
1.1 tholo 368: tempfree(y);
369: }
370: longjmp(env, 1);
371: case RETURN:
372: if (a[0] != NULL) {
373: y = execute(a[0]);
374: if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1.57 millert 375: setsval(frp->retval, getsval(y));
376: frp->retval->fval = getfval(y);
377: frp->retval->tval |= NUM;
1.1 tholo 378: }
379: else if (y->tval & STR)
1.57 millert 380: setsval(frp->retval, getsval(y));
1.1 tholo 381: else if (y->tval & NUM)
1.57 millert 382: setfval(frp->retval, getfval(y));
1.1 tholo 383: else /* can't happen */
1.16 millert 384: FATAL("bad type variable %d", y->tval);
1.1 tholo 385: tempfree(y);
386: }
387: return(jret);
388: case NEXT:
389: return(jnext);
390: case NEXTFILE:
391: nextfile();
392: return(jnextfile);
393: case BREAK:
394: return(jbreak);
395: case CONTINUE:
396: return(jcont);
397: default: /* can't happen */
1.16 millert 398: FATAL("illegal jump type %d", n);
1.1 tholo 399: }
400: return 0; /* not reached */
401: }
402:
1.31 millert 403: Cell *awkgetline(Node **a, int n) /* get next line from specific input */
1.1 tholo 404: { /* a[0] is variable, a[1] is operator, a[2] is filename */
405: Cell *r, *x;
1.13 kstailey 406: extern Cell **fldtab;
1.1 tholo 407: FILE *fp;
1.13 kstailey 408: char *buf;
409: int bufsize = recsize;
1.15 millert 410: int mode;
1.57 millert 411: bool newflag;
1.69 millert 412: double result;
1.13 kstailey 413:
1.69 millert 414: if ((buf = (char *) malloc(bufsize)) == NULL)
1.16 millert 415: FATAL("out of memory in getline");
1.1 tholo 416:
417: fflush(stdout); /* in case someone is waiting for a prompt */
418: r = gettemp();
419: if (a[1] != NULL) { /* getline < file */
420: x = execute(a[2]); /* filename */
1.15 millert 421: mode = ptoi(a[1]);
422: if (mode == '|') /* input pipe */
423: mode = LE; /* arbitrary flag */
1.57 millert 424: fp = openfile(mode, getsval(x), &newflag);
1.1 tholo 425: tempfree(x);
426: if (fp == NULL)
427: n = -1;
428: else
1.57 millert 429: n = readrec(&buf, &bufsize, fp, newflag);
1.1 tholo 430: if (n <= 0) {
431: ;
432: } else if (a[0] != NULL) { /* getline var <file */
1.13 kstailey 433: x = execute(a[0]);
434: setsval(x, buf);
1.69 millert 435: if (is_number(x->sval, & result)) {
436: x->fval = result;
1.49 millert 437: x->tval |= NUM;
438: }
1.13 kstailey 439: tempfree(x);
1.1 tholo 440: } else { /* getline <file */
1.13 kstailey 441: setsval(fldtab[0], buf);
1.69 millert 442: if (is_number(fldtab[0]->sval, & result)) {
443: fldtab[0]->fval = result;
1.13 kstailey 444: fldtab[0]->tval |= NUM;
1.1 tholo 445: }
446: }
447: } else { /* bare getline; use current input */
448: if (a[0] == NULL) /* getline */
1.54 millert 449: n = getrec(&record, &recsize, true);
1.1 tholo 450: else { /* getline var */
1.54 millert 451: n = getrec(&buf, &bufsize, false);
1.70 millert 452: if (n > 0) {
453: x = execute(a[0]);
454: setsval(x, buf);
455: if (is_number(x->sval, & result)) {
456: x->fval = result;
457: x->tval |= NUM;
458: }
459: tempfree(x);
1.49 millert 460: }
1.1 tholo 461: }
462: }
463: setfval(r, (Awkfloat) n);
1.13 kstailey 464: free(buf);
1.1 tholo 465: return r;
466: }
467:
468: Cell *getnf(Node **a, int n) /* get NF */
469: {
1.54 millert 470: if (!donefld)
1.1 tholo 471: fldbld();
472: return (Cell *) a[0];
473: }
474:
1.53 millert 475: static char *
476: makearraystring(Node *p, const char *func)
1.1 tholo 477: {
1.13 kstailey 478: char *buf;
479: int bufsz = recsize;
1.62 millert 480: size_t blen;
1.53 millert 481:
1.69 millert 482: if ((buf = (char *) malloc(bufsz)) == NULL) {
1.53 millert 483: FATAL("%s: out of memory", func);
484: }
1.13 kstailey 485:
1.53 millert 486: blen = 0;
487: buf[blen] = '\0';
488:
489: for (; p; p = p->nnext) {
490: Cell *x = execute(p); /* expr */
491: char *s = getsval(x);
1.62 millert 492: size_t seplen = strlen(getsval(subseploc));
1.53 millert 493: size_t nsub = p->nnext ? seplen : 0;
494: size_t slen = strlen(s);
495: size_t tlen = blen + slen + nsub;
496:
497: if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
498: FATAL("%s: out of memory %s[%s...]",
499: func, x->nval, buf);
500: }
501: memcpy(buf + blen, s, slen);
502: if (nsub) {
503: memcpy(buf + blen + slen, *SUBSEP, nsub);
504: }
505: buf[tlen] = '\0';
506: blen = tlen;
507: tempfree(x);
508: }
509: return buf;
510: }
511:
512: Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
513: {
514: Cell *x, *z;
515: char *buf;
1.1 tholo 516:
517: x = execute(a[0]); /* Cell* for symbol table */
1.53 millert 518: buf = makearraystring(a[1], __func__);
1.1 tholo 519: if (!isarr(x)) {
1.60 millert 520: DPRINTF("making %s into an array\n", NN(x->nval));
1.1 tholo 521: if (freeable(x))
522: xfree(x->sval);
523: x->tval &= ~(STR|NUM|DONTFREE);
524: x->tval |= ARR;
525: x->sval = (char *) makesymtab(NSYMTAB);
526: }
527: z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
528: z->ctype = OCELL;
529: z->csub = CVAR;
530: tempfree(x);
1.13 kstailey 531: free(buf);
1.1 tholo 532: return(z);
533: }
534:
1.14 millert 535: Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
1.1 tholo 536: {
1.53 millert 537: Cell *x;
1.1 tholo 538:
539: x = execute(a[0]); /* Cell* for symbol table */
1.50 millert 540: if (x == symtabloc) {
541: FATAL("cannot delete SYMTAB or its elements");
542: }
1.1 tholo 543: if (!isarr(x))
1.15 millert 544: return True;
1.51 millert 545: if (a[1] == NULL) { /* delete the elements, not the table */
1.1 tholo 546: freesymtab(x);
547: x->tval &= ~STR;
548: x->tval |= ARR;
549: x->sval = (char *) makesymtab(NSYMTAB);
550: } else {
1.53 millert 551: char *buf = makearraystring(a[1], __func__);
1.1 tholo 552: freeelem(x, buf);
1.13 kstailey 553: free(buf);
1.1 tholo 554: }
555: tempfree(x);
1.15 millert 556: return True;
1.1 tholo 557: }
558:
559: Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
560: {
1.53 millert 561: Cell *ap, *k;
1.13 kstailey 562: char *buf;
1.1 tholo 563:
564: ap = execute(a[1]); /* array name */
565: if (!isarr(ap)) {
1.60 millert 566: DPRINTF("making %s into an array\n", ap->nval);
1.1 tholo 567: if (freeable(ap))
568: xfree(ap->sval);
569: ap->tval &= ~(STR|NUM|DONTFREE);
570: ap->tval |= ARR;
571: ap->sval = (char *) makesymtab(NSYMTAB);
572: }
1.53 millert 573: buf = makearraystring(a[0], __func__);
1.1 tholo 574: k = lookup(buf, (Array *) ap->sval);
575: tempfree(ap);
1.13 kstailey 576: free(buf);
1.1 tholo 577: if (k == NULL)
1.15 millert 578: return(False);
1.1 tholo 579: else
1.15 millert 580: return(True);
1.1 tholo 581: }
582:
583:
1.75 millert 584: /* ======== utf-8 code ========== */
585:
586: /*
587: * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
588: * or utf-8. u8_isutf tests whether a string starts with a valid
589: * utf-8 sequence, and returns 0 if not (e.g., high bit set).
590: * u8_nextlen returns length of next valid sequence, which is
591: * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
592: * u8_strlen returns length of string in valid utf-8 sequences
593: * and/or high-bit bytes. Conversion functions go between byte
594: * number and character number.
595: *
596: * In theory, this behaves the same as before for non-utf8 bytes.
597: *
598: * Limited checking! This is a potential security hole.
599: */
600:
601: /* is s the beginning of a valid utf-8 string? */
602: /* return length 1..4 if yes, 0 if no */
603: int u8_isutf(const char *s)
604: {
605: int n, ret;
606: unsigned char c;
607:
608: c = s[0];
1.77 millert 609: if (c < 128 || awk_mb_cur_max == 1)
1.75 millert 610: return 1; /* what if it's 0? */
611:
612: n = strlen(s);
613: if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
614: ret = 2; /* 110xxxxx 10xxxxxx */
615: } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
616: && (s[2] & 0xC0) == 0x80) {
617: ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
618: } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
619: && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
620: ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
621: } else {
622: ret = 0;
623: }
624: return ret;
625: }
626:
627: /* Convert (prefix of) utf8 string to utf-32 rune. */
628: /* Sets *rune to the value, returns the length. */
629: /* No error checking: watch out. */
630: int u8_rune(int *rune, const char *s)
631: {
632: int n, ret;
633: unsigned char c;
634:
635: c = s[0];
1.77 millert 636: if (c < 128 || awk_mb_cur_max == 1) {
1.75 millert 637: *rune = c;
638: return 1;
639: }
640:
641: n = strlen(s);
642: if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
643: *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
644: ret = 2;
645: } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
646: && (s[2] & 0xC0) == 0x80) {
647: *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
648: /* 1110xxxx 10xxxxxx 10xxxxxx */
649: ret = 3;
650: } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
651: && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
652: *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
653: /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
654: ret = 4;
655: } else {
656: *rune = c;
657: ret = 1;
658: }
659: return ret; /* returns one byte if sequence doesn't look like utf */
660: }
661:
662: /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
663: int u8_nextlen(const char *s)
664: {
665: int len;
666:
667: len = u8_isutf(s);
668: if (len == 0)
669: len = 1;
670: return len;
671: }
672:
673: /* return number of utf characters or single non-utf bytes */
674: int u8_strlen(const char *s)
675: {
676: int i, len, n, totlen;
677: unsigned char c;
678:
679: n = strlen(s);
680: totlen = 0;
681: for (i = 0; i < n; i += len) {
682: c = s[i];
1.77 millert 683: if (c < 128 || awk_mb_cur_max == 1) {
1.75 millert 684: len = 1;
685: } else {
686: len = u8_nextlen(&s[i]);
687: }
688: totlen++;
689: if (i > n)
690: FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
691: }
692: return totlen;
693: }
694:
695: /* convert utf-8 char number in a string to its byte offset */
696: int u8_char2byte(const char *s, int charnum)
697: {
698: int n;
699: int bytenum = 0;
700:
701: while (charnum > 0) {
702: n = u8_nextlen(s);
703: s += n;
704: bytenum += n;
705: charnum--;
706: }
707: return bytenum;
708: }
709:
710: /* convert byte offset in s to utf-8 char number that starts there */
711: int u8_byte2char(const char *s, int bytenum)
712: {
713: int i, len, b;
714: int charnum = 0; /* BUG: what origin? */
715: /* should be 0 to match start==0 which means no match */
716:
717: b = strlen(s);
718: if (bytenum > b) {
719: return -1; /* ??? */
720: }
721: for (i = 0; i <= bytenum; i += len) {
722: len = u8_nextlen(s+i);
723: charnum++;
724: }
725: return charnum;
726: }
727:
728: /* runetochar() adapted from rune.c in the Plan 9 distributione */
729:
730: enum
731: {
732: Runeerror = 128, /* from somewhere else */
733: Runemax = 0x10FFFF,
734:
735: Bit1 = 7,
736: Bitx = 6,
737: Bit2 = 5,
738: Bit3 = 4,
739: Bit4 = 3,
740: Bit5 = 2,
741:
742: T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
743: Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
744: T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
745: T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
746: T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
747: T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
748:
749: Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
750: Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
751: Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
752: Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
753:
754: Maskx = (1<<Bitx)-1, /* 0011 1111 */
755: Testx = Maskx ^ 0xFF, /* 1100 0000 */
756:
757: };
758:
759: int runetochar(char *str, int c)
760: {
761: /* one character sequence 00000-0007F => 00-7F */
762: if (c <= Rune1) {
763: str[0] = c;
764: return 1;
765: }
766:
767: /* two character sequence 00080-007FF => T2 Tx */
768: if (c <= Rune2) {
769: str[0] = T2 | (c >> 1*Bitx);
770: str[1] = Tx | (c & Maskx);
771: return 2;
772: }
773:
774: /* three character sequence 00800-0FFFF => T3 Tx Tx */
775: if (c > Runemax)
776: c = Runeerror;
777: if (c <= Rune3) {
778: str[0] = T3 | (c >> 2*Bitx);
779: str[1] = Tx | ((c >> 1*Bitx) & Maskx);
780: str[2] = Tx | (c & Maskx);
781: return 3;
782: }
783:
784: /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
785: str[0] = T4 | (c >> 3*Bitx);
786: str[1] = Tx | ((c >> 2*Bitx) & Maskx);
787: str[2] = Tx | ((c >> 1*Bitx) & Maskx);
788: str[3] = Tx | (c & Maskx);
789: return 4;
790: }
791:
792:
793: /* ========== end of utf8 code =========== */
794:
795:
796:
1.1 tholo 797: Cell *matchop(Node **a, int n) /* ~ and match() */
798: {
799: Cell *x, *y;
800: char *s, *t;
801: int i;
1.75 millert 802: int cstart, cpatlen, len;
1.1 tholo 803: fa *pfa;
1.18 millert 804: int (*mf)(fa *, const char *) = match, mode = 0;
1.1 tholo 805:
806: if (n == MATCHFCN) {
807: mf = pmatch;
808: mode = 1;
809: }
810: x = execute(a[1]); /* a[1] = target text */
811: s = getsval(x);
1.51 millert 812: if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
1.1 tholo 813: i = (*mf)((fa *) a[2], s);
814: else {
815: y = execute(a[2]); /* a[2] = regular expr */
816: t = getsval(y);
817: pfa = makedfa(t, mode);
818: i = (*mf)(pfa, s);
819: tempfree(y);
820: }
821: tempfree(x);
822: if (n == MATCHFCN) {
1.75 millert 823: int start = patbeg - s + 1; /* origin 1 */
824: if (patlen < 0) {
825: start = 0; /* not found */
826: } else {
827: cstart = u8_byte2char(s, start-1);
828: cpatlen = 0;
829: for (i = 0; i < patlen; i += len) {
830: len = u8_nextlen(patbeg+i);
831: cpatlen++;
832: }
833:
834: start = cstart;
835: patlen = cpatlen;
836: }
837:
1.1 tholo 838: setfval(rstartloc, (Awkfloat) start);
839: setfval(rlengthloc, (Awkfloat) patlen);
840: x = gettemp();
841: x->tval = NUM;
842: x->fval = start;
843: return x;
844: } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
1.15 millert 845: return(True);
1.1 tholo 846: else
1.15 millert 847: return(False);
1.1 tholo 848: }
849:
850:
851: Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
852: {
853: Cell *x, *y;
854: int i;
855:
856: x = execute(a[0]);
857: i = istrue(x);
858: tempfree(x);
859: switch (n) {
860: case BOR:
1.15 millert 861: if (i) return(True);
1.1 tholo 862: y = execute(a[1]);
863: i = istrue(y);
864: tempfree(y);
1.15 millert 865: if (i) return(True);
866: else return(False);
1.1 tholo 867: case AND:
1.15 millert 868: if ( !i ) return(False);
1.1 tholo 869: y = execute(a[1]);
870: i = istrue(y);
871: tempfree(y);
1.15 millert 872: if (i) return(True);
873: else return(False);
1.1 tholo 874: case NOT:
1.15 millert 875: if (i) return(False);
876: else return(True);
1.1 tholo 877: default: /* can't happen */
1.16 millert 878: FATAL("unknown boolean operator %d", n);
1.1 tholo 879: }
880: return 0; /*NOTREACHED*/
881: }
882:
883: Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
884: {
885: int i;
886: Cell *x, *y;
887: Awkfloat j;
1.75 millert 888: bool x_is_nan, y_is_nan;
1.1 tholo 889:
890: x = execute(a[0]);
891: y = execute(a[1]);
1.75 millert 892: x_is_nan = isnan(x->fval);
893: y_is_nan = isnan(y->fval);
1.1 tholo 894: if (x->tval&NUM && y->tval&NUM) {
1.75 millert 895: if ((x_is_nan || y_is_nan) && n != NE)
896: return(False);
1.1 tholo 897: j = x->fval - y->fval;
898: i = j<0? -1: (j>0? 1: 0);
899: } else {
900: i = strcmp(getsval(x), getsval(y));
901: }
902: tempfree(x);
903: tempfree(y);
904: switch (n) {
1.15 millert 905: case LT: if (i<0) return(True);
906: else return(False);
907: case LE: if (i<=0) return(True);
908: else return(False);
1.75 millert 909: case NE: if (x_is_nan && y_is_nan) return(True);
910: else if (i!=0) return(True);
1.15 millert 911: else return(False);
912: case EQ: if (i == 0) return(True);
913: else return(False);
914: case GE: if (i>=0) return(True);
915: else return(False);
916: case GT: if (i>0) return(True);
917: else return(False);
1.1 tholo 918: default: /* can't happen */
1.16 millert 919: FATAL("unknown relational operator %d", n);
1.1 tholo 920: }
921: return 0; /*NOTREACHED*/
922: }
923:
924: void tfree(Cell *a) /* free a tempcell */
925: {
1.13 kstailey 926: if (freeable(a)) {
1.60 millert 927: DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
1.1 tholo 928: xfree(a->sval);
1.13 kstailey 929: }
1.1 tholo 930: if (a == tmps)
1.16 millert 931: FATAL("tempcell list is curdled");
1.1 tholo 932: a->cnext = tmps;
933: tmps = a;
934: }
935:
936: Cell *gettemp(void) /* get a tempcell */
937: { int i;
938: Cell *x;
939:
940: if (!tmps) {
1.69 millert 941: tmps = (Cell *) calloc(100, sizeof(*tmps));
1.1 tholo 942: if (!tmps)
1.16 millert 943: FATAL("out of space for temporaries");
1.52 millert 944: for (i = 1; i < 100; i++)
1.1 tholo 945: tmps[i-1].cnext = &tmps[i];
1.51 millert 946: tmps[i-1].cnext = NULL;
1.1 tholo 947: }
948: x = tmps;
949: tmps = x->cnext;
950: *x = tempcell;
951: return(x);
952: }
953:
954: Cell *indirect(Node **a, int n) /* $( a[0] ) */
955: {
1.25 millert 956: Awkfloat val;
1.1 tholo 957: Cell *x;
958: int m;
959: char *s;
960:
961: x = execute(a[0]);
1.25 millert 962: val = getfval(x); /* freebsd: defend against super large field numbers */
963: if ((Awkfloat)INT_MAX < val)
964: FATAL("trying to access out of range field %s", x->nval);
965: m = (int) val;
1.69 millert 966: if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */
1.16 millert 967: FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
1.13 kstailey 968: /* BUG: can x->nval ever be null??? */
1.1 tholo 969: tempfree(x);
970: x = fieldadr(m);
1.13 kstailey 971: x->ctype = OCELL; /* BUG? why are these needed? */
1.1 tholo 972: x->csub = CFLD;
973: return(x);
974: }
975:
976: Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
977: {
978: int k, m, n;
1.75 millert 979: int mb, nb;
1.1 tholo 980: char *s;
981: int temp;
1.51 millert 982: Cell *x, *y, *z = NULL;
1.1 tholo 983:
984: x = execute(a[0]);
985: y = execute(a[1]);
1.51 millert 986: if (a[2] != NULL)
1.1 tholo 987: z = execute(a[2]);
988: s = getsval(x);
1.80 ! millert 989: k = u8_strlen(s) + 1;
1.1 tholo 990: if (k <= 1) {
991: tempfree(x);
992: tempfree(y);
1.51 millert 993: if (a[2] != NULL) {
1.1 tholo 994: tempfree(z);
1.17 millert 995: }
1.1 tholo 996: x = gettemp();
997: setsval(x, "");
998: return(x);
999: }
1.14 millert 1000: m = (int) getfval(y);
1.1 tholo 1001: if (m <= 0)
1002: m = 1;
1003: else if (m > k)
1004: m = k;
1005: tempfree(y);
1.51 millert 1006: if (a[2] != NULL) {
1.14 millert 1007: n = (int) getfval(z);
1.1 tholo 1008: tempfree(z);
1009: } else
1010: n = k - 1;
1011: if (n < 0)
1012: n = 0;
1013: else if (n > k - m)
1014: n = k - m;
1.75 millert 1015: /* m is start, n is length from there */
1.60 millert 1016: DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1.1 tholo 1017: y = gettemp();
1.75 millert 1018: mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1019: nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */
1020:
1021: temp = s[nb]; /* with thanks to John Linderman */
1022: s[nb] = '\0';
1023: setsval(y, s + mb);
1024: s[nb] = temp;
1.1 tholo 1025: tempfree(x);
1026: return(y);
1027: }
1028:
1029: Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1030: {
1031: Cell *x, *y, *z;
1032: char *s1, *s2, *p1, *p2, *q;
1033: Awkfloat v = 0.0;
1034:
1035: x = execute(a[0]);
1036: s1 = getsval(x);
1037: y = execute(a[1]);
1038: s2 = getsval(y);
1039:
1040: z = gettemp();
1041: for (p1 = s1; *p1 != '\0'; p1++) {
1.57 millert 1042: for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1043: continue;
1.1 tholo 1044: if (*p2 == '\0') {
1.75 millert 1045: /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1046:
1047: /* should be a function: used in match() as well */
1048: int i, len;
1049: v = 0;
1050: for (i = 0; i < p1-s1+1; i += len) {
1051: len = u8_nextlen(s1+i);
1052: v++;
1053: }
1.1 tholo 1054: break;
1055: }
1056: }
1057: tempfree(x);
1058: tempfree(y);
1059: setfval(z, v);
1060: return(z);
1061: }
1062:
1.75 millert 1063: int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1064: {
1065: int n;
1066:
1067: for (n = 0; *s != 0; s += n) {
1068: n = u8_nextlen(s);
1069: if (n > 1)
1070: return 1;
1071: }
1072: return 0;
1073: }
1074:
1.13 kstailey 1075: #define MAXNUMSIZE 50
1076:
1.18 millert 1077: int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1.1 tholo 1078: {
1.13 kstailey 1079: char *fmt;
1.18 millert 1080: char *p, *t;
1081: const char *os;
1.1 tholo 1082: Cell *x;
1.10 kstailey 1083: int flag = 0, n;
1.13 kstailey 1084: int fmtwd; /* format width */
1085: int fmtsz = recsize;
1086: char *buf = *pbuf;
1087: int bufsize = *pbufsize;
1.53 millert 1088: #define FMTSZ(a) (fmtsz - ((a) - fmt))
1089: #define BUFSZ(a) (bufsize - ((a) - buf))
1.1 tholo 1090:
1.54 millert 1091: static bool first = true;
1092: static bool have_a_format = false;
1.47 millert 1093:
1094: if (first) {
1.57 millert 1095: char xbuf[100];
1.47 millert 1096:
1.57 millert 1097: snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1098: have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1.54 millert 1099: first = false;
1.47 millert 1100: }
1101:
1.1 tholo 1102: os = s;
1103: p = buf;
1.69 millert 1104: if ((fmt = (char *) malloc(fmtsz)) == NULL)
1.16 millert 1105: FATAL("out of memory in format()");
1.1 tholo 1106: while (*s) {
1.30 millert 1107: adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1.1 tholo 1108: if (*s != '%') {
1109: *p++ = *s++;
1110: continue;
1111: }
1112: if (*(s+1) == '%') {
1113: *p++ = '%';
1114: s += 2;
1115: continue;
1116: }
1.13 kstailey 1117: fmtwd = atoi(s+1);
1118: if (fmtwd < 0)
1119: fmtwd = -fmtwd;
1.30 millert 1120: adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1.13 kstailey 1121: for (t = fmt; (*t++ = *s) != '\0'; s++) {
1.30 millert 1122: if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1.16 millert 1123: FATAL("format item %.30s... ran format() out of memory", os);
1.55 millert 1124: /* Ignore size specifiers */
1125: if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
1126: t--;
1127: continue;
1128: }
1129: if (isalpha((uschar)*s))
1130: break;
1.48 millert 1131: if (*s == '$') {
1132: FATAL("'$' not permitted in awk formats");
1133: }
1.1 tholo 1134: if (*s == '*') {
1.49 millert 1135: if (a == NULL) {
1.27 deraadt 1136: FATAL("not enough args in printf(%s)", os);
1.49 millert 1137: }
1.1 tholo 1138: x = execute(a);
1139: a = a->nnext;
1.53 millert 1140: snprintf(t - 1, FMTSZ(t - 1),
1141: "%d", fmtwd=(int) getfval(x));
1.13 kstailey 1142: if (fmtwd < 0)
1143: fmtwd = -fmtwd;
1144: adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1.1 tholo 1145: t = fmt + strlen(fmt);
1146: tempfree(x);
1147: }
1148: }
1149: *t = '\0';
1.13 kstailey 1150: if (fmtwd < 0)
1151: fmtwd = -fmtwd;
1.30 millert 1152: adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1.1 tholo 1153: switch (*s) {
1.47 millert 1154: case 'a': case 'A':
1155: if (have_a_format)
1156: flag = *s;
1157: else
1158: flag = 'f';
1159: break;
1.1 tholo 1160: case 'f': case 'e': case 'g': case 'E': case 'G':
1.18 millert 1161: flag = 'f';
1.1 tholo 1162: break;
1.55 millert 1163: case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1164: flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1.53 millert 1165: *(t-1) = 'j';
1166: *t = *s;
1167: *++t = '\0';
1.1 tholo 1168: break;
1169: case 's':
1.18 millert 1170: flag = 's';
1.1 tholo 1171: break;
1172: case 'c':
1.18 millert 1173: flag = 'c';
1.1 tholo 1174: break;
1175: default:
1.16 millert 1176: WARNING("weird printf conversion %s", fmt);
1.18 millert 1177: flag = '?';
1.1 tholo 1178: break;
1179: }
1180: if (a == NULL)
1.16 millert 1181: FATAL("not enough args in printf(%s)", os);
1.1 tholo 1182: x = execute(a);
1183: a = a->nnext;
1.13 kstailey 1184: n = MAXNUMSIZE;
1185: if (fmtwd > n)
1186: n = fmtwd;
1.30 millert 1187: adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1.1 tholo 1188: switch (flag) {
1.75 millert 1189: case '?':
1190: snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
1.13 kstailey 1191: t = getsval(x);
1192: n = strlen(t);
1193: if (fmtwd > n)
1194: n = fmtwd;
1.30 millert 1195: adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1.5 kstailey 1196: p += strlen(p);
1.53 millert 1197: snprintf(p, BUFSZ(p), "%s", t);
1.1 tholo 1198: break;
1.47 millert 1199: case 'a':
1200: case 'A':
1.53 millert 1201: case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1.55 millert 1202: case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1203: case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1.75 millert 1204:
1205: case 's': {
1.1 tholo 1206: t = getsval(x);
1207: n = strlen(t);
1.75 millert 1208: /* if simple format or no utf-8 in the string, sprintf works */
1209: if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1210: if (fmtwd > n)
1211: n = fmtwd;
1212: if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1213: FATAL("huge string/format (%d chars) in printf %.30s..." \
1214: " ran format() out of memory", n, t);
1215: snprintf(p, BUFSZ(p), fmt, t);
1216: break;
1217: }
1218:
1219: /* get here if string has utf-8 chars and fmt is not plain %s */
1220: /* "%-w.ps", where -, w and .p are all optional */
1221: /* '0' before the w is a flag character */
1222: /* fmt points at % */
1223: int ljust = 0, wid = 0, prec = n, pad = 0;
1224: char *f = fmt+1;
1225: if (f[0] == '-') {
1226: ljust = 1;
1227: f++;
1228: }
1229: // flags '0' and '+' are recognized but skipped
1230: if (f[0] == '0') {
1231: f++;
1232: if (f[0] == '+')
1233: f++;
1234: }
1235: if (f[0] == '+') {
1236: f++;
1237: if (f[0] == '0')
1238: f++;
1239: }
1240: if (isdigit((uschar)f[0])) { /* there is a wid */
1241: wid = strtol(f, &f, 10);
1242: }
1243: if (f[0] == '.') { /* there is a .prec */
1244: prec = strtol(++f, &f, 10);
1245: }
1246: if (prec > u8_strlen(t))
1247: prec = u8_strlen(t);
1248: pad = wid>prec ? wid - prec : 0; // has to be >= 0
1249: int i, k, n;
1250:
1251: if (ljust) { // print prec chars from t, then pad blanks
1252: n = u8_char2byte(t, prec);
1253: for (k = 0; k < n; k++) {
1254: //putchar(t[k]);
1255: *p++ = t[k];
1256: }
1257: for (i = 0; i < pad; i++) {
1258: //printf(" ");
1259: *p++ = ' ';
1260: }
1261: } else { // print pad blanks, then prec chars from t
1262: for (i = 0; i < pad; i++) {
1263: //printf(" ");
1264: *p++ = ' ';
1265: }
1266: n = u8_char2byte(t, prec);
1267: for (k = 0; k < n; k++) {
1268: //putchar(t[k]);
1269: *p++ = t[k];
1270: }
1271: }
1272: *p = 0;
1.1 tholo 1273: break;
1.75 millert 1274: }
1275:
1276: case 'c': {
1277: /*
1278: * If a numeric value is given, awk should just turn
1279: * it into a character and print it:
1280: * BEGIN { printf("%c\n", 65) }
1281: * prints "A".
1282: *
1283: * But what if the numeric value is > 128 and
1284: * represents a valid Unicode code point?!? We do
1285: * our best to convert it back into UTF-8. If we
1286: * can't, we output the encoding of the Unicode
1287: * "invalid character", 0xFFFD.
1288: */
1.13 kstailey 1289: if (isnum(x)) {
1.75 millert 1290: int charval = (int) getfval(x);
1291:
1292: if (charval != 0) {
1.77 millert 1293: if (charval < 128 || awk_mb_cur_max == 1)
1.75 millert 1294: snprintf(p, BUFSZ(p), fmt, charval);
1295: else {
1296: // possible unicode character
1297: size_t count;
1298: char *bs = wide_char_to_byte_str(charval, &count);
1299:
1300: if (bs == NULL) { // invalid character
1301: // use unicode invalid character, 0xFFFD
1302: bs = "\357\277\275";
1303: count = 3;
1304: }
1305: t = bs;
1306: n = count;
1307: goto format_percent_c;
1308: }
1309: } else {
1.18 millert 1310: *p++ = '\0'; /* explicit null byte */
1311: *p = '\0'; /* next output will start here */
1312: }
1.75 millert 1313: break;
1314: }
1315: t = getsval(x);
1316: n = u8_nextlen(t);
1317: format_percent_c:
1318: if (n < 2) { /* not utf8 */
1.53 millert 1319: snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1.75 millert 1320: break;
1321: }
1322:
1323: // utf8 character, almost same song and dance as for %s
1324: int ljust = 0, wid = 0, prec = n, pad = 0;
1325: char *f = fmt+1;
1326: if (f[0] == '-') {
1327: ljust = 1;
1328: f++;
1329: }
1330: // flags '0' and '+' are recognized but skipped
1331: if (f[0] == '0') {
1332: f++;
1333: if (f[0] == '+')
1334: f++;
1335: }
1336: if (f[0] == '+') {
1337: f++;
1338: if (f[0] == '0')
1339: f++;
1340: }
1341: if (isdigit((uschar)f[0])) { /* there is a wid */
1342: wid = strtol(f, &f, 10);
1343: }
1344: if (f[0] == '.') { /* there is a .prec */
1345: prec = strtol(++f, &f, 10);
1346: }
1347: if (prec > 1) // %c --> only one character
1348: prec = 1;
1349: pad = wid>prec ? wid - prec : 0; // has to be >= 0
1350: int i;
1351:
1352: if (ljust) { // print one char from t, then pad blanks
1.76 deraadt 1353: for (i = 0; i < n; i++)
1.75 millert 1354: *p++ = t[i];
1355: for (i = 0; i < pad; i++) {
1356: //printf(" ");
1357: *p++ = ' ';
1358: }
1359: } else { // print pad blanks, then prec chars from t
1360: for (i = 0; i < pad; i++) {
1361: //printf(" ");
1362: *p++ = ' ';
1363: }
1.76 deraadt 1364: for (i = 0; i < n; i++)
1.75 millert 1365: *p++ = t[i];
1366: }
1367: *p = 0;
1.1 tholo 1368: break;
1.75 millert 1369: }
1.18 millert 1370: default:
1371: FATAL("can't happen: bad conversion %c in format()", flag);
1.1 tholo 1372: }
1.75 millert 1373:
1.1 tholo 1374: tempfree(x);
1.5 kstailey 1375: p += strlen(p);
1.1 tholo 1376: s++;
1377: }
1378: *p = '\0';
1.13 kstailey 1379: free(fmt);
1.73 millert 1380: for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1381: x = execute(a);
1382: tempfree(x);
1383: }
1.13 kstailey 1384: *pbuf = buf;
1385: *pbufsize = bufsize;
1386: return p - buf;
1.1 tholo 1387: }
1388:
1389: Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1390: {
1391: Cell *x;
1392: Node *y;
1.13 kstailey 1393: char *buf;
1394: int bufsz=3*recsize;
1.1 tholo 1395:
1.69 millert 1396: if ((buf = (char *) malloc(bufsz)) == NULL)
1.16 millert 1397: FATAL("out of memory in awksprintf");
1.1 tholo 1398: y = a[0]->nnext;
1399: x = execute(a[0]);
1.13 kstailey 1400: if (format(&buf, &bufsz, getsval(x), y) == -1)
1.16 millert 1401: FATAL("sprintf string %.30s... too long. can't happen.", buf);
1.1 tholo 1402: tempfree(x);
1403: x = gettemp();
1.13 kstailey 1404: x->sval = buf;
1.1 tholo 1405: x->tval = STR;
1406: return(x);
1407: }
1408:
1409: Cell *awkprintf(Node **a, int n) /* printf */
1410: { /* a[0] is list of args, starting with format string */
1411: /* a[1] is redirection operator, a[2] is redirection file */
1412: FILE *fp;
1413: Cell *x;
1414: Node *y;
1.13 kstailey 1415: char *buf;
1.9 kstailey 1416: int len;
1.13 kstailey 1417: int bufsz=3*recsize;
1.1 tholo 1418:
1.69 millert 1419: if ((buf = (char *) malloc(bufsz)) == NULL)
1.16 millert 1420: FATAL("out of memory in awkprintf");
1.1 tholo 1421: y = a[0]->nnext;
1422: x = execute(a[0]);
1.13 kstailey 1423: if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1.16 millert 1424: FATAL("printf string %.30s... too long. can't happen.", buf);
1.1 tholo 1425: tempfree(x);
1426: if (a[1] == NULL) {
1.13 kstailey 1427: /* fputs(buf, stdout); */
1.9 kstailey 1428: fwrite(buf, len, 1, stdout);
1.8 kstailey 1429: if (ferror(stdout))
1.16 millert 1430: FATAL("write error on stdout");
1.1 tholo 1431: } else {
1.15 millert 1432: fp = redirect(ptoi(a[1]), a[2]);
1.13 kstailey 1433: /* fputs(buf, fp); */
1.9 kstailey 1434: fwrite(buf, len, 1, fp);
1.8 kstailey 1435: fflush(fp);
1436: if (ferror(fp))
1.16 millert 1437: FATAL("write error on %s", filename(fp));
1.1 tholo 1438: }
1.13 kstailey 1439: free(buf);
1.15 millert 1440: return(True);
1.1 tholo 1441: }
1442:
1443: Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1444: {
1445: Awkfloat i, j = 0;
1446: double v;
1447: Cell *x, *y, *z;
1448:
1449: x = execute(a[0]);
1450: i = getfval(x);
1451: tempfree(x);
1.47 millert 1452: if (n != UMINUS && n != UPLUS) {
1.1 tholo 1453: y = execute(a[1]);
1454: j = getfval(y);
1455: tempfree(y);
1456: }
1457: z = gettemp();
1458: switch (n) {
1459: case ADD:
1460: i += j;
1461: break;
1462: case MINUS:
1463: i -= j;
1464: break;
1465: case MULT:
1466: i *= j;
1467: break;
1468: case DIVIDE:
1469: if (j == 0)
1.16 millert 1470: FATAL("division by zero");
1.1 tholo 1471: i /= j;
1472: break;
1473: case MOD:
1474: if (j == 0)
1.16 millert 1475: FATAL("division by zero in mod");
1.1 tholo 1476: modf(i/j, &v);
1477: i = i - j * v;
1478: break;
1479: case UMINUS:
1480: i = -i;
1481: break;
1.57 millert 1482: case UPLUS: /* handled by getfval(), above */
1.47 millert 1483: break;
1.1 tholo 1484: case POWER:
1485: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1.12 millert 1486: i = ipow(i, (int) j);
1.63 millert 1487: else {
1.45 guenther 1488: errno = 0;
1.1 tholo 1489: i = errcheck(pow(i, j), "pow");
1.63 millert 1490: }
1.1 tholo 1491: break;
1492: default: /* can't happen */
1.16 millert 1493: FATAL("illegal arithmetic operator %d", n);
1.1 tholo 1494: }
1495: setfval(z, i);
1496: return(z);
1497: }
1498:
1499: double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1500: {
1501: double v;
1502:
1503: if (n <= 0)
1504: return 1;
1505: v = ipow(x, n/2);
1506: if (n % 2 == 0)
1507: return v * v;
1508: else
1509: return x * v * v;
1510: }
1511:
1512: Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1513: {
1514: Cell *x, *z;
1515: int k;
1516: Awkfloat xf;
1517:
1518: x = execute(a[0]);
1519: xf = getfval(x);
1520: k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1521: if (n == PREINCR || n == PREDECR) {
1522: setfval(x, xf + k);
1523: return(x);
1524: }
1525: z = gettemp();
1526: setfval(z, xf);
1527: setfval(x, xf + k);
1528: tempfree(x);
1529: return(z);
1530: }
1531:
1532: Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1533: { /* this is subtle; don't muck with it. */
1534: Cell *x, *y;
1535: Awkfloat xf, yf;
1536: double v;
1537:
1538: y = execute(a[1]);
1539: x = execute(a[0]);
1540: if (n == ASSIGN) { /* ordinary assignment */
1.49 millert 1541: if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1542: ; /* self-assignment: leave alone unless it's a field or NF */
1.1 tholo 1543: else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1544: setsval(x, getsval(y));
1545: x->fval = getfval(y);
1546: x->tval |= NUM;
1547: }
1.13 kstailey 1548: else if (isstr(y))
1.1 tholo 1549: setsval(x, getsval(y));
1.13 kstailey 1550: else if (isnum(y))
1.1 tholo 1551: setfval(x, getfval(y));
1552: else
1553: funnyvar(y, "read value of");
1554: tempfree(y);
1555: return(x);
1556: }
1557: xf = getfval(x);
1558: yf = getfval(y);
1559: switch (n) {
1560: case ADDEQ:
1561: xf += yf;
1562: break;
1563: case SUBEQ:
1564: xf -= yf;
1565: break;
1566: case MULTEQ:
1567: xf *= yf;
1568: break;
1569: case DIVEQ:
1570: if (yf == 0)
1.16 millert 1571: FATAL("division by zero in /=");
1.1 tholo 1572: xf /= yf;
1573: break;
1574: case MODEQ:
1575: if (yf == 0)
1.16 millert 1576: FATAL("division by zero in %%=");
1.1 tholo 1577: modf(xf/yf, &v);
1578: xf = xf - yf * v;
1579: break;
1580: case POWEQ:
1581: if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1.12 millert 1582: xf = ipow(xf, (int) yf);
1.63 millert 1583: else {
1.45 guenther 1584: errno = 0;
1.1 tholo 1585: xf = errcheck(pow(xf, yf), "pow");
1.63 millert 1586: }
1.1 tholo 1587: break;
1588: default:
1.16 millert 1589: FATAL("illegal assignment operator %d", n);
1.1 tholo 1590: break;
1591: }
1592: tempfree(y);
1593: setfval(x, xf);
1594: return(x);
1595: }
1596:
1597: Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1598: {
1599: Cell *x, *y, *z;
1600: int n1, n2;
1.49 millert 1601: char *s = NULL;
1602: int ssz = 0;
1.1 tholo 1603:
1604: x = execute(a[0]);
1.49 millert 1605: n1 = strlen(getsval(x));
1.74 millert 1606: adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1.61 millert 1607: memcpy(s, x->sval, n1);
1.49 millert 1608:
1.74 millert 1609: tempfree(x);
1610:
1.1 tholo 1611: y = execute(a[1]);
1.49 millert 1612: n2 = strlen(getsval(y));
1.61 millert 1613: adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1.53 millert 1614: memcpy(s + n1, y->sval, n2);
1615: s[n1 + n2] = '\0';
1.49 millert 1616:
1.1 tholo 1617: tempfree(y);
1.49 millert 1618:
1.1 tholo 1619: z = gettemp();
1620: z->sval = s;
1621: z->tval = STR;
1.49 millert 1622:
1.1 tholo 1623: return(z);
1624: }
1625:
1626: Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1627: {
1628: Cell *x;
1629:
1.51 millert 1630: if (a[0] == NULL)
1.1 tholo 1631: x = execute(a[1]);
1632: else {
1633: x = execute(a[0]);
1634: if (istrue(x)) {
1635: tempfree(x);
1636: x = execute(a[1]);
1637: }
1638: }
1639: return x;
1640: }
1641:
1642: Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1643: {
1644: Cell *x;
1645: int pair;
1646:
1.15 millert 1647: pair = ptoi(a[3]);
1.1 tholo 1648: if (pairstack[pair] == 0) {
1649: x = execute(a[0]);
1650: if (istrue(x))
1651: pairstack[pair] = 1;
1652: tempfree(x);
1653: }
1654: if (pairstack[pair] == 1) {
1655: x = execute(a[1]);
1656: if (istrue(x))
1657: pairstack[pair] = 0;
1658: tempfree(x);
1659: x = execute(a[2]);
1660: return(x);
1661: }
1.15 millert 1662: return(False);
1.1 tholo 1663: }
1664:
1665: Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1666: {
1.51 millert 1667: Cell *x = NULL, *y, *ap;
1.53 millert 1668: const char *s, *origs, *t;
1.56 millert 1669: const char *fs = NULL;
1670: char *origfs = NULL;
1.1 tholo 1671: int sep;
1.53 millert 1672: char temp, num[50];
1.75 millert 1673: int j, n, tempstat, arg3type;
1.69 millert 1674: double result;
1.1 tholo 1675:
1676: y = execute(a[0]); /* source string */
1.43 fcambus 1677: origs = s = strdup(getsval(y));
1.44 fcambus 1678: if (s == NULL)
1679: FATAL("out of space in split");
1.73 millert 1680: tempfree(y);
1.15 millert 1681: arg3type = ptoi(a[3]);
1.75 millert 1682: if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
1.49 millert 1683: fs = getsval(fsloc);
1.75 millert 1684: } else if (arg3type == STRING) { /* split(str,arr,"string") */
1.1 tholo 1685: x = execute(a[2]);
1.56 millert 1686: fs = origfs = strdup(getsval(x));
1.49 millert 1687: if (fs == NULL)
1688: FATAL("out of space in split");
1689: tempfree(x);
1.75 millert 1690: } else if (arg3type == REGEXPR) {
1.13 kstailey 1691: fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1.75 millert 1692: } else {
1.16 millert 1693: FATAL("illegal type of split");
1.75 millert 1694: }
1.1 tholo 1695: sep = *fs;
1696: ap = execute(a[1]); /* array name */
1.75 millert 1697: /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1.1 tholo 1698: freesymtab(ap);
1.60 millert 1699: DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1.1 tholo 1700: ap->tval &= ~STR;
1701: ap->tval |= ARR;
1702: ap->sval = (char *) makesymtab(NSYMTAB);
1703:
1704: n = 0;
1.33 millert 1705: if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1706: /* split(s, a, //); have to arrange that it looks like empty sep */
1707: arg3type = 0;
1708: fs = "";
1709: sep = 0;
1710: }
1.25 millert 1711: if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1.1 tholo 1712: fa *pfa;
1.15 millert 1713: if (arg3type == REGEXPR) { /* it's ready already */
1.1 tholo 1714: pfa = (fa *) a[2];
1715: } else {
1716: pfa = makedfa(fs, 1);
1717: }
1718: if (nematch(pfa,s)) {
1719: tempstat = pfa->initstat;
1720: pfa->initstat = 2;
1721: do {
1722: n++;
1.53 millert 1723: snprintf(num, sizeof(num), "%d", n);
1.1 tholo 1724: temp = *patbeg;
1.53 millert 1725: setptr(patbeg, '\0');
1.69 millert 1726: if (is_number(s, & result))
1727: setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1.1 tholo 1728: else
1729: setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1.53 millert 1730: setptr(patbeg, temp);
1.1 tholo 1731: s = patbeg + patlen;
1.57 millert 1732: if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1.1 tholo 1733: n++;
1.53 millert 1734: snprintf(num, sizeof(num), "%d", n);
1.1 tholo 1735: setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1736: pfa->initstat = tempstat;
1737: goto spdone;
1738: }
1739: } while (nematch(pfa,s));
1.25 millert 1740: pfa->initstat = tempstat; /* bwk: has to be here to reset */
1741: /* cf gsub and refldbld */
1.1 tholo 1742: }
1743: n++;
1.53 millert 1744: snprintf(num, sizeof(num), "%d", n);
1.69 millert 1745: if (is_number(s, & result))
1746: setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1.1 tholo 1747: else
1748: setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1749: spdone:
1750: pfa = NULL;
1.75 millert 1751:
1752: } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
1753: char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1754: for (;;) {
1755: char *fr = newt;
1756: n++;
1757: if (*s == '"' ) { /* start of "..." */
1758: for (s++ ; *s != '\0'; ) {
1759: if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1760: s += 2; /* doubled quote */
1761: *fr++ = '"';
1762: } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1763: s++; /* skip over closing quote */
1764: break;
1765: } else {
1766: *fr++ = *s++;
1767: }
1768: }
1769: *fr++ = 0;
1770: } else { /* unquoted field */
1771: while (*s != ',' && *s != '\0')
1772: *fr++ = *s++;
1773: *fr++ = 0;
1774: }
1775: snprintf(num, sizeof(num), "%d", n);
1776: if (is_number(newt, &result))
1777: setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1778: else
1779: setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1780: if (*s++ == '\0')
1781: break;
1782: }
1783: free(newt);
1784:
1785: } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1.1 tholo 1786: for (n = 0; ; ) {
1.57 millert 1787: #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1788: while (ISWS(*s))
1.1 tholo 1789: s++;
1.57 millert 1790: if (*s == '\0')
1.1 tholo 1791: break;
1792: n++;
1793: t = s;
1794: do
1795: s++;
1.57 millert 1796: while (*s != '\0' && !ISWS(*s));
1.1 tholo 1797: temp = *s;
1.53 millert 1798: setptr(s, '\0');
1799: snprintf(num, sizeof(num), "%d", n);
1.69 millert 1800: if (is_number(t, & result))
1801: setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1.1 tholo 1802: else
1803: setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1.53 millert 1804: setptr(s, temp);
1.57 millert 1805: if (*s != '\0')
1.1 tholo 1806: s++;
1807: }
1.75 millert 1808:
1.1 tholo 1809: } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1.75 millert 1810: for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1811: char buf[10];
1.1 tholo 1812: n++;
1.53 millert 1813: snprintf(num, sizeof(num), "%d", n);
1.75 millert 1814:
1815: for (j = 0; j < u8_nextlen(s); j++) {
1816: buf[j] = s[j];
1817: }
1818: buf[j] = '\0';
1819:
1.17 millert 1820: if (isdigit((uschar)buf[0]))
1.1 tholo 1821: setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1822: else
1823: setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1824: }
1.75 millert 1825:
1826: } else if (*s != '\0') { /* some random single character */
1.1 tholo 1827: for (;;) {
1828: n++;
1829: t = s;
1830: while (*s != sep && *s != '\n' && *s != '\0')
1831: s++;
1832: temp = *s;
1.53 millert 1833: setptr(s, '\0');
1834: snprintf(num, sizeof(num), "%d", n);
1.69 millert 1835: if (is_number(t, & result))
1836: setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1.1 tholo 1837: else
1838: setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1.53 millert 1839: setptr(s, temp);
1.57 millert 1840: if (*s++ == '\0')
1.1 tholo 1841: break;
1842: }
1843: }
1844: tempfree(ap);
1.53 millert 1845: xfree(origs);
1846: xfree(origfs);
1.1 tholo 1847: x = gettemp();
1848: x->tval = NUM;
1849: x->fval = n;
1850: return(x);
1851: }
1852:
1853: Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1854: {
1855: Cell *x;
1856:
1857: x = execute(a[0]);
1858: if (istrue(x)) {
1859: tempfree(x);
1860: x = execute(a[1]);
1861: } else {
1862: tempfree(x);
1863: x = execute(a[2]);
1864: }
1865: return(x);
1866: }
1867:
1868: Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1869: {
1870: Cell *x;
1871:
1872: x = execute(a[0]);
1873: if (istrue(x)) {
1874: tempfree(x);
1875: x = execute(a[1]);
1.51 millert 1876: } else if (a[2] != NULL) {
1.1 tholo 1877: tempfree(x);
1878: x = execute(a[2]);
1879: }
1880: return(x);
1881: }
1882:
1883: Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1884: {
1885: Cell *x;
1886:
1887: for (;;) {
1888: x = execute(a[0]);
1889: if (!istrue(x))
1890: return(x);
1891: tempfree(x);
1892: x = execute(a[1]);
1893: if (isbreak(x)) {
1.15 millert 1894: x = True;
1.1 tholo 1895: return(x);
1896: }
1897: if (isnext(x) || isexit(x) || isret(x))
1898: return(x);
1899: tempfree(x);
1900: }
1901: }
1902:
1903: Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1904: {
1905: Cell *x;
1906:
1907: for (;;) {
1908: x = execute(a[0]);
1909: if (isbreak(x))
1.15 millert 1910: return True;
1.17 millert 1911: if (isnext(x) || isexit(x) || isret(x))
1.1 tholo 1912: return(x);
1913: tempfree(x);
1914: x = execute(a[1]);
1915: if (!istrue(x))
1916: return(x);
1917: tempfree(x);
1918: }
1919: }
1920:
1921: Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1922: {
1923: Cell *x;
1924:
1925: x = execute(a[0]);
1926: tempfree(x);
1927: for (;;) {
1.51 millert 1928: if (a[1]!=NULL) {
1.1 tholo 1929: x = execute(a[1]);
1930: if (!istrue(x)) return(x);
1931: else tempfree(x);
1932: }
1933: x = execute(a[3]);
1934: if (isbreak(x)) /* turn off break */
1.15 millert 1935: return True;
1.1 tholo 1936: if (isnext(x) || isexit(x) || isret(x))
1937: return(x);
1938: tempfree(x);
1939: x = execute(a[2]);
1940: tempfree(x);
1941: }
1942: }
1943:
1944: Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1945: {
1946: Cell *x, *vp, *arrayp, *cp, *ncp;
1947: Array *tp;
1948: int i;
1949:
1950: vp = execute(a[0]);
1951: arrayp = execute(a[1]);
1952: if (!isarr(arrayp)) {
1.15 millert 1953: return True;
1.1 tholo 1954: }
1955: tp = (Array *) arrayp->sval;
1956: tempfree(arrayp);
1957: for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1958: for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1959: setsval(vp, cp->nval);
1960: ncp = cp->cnext;
1961: x = execute(a[2]);
1962: if (isbreak(x)) {
1963: tempfree(vp);
1.15 millert 1964: return True;
1.1 tholo 1965: }
1966: if (isnext(x) || isexit(x) || isret(x)) {
1967: tempfree(vp);
1968: return(x);
1969: }
1970: tempfree(x);
1971: }
1972: }
1.15 millert 1973: return True;
1.1 tholo 1974: }
1975:
1.57 millert 1976: static char *nawk_convert(const char *s, int (*fun_c)(int),
1977: wint_t (*fun_wc)(wint_t))
1978: {
1979: char *buf = NULL;
1980: char *pbuf = NULL;
1981: const char *ps = NULL;
1982: size_t n = 0;
1983: wchar_t wc;
1.78 millert 1984: const size_t sz = awk_mb_cur_max;
1.75 millert 1985: int unused;
1.57 millert 1986:
1987: if (sz == 1) {
1988: buf = tostring(s);
1989:
1990: for (pbuf = buf; *pbuf; pbuf++)
1991: *pbuf = fun_c((uschar)*pbuf);
1992:
1993: return buf;
1994: } else {
1995: /* upper/lower character may be shorter/longer */
1996: buf = tostringN(s, strlen(s) * sz + 1);
1997:
1.67 millert 1998: (void) mbtowc(NULL, NULL, 0); /* reset internal state */
1999: /*
2000: * Reset internal state here too.
2001: * Assign result to avoid a compiler warning. (Casting to void
2002: * doesn't work.)
2003: * Increment said variable to avoid a different warning.
2004: */
1.75 millert 2005: unused = wctomb(NULL, L'\0');
1.67 millert 2006: unused++;
1.57 millert 2007:
2008: ps = s;
2009: pbuf = buf;
1.67 millert 2010: while (n = mbtowc(&wc, ps, sz),
1.57 millert 2011: n > 0 && n != (size_t)-1 && n != (size_t)-2)
2012: {
2013: ps += n;
2014:
1.67 millert 2015: n = wctomb(pbuf, fun_wc(wc));
1.57 millert 2016: if (n == (size_t)-1)
2017: FATAL("illegal wide character %s", s);
2018:
2019: pbuf += n;
2020: }
2021:
2022: *pbuf = '\0';
2023:
2024: if (n)
2025: FATAL("illegal byte sequence %s", s);
2026:
2027: return buf;
2028: }
2029: }
2030:
1.69 millert 2031: #ifdef __DJGPP__
2032: static wint_t towupper(wint_t wc)
2033: {
2034: if (wc >= 0 && wc < 256)
2035: return toupper(wc & 0xFF);
2036:
2037: return wc;
2038: }
2039:
2040: static wint_t towlower(wint_t wc)
2041: {
2042: if (wc >= 0 && wc < 256)
2043: return tolower(wc & 0xFF);
2044:
2045: return wc;
2046: }
2047: #endif
2048:
1.57 millert 2049: static char *nawk_toupper(const char *s)
2050: {
2051: return nawk_convert(s, toupper, towupper);
2052: }
2053:
2054: static char *nawk_tolower(const char *s)
2055: {
2056: return nawk_convert(s, tolower, towlower);
2057: }
2058:
1.1 tholo 2059: Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2060: {
2061: Cell *x, *y;
2062: Awkfloat u;
1.59 millert 2063: int t, sz;
1.33 millert 2064: Awkfloat tmp;
1.59 millert 2065: char *buf, *fmt;
1.1 tholo 2066: Node *nextarg;
2067: FILE *fp;
1.47 millert 2068: int status = 0;
1.59 millert 2069: time_t tv;
1.68 millert 2070: struct tm *tm, tmbuf;
1.1 tholo 2071:
1.15 millert 2072: t = ptoi(a[0]);
1.1 tholo 2073: x = execute(a[1]);
2074: nextarg = a[1]->nnext;
2075: switch (t) {
2076: case FLENGTH:
1.18 millert 2077: if (isarr(x))
2078: u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2079: else
1.75 millert 2080: u = u8_strlen(getsval(x));
1.18 millert 2081: break;
1.1 tholo 2082: case FLOG:
1.45 guenther 2083: errno = 0;
1.63 millert 2084: u = errcheck(log(getfval(x)), "log");
2085: break;
1.1 tholo 2086: case FINT:
2087: modf(getfval(x), &u); break;
2088: case FEXP:
1.45 guenther 2089: errno = 0;
1.63 millert 2090: u = errcheck(exp(getfval(x)), "exp");
2091: break;
1.1 tholo 2092: case FSQRT:
1.45 guenther 2093: errno = 0;
1.63 millert 2094: u = errcheck(sqrt(getfval(x)), "sqrt");
2095: break;
1.1 tholo 2096: case FSIN:
2097: u = sin(getfval(x)); break;
2098: case FCOS:
2099: u = cos(getfval(x)); break;
2100: case FATAN:
1.51 millert 2101: if (nextarg == NULL) {
1.16 millert 2102: WARNING("atan2 requires two arguments; returning 1.0");
1.1 tholo 2103: u = 1.0;
2104: } else {
2105: y = execute(a[1]->nnext);
2106: u = atan2(getfval(x), getfval(y));
2107: tempfree(y);
2108: nextarg = nextarg->nnext;
2109: }
1.29 pyr 2110: break;
2111: case FCOMPL:
2112: u = ~((int)getfval(x));
2113: break;
2114: case FAND:
2115: if (nextarg == 0) {
2116: WARNING("and requires two arguments; returning 0");
2117: u = 0;
2118: break;
2119: }
2120: y = execute(a[1]->nnext);
2121: u = ((int)getfval(x)) & ((int)getfval(y));
2122: tempfree(y);
2123: nextarg = nextarg->nnext;
2124: break;
2125: case FFOR:
2126: if (nextarg == 0) {
2127: WARNING("or requires two arguments; returning 0");
2128: u = 0;
2129: break;
2130: }
2131: y = execute(a[1]->nnext);
2132: u = ((int)getfval(x)) | ((int)getfval(y));
2133: tempfree(y);
2134: nextarg = nextarg->nnext;
2135: break;
2136: case FXOR:
2137: if (nextarg == 0) {
1.41 ajacouto 2138: WARNING("xor requires two arguments; returning 0");
1.29 pyr 2139: u = 0;
2140: break;
2141: }
2142: y = execute(a[1]->nnext);
2143: u = ((int)getfval(x)) ^ ((int)getfval(y));
2144: tempfree(y);
2145: nextarg = nextarg->nnext;
2146: break;
2147: case FLSHIFT:
2148: if (nextarg == 0) {
1.41 ajacouto 2149: WARNING("lshift requires two arguments; returning 0");
1.29 pyr 2150: u = 0;
2151: break;
2152: }
2153: y = execute(a[1]->nnext);
2154: u = ((int)getfval(x)) << ((int)getfval(y));
2155: tempfree(y);
2156: nextarg = nextarg->nnext;
2157: break;
2158: case FRSHIFT:
2159: if (nextarg == 0) {
1.41 ajacouto 2160: WARNING("rshift requires two arguments; returning 0");
1.29 pyr 2161: u = 0;
2162: break;
2163: }
2164: y = execute(a[1]->nnext);
2165: u = ((int)getfval(x)) >> ((int)getfval(y));
2166: tempfree(y);
2167: nextarg = nextarg->nnext;
1.1 tholo 2168: break;
2169: case FSYSTEM:
2170: fflush(stdout); /* in case something is buffered already */
1.47 millert 2171: status = system(getsval(x));
2172: u = status;
2173: if (status != -1) {
2174: if (WIFEXITED(status)) {
2175: u = WEXITSTATUS(status);
2176: } else if (WIFSIGNALED(status)) {
2177: u = WTERMSIG(status) + 256;
2178: #ifdef WCOREDUMP
2179: if (WCOREDUMP(status))
2180: u += 256;
2181: #endif
2182: } else /* something else?!? */
2183: u = 0;
2184: }
1.1 tholo 2185: break;
2186: case FRAND:
1.51 millert 2187: /* random() returns numbers in [0..2^31-1]
2188: * in order to get a number in [0, 1), divide it by 2^31
2189: */
2190: u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
1.1 tholo 2191: break;
2192: case FSRAND:
1.39 deraadt 2193: if (isrec(x)) { /* no argument provided */
2194: u = time(NULL);
2195: tmp = u;
2196: srandom((unsigned int) u);
2197: } else {
1.33 millert 2198: u = getfval(x);
2199: tmp = u;
1.37 deraadt 2200: srandom_deterministic((unsigned int) u);
1.24 millert 2201: }
1.39 deraadt 2202: u = srand_seed;
2203: srand_seed = tmp;
1.1 tholo 2204: break;
2205: case FTOUPPER:
2206: case FTOLOWER:
1.57 millert 2207: if (t == FTOUPPER)
2208: buf = nawk_toupper(getsval(x));
2209: else
2210: buf = nawk_tolower(getsval(x));
1.1 tholo 2211: tempfree(x);
2212: x = gettemp();
2213: setsval(x, buf);
1.13 kstailey 2214: free(buf);
1.1 tholo 2215: return x;
2216: case FFLUSH:
1.18 millert 2217: if (isrec(x) || strlen(getsval(x)) == 0) {
2218: flush_all(); /* fflush() or fflush("") -> all */
2219: u = 0;
1.57 millert 2220: } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1.1 tholo 2221: u = EOF;
2222: else
2223: u = fflush(fp);
1.68 millert 2224: break;
2225: case FMKTIME:
2226: memset(&tmbuf, 0, sizeof(tmbuf));
2227: tm = &tmbuf;
2228: t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2229: &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2230: &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2231: switch (t) {
2232: case 6:
2233: tm->tm_isdst = -1; /* let mktime figure it out */
2234: /* FALLTHROUGH */
2235: case 7:
2236: tm->tm_year -= 1900;
2237: tm->tm_mon--;
2238: u = mktime(tm);
2239: break;
2240: default:
2241: u = -1;
2242: break;
2243: }
1.1 tholo 2244: break;
1.59 millert 2245: case FSYSTIME:
2246: u = time((time_t *) 0);
2247: break;
2248: case FSTRFTIME:
2249: /* strftime([format [,timestamp]]) */
2250: if (nextarg) {
2251: y = execute(nextarg);
2252: nextarg = nextarg->nnext;
2253: tv = (time_t) getfval(y);
2254: tempfree(y);
2255: } else
2256: tv = time((time_t *) 0);
2257: tm = localtime(&tv);
2258: if (tm == NULL)
2259: FATAL("bad time %ld", (long)tv);
2260:
2261: if (isrec(x)) {
2262: /* format argument not provided, use default */
2263: fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2264: } else
2265: fmt = tostring(getsval(x));
2266:
2267: sz = 32;
2268: buf = NULL;
2269: do {
1.69 millert 2270: if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL)
1.59 millert 2271: FATAL("out of memory in strftime");
2272: sz *= 2;
2273: } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2274:
2275: y = gettemp();
2276: setsval(y, buf);
2277: free(fmt);
2278: free(buf);
2279:
2280: return y;
1.1 tholo 2281: default: /* can't happen */
1.16 millert 2282: FATAL("illegal function type %d", t);
1.1 tholo 2283: break;
2284: }
2285: tempfree(x);
2286: x = gettemp();
2287: setfval(x, u);
1.51 millert 2288: if (nextarg != NULL) {
1.16 millert 2289: WARNING("warning: function has too many arguments");
1.73 millert 2290: for ( ; nextarg; nextarg = nextarg->nnext) {
2291: y = execute(nextarg);
2292: tempfree(y);
2293: }
1.1 tholo 2294: }
2295: return(x);
2296: }
2297:
2298: Cell *printstat(Node **a, int n) /* print a[0] */
2299: {
2300: Node *x;
2301: Cell *y;
2302: FILE *fp;
2303:
1.51 millert 2304: if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
1.1 tholo 2305: fp = stdout;
2306: else
1.15 millert 2307: fp = redirect(ptoi(a[1]), a[2]);
1.1 tholo 2308: for (x = a[0]; x != NULL; x = x->nnext) {
2309: y = execute(x);
1.18 millert 2310: fputs(getpssval(y), fp);
1.1 tholo 2311: tempfree(y);
2312: if (x->nnext == NULL)
1.49 millert 2313: fputs(getsval(orsloc), fp);
1.1 tholo 2314: else
1.49 millert 2315: fputs(getsval(ofsloc), fp);
1.1 tholo 2316: }
1.51 millert 2317: if (a[1] != NULL)
1.1 tholo 2318: fflush(fp);
2319: if (ferror(fp))
1.16 millert 2320: FATAL("write error on %s", filename(fp));
1.15 millert 2321: return(True);
1.1 tholo 2322: }
2323:
2324: Cell *nullproc(Node **a, int n)
2325: {
2326: return 0;
2327: }
2328:
2329:
2330: FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2331: {
2332: FILE *fp;
2333: Cell *x;
2334: char *fname;
2335:
2336: x = execute(b);
2337: fname = getsval(x);
1.57 millert 2338: fp = openfile(a, fname, NULL);
1.1 tholo 2339: if (fp == NULL)
1.16 millert 2340: FATAL("can't open file %s", fname);
1.1 tholo 2341: tempfree(x);
2342: return fp;
2343: }
2344:
2345: struct files {
2346: FILE *fp;
1.18 millert 2347: const char *fname;
1.1 tholo 2348: int mode; /* '|', 'a', 'w' => LE/LT, GT */
1.33 millert 2349: } *files;
2350:
1.57 millert 2351: size_t nfiles;
1.1 tholo 2352:
1.57 millert 2353: static void stdinit(void) /* in case stdin, etc., are not constants */
1.16 millert 2354: {
1.33 millert 2355: nfiles = FOPEN_MAX;
1.69 millert 2356: files = (struct files *) calloc(nfiles, sizeof(*files));
1.33 millert 2357: if (files == NULL)
1.57 millert 2358: FATAL("can't allocate file memory for %zu files", nfiles);
1.33 millert 2359: files[0].fp = stdin;
1.72 millert 2360: files[0].fname = tostring("/dev/stdin");
1.33 millert 2361: files[0].mode = LT;
2362: files[1].fp = stdout;
1.72 millert 2363: files[1].fname = tostring("/dev/stdout");
1.33 millert 2364: files[1].mode = GT;
2365: files[2].fp = stderr;
1.72 millert 2366: files[2].fname = tostring("/dev/stderr");
1.33 millert 2367: files[2].mode = GT;
1.16 millert 2368: }
2369:
1.57 millert 2370: FILE *openfile(int a, const char *us, bool *pnewflag)
1.1 tholo 2371: {
1.18 millert 2372: const char *s = us;
1.57 millert 2373: size_t i;
2374: int m;
1.51 millert 2375: FILE *fp = NULL;
1.1 tholo 2376:
2377: if (*s == '\0')
1.16 millert 2378: FATAL("null file name in print or getline");
1.57 millert 2379: for (i = 0; i < nfiles; i++)
2380: if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2381: (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2382: a == FFLUSH)) {
2383: if (pnewflag)
2384: *pnewflag = false;
2385: return files[i].fp;
1.13 kstailey 2386: }
2387: if (a == FFLUSH) /* didn't find it, so don't create it! */
2388: return NULL;
2389:
1.57 millert 2390: for (i = 0; i < nfiles; i++)
1.51 millert 2391: if (files[i].fp == NULL)
1.1 tholo 2392: break;
1.33 millert 2393: if (i >= nfiles) {
2394: struct files *nf;
1.57 millert 2395: size_t nnf = nfiles + FOPEN_MAX;
1.69 millert 2396: nf = (struct files *) reallocarray(files, nnf, sizeof(*nf));
1.33 millert 2397: if (nf == NULL)
1.57 millert 2398: FATAL("cannot grow files for %s and %zu files", s, nnf);
1.33 millert 2399: memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2400: nfiles = nnf;
2401: files = nf;
2402: }
1.1 tholo 2403: fflush(stdout); /* force a semblance of order */
2404: m = a;
2405: if (a == GT) {
2406: fp = fopen(s, "w");
2407: } else if (a == APPEND) {
2408: fp = fopen(s, "a");
2409: m = GT; /* so can mix > and >> */
2410: } else if (a == '|') { /* output pipe */
2411: fp = popen(s, "w");
2412: } else if (a == LE) { /* input pipe */
2413: fp = popen(s, "r");
2414: } else if (a == LT) { /* getline <file */
2415: fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
2416: } else /* can't happen */
1.16 millert 2417: FATAL("illegal redirection %d", a);
1.1 tholo 2418: if (fp != NULL) {
2419: files[i].fname = tostring(s);
2420: files[i].fp = fp;
2421: files[i].mode = m;
1.57 millert 2422: if (pnewflag)
2423: *pnewflag = true;
1.56 millert 2424: if (fp != stdin && fp != stdout && fp != stderr)
2425: (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
1.1 tholo 2426: }
2427: return fp;
2428: }
2429:
1.18 millert 2430: const char *filename(FILE *fp)
1.1 tholo 2431: {
1.57 millert 2432: size_t i;
1.1 tholo 2433:
1.33 millert 2434: for (i = 0; i < nfiles; i++)
1.1 tholo 2435: if (fp == files[i].fp)
2436: return files[i].fname;
2437: return "???";
2438: }
2439:
1.71 millert 2440: Cell *closefile(Node **a, int n)
2441: {
1.57 millert 2442: Cell *x;
2443: size_t i;
2444: bool stat;
1.67 millert 2445:
1.57 millert 2446: x = execute(a[0]);
2447: getsval(x);
2448: stat = true;
2449: for (i = 0; i < nfiles; i++) {
2450: if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2451: continue;
1.71 millert 2452: if (files[i].mode == GT || files[i].mode == '|')
2453: fflush(files[i].fp);
2454: if (ferror(files[i].fp)) {
2455: if ((files[i].mode == GT && files[i].fp != stderr)
2456: || files[i].mode == '|')
2457: FATAL("write error on %s", files[i].fname);
2458: else
2459: WARNING("i/o error occurred on %s", files[i].fname);
2460: }
1.65 millert 2461: if (files[i].fp == stdin || files[i].fp == stdout ||
2462: files[i].fp == stderr)
2463: stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2464: else if (files[i].mode == '|' || files[i].mode == LE)
1.57 millert 2465: stat = pclose(files[i].fp) == -1;
2466: else
2467: stat = fclose(files[i].fp) == EOF;
2468: if (stat)
1.71 millert 2469: WARNING("i/o error occurred closing %s", files[i].fname);
1.72 millert 2470: xfree(files[i].fname);
1.57 millert 2471: files[i].fname = NULL; /* watch out for ref thru this */
2472: files[i].fp = NULL;
1.65 millert 2473: break;
1.57 millert 2474: }
2475: tempfree(x);
2476: x = gettemp();
2477: setfval(x, (Awkfloat) (stat ? -1 : 0));
2478: return(x);
1.71 millert 2479: }
1.1 tholo 2480:
2481: void closeall(void)
2482: {
1.57 millert 2483: size_t i;
2484: bool stat = false;
1.1 tholo 2485:
1.57 millert 2486: for (i = 0; i < nfiles; i++) {
2487: if (! files[i].fp)
2488: continue;
1.71 millert 2489: if (files[i].mode == GT || files[i].mode == '|')
2490: fflush(files[i].fp);
2491: if (ferror(files[i].fp)) {
2492: if ((files[i].mode == GT && files[i].fp != stderr)
2493: || files[i].mode == '|')
2494: FATAL("write error on %s", files[i].fname);
2495: else
2496: WARNING("i/o error occurred on %s", files[i].fname);
2497: }
2498: if (files[i].fp == stdin || files[i].fp == stdout ||
2499: files[i].fp == stderr)
1.64 millert 2500: continue;
1.57 millert 2501: if (files[i].mode == '|' || files[i].mode == LE)
2502: stat = pclose(files[i].fp) == -1;
2503: else
2504: stat = fclose(files[i].fp) == EOF;
2505: if (stat)
1.71 millert 2506: WARNING("i/o error occurred while closing %s", files[i].fname);
1.17 millert 2507: }
1.18 millert 2508: }
2509:
1.57 millert 2510: static void flush_all(void)
1.18 millert 2511: {
1.57 millert 2512: size_t i;
1.18 millert 2513:
1.33 millert 2514: for (i = 0; i < nfiles; i++)
1.18 millert 2515: if (files[i].fp)
2516: fflush(files[i].fp);
1.1 tholo 2517: }
2518:
1.53 millert 2519: void backsub(char **pb_ptr, const char **sptr_ptr);
1.1 tholo 2520:
2521: Cell *sub(Node **a, int nnn) /* substitute command */
2522: {
1.53 millert 2523: const char *sptr, *q;
1.1 tholo 2524: Cell *x, *y, *result;
1.53 millert 2525: char *t, *buf, *pb;
1.1 tholo 2526: fa *pfa;
1.13 kstailey 2527: int bufsz = recsize;
1.1 tholo 2528:
1.69 millert 2529: if ((buf = (char *) malloc(bufsz)) == NULL)
1.16 millert 2530: FATAL("out of memory in sub");
1.1 tholo 2531: x = execute(a[3]); /* target string */
2532: t = getsval(x);
1.51 millert 2533: if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
1.1 tholo 2534: pfa = (fa *) a[1]; /* regular expression */
2535: else {
2536: y = execute(a[1]);
2537: pfa = makedfa(getsval(y), 1);
2538: tempfree(y);
2539: }
2540: y = execute(a[2]); /* replacement string */
1.15 millert 2541: result = False;
1.1 tholo 2542: if (pmatch(pfa, t)) {
1.13 kstailey 2543: sptr = t;
2544: adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
1.1 tholo 2545: pb = buf;
2546: while (sptr < patbeg)
2547: *pb++ = *sptr++;
2548: sptr = getsval(y);
1.57 millert 2549: while (*sptr != '\0') {
1.13 kstailey 2550: adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
2551: if (*sptr == '\\') {
2552: backsub(&pb, &sptr);
1.1 tholo 2553: } else if (*sptr == '&') {
2554: sptr++;
1.13 kstailey 2555: adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
1.1 tholo 2556: for (q = patbeg; q < patbeg+patlen; )
2557: *pb++ = *q++;
2558: } else
2559: *pb++ = *sptr++;
1.13 kstailey 2560: }
1.1 tholo 2561: *pb = '\0';
1.13 kstailey 2562: if (pb > buf + bufsz)
1.16 millert 2563: FATAL("sub result1 %.30s too big; can't happen", buf);
1.1 tholo 2564: sptr = patbeg + patlen;
1.13 kstailey 2565: if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
2566: adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
1.57 millert 2567: while ((*pb++ = *sptr++) != '\0')
2568: continue;
1.13 kstailey 2569: }
2570: if (pb > buf + bufsz)
1.16 millert 2571: FATAL("sub result2 %.30s too big; can't happen", buf);
1.13 kstailey 2572: setsval(x, buf); /* BUG: should be able to avoid copy */
1.23 millert 2573: result = True;
1.1 tholo 2574: }
2575: tempfree(x);
2576: tempfree(y);
1.13 kstailey 2577: free(buf);
1.1 tholo 2578: return result;
2579: }
2580:
2581: Cell *gsub(Node **a, int nnn) /* global substitute */
2582: {
2583: Cell *x, *y;
1.53 millert 2584: char *rptr, *pb;
2585: const char *q, *t, *sptr;
1.13 kstailey 2586: char *buf;
1.1 tholo 2587: fa *pfa;
2588: int mflag, tempstat, num;
1.13 kstailey 2589: int bufsz = recsize;
1.79 millert 2590: int charlen = 0;
1.1 tholo 2591:
1.69 millert 2592: if ((buf = (char *) malloc(bufsz)) == NULL)
1.16 millert 2593: FATAL("out of memory in gsub");
1.1 tholo 2594: mflag = 0; /* if mflag == 0, can replace empty string */
2595: num = 0;
2596: x = execute(a[3]); /* target string */
2597: t = getsval(x);
1.51 millert 2598: if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
1.1 tholo 2599: pfa = (fa *) a[1]; /* regular expression */
2600: else {
2601: y = execute(a[1]);
2602: pfa = makedfa(getsval(y), 1);
2603: tempfree(y);
2604: }
2605: y = execute(a[2]); /* replacement string */
2606: if (pmatch(pfa, t)) {
2607: tempstat = pfa->initstat;
2608: pfa->initstat = 2;
2609: pb = buf;
2610: rptr = getsval(y);
2611: do {
1.57 millert 2612: if (patlen == 0 && *patbeg != '\0') { /* matched empty string */
1.1 tholo 2613: if (mflag == 0) { /* can replace empty */
2614: num++;
2615: sptr = rptr;
1.57 millert 2616: while (*sptr != '\0') {
1.13 kstailey 2617: adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2618: if (*sptr == '\\') {
2619: backsub(&pb, &sptr);
1.1 tholo 2620: } else if (*sptr == '&') {
2621: sptr++;
1.13 kstailey 2622: adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
1.1 tholo 2623: for (q = patbeg; q < patbeg+patlen; )
2624: *pb++ = *q++;
2625: } else
2626: *pb++ = *sptr++;
1.13 kstailey 2627: }
1.1 tholo 2628: }
1.57 millert 2629: if (*t == '\0') /* at end */
1.1 tholo 2630: goto done;
1.13 kstailey 2631: adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
1.79 millert 2632: charlen = u8_nextlen(t);
2633: while (charlen-- > 0)
2634: *pb++ = *t++;
1.13 kstailey 2635: if (pb > buf + bufsz) /* BUG: not sure of this test */
1.16 millert 2636: FATAL("gsub result0 %.30s too big; can't happen", buf);
1.1 tholo 2637: mflag = 0;
2638: }
2639: else { /* matched nonempty string */
2640: num++;
2641: sptr = t;
1.13 kstailey 2642: adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2643: while (sptr < patbeg)
1.1 tholo 2644: *pb++ = *sptr++;
2645: sptr = rptr;
1.57 millert 2646: while (*sptr != '\0') {
1.13 kstailey 2647: adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2648: if (*sptr == '\\') {
2649: backsub(&pb, &sptr);
1.1 tholo 2650: } else if (*sptr == '&') {
2651: sptr++;
1.13 kstailey 2652: adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
1.1 tholo 2653: for (q = patbeg; q < patbeg+patlen; )
2654: *pb++ = *q++;
2655: } else
2656: *pb++ = *sptr++;
1.13 kstailey 2657: }
1.1 tholo 2658: t = patbeg + patlen;
1.57 millert 2659: if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
1.1 tholo 2660: goto done;
1.13 kstailey 2661: if (pb > buf + bufsz)
1.16 millert 2662: FATAL("gsub result1 %.30s too big; can't happen", buf);
1.1 tholo 2663: mflag = 1;
2664: }
2665: } while (pmatch(pfa,t));
2666: sptr = t;
1.13 kstailey 2667: adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
1.57 millert 2668: while ((*pb++ = *sptr++) != '\0')
2669: continue;
1.31 millert 2670: done: if (pb < buf + bufsz)
2671: *pb = '\0';
2672: else if (*(pb-1) != '\0')
2673: FATAL("gsub result2 %.30s truncated; can't happen", buf);
1.13 kstailey 2674: setsval(x, buf); /* BUG: should be able to avoid copy + free */
1.1 tholo 2675: pfa->initstat = tempstat;
2676: }
2677: tempfree(x);
2678: tempfree(y);
2679: x = gettemp();
2680: x->tval = NUM;
2681: x->fval = num;
1.13 kstailey 2682: free(buf);
1.1 tholo 2683: return(x);
1.59 millert 2684: }
2685:
2686: Cell *gensub(Node **a, int nnn) /* global selective substitute */
2687: /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2688: {
2689: Cell *x, *y, *res, *h;
2690: char *rptr;
2691: const char *sptr;
2692: char *buf, *pb;
2693: const char *t, *q;
2694: fa *pfa;
2695: int mflag, tempstat, num, whichm;
2696: int bufsz = recsize;
2697:
2698: if ((buf = malloc(bufsz)) == NULL)
2699: FATAL("out of memory in gensub");
2700: mflag = 0; /* if mflag == 0, can replace empty string */
2701: num = 0;
2702: x = execute(a[4]); /* source string */
2703: t = getsval(x);
2704: res = copycell(x); /* target string - initially copy of source */
2705: res->csub = CTEMP; /* result values are temporary */
2706: if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2707: pfa = (fa *) a[1]; /* regular expression */
2708: else {
2709: y = execute(a[1]);
2710: pfa = makedfa(getsval(y), 1);
2711: tempfree(y);
2712: }
2713: y = execute(a[2]); /* replacement string */
2714: h = execute(a[3]); /* which matches should be replaced */
2715: sptr = getsval(h);
2716: if (sptr[0] == 'g' || sptr[0] == 'G')
2717: whichm = -1;
2718: else {
2719: /*
2720: * The specified number is index of replacement, starting
2721: * from 1. GNU awk treats index lower than 0 same as
2722: * 1, we do same for compatibility.
2723: */
2724: whichm = (int) getfval(h) - 1;
2725: if (whichm < 0)
2726: whichm = 0;
2727: }
2728: tempfree(h);
2729:
2730: if (pmatch(pfa, t)) {
2731: char *sl;
2732:
2733: tempstat = pfa->initstat;
2734: pfa->initstat = 2;
2735: pb = buf;
2736: rptr = getsval(y);
2737: /*
2738: * XXX if there are any backreferences in subst string,
2739: * complain now.
2740: */
2741: for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2742: if (strchr("0123456789", sl[1])) {
2743: FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2744: }
2745: }
2746:
2747: do {
2748: if (whichm >= 0 && whichm != num) {
2749: num++;
2750: adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2751:
2752: /* copy the part of string up to and including
2753: * match to output buffer */
2754: while (t < patbeg + patlen)
2755: *pb++ = *t++;
2756: continue;
2757: }
2758:
2759: if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2760: if (mflag == 0) { /* can replace empty */
2761: num++;
2762: sptr = rptr;
2763: while (*sptr != 0) {
2764: adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2765: if (*sptr == '\\') {
2766: backsub(&pb, &sptr);
2767: } else if (*sptr == '&') {
2768: sptr++;
2769: adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2770: for (q = patbeg; q < patbeg+patlen; )
2771: *pb++ = *q++;
2772: } else
2773: *pb++ = *sptr++;
2774: }
2775: }
2776: if (*t == 0) /* at end */
2777: goto done;
2778: adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2779: *pb++ = *t++;
2780: if (pb > buf + bufsz) /* BUG: not sure of this test */
2781: FATAL("gensub result0 %.30s too big; can't happen", buf);
2782: mflag = 0;
2783: }
2784: else { /* matched nonempty string */
2785: num++;
2786: sptr = t;
2787: adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2788: while (sptr < patbeg)
2789: *pb++ = *sptr++;
2790: sptr = rptr;
2791: while (*sptr != 0) {
2792: adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2793: if (*sptr == '\\') {
2794: backsub(&pb, &sptr);
2795: } else if (*sptr == '&') {
2796: sptr++;
2797: adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2798: for (q = patbeg; q < patbeg+patlen; )
2799: *pb++ = *q++;
2800: } else
2801: *pb++ = *sptr++;
2802: }
2803: t = patbeg + patlen;
2804: if (patlen == 0 || *t == 0 || *(t-1) == 0)
2805: goto done;
2806: if (pb > buf + bufsz)
2807: FATAL("gensub result1 %.30s too big; can't happen", buf);
2808: mflag = 1;
2809: }
2810: } while (pmatch(pfa,t));
2811: sptr = t;
2812: adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2813: while ((*pb++ = *sptr++) != 0)
2814: ;
2815: done: if (pb > buf + bufsz)
2816: FATAL("gensub result2 %.30s too big; can't happen", buf);
2817: *pb = '\0';
2818: setsval(res, buf);
2819: pfa->initstat = tempstat;
2820: }
2821: tempfree(x);
2822: tempfree(y);
2823: free(buf);
2824: return(res);
1.13 kstailey 2825: }
2826:
1.53 millert 2827: void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
1.13 kstailey 2828: { /* sptr[0] == '\\' */
1.53 millert 2829: char *pb = *pb_ptr;
2830: const char *sptr = *sptr_ptr;
1.13 kstailey 2831:
2832: if (sptr[1] == '\\') {
2833: if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2834: *pb++ = '\\';
2835: *pb++ = '&';
2836: sptr += 4;
2837: } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2838: *pb++ = '\\';
2839: sptr += 2;
1.56 millert 2840: } else if (do_posix) { /* \\x -> \x */
2841: sptr++;
2842: *pb++ = *sptr++;
1.13 kstailey 2843: } else { /* \\x -> \\x */
2844: *pb++ = *sptr++;
2845: *pb++ = *sptr++;
2846: }
2847: } else if (sptr[1] == '&') { /* literal & */
2848: sptr++;
2849: *pb++ = *sptr++;
2850: } else /* literal \ */
2851: *pb++ = *sptr++;
2852:
2853: *pb_ptr = pb;
2854: *sptr_ptr = sptr;
1.75 millert 2855: }
2856:
2857: static char *wide_char_to_byte_str(int rune, size_t *outlen)
2858: {
2859: static char buf[5];
2860: int len;
2861:
2862: if (rune < 0 || rune > 0x10FFFF)
2863: return NULL;
2864:
2865: memset(buf, 0, sizeof(buf));
2866:
2867: len = 0;
2868: if (rune <= 0x0000007F) {
2869: buf[len++] = rune;
2870: } else if (rune <= 0x000007FF) {
2871: // 110xxxxx 10xxxxxx
2872: buf[len++] = 0xC0 | (rune >> 6);
2873: buf[len++] = 0x80 | (rune & 0x3F);
2874: } else if (rune <= 0x0000FFFF) {
2875: // 1110xxxx 10xxxxxx 10xxxxxx
2876: buf[len++] = 0xE0 | (rune >> 12);
2877: buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2878: buf[len++] = 0x80 | (rune & 0x3F);
2879:
2880: } else {
2881: // 0x00010000 - 0x10FFFF
2882: // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2883: buf[len++] = 0xF0 | (rune >> 18);
2884: buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2885: buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2886: buf[len++] = 0x80 | (rune & 0x3F);
2887: }
2888:
2889: *outlen = len;
2890: buf[len++] = '\0';
2891:
2892: return buf;
1.1 tholo 2893: }