Annotation of src/usr.bin/awk/tran.c, Revision 1.14
1.14 ! jmc 1: /* $OpenBSD: tran.c,v 1.13 2005/04/15 15:54:26 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.3 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.3 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: #define DEBUG
27: #include <stdio.h>
28: #include <math.h>
29: #include <ctype.h>
30: #include <string.h>
31: #include <stdlib.h>
32: #include "awk.h"
1.3 kstailey 33: #include "ytab.h"
1.1 tholo 34:
35: #define FULLTAB 2 /* rehash when table gets this x full */
36: #define GROWTAB 4 /* grow table by this factor */
37:
38: Array *symtab; /* main symbol table */
39:
40: char **FS; /* initial field sep */
41: char **RS; /* initial record sep */
42: char **OFS; /* output field sep */
43: char **ORS; /* output record sep */
44: char **OFMT; /* output format for numbers */
45: char **CONVFMT; /* format for conversions in getsval */
46: Awkfloat *NF; /* number of fields in current record */
47: Awkfloat *NR; /* number of current record */
48: Awkfloat *FNR; /* number of current record in current file */
49: char **FILENAME; /* current filename argument */
50: Awkfloat *ARGC; /* number of arguments from command line */
51: char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
52: Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
53: Awkfloat *RLENGTH; /* length of same */
54:
1.11 millert 55: Cell *fsloc; /* FS */
1.1 tholo 56: Cell *nrloc; /* NR */
57: Cell *nfloc; /* NF */
58: Cell *fnrloc; /* FNR */
59: Array *ARGVtab; /* symbol table containing ARGV[...] */
60: Array *ENVtab; /* symbol table containing ENVIRON[...] */
61: Cell *rstartloc; /* RSTART */
62: Cell *rlengthloc; /* RLENGTH */
63: Cell *symtabloc; /* SYMTAB */
64:
65: Cell *nullloc; /* a guaranteed empty cell */
66: Node *nullnode; /* zero&null, converted into a node for comparisons */
1.3 kstailey 67: Cell *literal0;
1.1 tholo 68:
1.3 kstailey 69: extern Cell **fldtab;
1.1 tholo 70:
71: void syminit(void) /* initialize symbol table with builtin vars */
72: {
1.3 kstailey 73: literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
1.1 tholo 74: /* this is used for if(x)... tests: */
75: nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
1.3 kstailey 76: nullnode = celltonode(nullloc, CCON);
1.1 tholo 77:
1.11 millert 78: fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
79: FS = &fsloc->sval;
1.1 tholo 80: RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
81: OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
82: ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
83: OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84: CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
1.2 millert 85: FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
1.1 tholo 86: nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
87: NF = &nfloc->fval;
88: nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
89: NR = &nrloc->fval;
90: fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
91: FNR = &fnrloc->fval;
92: SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
93: rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
94: RSTART = &rstartloc->fval;
95: rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
96: RLENGTH = &rlengthloc->fval;
97: symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
98: symtabloc->sval = (char *) symtab;
99: }
100:
1.3 kstailey 101: void arginit(int ac, char **av) /* set up ARGV and ARGC */
1.1 tholo 102: {
103: Cell *cp;
104: int i;
1.3 kstailey 105: char temp[50];
1.1 tholo 106:
107: ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
108: cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
109: ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
110: cp->sval = (char *) ARGVtab;
111: for (i = 0; i < ac; i++) {
1.9 deraadt 112: snprintf(temp, sizeof temp, "%d", i);
1.4 millert 113: if (is_number(*av))
1.1 tholo 114: setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
115: else
116: setsymtab(temp, *av, 0.0, STR, ARGVtab);
117: av++;
118: }
119: }
120:
121: void envinit(char **envp) /* set up ENVIRON variable */
122: {
123: Cell *cp;
124: char *p;
125:
126: cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
127: ENVtab = makesymtab(NSYMTAB);
128: cp->sval = (char *) ENVtab;
129: for ( ; *envp; envp++) {
1.3 kstailey 130: if ((p = strchr(*envp, '=')) == NULL)
1.1 tholo 131: continue;
1.7 millert 132: if( p == *envp ) /* no left hand side name in env string */
133: continue;
1.1 tholo 134: *p++ = 0; /* split into two strings at = */
1.4 millert 135: if (is_number(p))
1.1 tholo 136: setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
137: else
138: setsymtab(*envp, p, 0.0, STR, ENVtab);
139: p[-1] = '='; /* restore in case env is passed down to a shell */
140: }
141: }
142:
143: Array *makesymtab(int n) /* make a new symbol table */
144: {
145: Array *ap;
146: Cell **tp;
147:
148: ap = (Array *) malloc(sizeof(Array));
149: tp = (Cell **) calloc(n, sizeof(Cell *));
150: if (ap == NULL || tp == NULL)
1.6 millert 151: FATAL("out of space in makesymtab");
1.1 tholo 152: ap->nelem = 0;
153: ap->size = n;
154: ap->tab = tp;
155: return(ap);
156: }
157:
158: void freesymtab(Cell *ap) /* free a symbol table */
159: {
160: Cell *cp, *temp;
161: Array *tp;
162: int i;
163:
164: if (!isarr(ap))
165: return;
166: tp = (Array *) ap->sval;
167: if (tp == NULL)
168: return;
169: for (i = 0; i < tp->size; i++) {
170: for (cp = tp->tab[i]; cp != NULL; cp = temp) {
171: xfree(cp->nval);
172: if (freeable(cp))
173: xfree(cp->sval);
174: temp = cp->cnext; /* avoids freeing then using */
1.3 kstailey 175: free(cp);
1.8 millert 176: tp->nelem--;
1.1 tholo 177: }
178: tp->tab[i] = 0;
179: }
1.8 millert 180: if (tp->nelem != 0)
181: WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
1.3 kstailey 182: free(tp->tab);
183: free(tp);
1.1 tholo 184: }
185:
1.8 millert 186: void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */
1.1 tholo 187: {
188: Array *tp;
189: Cell *p, *prev = NULL;
190: int h;
191:
192: tp = (Array *) ap->sval;
193: h = hash(s, tp->size);
194: for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
1.3 kstailey 195: if (strcmp(s, p->nval) == 0) {
1.1 tholo 196: if (prev == NULL) /* 1st one */
197: tp->tab[h] = p->cnext;
198: else /* middle somewhere */
199: prev->cnext = p->cnext;
200: if (freeable(p))
201: xfree(p->sval);
202: free(p->nval);
1.3 kstailey 203: free(p);
1.1 tholo 204: tp->nelem--;
205: return;
206: }
207: }
208:
1.8 millert 209: Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
1.1 tholo 210: {
211: int h;
212: Cell *p;
213:
214: if (n != NULL && (p = lookup(n, tp)) != NULL) {
1.3 kstailey 215: dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
1.8 millert 216: p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
1.1 tholo 217: return(p);
218: }
219: p = (Cell *) malloc(sizeof(Cell));
220: if (p == NULL)
1.6 millert 221: FATAL("out of space for symbol table at %s", n);
1.1 tholo 222: p->nval = tostring(n);
223: p->sval = s ? tostring(s) : tostring("");
224: p->fval = f;
225: p->tval = t;
226: p->csub = CUNK;
227: p->ctype = OCELL;
228: tp->nelem++;
229: if (tp->nelem > FULLTAB * tp->size)
230: rehash(tp);
231: h = hash(n, tp->size);
232: p->cnext = tp->tab[h];
233: tp->tab[h] = p;
1.3 kstailey 234: dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
1.1 tholo 235: p, p->nval, p->sval, p->fval, p->tval) );
236: return(p);
237: }
238:
1.8 millert 239: int hash(const char *s, int n) /* form hash value for string s */
1.1 tholo 240: {
241: unsigned hashval;
242:
243: for (hashval = 0; *s != '\0'; s++)
244: hashval = (*s + 31 * hashval);
245: return hashval % n;
246: }
247:
248: void rehash(Array *tp) /* rehash items in small table into big one */
249: {
250: int i, nh, nsz;
251: Cell *cp, *op, **np;
252:
253: nsz = GROWTAB * tp->size;
254: np = (Cell **) calloc(nsz, sizeof(Cell *));
255: if (np == NULL) /* can't do it, but can keep running. */
256: return; /* someone else will run out later. */
257: for (i = 0; i < tp->size; i++) {
258: for (cp = tp->tab[i]; cp; cp = op) {
259: op = cp->cnext;
260: nh = hash(cp->nval, nsz);
261: cp->cnext = np[nh];
262: np[nh] = cp;
263: }
264: }
1.3 kstailey 265: free(tp->tab);
1.1 tholo 266: tp->tab = np;
267: tp->size = nsz;
268: }
269:
1.8 millert 270: Cell *lookup(const char *s, Array *tp) /* look for s in tp */
1.1 tholo 271: {
1.5 millert 272: Cell *p;
1.1 tholo 273: int h;
274:
275: h = hash(s, tp->size);
1.5 millert 276: for (p = tp->tab[h]; p != NULL; p = p->cnext)
1.3 kstailey 277: if (strcmp(s, p->nval) == 0)
1.1 tholo 278: return(p); /* found it */
279: return(NULL); /* not found */
280: }
281:
282: Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
283: {
1.3 kstailey 284: int fldno;
285:
1.1 tholo 286: if ((vp->tval & (NUM | STR)) == 0)
287: funnyvar(vp, "assign to");
1.3 kstailey 288: if (isfld(vp)) {
1.1 tholo 289: donerec = 0; /* mark $0 invalid */
1.3 kstailey 290: fldno = atoi(vp->nval);
291: if (fldno > *NF)
292: newfld(fldno);
293: dprintf( ("setting field %d to %g\n", fldno, f) );
294: } else if (isrec(vp)) {
1.1 tholo 295: donefld = 0; /* mark $1... invalid */
296: donerec = 1;
297: }
1.4 millert 298: if (freeable(vp))
299: xfree(vp->sval); /* free any previous string */
1.1 tholo 300: vp->tval &= ~STR; /* mark string invalid */
301: vp->tval |= NUM; /* mark number ok */
1.8 millert 302: dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
1.1 tholo 303: return vp->fval = f;
304: }
305:
1.8 millert 306: void funnyvar(Cell *vp, const char *rw)
1.1 tholo 307: {
1.3 kstailey 308: if (isarr(vp))
1.6 millert 309: FATAL("can't %s %s; it's an array name.", rw, vp->nval);
1.1 tholo 310: if (vp->tval & FCN)
1.6 millert 311: FATAL("can't %s %s; it's a function.", rw, vp->nval);
312: WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
313: vp, vp->nval, vp->sval, vp->fval, vp->tval);
1.1 tholo 314: }
315:
1.8 millert 316: char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
1.1 tholo 317: {
318: char *t;
1.3 kstailey 319: int fldno;
1.1 tholo 320:
1.11 millert 321: dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
322: vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
1.1 tholo 323: if ((vp->tval & (NUM | STR)) == 0)
324: funnyvar(vp, "assign to");
1.3 kstailey 325: if (isfld(vp)) {
1.1 tholo 326: donerec = 0; /* mark $0 invalid */
1.3 kstailey 327: fldno = atoi(vp->nval);
328: if (fldno > *NF)
329: newfld(fldno);
330: dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
331: } else if (isrec(vp)) {
1.1 tholo 332: donefld = 0; /* mark $1... invalid */
333: donerec = 1;
334: }
335: t = tostring(s); /* in case it's self-assign */
1.14 ! jmc 336: if (freeable(vp))
! 337: xfree(vp->sval);
1.1 tholo 338: vp->tval &= ~NUM;
339: vp->tval |= STR;
340: vp->tval &= ~DONTFREE;
1.11 millert 341: dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
342: vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
1.1 tholo 343: return(vp->sval = t);
344: }
345:
346: Awkfloat getfval(Cell *vp) /* get float val of a Cell */
347: {
348: if ((vp->tval & (NUM | STR)) == 0)
349: funnyvar(vp, "read value of");
1.3 kstailey 350: if (isfld(vp) && donefld == 0)
1.1 tholo 351: fldbld();
1.3 kstailey 352: else if (isrec(vp) && donerec == 0)
1.1 tholo 353: recbld();
354: if (!isnum(vp)) { /* not a number */
355: vp->fval = atof(vp->sval); /* best guess */
1.4 millert 356: if (is_number(vp->sval) && !(vp->tval&CON))
1.1 tholo 357: vp->tval |= NUM; /* make NUM only sparingly */
358: }
1.8 millert 359: dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
1.1 tholo 360: return(vp->fval);
361: }
362:
1.11 millert 363: static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
1.1 tholo 364: {
1.13 millert 365: int n;
1.1 tholo 366: double dtemp;
367:
368: if ((vp->tval & (NUM | STR)) == 0)
369: funnyvar(vp, "read value of");
1.3 kstailey 370: if (isfld(vp) && donefld == 0)
1.1 tholo 371: fldbld();
1.3 kstailey 372: else if (isrec(vp) && donerec == 0)
1.1 tholo 373: recbld();
1.3 kstailey 374: if (isstr(vp) == 0) {
375: if (freeable(vp))
1.1 tholo 376: xfree(vp->sval);
377: if (modf(vp->fval, &dtemp) == 0) /* it's integral */
1.13 millert 378: n = asprintf(&vp->sval, "%.30g", vp->fval);
1.1 tholo 379: else
1.13 millert 380: n = asprintf(&vp->sval, *fmt, vp->fval);
381: if (n == -1)
382: FATAL("out of space in get_str_val");
1.1 tholo 383: vp->tval &= ~DONTFREE;
384: vp->tval |= STR;
385: }
1.8 millert 386: dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
1.1 tholo 387: return(vp->sval);
388: }
389:
1.8 millert 390: char *getsval(Cell *vp) /* get string val of a Cell */
391: {
392: return get_str_val(vp, CONVFMT);
393: }
394:
395: char *getpssval(Cell *vp) /* get string val of a Cell for print */
396: {
397: return get_str_val(vp, OFMT);
398: }
399:
400:
401: char *tostring(const char *s) /* make a copy of string s */
1.1 tholo 402: {
1.12 millert 403: char *p;
404:
405: p = strdup(s);
406: if (p == NULL)
407: FATAL("out of space in tostring on %s", s);
408: return p;
1.1 tholo 409: }
410:
1.8 millert 411: char *qstring(const char *is, int delim) /* collect string up to next delim */
1.1 tholo 412: {
1.8 millert 413: const char *os = is;
1.1 tholo 414: int c, n;
1.7 millert 415: uschar *s = (uschar *) is;
416: uschar *buf, *bp;
1.1 tholo 417:
1.8 millert 418: if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
1.6 millert 419: FATAL( "out of space in qstring(%s)", s);
1.3 kstailey 420: for (bp = buf; (c = *s) != delim; s++) {
1.1 tholo 421: if (c == '\n')
1.6 millert 422: SYNTAX( "newline in string %.20s...", os );
1.1 tholo 423: else if (c != '\\')
1.3 kstailey 424: *bp++ = c;
1.6 millert 425: else { /* \something */
426: c = *++s;
427: if (c == 0) { /* \ at end */
428: *bp++ = '\\';
429: break; /* for loop */
430: }
431: switch (c) {
1.3 kstailey 432: case '\\': *bp++ = '\\'; break;
433: case 'n': *bp++ = '\n'; break;
434: case 't': *bp++ = '\t'; break;
435: case 'b': *bp++ = '\b'; break;
436: case 'f': *bp++ = '\f'; break;
437: case 'r': *bp++ = '\r'; break;
1.1 tholo 438: default:
439: if (!isdigit(c)) {
1.3 kstailey 440: *bp++ = c;
1.1 tholo 441: break;
442: }
443: n = c - '0';
444: if (isdigit(s[1])) {
445: n = 8 * n + *++s - '0';
446: if (isdigit(s[1]))
447: n = 8 * n + *++s - '0';
448: }
1.3 kstailey 449: *bp++ = n;
1.1 tholo 450: break;
451: }
1.3 kstailey 452: }
1.1 tholo 453: }
1.3 kstailey 454: *bp++ = 0;
1.7 millert 455: return (char *) buf;
1.1 tholo 456: }