Annotation of src/usr.bin/awk/tran.c, Revision 1.2
1.1 tholo 1: /****************************************************************
2: Copyright (C) AT&T and Lucent Technologies 1996
3: All Rights Reserved
4:
5: Permission to use, copy, modify, and distribute this software and
6: its documentation for any purpose and without fee is hereby
7: granted, provided that the above copyright notice appear in all
8: copies and that both that the copyright notice and this
9: permission notice and warranty disclaimer appear in supporting
10: documentation, and that the names of AT&T or Lucent Technologies
11: or any of their entities not be used in advertising or publicity
12: pertaining to distribution of the software without specific,
13: written prior permission.
14:
15: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
18: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
19: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
21: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
22: USE OR PERFORMANCE OF THIS SOFTWARE.
23: ****************************************************************/
24:
25: #define DEBUG
26: #include <stdio.h>
27: #include <math.h>
28: #include <ctype.h>
29: #include <string.h>
30: #include <stdlib.h>
31: #include "awk.h"
32: #include "awkgram.h"
33:
34: #define FULLTAB 2 /* rehash when table gets this x full */
35: #define GROWTAB 4 /* grow table by this factor */
36:
37: Array *symtab; /* main symbol table */
38:
39: char **FS; /* initial field sep */
40: char **RS; /* initial record sep */
41: char **OFS; /* output field sep */
42: char **ORS; /* output record sep */
43: char **OFMT; /* output format for numbers */
44: char **CONVFMT; /* format for conversions in getsval */
45: Awkfloat *NF; /* number of fields in current record */
46: Awkfloat *NR; /* number of current record */
47: Awkfloat *FNR; /* number of current record in current file */
48: char **FILENAME; /* current filename argument */
49: Awkfloat *ARGC; /* number of arguments from command line */
50: char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
51: Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
52: Awkfloat *RLENGTH; /* length of same */
53:
54: Cell *recloc; /* location of record */
55: Cell *nrloc; /* NR */
56: Cell *nfloc; /* NF */
57: Cell *fnrloc; /* FNR */
58: Array *ARGVtab; /* symbol table containing ARGV[...] */
59: Array *ENVtab; /* symbol table containing ENVIRON[...] */
60: Cell *rstartloc; /* RSTART */
61: Cell *rlengthloc; /* RLENGTH */
62: Cell *symtabloc; /* SYMTAB */
63:
64: Cell *nullloc; /* a guaranteed empty cell */
65: Node *nullnode; /* zero&null, converted into a node for comparisons */
66:
67: extern Cell *fldtab;
68:
69: void syminit(void) /* initialize symbol table with builtin vars */
70: {
71: setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
72: /* this is used for if(x)... tests: */
73: nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
74: nullnode = valtonode(nullloc, CCON);
75:
76: /* recloc = setsymtab("$0", record, 0.0, REC|STR|DONTFREE, symtab); */
77: /* has been done elsewhere */
78: recloc = &fldtab[0];
79: FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
80: RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
81: OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
82: ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
83: OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84: CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
1.2 ! millert 85: FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
1.1 tholo 86: nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
87: NF = &nfloc->fval;
88: nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
89: NR = &nrloc->fval;
90: fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
91: FNR = &fnrloc->fval;
92: SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
93: rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
94: RSTART = &rstartloc->fval;
95: rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
96: RLENGTH = &rlengthloc->fval;
97: symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
98: symtabloc->sval = (char *) symtab;
99: }
100:
101: void arginit(int ac, char *av[]) /* set up ARGV and ARGC */
102: {
103: Cell *cp;
104: int i;
105: char temp[5];
106:
107: ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
108: cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
109: ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
110: cp->sval = (char *) ARGVtab;
111: for (i = 0; i < ac; i++) {
112: sprintf((char *)temp, "%d", i);
113: if (isnumber(*av))
114: setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
115: else
116: setsymtab(temp, *av, 0.0, STR, ARGVtab);
117: av++;
118: }
119: }
120:
121: void envinit(char **envp) /* set up ENVIRON variable */
122: {
123: Cell *cp;
124: char *p;
125:
126: cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
127: ENVtab = makesymtab(NSYMTAB);
128: cp->sval = (char *) ENVtab;
129: for ( ; *envp; envp++) {
130: if ((p = (char *) strchr((char *) *envp, '=')) == NULL)
131: continue;
132: *p++ = 0; /* split into two strings at = */
133: if (isnumber(p))
134: setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
135: else
136: setsymtab(*envp, p, 0.0, STR, ENVtab);
137: p[-1] = '='; /* restore in case env is passed down to a shell */
138: }
139: }
140:
141: Array *makesymtab(int n) /* make a new symbol table */
142: {
143: Array *ap;
144: Cell **tp;
145:
146: ap = (Array *) malloc(sizeof(Array));
147: tp = (Cell **) calloc(n, sizeof(Cell *));
148: if (ap == NULL || tp == NULL)
149: ERROR "out of space in makesymtab" FATAL;
150: ap->nelem = 0;
151: ap->size = n;
152: ap->tab = tp;
153: return(ap);
154: }
155:
156: void freesymtab(Cell *ap) /* free a symbol table */
157: {
158: Cell *cp, *temp;
159: Array *tp;
160: int i;
161:
162: if (!isarr(ap))
163: return;
164: tp = (Array *) ap->sval;
165: if (tp == NULL)
166: return;
167: for (i = 0; i < tp->size; i++) {
168: for (cp = tp->tab[i]; cp != NULL; cp = temp) {
169: xfree(cp->nval);
170: if (freeable(cp))
171: xfree(cp->sval);
172: temp = cp->cnext; /* avoids freeing then using */
173: free((char *) cp);
174: }
175: tp->tab[i] = 0;
176: }
177: free((char *) (tp->tab));
178: free((char *) tp);
179: }
180:
181: void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
182: {
183: Array *tp;
184: Cell *p, *prev = NULL;
185: int h;
186:
187: tp = (Array *) ap->sval;
188: h = hash(s, tp->size);
189: for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
190: if (strcmp((char *) s, (char *) p->nval) == 0) {
191: if (prev == NULL) /* 1st one */
192: tp->tab[h] = p->cnext;
193: else /* middle somewhere */
194: prev->cnext = p->cnext;
195: if (freeable(p))
196: xfree(p->sval);
197: free(p->nval);
198: free((char *) p);
199: tp->nelem--;
200: return;
201: }
202: }
203:
204: Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
205: {
206: int h;
207: Cell *p;
208:
209: if (n != NULL && (p = lookup(n, tp)) != NULL) {
210: dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
211: p, p->nval, p->sval, p->fval, p->tval) );
212: return(p);
213: }
214: p = (Cell *) malloc(sizeof(Cell));
215: if (p == NULL)
216: ERROR "out of space for symbol table at %s", n FATAL;
217: p->nval = tostring(n);
218: p->sval = s ? tostring(s) : tostring("");
219: p->fval = f;
220: p->tval = t;
221: p->csub = CUNK;
222: p->ctype = OCELL;
223: tp->nelem++;
224: if (tp->nelem > FULLTAB * tp->size)
225: rehash(tp);
226: h = hash(n, tp->size);
227: p->cnext = tp->tab[h];
228: tp->tab[h] = p;
229: dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
230: p, p->nval, p->sval, p->fval, p->tval) );
231: return(p);
232: }
233:
234: int hash(char *s, int n) /* form hash value for string s */
235: {
236: unsigned hashval;
237:
238: for (hashval = 0; *s != '\0'; s++)
239: hashval = (*s + 31 * hashval);
240: return hashval % n;
241: }
242:
243: void rehash(Array *tp) /* rehash items in small table into big one */
244: {
245: int i, nh, nsz;
246: Cell *cp, *op, **np;
247:
248: nsz = GROWTAB * tp->size;
249: np = (Cell **) calloc(nsz, sizeof(Cell *));
250: if (np == NULL) /* can't do it, but can keep running. */
251: return; /* someone else will run out later. */
252: for (i = 0; i < tp->size; i++) {
253: for (cp = tp->tab[i]; cp; cp = op) {
254: op = cp->cnext;
255: nh = hash(cp->nval, nsz);
256: cp->cnext = np[nh];
257: np[nh] = cp;
258: }
259: }
260: free((char *) (tp->tab));
261: tp->tab = np;
262: tp->size = nsz;
263: }
264:
265: Cell *lookup(char *s, Array *tp) /* look for s in tp */
266: {
267: Cell *p, *prev = NULL;
268: int h;
269:
270: h = hash(s, tp->size);
271: for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
272: if (strcmp((char *) s, (char *) p->nval) == 0)
273: return(p); /* found it */
274: return(NULL); /* not found */
275: }
276:
277: Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
278: {
279: if ((vp->tval & (NUM | STR)) == 0)
280: funnyvar(vp, "assign to");
281: if (vp->tval & FLD) {
282: donerec = 0; /* mark $0 invalid */
283: if (vp-fldtab > *NF)
284: newfld(vp-fldtab);
285: dprintf( ("setting field %d to %g\n", vp-fldtab, f) );
286: } else if (vp->tval & REC) {
287: donefld = 0; /* mark $1... invalid */
288: donerec = 1;
289: }
290: vp->tval &= ~STR; /* mark string invalid */
291: vp->tval |= NUM; /* mark number ok */
292: dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
293: return vp->fval = f;
294: }
295:
296: void funnyvar(Cell *vp, char *rw)
297: {
298: if (vp->tval & ARR)
299: ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
300: if (vp->tval & FCN)
301: ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
302: ERROR "funny variable %p: n=%s s=\"%s\" f=%g t=%o",
303: vp, vp->nval, vp->sval, vp->fval, vp->tval WARNING;
304: }
305:
306: char *setsval(Cell *vp, char *s) /* set string val of a Cell */
307: {
308: char *t;
309:
310: if ((vp->tval & (NUM | STR)) == 0)
311: funnyvar(vp, "assign to");
312: if (vp->tval & FLD) {
313: donerec = 0; /* mark $0 invalid */
314: if (vp-fldtab > *NF)
315: newfld(vp-fldtab);
316: dprintf( ("setting field %d to %s (%p)\n", vp-fldtab, s, s) );
317: } else if (vp->tval & REC) {
318: donefld = 0; /* mark $1... invalid */
319: donerec = 1;
320: }
321: t = tostring(s); /* in case it's self-assign */
322: vp->tval &= ~NUM;
323: vp->tval |= STR;
324: if (freeable(vp))
325: xfree(vp->sval);
326: vp->tval &= ~DONTFREE;
327: dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
328: return(vp->sval = t);
329: }
330:
331: Awkfloat getfval(Cell *vp) /* get float val of a Cell */
332: {
333: if ((vp->tval & (NUM | STR)) == 0)
334: funnyvar(vp, "read value of");
335: if ((vp->tval & FLD) && donefld == 0)
336: fldbld();
337: else if ((vp->tval & REC) && donerec == 0)
338: recbld();
339: if (!isnum(vp)) { /* not a number */
340: vp->fval = atof(vp->sval); /* best guess */
341: if (isnumber(vp->sval) && !(vp->tval&CON))
342: vp->tval |= NUM; /* make NUM only sparingly */
343: }
344: dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
345: return(vp->fval);
346: }
347:
348: char *getsval(Cell *vp) /* get string val of a Cell */
349: {
350: char s[100];
351: double dtemp;
352:
353: if ((vp->tval & (NUM | STR)) == 0)
354: funnyvar(vp, "read value of");
355: if ((vp->tval & FLD) && donefld == 0)
356: fldbld();
357: else if ((vp->tval & REC) && donerec == 0)
358: recbld();
359: if ((vp->tval & STR) == 0) {
360: if (!(vp->tval&DONTFREE))
361: xfree(vp->sval);
362: if (modf(vp->fval, &dtemp) == 0) /* it's integral */
363: sprintf((char *)s, "%.20g", vp->fval);
364: else
365: sprintf((char *)s, (char *)*CONVFMT, vp->fval);
366: vp->sval = tostring(s);
367: vp->tval &= ~DONTFREE;
368: vp->tval |= STR;
369: }
370: dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
371: return(vp->sval);
372: }
373:
374: char *tostring(char *s) /* make a copy of string s */
375: {
376: char *p;
377:
378: p = (char *) malloc(strlen((char *) s)+1);
379: if (p == NULL)
380: ERROR "out of space in tostring on %s", s FATAL;
381: strcpy((char *) p, (char *) s);
382: return(p);
383: }
384:
385: char *qstring(char *s, int delim) /* collect string up to next delim */
386: {
387: int c, n;
388:
389: for (caddreset(gs); (c = *s) != delim; s++) {
390: if (c == '\n')
391: ERROR "newline in string %.10s...", gs->cbuf SYNTAX;
392: else if (c != '\\')
393: cadd(gs, c);
394: else /* \something */
395: switch (c = *++s) {
396: case '\\': cadd(gs, '\\'); break;
397: case 'n': cadd(gs, '\n'); break;
398: case 't': cadd(gs, '\t'); break;
399: case 'b': cadd(gs, '\b'); break;
400: case 'f': cadd(gs, '\f'); break;
401: case 'r': cadd(gs, '\r'); break;
402: default:
403: if (!isdigit(c)) {
404: cadd(gs, c);
405: break;
406: }
407: n = c - '0';
408: if (isdigit(s[1])) {
409: n = 8 * n + *++s - '0';
410: if (isdigit(s[1]))
411: n = 8 * n + *++s - '0';
412: }
413: cadd(gs, n);
414: break;
415: }
416: }
417: cadd(gs, 0);
418: return gs->cbuf;
419: }