Annotation of src/usr.bin/sort/init.c, Revision 1.6
1.6 ! deraadt 1: /* $OpenBSD: init.c,v 1.5 2003/06/03 02:56:16 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.5 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 millert 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #ifndef lint
36: #if 0
37: static char sccsid[] = "@(#)init.c 8.1 (Berkeley) 6/6/93";
38: #else
1.6 ! deraadt 39: static char rcsid[] = "$OpenBSD: init.c,v 1.5 2003/06/03 02:56:16 millert Exp $";
1.1 millert 40: #endif
41: #endif /* not lint */
42:
43: #include "sort.h"
44:
45: #include <ctype.h>
46: #include <string.h>
47:
1.3 millert 48: extern struct coldesc *clist;
1.1 millert 49: extern int ncols;
50: u_char gweights[NBINS];
51:
1.6 ! deraadt 52: static void insertcol(struct field *);
! 53: char *setcolumn(char *, struct field *, int);
! 54:
1.1 millert 55: /*
56: * clist (list of columns which correspond to one or more icol or tcol)
57: * is in increasing order of columns.
58: * Fields are kept in increasing order of fields.
59: */
60:
61: /*
62: * keep clist in order--inserts a column in a sorted array
63: */
64: static void
65: insertcol(field)
66: struct field *field;
67: {
68: int i;
69: for (i = 0; i < ncols; i++)
70: if (field->icol.num <= clist[i].num)
71: break;
72: if (field->icol.num != clist[i].num) {
73: memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
74: clist[i].num = field->icol.num;
75: ncols++;
76: }
77: if (field->tcol.num && field->tcol.num != field->icol.num) {
78: for (i = 0; i < ncols; i++)
79: if (field->tcol.num <= clist[i].num)
80: break;
81: if (field->tcol.num != clist[i].num) {
82: memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
83: clist[i].num = field->tcol.num;
84: ncols++;
85: }
86: }
87: }
88:
89: /*
90: * matches fields with the appropriate columns--n^2 but who cares?
91: */
92: void
93: fldreset(fldtab)
94: struct field *fldtab;
95: {
96: int i;
97: fldtab[0].tcol.p = clist+ncols-1;
98: for (++fldtab; fldtab->icol.num; ++fldtab) {
1.2 millert 99: for (i = 0; fldtab->icol.num != clist[i].num; i++)
100: ;
1.1 millert 101: fldtab->icol.p = clist + i;
102: if (!fldtab->tcol.num)
103: continue;
1.2 millert 104: for (i = 0; fldtab->tcol.num != clist[i].num; i++)
105: ;
1.1 millert 106: fldtab->tcol.p = clist + i;
107: }
108: }
109:
110: /*
111: * interprets a column in a -k field
112: */
113: char *
114: setcolumn(pos, cur_fld, gflag)
115: char *pos;
116: struct field *cur_fld;
117: int gflag;
118: {
119: struct column *col;
120: int tmp;
1.2 millert 121:
1.1 millert 122: col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
123: pos += sscanf(pos, "%d", &(col->num));
124: while (isdigit(*pos))
125: pos++;
126: if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
127: errx(2, "field numbers must be positive");
128: if (*pos == '.') {
129: if (!col->num)
130: errx(2, "cannot indent end of line");
131: pos += sscanf(++pos, "%d", &(col->indent));
132: while (isdigit(*pos))
133: pos++;
134: if (&cur_fld->icol == col)
135: col->indent--;
136: if (col->indent < 0)
137: errx(2, "illegal offset");
138: }
139: if (optval(*pos, cur_fld->tcol.num))
140: while ((tmp = optval(*pos, cur_fld->tcol.num))) {
141: cur_fld->flags |= tmp;
142: pos++;
143: }
144: if (cur_fld->icol.num == 0)
145: cur_fld->icol.num = 1;
146: return (pos);
147: }
148:
149: int
150: setfield(pos, cur_fld, gflag)
151: char *pos;
152: struct field *cur_fld;
153: int gflag;
154: {
155: int tmp;
156: cur_fld->weights = ascii;
157: cur_fld->mask = alltable;
158: pos = setcolumn(pos, cur_fld, gflag);
159: if (*pos == '\0') /* key extends to EOL. */
160: cur_fld->tcol.num = 0;
161: else {
162: if (*pos != ',')
163: errx(2, "illegal field descriptor");
164: setcolumn((++pos), cur_fld, gflag);
165: }
166: if (!cur_fld->flags)
167: cur_fld->flags = gflag;
168: tmp = cur_fld->flags;
169:
170: /*
171: * Assign appropriate mask table and weight table.
172: * If the global weights are reversed, the local field
173: * must be "re-reversed".
174: */
175: if (((tmp & R) ^ (gflag & R)) && tmp & F)
176: cur_fld->weights = RFtable;
177: else if (tmp & F)
178: cur_fld->weights = Ftable;
179: else if ((tmp & R) ^ (gflag & R))
180: cur_fld->weights = Rascii;
181: if (tmp & I)
182: cur_fld->mask = itable;
183: else if (tmp & D)
184: cur_fld->mask = dtable;
185: cur_fld->flags |= (gflag & (BI | BT));
186: if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
187: cur_fld->flags &= (D|F|I|N|R|BI);
188: if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
189: && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
190: && cur_fld->tcol.indent < cur_fld->icol.indent))
191: errx(2, "fields out of order");
192: insertcol(cur_fld);
193: return (cur_fld->tcol.num);
194: }
195:
196: int
197: optval(desc, tcolflag)
198: int desc, tcolflag;
199: {
200: switch(desc) {
201: case 'b':
202: if (!tcolflag)
1.2 millert 203: return (BI);
1.1 millert 204: else
1.2 millert 205: return (BT);
206: case 'd': return (D);
207: case 'f': return (F);
208: case 'i': return (I);
209: case 'n': return (N);
210: case 'r': return (R);
211: default: return (0);
1.1 millert 212: }
213: }
214:
1.4 millert 215: /*
216: * Convert obsolescent "+pos1 [-pos2]" format to POSIX -k form.
217: * Note that the conversion is tricky, see the manual for details.
218: */
1.1 millert 219: void
220: fixit(argc, argv)
221: int *argc;
222: char **argv;
223: {
1.4 millert 224: int i, j;
225: long v, w, x;
226: char *p, *ep;
227: char buf[128], *bufp, *bufend;
228: size_t n;
1.1 millert 229:
1.4 millert 230: bufend = buf + sizeof(buf);
1.1 millert 231: for (i = 1; i < *argc; i++) {
232: if (argv[i][0] == '+') {
1.4 millert 233: bufp = buf;
234: p = argv[i] + 1;
235: v = strtol(p, &ep, 10);
236: if (ep == p || v < 0 ||
237: (v == LONG_MAX && errno == ERANGE))
238: errx(2, "invalid field number");
239: p = ep;
240: if (*p == '.') {
241: x = strtol(++p, &ep, 10);
242: if (ep == p || x < 0 ||
243: (x == LONG_MAX && errno == ERANGE))
244: errx(2, "invalid field number");
245: p = ep;
246: n = snprintf(bufp, bufend - bufp, "-k%ld.%ld%s",
247: v+1, x+1, p);
248: } else {
249: n = snprintf(bufp, bufend - bufp, "-k%ld%s",
250: v+1, p);
1.1 millert 251: }
1.4 millert 252: if (n >= bufend - bufp)
253: errx(2, "bad field specification");
254: bufp += n;
255:
1.1 millert 256: if (argv[i+1] &&
257: argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
1.4 millert 258: p = argv[i+1] + 1;
259: w = strtol(p, &ep, 10);
260: if (ep == p || w < 0 ||
261: (w == LONG_MAX && errno == ERANGE))
262: errx(2, "invalid field number");
263: p = ep;
1.1 millert 264: x = 0;
1.4 millert 265: if (*p == '.') {
266: x = strtol(++p, &ep, 10);
267: if (ep == p || x < 0 ||
268: (x == LONG_MAX && errno == ERANGE))
269: errx(2, "invalid field number");
270: p = ep;
271: }
272: if (x == 0) {
273: n = snprintf(bufp, bufend - bufp,
274: ",%ld%s", w, p);
275: } else {
276: n = snprintf(bufp, bufend - bufp,
277: ",%ld.%ld%s", w+1, x, p);
1.1 millert 278: }
1.4 millert 279: if (n >= bufend - bufp)
280: errx(2, "bad field specification");
281:
282: /* shift over argv */
283: for (j = i+1; j < *argc; j++)
1.1 millert 284: argv[j] = argv[j+1];
285: *argc -= 1;
286: }
1.4 millert 287: if ((argv[i] = strdup(buf)) == NULL)
288: err(2, NULL);
1.1 millert 289: }
290: }
291: }
292:
293: /*
294: * ascii, Rascii, Ftable, and RFtable map
295: * REC_D -> REC_D; {not REC_D} -> {not REC_D}.
296: * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
297: * Note: when sorting in forward order, to encode character zero in a key,
298: * use \001\001; character 1 becomes \001\002. In this case, character 0
299: * is reserved for the field delimiter. Analagously for -r (fld_d = 255).
300: * Note: this is only good for ASCII sorting. For different LC 's,
301: * all bets are off. See also num_init in number.c
302: */
303: void
304: settables(gflags)
305: int gflags;
306: {
307: u_char *wts;
308: int i, incr;
309: for (i=0; i < 256; i++) {
310: ascii[i] = i;
311: if (i > REC_D && i < 255 - REC_D+1)
312: Rascii[i] = 255 - i + 1;
313: else
314: Rascii[i] = 255 - i;
315: if (islower(i)) {
316: Ftable[i] = Ftable[i- ('a' -'A')];
317: RFtable[i] = RFtable[i - ('a' - 'A')];
318: } else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
319: Ftable[i] = i + 1;
320: RFtable[i] = Rascii[i] - 1;
321: } else {
322: Ftable[i] = i;
323: RFtable[i] = Rascii[i];
324: }
325: alltable[i] = 1;
326: if (i == '\n' || isprint(i))
327: itable[i] = 1;
328: else itable[i] = 0;
329: if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
330: dtable[i] = 1;
331: else dtable[i] = 0;
332: }
333: Rascii[REC_D] = RFtable[REC_D] = REC_D;
334: if (REC_D >= 'A' && REC_D < 'Z')
1.2 millert 335: Ftable[REC_D + ('a' - 'A')]++;
1.1 millert 336: if (gflags & R && (!(gflags & F) || !SINGL_FLD))
337: wts = Rascii;
338: else if (!(gflags & F) || !SINGL_FLD)
339: wts = ascii;
340: else if (gflags & R)
341: wts = RFtable;
342: else
343: wts = Ftable;
344: memmove(gweights, wts, sizeof(gweights));
345: incr = (gflags & R) ? -1 : 1;
346: for (i = 0; i < REC_D; i++)
347: gweights[i] += incr;
348: gweights[REC_D] = ((gflags & R) ? 255 : 0);
349: if (SINGL_FLD && gflags & F) {
350: for (i = 0; i < REC_D; i++) {
351: ascii[i] += incr;
352: Rascii[i] += incr;
353: }
354: ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
355: }
356: }