Annotation of src/usr.bin/sort/init.c, Revision 1.7
1.7 ! deraadt 1: /* $OpenBSD: init.c,v 1.6 2003/06/26 00:12:39 deraadt Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.5 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 millert 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #ifndef lint
36: #if 0
37: static char sccsid[] = "@(#)init.c 8.1 (Berkeley) 6/6/93";
38: #else
1.7 ! deraadt 39: static char rcsid[] = "$OpenBSD: init.c,v 1.6 2003/06/26 00:12:39 deraadt Exp $";
1.1 millert 40: #endif
41: #endif /* not lint */
42:
43: #include "sort.h"
44:
45: #include <ctype.h>
46: #include <string.h>
47:
1.3 millert 48: extern struct coldesc *clist;
1.1 millert 49: extern int ncols;
50: u_char gweights[NBINS];
51:
1.6 deraadt 52: static void insertcol(struct field *);
53: char *setcolumn(char *, struct field *, int);
54:
1.1 millert 55: /*
56: * clist (list of columns which correspond to one or more icol or tcol)
57: * is in increasing order of columns.
58: * Fields are kept in increasing order of fields.
59: */
60:
61: /*
62: * keep clist in order--inserts a column in a sorted array
63: */
64: static void
1.7 ! deraadt 65: insertcol(struct field *field)
1.1 millert 66: {
67: int i;
68: for (i = 0; i < ncols; i++)
69: if (field->icol.num <= clist[i].num)
70: break;
71: if (field->icol.num != clist[i].num) {
72: memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
73: clist[i].num = field->icol.num;
74: ncols++;
75: }
76: if (field->tcol.num && field->tcol.num != field->icol.num) {
77: for (i = 0; i < ncols; i++)
78: if (field->tcol.num <= clist[i].num)
79: break;
80: if (field->tcol.num != clist[i].num) {
81: memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
82: clist[i].num = field->tcol.num;
83: ncols++;
84: }
85: }
86: }
87:
88: /*
89: * matches fields with the appropriate columns--n^2 but who cares?
90: */
91: void
1.7 ! deraadt 92: fldreset(struct field *fldtab)
1.1 millert 93: {
94: int i;
95: fldtab[0].tcol.p = clist+ncols-1;
96: for (++fldtab; fldtab->icol.num; ++fldtab) {
1.2 millert 97: for (i = 0; fldtab->icol.num != clist[i].num; i++)
98: ;
1.1 millert 99: fldtab->icol.p = clist + i;
100: if (!fldtab->tcol.num)
101: continue;
1.2 millert 102: for (i = 0; fldtab->tcol.num != clist[i].num; i++)
103: ;
1.1 millert 104: fldtab->tcol.p = clist + i;
105: }
106: }
107:
108: /*
109: * interprets a column in a -k field
110: */
111: char *
1.7 ! deraadt 112: setcolumn(char *pos, struct field *cur_fld, int gflag)
1.1 millert 113: {
114: struct column *col;
115: int tmp;
1.2 millert 116:
1.1 millert 117: col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
118: pos += sscanf(pos, "%d", &(col->num));
119: while (isdigit(*pos))
120: pos++;
121: if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
122: errx(2, "field numbers must be positive");
123: if (*pos == '.') {
124: if (!col->num)
125: errx(2, "cannot indent end of line");
126: pos += sscanf(++pos, "%d", &(col->indent));
127: while (isdigit(*pos))
128: pos++;
129: if (&cur_fld->icol == col)
130: col->indent--;
131: if (col->indent < 0)
132: errx(2, "illegal offset");
133: }
134: if (optval(*pos, cur_fld->tcol.num))
135: while ((tmp = optval(*pos, cur_fld->tcol.num))) {
136: cur_fld->flags |= tmp;
137: pos++;
138: }
139: if (cur_fld->icol.num == 0)
140: cur_fld->icol.num = 1;
141: return (pos);
142: }
143:
144: int
1.7 ! deraadt 145: setfield(char *pos, struct field *cur_fld, int gflag)
1.1 millert 146: {
147: int tmp;
148: cur_fld->weights = ascii;
149: cur_fld->mask = alltable;
150: pos = setcolumn(pos, cur_fld, gflag);
151: if (*pos == '\0') /* key extends to EOL. */
152: cur_fld->tcol.num = 0;
153: else {
154: if (*pos != ',')
155: errx(2, "illegal field descriptor");
156: setcolumn((++pos), cur_fld, gflag);
157: }
158: if (!cur_fld->flags)
159: cur_fld->flags = gflag;
160: tmp = cur_fld->flags;
161:
162: /*
163: * Assign appropriate mask table and weight table.
164: * If the global weights are reversed, the local field
165: * must be "re-reversed".
166: */
167: if (((tmp & R) ^ (gflag & R)) && tmp & F)
168: cur_fld->weights = RFtable;
169: else if (tmp & F)
170: cur_fld->weights = Ftable;
171: else if ((tmp & R) ^ (gflag & R))
172: cur_fld->weights = Rascii;
173: if (tmp & I)
174: cur_fld->mask = itable;
175: else if (tmp & D)
176: cur_fld->mask = dtable;
177: cur_fld->flags |= (gflag & (BI | BT));
178: if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
179: cur_fld->flags &= (D|F|I|N|R|BI);
180: if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
181: && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
182: && cur_fld->tcol.indent < cur_fld->icol.indent))
183: errx(2, "fields out of order");
184: insertcol(cur_fld);
185: return (cur_fld->tcol.num);
186: }
187:
188: int
1.7 ! deraadt 189: optval(int desc, int tcolflag)
1.1 millert 190: {
191: switch(desc) {
192: case 'b':
193: if (!tcolflag)
1.2 millert 194: return (BI);
1.1 millert 195: else
1.2 millert 196: return (BT);
197: case 'd': return (D);
198: case 'f': return (F);
199: case 'i': return (I);
200: case 'n': return (N);
201: case 'r': return (R);
202: default: return (0);
1.1 millert 203: }
204: }
205:
1.4 millert 206: /*
207: * Convert obsolescent "+pos1 [-pos2]" format to POSIX -k form.
208: * Note that the conversion is tricky, see the manual for details.
209: */
1.1 millert 210: void
1.7 ! deraadt 211: fixit(int *argc, char **argv)
1.1 millert 212: {
1.4 millert 213: int i, j;
214: long v, w, x;
215: char *p, *ep;
216: char buf[128], *bufp, *bufend;
217: size_t n;
1.1 millert 218:
1.4 millert 219: bufend = buf + sizeof(buf);
1.1 millert 220: for (i = 1; i < *argc; i++) {
221: if (argv[i][0] == '+') {
1.4 millert 222: bufp = buf;
223: p = argv[i] + 1;
224: v = strtol(p, &ep, 10);
225: if (ep == p || v < 0 ||
226: (v == LONG_MAX && errno == ERANGE))
227: errx(2, "invalid field number");
228: p = ep;
229: if (*p == '.') {
230: x = strtol(++p, &ep, 10);
231: if (ep == p || x < 0 ||
232: (x == LONG_MAX && errno == ERANGE))
233: errx(2, "invalid field number");
234: p = ep;
235: n = snprintf(bufp, bufend - bufp, "-k%ld.%ld%s",
236: v+1, x+1, p);
237: } else {
238: n = snprintf(bufp, bufend - bufp, "-k%ld%s",
239: v+1, p);
1.1 millert 240: }
1.4 millert 241: if (n >= bufend - bufp)
242: errx(2, "bad field specification");
243: bufp += n;
244:
1.1 millert 245: if (argv[i+1] &&
246: argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
1.4 millert 247: p = argv[i+1] + 1;
248: w = strtol(p, &ep, 10);
249: if (ep == p || w < 0 ||
250: (w == LONG_MAX && errno == ERANGE))
251: errx(2, "invalid field number");
252: p = ep;
1.1 millert 253: x = 0;
1.4 millert 254: if (*p == '.') {
255: x = strtol(++p, &ep, 10);
256: if (ep == p || x < 0 ||
257: (x == LONG_MAX && errno == ERANGE))
258: errx(2, "invalid field number");
259: p = ep;
260: }
261: if (x == 0) {
262: n = snprintf(bufp, bufend - bufp,
263: ",%ld%s", w, p);
264: } else {
265: n = snprintf(bufp, bufend - bufp,
266: ",%ld.%ld%s", w+1, x, p);
1.1 millert 267: }
1.4 millert 268: if (n >= bufend - bufp)
269: errx(2, "bad field specification");
270:
271: /* shift over argv */
272: for (j = i+1; j < *argc; j++)
1.1 millert 273: argv[j] = argv[j+1];
274: *argc -= 1;
275: }
1.4 millert 276: if ((argv[i] = strdup(buf)) == NULL)
277: err(2, NULL);
1.1 millert 278: }
279: }
280: }
281:
282: /*
283: * ascii, Rascii, Ftable, and RFtable map
284: * REC_D -> REC_D; {not REC_D} -> {not REC_D}.
285: * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
286: * Note: when sorting in forward order, to encode character zero in a key,
287: * use \001\001; character 1 becomes \001\002. In this case, character 0
288: * is reserved for the field delimiter. Analagously for -r (fld_d = 255).
289: * Note: this is only good for ASCII sorting. For different LC 's,
290: * all bets are off. See also num_init in number.c
291: */
292: void
1.7 ! deraadt 293: settables(int gflags)
1.1 millert 294: {
295: u_char *wts;
296: int i, incr;
297: for (i=0; i < 256; i++) {
298: ascii[i] = i;
299: if (i > REC_D && i < 255 - REC_D+1)
300: Rascii[i] = 255 - i + 1;
301: else
302: Rascii[i] = 255 - i;
303: if (islower(i)) {
304: Ftable[i] = Ftable[i- ('a' -'A')];
305: RFtable[i] = RFtable[i - ('a' - 'A')];
306: } else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
307: Ftable[i] = i + 1;
308: RFtable[i] = Rascii[i] - 1;
309: } else {
310: Ftable[i] = i;
311: RFtable[i] = Rascii[i];
312: }
313: alltable[i] = 1;
314: if (i == '\n' || isprint(i))
315: itable[i] = 1;
316: else itable[i] = 0;
317: if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
318: dtable[i] = 1;
319: else dtable[i] = 0;
320: }
321: Rascii[REC_D] = RFtable[REC_D] = REC_D;
322: if (REC_D >= 'A' && REC_D < 'Z')
1.2 millert 323: Ftable[REC_D + ('a' - 'A')]++;
1.1 millert 324: if (gflags & R && (!(gflags & F) || !SINGL_FLD))
325: wts = Rascii;
326: else if (!(gflags & F) || !SINGL_FLD)
327: wts = ascii;
328: else if (gflags & R)
329: wts = RFtable;
330: else
331: wts = Ftable;
332: memmove(gweights, wts, sizeof(gweights));
333: incr = (gflags & R) ? -1 : 1;
334: for (i = 0; i < REC_D; i++)
335: gweights[i] += incr;
336: gweights[REC_D] = ((gflags & R) ? 255 : 0);
337: if (SINGL_FLD && gflags & F) {
338: for (i = 0; i < REC_D; i++) {
339: ascii[i] += incr;
340: Rascii[i] += incr;
341: }
342: ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
343: }
344: }