Annotation of src/usr.bin/sort/init.c, Revision 1.9
1.9 ! ray 1: /* $OpenBSD: init.c,v 1.8 2005/04/11 07:12:03 deraadt Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.5 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 millert 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #ifndef lint
36: #if 0
37: static char sccsid[] = "@(#)init.c 8.1 (Berkeley) 6/6/93";
38: #else
1.9 ! ray 39: static char rcsid[] = "$OpenBSD: init.c,v 1.8 2005/04/11 07:12:03 deraadt Exp $";
1.1 millert 40: #endif
41: #endif /* not lint */
42:
43: #include "sort.h"
44:
45: #include <ctype.h>
46: #include <string.h>
47:
1.3 millert 48: extern struct coldesc *clist;
1.1 millert 49: extern int ncols;
50: u_char gweights[NBINS];
51:
1.6 deraadt 52: static void insertcol(struct field *);
53: char *setcolumn(char *, struct field *, int);
54:
1.1 millert 55: /*
56: * clist (list of columns which correspond to one or more icol or tcol)
57: * is in increasing order of columns.
58: * Fields are kept in increasing order of fields.
59: */
60:
61: /*
62: * keep clist in order--inserts a column in a sorted array
63: */
64: static void
1.7 deraadt 65: insertcol(struct field *field)
1.1 millert 66: {
67: int i;
68: for (i = 0; i < ncols; i++)
69: if (field->icol.num <= clist[i].num)
70: break;
71: if (field->icol.num != clist[i].num) {
72: memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
73: clist[i].num = field->icol.num;
74: ncols++;
75: }
76: if (field->tcol.num && field->tcol.num != field->icol.num) {
77: for (i = 0; i < ncols; i++)
78: if (field->tcol.num <= clist[i].num)
79: break;
80: if (field->tcol.num != clist[i].num) {
81: memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
82: clist[i].num = field->tcol.num;
83: ncols++;
84: }
85: }
86: }
87:
88: /*
89: * matches fields with the appropriate columns--n^2 but who cares?
90: */
91: void
1.7 deraadt 92: fldreset(struct field *fldtab)
1.1 millert 93: {
94: int i;
95: fldtab[0].tcol.p = clist+ncols-1;
96: for (++fldtab; fldtab->icol.num; ++fldtab) {
1.2 millert 97: for (i = 0; fldtab->icol.num != clist[i].num; i++)
98: ;
1.1 millert 99: fldtab->icol.p = clist + i;
100: if (!fldtab->tcol.num)
101: continue;
1.2 millert 102: for (i = 0; fldtab->tcol.num != clist[i].num; i++)
103: ;
1.1 millert 104: fldtab->tcol.p = clist + i;
105: }
106: }
107:
108: /*
109: * interprets a column in a -k field
110: */
111: char *
1.7 deraadt 112: setcolumn(char *pos, struct field *cur_fld, int gflag)
1.1 millert 113: {
114: struct column *col;
115: int tmp;
1.2 millert 116:
1.1 millert 117: col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
118: pos += sscanf(pos, "%d", &(col->num));
119: while (isdigit(*pos))
120: pos++;
121: if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
122: errx(2, "field numbers must be positive");
123: if (*pos == '.') {
124: if (!col->num)
125: errx(2, "cannot indent end of line");
1.9 ! ray 126: pos++;
! 127: pos += sscanf(pos, "%d", &(col->indent));
1.1 millert 128: while (isdigit(*pos))
129: pos++;
130: if (&cur_fld->icol == col)
131: col->indent--;
132: if (col->indent < 0)
133: errx(2, "illegal offset");
134: }
135: if (optval(*pos, cur_fld->tcol.num))
136: while ((tmp = optval(*pos, cur_fld->tcol.num))) {
137: cur_fld->flags |= tmp;
138: pos++;
139: }
140: if (cur_fld->icol.num == 0)
141: cur_fld->icol.num = 1;
142: return (pos);
143: }
144:
145: int
1.7 deraadt 146: setfield(char *pos, struct field *cur_fld, int gflag)
1.1 millert 147: {
148: int tmp;
149: cur_fld->weights = ascii;
150: cur_fld->mask = alltable;
151: pos = setcolumn(pos, cur_fld, gflag);
152: if (*pos == '\0') /* key extends to EOL. */
153: cur_fld->tcol.num = 0;
154: else {
155: if (*pos != ',')
156: errx(2, "illegal field descriptor");
157: setcolumn((++pos), cur_fld, gflag);
158: }
159: if (!cur_fld->flags)
160: cur_fld->flags = gflag;
161: tmp = cur_fld->flags;
162:
163: /*
164: * Assign appropriate mask table and weight table.
165: * If the global weights are reversed, the local field
166: * must be "re-reversed".
167: */
168: if (((tmp & R) ^ (gflag & R)) && tmp & F)
169: cur_fld->weights = RFtable;
170: else if (tmp & F)
171: cur_fld->weights = Ftable;
172: else if ((tmp & R) ^ (gflag & R))
173: cur_fld->weights = Rascii;
174: if (tmp & I)
175: cur_fld->mask = itable;
176: else if (tmp & D)
177: cur_fld->mask = dtable;
178: cur_fld->flags |= (gflag & (BI | BT));
179: if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
180: cur_fld->flags &= (D|F|I|N|R|BI);
181: if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
182: && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
183: && cur_fld->tcol.indent < cur_fld->icol.indent))
184: errx(2, "fields out of order");
185: insertcol(cur_fld);
186: return (cur_fld->tcol.num);
187: }
188:
189: int
1.7 deraadt 190: optval(int desc, int tcolflag)
1.1 millert 191: {
192: switch(desc) {
193: case 'b':
194: if (!tcolflag)
1.2 millert 195: return (BI);
1.1 millert 196: else
1.2 millert 197: return (BT);
198: case 'd': return (D);
199: case 'f': return (F);
200: case 'i': return (I);
201: case 'n': return (N);
202: case 'r': return (R);
203: default: return (0);
1.1 millert 204: }
205: }
206:
1.4 millert 207: /*
208: * Convert obsolescent "+pos1 [-pos2]" format to POSIX -k form.
209: * Note that the conversion is tricky, see the manual for details.
210: */
1.1 millert 211: void
1.7 deraadt 212: fixit(int *argc, char **argv)
1.1 millert 213: {
1.4 millert 214: int i, j;
215: long v, w, x;
216: char *p, *ep;
217: char buf[128], *bufp, *bufend;
218: size_t n;
1.1 millert 219:
1.4 millert 220: bufend = buf + sizeof(buf);
1.1 millert 221: for (i = 1; i < *argc; i++) {
222: if (argv[i][0] == '+') {
1.4 millert 223: bufp = buf;
224: p = argv[i] + 1;
225: v = strtol(p, &ep, 10);
226: if (ep == p || v < 0 ||
227: (v == LONG_MAX && errno == ERANGE))
228: errx(2, "invalid field number");
229: p = ep;
230: if (*p == '.') {
231: x = strtol(++p, &ep, 10);
232: if (ep == p || x < 0 ||
233: (x == LONG_MAX && errno == ERANGE))
234: errx(2, "invalid field number");
235: p = ep;
236: n = snprintf(bufp, bufend - bufp, "-k%ld.%ld%s",
237: v+1, x+1, p);
238: } else {
239: n = snprintf(bufp, bufend - bufp, "-k%ld%s",
240: v+1, p);
1.1 millert 241: }
1.8 deraadt 242: if (n == -1 || n >= bufend - bufp)
1.4 millert 243: errx(2, "bad field specification");
244: bufp += n;
245:
1.1 millert 246: if (argv[i+1] &&
247: argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
1.4 millert 248: p = argv[i+1] + 1;
249: w = strtol(p, &ep, 10);
250: if (ep == p || w < 0 ||
251: (w == LONG_MAX && errno == ERANGE))
252: errx(2, "invalid field number");
253: p = ep;
1.1 millert 254: x = 0;
1.4 millert 255: if (*p == '.') {
256: x = strtol(++p, &ep, 10);
257: if (ep == p || x < 0 ||
258: (x == LONG_MAX && errno == ERANGE))
259: errx(2, "invalid field number");
260: p = ep;
261: }
262: if (x == 0) {
263: n = snprintf(bufp, bufend - bufp,
264: ",%ld%s", w, p);
265: } else {
266: n = snprintf(bufp, bufend - bufp,
267: ",%ld.%ld%s", w+1, x, p);
1.1 millert 268: }
1.8 deraadt 269: if (n == -1 || n >= bufend - bufp)
1.4 millert 270: errx(2, "bad field specification");
271:
272: /* shift over argv */
273: for (j = i+1; j < *argc; j++)
1.1 millert 274: argv[j] = argv[j+1];
275: *argc -= 1;
276: }
1.4 millert 277: if ((argv[i] = strdup(buf)) == NULL)
278: err(2, NULL);
1.1 millert 279: }
280: }
281: }
282:
283: /*
284: * ascii, Rascii, Ftable, and RFtable map
285: * REC_D -> REC_D; {not REC_D} -> {not REC_D}.
286: * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
287: * Note: when sorting in forward order, to encode character zero in a key,
288: * use \001\001; character 1 becomes \001\002. In this case, character 0
289: * is reserved for the field delimiter. Analagously for -r (fld_d = 255).
290: * Note: this is only good for ASCII sorting. For different LC 's,
291: * all bets are off. See also num_init in number.c
292: */
293: void
1.7 deraadt 294: settables(int gflags)
1.1 millert 295: {
296: u_char *wts;
297: int i, incr;
298: for (i=0; i < 256; i++) {
299: ascii[i] = i;
300: if (i > REC_D && i < 255 - REC_D+1)
301: Rascii[i] = 255 - i + 1;
302: else
303: Rascii[i] = 255 - i;
304: if (islower(i)) {
305: Ftable[i] = Ftable[i- ('a' -'A')];
306: RFtable[i] = RFtable[i - ('a' - 'A')];
307: } else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
308: Ftable[i] = i + 1;
309: RFtable[i] = Rascii[i] - 1;
310: } else {
311: Ftable[i] = i;
312: RFtable[i] = Rascii[i];
313: }
314: alltable[i] = 1;
315: if (i == '\n' || isprint(i))
316: itable[i] = 1;
317: else itable[i] = 0;
318: if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
319: dtable[i] = 1;
320: else dtable[i] = 0;
321: }
322: Rascii[REC_D] = RFtable[REC_D] = REC_D;
323: if (REC_D >= 'A' && REC_D < 'Z')
1.2 millert 324: Ftable[REC_D + ('a' - 'A')]++;
1.1 millert 325: if (gflags & R && (!(gflags & F) || !SINGL_FLD))
326: wts = Rascii;
327: else if (!(gflags & F) || !SINGL_FLD)
328: wts = ascii;
329: else if (gflags & R)
330: wts = RFtable;
331: else
332: wts = Ftable;
333: memmove(gweights, wts, sizeof(gweights));
334: incr = (gflags & R) ? -1 : 1;
335: for (i = 0; i < REC_D; i++)
336: gweights[i] += incr;
337: gweights[REC_D] = ((gflags & R) ? 255 : 0);
338: if (SINGL_FLD && gflags & F) {
339: for (i = 0; i < REC_D; i++) {
340: ascii[i] += incr;
341: Rascii[i] += incr;
342: }
343: ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
344: }
345: }