Annotation of src/usr.bin/sort/init.c, Revision 1.12
1.12 ! deraadt 1: /* $OpenBSD: init.c,v 1.11 2007/09/01 18:13:58 kili Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.5 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 millert 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #include "sort.h"
36:
37: #include <ctype.h>
38: #include <string.h>
39:
1.3 millert 40: extern struct coldesc *clist;
1.1 millert 41: extern int ncols;
42: u_char gweights[NBINS];
43:
1.6 deraadt 44: static void insertcol(struct field *);
45: char *setcolumn(char *, struct field *, int);
46:
1.1 millert 47: /*
48: * clist (list of columns which correspond to one or more icol or tcol)
49: * is in increasing order of columns.
50: * Fields are kept in increasing order of fields.
51: */
52:
53: /*
54: * keep clist in order--inserts a column in a sorted array
55: */
56: static void
1.7 deraadt 57: insertcol(struct field *field)
1.1 millert 58: {
59: int i;
60: for (i = 0; i < ncols; i++)
61: if (field->icol.num <= clist[i].num)
62: break;
63: if (field->icol.num != clist[i].num) {
64: memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
65: clist[i].num = field->icol.num;
66: ncols++;
67: }
68: if (field->tcol.num && field->tcol.num != field->icol.num) {
69: for (i = 0; i < ncols; i++)
70: if (field->tcol.num <= clist[i].num)
71: break;
72: if (field->tcol.num != clist[i].num) {
73: memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
74: clist[i].num = field->tcol.num;
75: ncols++;
76: }
77: }
78: }
79:
80: /*
81: * matches fields with the appropriate columns--n^2 but who cares?
82: */
83: void
1.7 deraadt 84: fldreset(struct field *fldtab)
1.1 millert 85: {
86: int i;
87: fldtab[0].tcol.p = clist+ncols-1;
88: for (++fldtab; fldtab->icol.num; ++fldtab) {
1.2 millert 89: for (i = 0; fldtab->icol.num != clist[i].num; i++)
90: ;
1.1 millert 91: fldtab->icol.p = clist + i;
92: if (!fldtab->tcol.num)
93: continue;
1.2 millert 94: for (i = 0; fldtab->tcol.num != clist[i].num; i++)
95: ;
1.1 millert 96: fldtab->tcol.p = clist + i;
97: }
98: }
99:
100: /*
101: * interprets a column in a -k field
102: */
103: char *
1.7 deraadt 104: setcolumn(char *pos, struct field *cur_fld, int gflag)
1.1 millert 105: {
106: struct column *col;
107: int tmp;
1.2 millert 108:
1.1 millert 109: col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
1.10 moritz 110: if (sscanf(pos, "%d", &(col->num)) != 1)
111: errx(2, "missing field number");
112: pos++;
1.1 millert 113: while (isdigit(*pos))
114: pos++;
115: if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
116: errx(2, "field numbers must be positive");
117: if (*pos == '.') {
118: if (!col->num)
119: errx(2, "cannot indent end of line");
1.9 ray 120: pos++;
1.10 moritz 121: if (sscanf(pos, "%d", &(col->indent)) != 1)
122: errx(2, "missing offset");
123: pos++;
1.1 millert 124: while (isdigit(*pos))
125: pos++;
126: if (&cur_fld->icol == col)
127: col->indent--;
128: if (col->indent < 0)
129: errx(2, "illegal offset");
130: }
131: if (optval(*pos, cur_fld->tcol.num))
132: while ((tmp = optval(*pos, cur_fld->tcol.num))) {
133: cur_fld->flags |= tmp;
134: pos++;
135: }
136: if (cur_fld->icol.num == 0)
137: cur_fld->icol.num = 1;
138: return (pos);
139: }
140:
141: int
1.7 deraadt 142: setfield(char *pos, struct field *cur_fld, int gflag)
1.1 millert 143: {
144: int tmp;
145: cur_fld->weights = ascii;
146: cur_fld->mask = alltable;
147: pos = setcolumn(pos, cur_fld, gflag);
148: if (*pos == '\0') /* key extends to EOL. */
149: cur_fld->tcol.num = 0;
150: else {
151: if (*pos != ',')
152: errx(2, "illegal field descriptor");
153: setcolumn((++pos), cur_fld, gflag);
154: }
155: if (!cur_fld->flags)
156: cur_fld->flags = gflag;
157: tmp = cur_fld->flags;
158:
159: /*
160: * Assign appropriate mask table and weight table.
161: * If the global weights are reversed, the local field
162: * must be "re-reversed".
163: */
164: if (((tmp & R) ^ (gflag & R)) && tmp & F)
165: cur_fld->weights = RFtable;
166: else if (tmp & F)
167: cur_fld->weights = Ftable;
168: else if ((tmp & R) ^ (gflag & R))
169: cur_fld->weights = Rascii;
170: if (tmp & I)
171: cur_fld->mask = itable;
172: else if (tmp & D)
173: cur_fld->mask = dtable;
174: cur_fld->flags |= (gflag & (BI | BT));
175: if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
176: cur_fld->flags &= (D|F|I|N|R|BI);
177: if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
178: && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
179: && cur_fld->tcol.indent < cur_fld->icol.indent))
180: errx(2, "fields out of order");
181: insertcol(cur_fld);
182: return (cur_fld->tcol.num);
183: }
184:
185: int
1.7 deraadt 186: optval(int desc, int tcolflag)
1.1 millert 187: {
188: switch(desc) {
189: case 'b':
190: if (!tcolflag)
1.2 millert 191: return (BI);
1.1 millert 192: else
1.2 millert 193: return (BT);
194: case 'd': return (D);
195: case 'f': return (F);
196: case 'i': return (I);
197: case 'n': return (N);
198: case 'r': return (R);
199: default: return (0);
1.1 millert 200: }
201: }
202:
1.4 millert 203: /*
204: * Convert obsolescent "+pos1 [-pos2]" format to POSIX -k form.
205: * Note that the conversion is tricky, see the manual for details.
206: */
1.1 millert 207: void
1.7 deraadt 208: fixit(int *argc, char **argv)
1.1 millert 209: {
1.11 kili 210: int i, j, n;
1.4 millert 211: long v, w, x;
212: char *p, *ep;
213: char buf[128], *bufp, *bufend;
1.1 millert 214:
1.4 millert 215: bufend = buf + sizeof(buf);
1.1 millert 216: for (i = 1; i < *argc; i++) {
217: if (argv[i][0] == '+') {
1.4 millert 218: bufp = buf;
219: p = argv[i] + 1;
220: v = strtol(p, &ep, 10);
221: if (ep == p || v < 0 ||
222: (v == LONG_MAX && errno == ERANGE))
223: errx(2, "invalid field number");
224: p = ep;
225: if (*p == '.') {
226: x = strtol(++p, &ep, 10);
227: if (ep == p || x < 0 ||
228: (x == LONG_MAX && errno == ERANGE))
229: errx(2, "invalid field number");
230: p = ep;
231: n = snprintf(bufp, bufend - bufp, "-k%ld.%ld%s",
232: v+1, x+1, p);
233: } else {
234: n = snprintf(bufp, bufend - bufp, "-k%ld%s",
235: v+1, p);
1.1 millert 236: }
1.8 deraadt 237: if (n == -1 || n >= bufend - bufp)
1.4 millert 238: errx(2, "bad field specification");
239: bufp += n;
240:
1.1 millert 241: if (argv[i+1] &&
242: argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
1.4 millert 243: p = argv[i+1] + 1;
244: w = strtol(p, &ep, 10);
245: if (ep == p || w < 0 ||
246: (w == LONG_MAX && errno == ERANGE))
247: errx(2, "invalid field number");
248: p = ep;
1.1 millert 249: x = 0;
1.4 millert 250: if (*p == '.') {
251: x = strtol(++p, &ep, 10);
252: if (ep == p || x < 0 ||
253: (x == LONG_MAX && errno == ERANGE))
254: errx(2, "invalid field number");
255: p = ep;
256: }
257: if (x == 0) {
258: n = snprintf(bufp, bufend - bufp,
259: ",%ld%s", w, p);
260: } else {
261: n = snprintf(bufp, bufend - bufp,
262: ",%ld.%ld%s", w+1, x, p);
1.1 millert 263: }
1.8 deraadt 264: if (n == -1 || n >= bufend - bufp)
1.4 millert 265: errx(2, "bad field specification");
266:
267: /* shift over argv */
268: for (j = i+1; j < *argc; j++)
1.1 millert 269: argv[j] = argv[j+1];
270: *argc -= 1;
271: }
1.4 millert 272: if ((argv[i] = strdup(buf)) == NULL)
273: err(2, NULL);
1.1 millert 274: }
275: }
276: }
277:
278: /*
279: * ascii, Rascii, Ftable, and RFtable map
280: * REC_D -> REC_D; {not REC_D} -> {not REC_D}.
281: * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
282: * Note: when sorting in forward order, to encode character zero in a key,
283: * use \001\001; character 1 becomes \001\002. In this case, character 0
284: * is reserved for the field delimiter. Analagously for -r (fld_d = 255).
285: * Note: this is only good for ASCII sorting. For different LC 's,
286: * all bets are off. See also num_init in number.c
287: */
288: void
1.7 deraadt 289: settables(int gflags)
1.1 millert 290: {
291: u_char *wts;
292: int i, incr;
293: for (i=0; i < 256; i++) {
294: ascii[i] = i;
295: if (i > REC_D && i < 255 - REC_D+1)
296: Rascii[i] = 255 - i + 1;
297: else
298: Rascii[i] = 255 - i;
299: if (islower(i)) {
300: Ftable[i] = Ftable[i- ('a' -'A')];
301: RFtable[i] = RFtable[i - ('a' - 'A')];
302: } else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
303: Ftable[i] = i + 1;
304: RFtable[i] = Rascii[i] - 1;
305: } else {
306: Ftable[i] = i;
307: RFtable[i] = Rascii[i];
308: }
309: alltable[i] = 1;
310: if (i == '\n' || isprint(i))
311: itable[i] = 1;
312: else itable[i] = 0;
313: if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
314: dtable[i] = 1;
315: else dtable[i] = 0;
316: }
317: Rascii[REC_D] = RFtable[REC_D] = REC_D;
318: if (REC_D >= 'A' && REC_D < 'Z')
1.2 millert 319: Ftable[REC_D + ('a' - 'A')]++;
1.1 millert 320: if (gflags & R && (!(gflags & F) || !SINGL_FLD))
321: wts = Rascii;
322: else if (!(gflags & F) || !SINGL_FLD)
323: wts = ascii;
324: else if (gflags & R)
325: wts = RFtable;
326: else
327: wts = Ftable;
328: memmove(gweights, wts, sizeof(gweights));
329: incr = (gflags & R) ? -1 : 1;
330: for (i = 0; i < REC_D; i++)
331: gweights[i] += incr;
332: gweights[REC_D] = ((gflags & R) ? 255 : 0);
333: if (SINGL_FLD && gflags & F) {
334: for (i = 0; i < REC_D; i++) {
335: ascii[i] += incr;
336: Rascii[i] += incr;
337: }
338: ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
339: }
340: }