Annotation of src/usr.bin/sort/fields.c, Revision 1.5
1.5 ! ericj 1: /* $OpenBSD: fields.c,v 1.4 1999/05/24 17:57:17 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. All advertising materials mentioning features or use of this software
19: * must display the following acknowledgement:
20: * This product includes software developed by the University of
21: * California, Berkeley and its contributors.
22: * 4. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
39: #ifndef lint
40: #if 0
41: static char sccsid[] = "@(#)fields.c 8.1 (Berkeley) 6/6/93";
42: #else
1.5 ! ericj 43: static char rcsid[] = "$OpenBSD: fields.c,v 1.4 1999/05/24 17:57:17 millert Exp $";
1.1 millert 44: #endif
45: #endif /* not lint */
46:
47: /* Subroutines to generate sort keys. */
48:
49: #include "sort.h"
50:
51: #define blancmange(ptr) { \
52: if (BLANK & d_mask[*(ptr)]) \
53: while (BLANK & d_mask[*(++(ptr))]); \
54: }
55:
56: #define NEXTCOL(pos) { \
57: if (!SEP_FLAG) \
58: while (BLANK & l_d_mask[*(++pos)]); \
59: while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
60: }
61:
62: extern u_char *enterfield __P((u_char *, u_char *, struct field *, int));
63:
64: extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
65:
1.4 millert 66: extern struct coldesc *clist;
1.1 millert 67: extern int ncols;
68:
69: #define DECIMAL '.'
70: #define OFFSET 128
71:
72: u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
73: u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
74: u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
75: u_char fnum[NBINS], rnum[NBINS];
76:
77: /*
78: * constructs sort key with leading recheader, followed by the key,
79: * followed by the original line.
80: */
81: length_t
82: enterkey(keybuf, line, size, fieldtable)
1.3 millert 83: RECHEADER *keybuf; /* pointer to start of key */
1.1 millert 84: DBT *line;
85: int size;
86: struct field fieldtable[];
87: {
88: int i;
1.5 ! ericj 89: u_char *l_d_mask;
! 90: u_char *lineend, *pos;
1.1 millert 91: u_char *endkey, *keypos;
1.5 ! ericj 92: struct coldesc *clpos;
! 93: int col = 1;
1.1 millert 94: struct field *ftpos;
95: l_d_mask = d_mask;
96: pos = (u_char *) line->data - 1;
97: lineend = (u_char *) line->data + line->size-1;
98: /* don't include rec_delimiter */
99: keypos = keybuf->data;
100:
101: for (i = 0; i < ncols; i++) {
102: clpos = clist + i;
1.5 ! ericj 103: for (; (col < clpos->num) && (pos < lineend); col++) {
! 104: NEXTCOL(pos);
! 105: }
1.1 millert 106: if (pos >= lineend)
107: break;
108: clpos->start = SEP_FLAG ? pos + 1 : pos;
109: NEXTCOL(pos);
110: clpos->end = pos;
111: col++;
112: if (pos >= lineend) {
113: clpos->end = lineend;
1.2 millert 114: i++;
1.1 millert 115: break;
116: }
117: }
118: for (; i <= ncols; i++)
119: clist[i].start = clist[i].end = lineend;
120: if (clist[0].start < (u_char *) line->data)
1.2 millert 121: clist[0].start++;
1.1 millert 122: endkey = (u_char *) keybuf + size - line->size;
123: for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
124: if ((keypos = enterfield(keypos, endkey, ftpos,
125: fieldtable->flags)) == NULL)
126: return (1);
127:
128: if (UNIQUE)
129: *(keypos-1) = REC_D;
130: keybuf->offset = keypos - keybuf->data;
131: keybuf->length = keybuf->offset + line->size;
132: if (keybuf->length + sizeof(TRECHEADER) > size)
133: return (1); /* line too long for buffer */
134: memcpy(keybuf->data + keybuf->offset, line->data, line->size);
135: return (0);
136: }
137:
138: /*
139: * constructs a field (as defined by -k) within a key
140: */
141: u_char *
142: enterfield(tablepos, endkey, cur_fld, gflags)
143: struct field *cur_fld;
1.5 ! ericj 144: u_char *tablepos, *endkey;
1.1 millert 145: int gflags;
146: {
1.5 ! ericj 147: u_char *start, *end, *lineend, *mask, *lweight;
1.1 millert 148: struct column icol, tcol;
1.5 ! ericj 149: u_int flags;
1.1 millert 150: u_int Rflag;
1.2 millert 151:
1.1 millert 152: icol = cur_fld->icol;
153: tcol = cur_fld->tcol;
154: flags = cur_fld->flags;
155: start = icol.p->start;
156: lineend = clist[ncols].end;
157: if (flags & BI)
158: blancmange(start);
159: start += icol.indent;
160: start = min(start, lineend);
1.2 millert 161:
1.1 millert 162: if (!tcol.num)
163: end = lineend;
164: else {
165: if (tcol.indent) {
166: end = tcol.p->start;
1.2 millert 167: if (flags & BT)
168: blancmange(end);
1.1 millert 169: end += tcol.indent;
170: end = min(end, lineend);
171: } else
172: end = tcol.p->end;
173: }
174: if (flags & N) {
175: Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
176: tablepos = number(tablepos, endkey, start, end, Rflag);
177: return (tablepos);
178: }
179: mask = alltable;
180: mask = cur_fld->mask;
181: lweight = cur_fld->weights;
182: for (; start < end; start++)
183: if (mask[*start]) {
184: if (*start <= 1) {
185: if (tablepos+2 >= endkey)
186: return (NULL);
187: *tablepos++ = lweight[1];
188: *tablepos++ = lweight[*start ? 2 : 1];
189: } else {
190: *tablepos++ = lweight[*start];
191: if (tablepos == endkey)
1.2 millert 192: return (NULL);
1.1 millert 193: }
194: }
195: *tablepos++ = lweight[0];
196: return (tablepos == endkey ? NULL : tablepos);
197: }
198:
199: /* Uses the first bin to assign sign, expsign, 0, and the first
200: * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
201: * When sorting in forward order:
202: * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
203: * else use (0-99)->(2-102).
204: * If the exponent is >=61, use another byte for each additional 253
205: * in the exponent. Cutoff is at 567.
206: * To avoid confusing the exponent and the mantissa, use a field delimiter
207: * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
208: * only time a field delimiter can come in that position.
209: * Reverse order is done analagously.
1.2 millert 210: */
1.1 millert 211:
212: u_char *
213: number(pos, bufend, line, lineend, Rflag)
1.5 ! ericj 214: u_char *line, *pos, *bufend, *lineend;
1.1 millert 215: int Rflag;
216: {
1.5 ! ericj 217: int or_sign, parity = 0;
! 218: int expincr = 1, exponent = -1;
1.1 millert 219: int bite, expsign = 1, sign = 1;
1.5 ! ericj 220: u_char lastvalue, *nonzero, *tline, *C_TENS;
1.1 millert 221: u_char *nweights;
222:
223: if (Rflag)
224: nweights = rnum;
225: else
226: nweights = fnum;
227: if (pos > bufend - 8)
228: return (NULL);
1.2 millert 229: /*
230: * or_sign sets the sort direction:
231: * (-r: +/-)(sign: +/-)(expsign: +/-)
232: */
1.1 millert 233: or_sign = sign ^ expsign ^ Rflag;
234: blancmange(line);
235: if (*line == '-') { /* set the sign */
236: or_sign ^= 1;
237: sign = 0;
238: line++;
239: }
240: /* eat initial zeroes */
1.2 millert 241: for (; *line == '0' && line < lineend; line++)
242: ;
1.1 millert 243: /* calculate exponents < 0 */
244: if (*line == DECIMAL) {
245: exponent = 1;
246: while (*++line == '0' && line < lineend)
247: exponent++;
248: expincr = 0;
249: expsign = 0;
250: }
251: /* next character better be a digit */
252: if (*line < '1' || *line > '9' || line >= lineend) {
253: *pos++ = nweights[127];
254: return (pos);
255: }
256: if (expincr) {
257: for (tline = line-1; *++tline >= '0' &&
258: *tline <= '9' && tline < lineend;)
259: exponent++;
260: }
261: if (exponent > 567) {
262: *pos++ = nweights[sign ? (expsign ? 254 : 128)
263: : (expsign ? 0 : 126)];
264: warnx("exponent out of bounds");
265: return (pos);
266: }
267: bite = min(exponent, 61);
268: *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
269: : (expsign ? 64-bite : 64+bite)];
270: if (bite >= 61) {
271: do {
272: exponent -= bite;
273: bite = min(exponent, 254);
274: *pos++ = nweights[or_sign ? 254-bite : bite];
275: } while (bite == 254);
276: }
277: C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
278: for (; line < lineend; line++) {
279: if (*line >= '0' && *line <= '9') {
280: if (parity) {
281: *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
282: : *line);
283: if (pos == bufend)
284: return (NULL);
285: if (*line != '0' || lastvalue != '0')
286: nonzero = pos;
287: } else
288: lastvalue = *line;
289: parity ^= 1;
290: } else if(*line == DECIMAL) {
291: if(!expincr) /* a decimal already occurred once */
292: break;
293: expincr = 0;
294: } else
295: break;
296: }
297: if (parity && lastvalue != '0') {
298: *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
299: OFF_TENS[lastvalue] + '0';
300: } else
301: pos = nonzero;
302: if (pos > bufend-1)
303: return (NULL);
304: *pos++ = or_sign ? nweights[254] : nweights[0];
305: return (pos);
306: }
307:
308: /* This forces a gap around the record delimiter
309: * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
310: * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
1.2 millert 311: */
1.1 millert 312: void
313: num_init()
314: {
315: int i;
316: TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
317: NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
318: OFF_TENS = TENS - '0';
319: OFF_NTENS = NEGTENS - '0';
320: for (i = 1; i < 10; i++) {
1.2 millert 321: TENS[i] = TENS[i - 1] + 10;
322: NEGTENS[i] = NEGTENS[i - 1] - 10;
1.1 millert 323: }
324: for (i = 0; i < REC_D; i++) {
325: fnum[i] = i;
1.2 millert 326: rnum[255 - i] = i;
1.1 millert 327: }
328: for (i = REC_D; i <255; i++) {
1.2 millert 329: fnum[i] = i + 1;
330: rnum[255 - i] = i - 1;
1.1 millert 331: }
332: }