Annotation of src/usr.bin/sort/fields.c, Revision 1.13
1.13 ! millert 1: /* $OpenBSD: fields.c,v 1.12 2007/08/21 20:29:25 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.7 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 millert 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #ifndef lint
36: #if 0
37: static char sccsid[] = "@(#)fields.c 8.1 (Berkeley) 6/6/93";
38: #else
1.13 ! millert 39: static char rcsid[] = "$OpenBSD: fields.c,v 1.12 2007/08/21 20:29:25 millert Exp $";
1.1 millert 40: #endif
41: #endif /* not lint */
42:
43: /* Subroutines to generate sort keys. */
44:
45: #include "sort.h"
46:
47: #define blancmange(ptr) { \
48: if (BLANK & d_mask[*(ptr)]) \
49: while (BLANK & d_mask[*(++(ptr))]); \
50: }
51:
52: #define NEXTCOL(pos) { \
53: if (!SEP_FLAG) \
1.9 tedu 54: while (pos < lineend && BLANK & l_d_mask[*(++pos)]); \
55: while (pos < lineend && !((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
1.1 millert 56: }
57:
1.6 millert 58: extern u_char *enterfield(u_char *, u_char *, struct field *, int);
1.1 millert 59:
1.6 millert 60: extern u_char *number(u_char *, u_char *, u_char *, u_char *, int);
1.1 millert 61:
1.4 millert 62: extern struct coldesc *clist;
1.1 millert 63: extern int ncols;
64:
65: #define DECIMAL '.'
66: #define OFFSET 128
67:
68: u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
69: u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
70: u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
71: u_char fnum[NBINS], rnum[NBINS];
72:
73: /*
74: * constructs sort key with leading recheader, followed by the key,
75: * followed by the original line.
76: */
77: length_t
1.10 deraadt 78: enterkey(RECHEADER *keybuf, /* pointer to start of key */
79: DBT *line, int size, struct field fieldtable[])
1.1 millert 80: {
81: int i;
1.5 ericj 82: u_char *l_d_mask;
83: u_char *lineend, *pos;
1.1 millert 84: u_char *endkey, *keypos;
1.5 ericj 85: struct coldesc *clpos;
86: int col = 1;
1.1 millert 87: struct field *ftpos;
88: l_d_mask = d_mask;
89: pos = (u_char *) line->data - 1;
90: lineend = (u_char *) line->data + line->size-1;
91: /* don't include rec_delimiter */
92: keypos = keybuf->data;
93:
94: for (i = 0; i < ncols; i++) {
95: clpos = clist + i;
1.5 ericj 96: for (; (col < clpos->num) && (pos < lineend); col++) {
97: NEXTCOL(pos);
98: }
1.1 millert 99: if (pos >= lineend)
100: break;
101: clpos->start = SEP_FLAG ? pos + 1 : pos;
102: NEXTCOL(pos);
103: clpos->end = pos;
104: col++;
105: if (pos >= lineend) {
106: clpos->end = lineend;
1.2 millert 107: i++;
1.1 millert 108: break;
109: }
110: }
111: for (; i <= ncols; i++)
112: clist[i].start = clist[i].end = lineend;
113: if (clist[0].start < (u_char *) line->data)
1.2 millert 114: clist[0].start++;
1.1 millert 115: endkey = (u_char *) keybuf + size - line->size;
116: for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
117: if ((keypos = enterfield(keypos, endkey, ftpos,
118: fieldtable->flags)) == NULL)
119: return (1);
120:
1.12 millert 121: if (UNIQUE || STABLE)
1.1 millert 122: *(keypos-1) = REC_D;
123: keybuf->offset = keypos - keybuf->data;
124: keybuf->length = keybuf->offset + line->size;
125: if (keybuf->length + sizeof(TRECHEADER) > size)
126: return (1); /* line too long for buffer */
127: memcpy(keybuf->data + keybuf->offset, line->data, line->size);
128: return (0);
129: }
130:
131: /*
132: * constructs a field (as defined by -k) within a key
133: */
134: u_char *
1.10 deraadt 135: enterfield(u_char *tablepos, u_char *endkey, struct field *cur_fld, int gflags)
1.1 millert 136: {
1.5 ericj 137: u_char *start, *end, *lineend, *mask, *lweight;
1.1 millert 138: struct column icol, tcol;
1.5 ericj 139: u_int flags;
1.1 millert 140: u_int Rflag;
1.2 millert 141:
1.1 millert 142: icol = cur_fld->icol;
143: tcol = cur_fld->tcol;
144: flags = cur_fld->flags;
145: start = icol.p->start;
146: lineend = clist[ncols].end;
147: if (flags & BI)
148: blancmange(start);
149: start += icol.indent;
150: start = min(start, lineend);
1.2 millert 151:
1.1 millert 152: if (!tcol.num)
153: end = lineend;
154: else {
155: if (tcol.indent) {
156: end = tcol.p->start;
1.2 millert 157: if (flags & BT)
158: blancmange(end);
1.1 millert 159: end += tcol.indent;
160: end = min(end, lineend);
161: } else
162: end = tcol.p->end;
163: }
164: if (flags & N) {
165: Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
166: tablepos = number(tablepos, endkey, start, end, Rflag);
167: return (tablepos);
168: }
169: mask = alltable;
170: mask = cur_fld->mask;
171: lweight = cur_fld->weights;
172: for (; start < end; start++)
173: if (mask[*start]) {
174: if (*start <= 1) {
175: if (tablepos+2 >= endkey)
176: return (NULL);
177: *tablepos++ = lweight[1];
178: *tablepos++ = lweight[*start ? 2 : 1];
179: } else {
180: *tablepos++ = lweight[*start];
181: if (tablepos == endkey)
1.2 millert 182: return (NULL);
1.1 millert 183: }
184: }
185: *tablepos++ = lweight[0];
186: return (tablepos == endkey ? NULL : tablepos);
187: }
188:
189: /* Uses the first bin to assign sign, expsign, 0, and the first
190: * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
191: * When sorting in forward order:
192: * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
193: * else use (0-99)->(2-102).
194: * If the exponent is >=61, use another byte for each additional 253
195: * in the exponent. Cutoff is at 567.
196: * To avoid confusing the exponent and the mantissa, use a field delimiter
197: * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
198: * only time a field delimiter can come in that position.
1.12 millert 199: * Reverse order is done analogously.
1.2 millert 200: */
1.1 millert 201:
202: u_char *
1.10 deraadt 203: number(u_char *pos, u_char *bufend, u_char *line, u_char *lineend, int Rflag)
1.1 millert 204: {
1.5 ericj 205: int or_sign, parity = 0;
206: int expincr = 1, exponent = -1;
1.13 ! millert 207: int bite, expsign = 1, sign = 1, zeroskip = 0;
! 208: u_char lastvalue, *tline, *C_TENS;
1.1 millert 209: u_char *nweights;
210:
211: if (Rflag)
212: nweights = rnum;
213: else
214: nweights = fnum;
215: if (pos > bufend - 8)
216: return (NULL);
1.2 millert 217: /*
218: * or_sign sets the sort direction:
219: * (-r: +/-)(sign: +/-)(expsign: +/-)
220: */
1.1 millert 221: or_sign = sign ^ expsign ^ Rflag;
222: blancmange(line);
223: if (*line == '-') { /* set the sign */
224: or_sign ^= 1;
225: sign = 0;
226: line++;
227: }
228: /* eat initial zeroes */
1.2 millert 229: for (; *line == '0' && line < lineend; line++)
1.13 ! millert 230: zeroskip = 1;
1.1 millert 231: /* calculate exponents < 0 */
232: if (*line == DECIMAL) {
233: exponent = 1;
234: while (*++line == '0' && line < lineend)
235: exponent++;
236: expincr = 0;
237: expsign = 0;
238: }
239: /* next character better be a digit */
240: if (*line < '1' || *line > '9' || line >= lineend) {
1.13 ! millert 241: if (!zeroskip) {
! 242: *pos++ = nweights[127];
! 243: return (pos);
! 244: }
1.1 millert 245: }
246: if (expincr) {
247: for (tline = line-1; *++tline >= '0' &&
248: *tline <= '9' && tline < lineend;)
249: exponent++;
250: }
251: if (exponent > 567) {
252: *pos++ = nweights[sign ? (expsign ? 254 : 128)
253: : (expsign ? 0 : 126)];
254: warnx("exponent out of bounds");
255: return (pos);
256: }
257: bite = min(exponent, 61);
258: *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
259: : (expsign ? 64-bite : 64+bite)];
260: if (bite >= 61) {
261: do {
262: exponent -= bite;
263: bite = min(exponent, 254);
264: *pos++ = nweights[or_sign ? 254-bite : bite];
265: } while (bite == 254);
266: }
267: C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
268: for (; line < lineend; line++) {
269: if (*line >= '0' && *line <= '9') {
270: if (parity) {
271: *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
272: : *line);
273: if (pos == bufend)
274: return (NULL);
275: } else
276: lastvalue = *line;
277: parity ^= 1;
278: } else if(*line == DECIMAL) {
279: if(!expincr) /* a decimal already occurred once */
280: break;
281: expincr = 0;
282: } else
283: break;
284: }
1.13 ! millert 285: if (parity) {
1.1 millert 286: *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
287: OFF_TENS[lastvalue] + '0';
1.13 ! millert 288: }
1.1 millert 289: if (pos > bufend-1)
290: return (NULL);
291: *pos++ = or_sign ? nweights[254] : nweights[0];
292: return (pos);
293: }
294:
295: /* This forces a gap around the record delimiter
1.11 jmc 296: * Thus fnum has values over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
1.1 millert 297: * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
1.2 millert 298: */
1.1 millert 299: void
1.8 deraadt 300: num_init(void)
1.1 millert 301: {
302: int i;
303: TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
304: NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
305: OFF_TENS = TENS - '0';
306: OFF_NTENS = NEGTENS - '0';
307: for (i = 1; i < 10; i++) {
1.2 millert 308: TENS[i] = TENS[i - 1] + 10;
309: NEGTENS[i] = NEGTENS[i - 1] - 10;
1.1 millert 310: }
311: for (i = 0; i < REC_D; i++) {
312: fnum[i] = i;
1.2 millert 313: rnum[255 - i] = i;
1.1 millert 314: }
315: for (i = REC_D; i <255; i++) {
1.2 millert 316: fnum[i] = i + 1;
317: rnum[255 - i] = i - 1;
1.1 millert 318: }
319: }