Annotation of src/usr.bin/sort/fields.c, Revision 1.1
1.1 ! millert 1: /* $OpenBSD$ */
! 2:
! 3: /*-
! 4: * Copyright (c) 1993
! 5: * The Regents of the University of California. All rights reserved.
! 6: *
! 7: * This code is derived from software contributed to Berkeley by
! 8: * Peter McIlroy.
! 9: *
! 10: * Redistribution and use in source and binary forms, with or without
! 11: * modification, are permitted provided that the following conditions
! 12: * are met:
! 13: * 1. Redistributions of source code must retain the above copyright
! 14: * notice, this list of conditions and the following disclaimer.
! 15: * 2. Redistributions in binary form must reproduce the above copyright
! 16: * notice, this list of conditions and the following disclaimer in the
! 17: * documentation and/or other materials provided with the distribution.
! 18: * 3. All advertising materials mentioning features or use of this software
! 19: * must display the following acknowledgement:
! 20: * This product includes software developed by the University of
! 21: * California, Berkeley and its contributors.
! 22: * 4. Neither the name of the University nor the names of its contributors
! 23: * may be used to endorse or promote products derived from this software
! 24: * without specific prior written permission.
! 25: *
! 26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 36: * SUCH DAMAGE.
! 37: */
! 38:
! 39: #ifndef lint
! 40: #if 0
! 41: static char sccsid[] = "@(#)fields.c 8.1 (Berkeley) 6/6/93";
! 42: #else
! 43: static char rcsid[] = "$OpenBSD: $";
! 44: #endif
! 45: #endif /* not lint */
! 46:
! 47: /* Subroutines to generate sort keys. */
! 48:
! 49: #include "sort.h"
! 50:
! 51: #define blancmange(ptr) { \
! 52: if (BLANK & d_mask[*(ptr)]) \
! 53: while (BLANK & d_mask[*(++(ptr))]); \
! 54: }
! 55:
! 56: #define NEXTCOL(pos) { \
! 57: if (!SEP_FLAG) \
! 58: while (BLANK & l_d_mask[*(++pos)]); \
! 59: while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
! 60: }
! 61:
! 62: extern u_char *enterfield __P((u_char *, u_char *, struct field *, int));
! 63:
! 64: extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
! 65:
! 66: extern struct coldesc clist[(ND+1)*2];
! 67: extern int ncols;
! 68:
! 69: #define DECIMAL '.'
! 70: #define OFFSET 128
! 71:
! 72: u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
! 73: u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
! 74: u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
! 75: u_char fnum[NBINS], rnum[NBINS];
! 76:
! 77: /*
! 78: * constructs sort key with leading recheader, followed by the key,
! 79: * followed by the original line.
! 80: */
! 81: length_t
! 82: enterkey(keybuf, line, size, fieldtable)
! 83: struct recheader *keybuf; /* pointer to start of key */
! 84: DBT *line;
! 85: int size;
! 86: struct field fieldtable[];
! 87: {
! 88: int i;
! 89: register u_char *l_d_mask;
! 90: register u_char *lineend, *pos;
! 91: u_char *endkey, *keypos;
! 92: register struct coldesc *clpos;
! 93: register int col = 1;
! 94: struct field *ftpos;
! 95: l_d_mask = d_mask;
! 96: pos = (u_char *) line->data - 1;
! 97: lineend = (u_char *) line->data + line->size-1;
! 98: /* don't include rec_delimiter */
! 99: keypos = keybuf->data;
! 100:
! 101: for (i = 0; i < ncols; i++) {
! 102: clpos = clist + i;
! 103: for (; (col < clpos->num) && (pos < lineend); col++)
! 104: { NEXTCOL(pos); }
! 105: if (pos >= lineend)
! 106: break;
! 107: clpos->start = SEP_FLAG ? pos + 1 : pos;
! 108: NEXTCOL(pos);
! 109: clpos->end = pos;
! 110: col++;
! 111: if (pos >= lineend) {
! 112: clpos->end = lineend;
! 113: ++i;
! 114: break;
! 115: }
! 116: }
! 117: for (; i <= ncols; i++)
! 118: clist[i].start = clist[i].end = lineend;
! 119: if (clist[0].start < (u_char *) line->data)
! 120: ++clist[0].start;
! 121: endkey = (u_char *) keybuf + size - line->size;
! 122: for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
! 123: if ((keypos = enterfield(keypos, endkey, ftpos,
! 124: fieldtable->flags)) == NULL)
! 125: return (1);
! 126:
! 127: if (UNIQUE)
! 128: *(keypos-1) = REC_D;
! 129: keybuf->offset = keypos - keybuf->data;
! 130: keybuf->length = keybuf->offset + line->size;
! 131: if (keybuf->length + sizeof(TRECHEADER) > size)
! 132: return (1); /* line too long for buffer */
! 133: memcpy(keybuf->data + keybuf->offset, line->data, line->size);
! 134: return (0);
! 135: }
! 136:
! 137: /*
! 138: * constructs a field (as defined by -k) within a key
! 139: */
! 140: u_char *
! 141: enterfield(tablepos, endkey, cur_fld, gflags)
! 142: struct field *cur_fld;
! 143: register u_char *tablepos, *endkey;
! 144: int gflags;
! 145: {
! 146: register u_char *start, *end, *lineend, *mask, *lweight;
! 147: struct column icol, tcol;
! 148: register u_int flags;
! 149: u_int Rflag;
! 150: icol = cur_fld->icol;
! 151: tcol = cur_fld->tcol;
! 152: flags = cur_fld->flags;
! 153: start = icol.p->start;
! 154: lineend = clist[ncols].end;
! 155: if (flags & BI)
! 156: blancmange(start);
! 157: start += icol.indent;
! 158: start = min(start, lineend);
! 159: if (!tcol.num)
! 160: end = lineend;
! 161: else {
! 162: if (tcol.indent) {
! 163: end = tcol.p->start;
! 164: if (flags & BT) blancmange(end);
! 165: end += tcol.indent;
! 166: end = min(end, lineend);
! 167: } else
! 168: end = tcol.p->end;
! 169: }
! 170: if (flags & N) {
! 171: Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
! 172: tablepos = number(tablepos, endkey, start, end, Rflag);
! 173: return (tablepos);
! 174: }
! 175: mask = alltable;
! 176: mask = cur_fld->mask;
! 177: lweight = cur_fld->weights;
! 178: for (; start < end; start++)
! 179: if (mask[*start]) {
! 180: if (*start <= 1) {
! 181: if (tablepos+2 >= endkey)
! 182: return (NULL);
! 183: *tablepos++ = lweight[1];
! 184: *tablepos++ = lweight[*start ? 2 : 1];
! 185: } else {
! 186: *tablepos++ = lweight[*start];
! 187: if (tablepos == endkey)
! 188: return (NULL);
! 189: }
! 190: }
! 191: *tablepos++ = lweight[0];
! 192: return (tablepos == endkey ? NULL : tablepos);
! 193: }
! 194:
! 195: /* Uses the first bin to assign sign, expsign, 0, and the first
! 196: * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
! 197: * When sorting in forward order:
! 198: * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
! 199: * else use (0-99)->(2-102).
! 200: * If the exponent is >=61, use another byte for each additional 253
! 201: * in the exponent. Cutoff is at 567.
! 202: * To avoid confusing the exponent and the mantissa, use a field delimiter
! 203: * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
! 204: * only time a field delimiter can come in that position.
! 205: * Reverse order is done analagously.
! 206: */
! 207:
! 208: u_char *
! 209: number(pos, bufend, line, lineend, Rflag)
! 210: register u_char *line, *pos, *bufend, *lineend;
! 211: int Rflag;
! 212: {
! 213: register int or_sign, parity = 0;
! 214: register int expincr = 1, exponent = -1;
! 215: int bite, expsign = 1, sign = 1;
! 216: register u_char lastvalue, *nonzero, *tline, *C_TENS;
! 217: u_char *nweights;
! 218:
! 219: if (Rflag)
! 220: nweights = rnum;
! 221: else
! 222: nweights = fnum;
! 223: if (pos > bufend - 8)
! 224: return (NULL);
! 225: /* or_sign sets the sort direction:
! 226: * (-r: +/-)(sign: +/-)(expsign: +/-) */
! 227: or_sign = sign ^ expsign ^ Rflag;
! 228: blancmange(line);
! 229: if (*line == '-') { /* set the sign */
! 230: or_sign ^= 1;
! 231: sign = 0;
! 232: line++;
! 233: }
! 234: /* eat initial zeroes */
! 235: for (; *line == '0' && line < lineend; line++);
! 236: /* calculate exponents < 0 */
! 237: if (*line == DECIMAL) {
! 238: exponent = 1;
! 239: while (*++line == '0' && line < lineend)
! 240: exponent++;
! 241: expincr = 0;
! 242: expsign = 0;
! 243: }
! 244: /* next character better be a digit */
! 245: if (*line < '1' || *line > '9' || line >= lineend) {
! 246: *pos++ = nweights[127];
! 247: return (pos);
! 248: }
! 249: if (expincr) {
! 250: for (tline = line-1; *++tline >= '0' &&
! 251: *tline <= '9' && tline < lineend;)
! 252: exponent++;
! 253: }
! 254: if (exponent > 567) {
! 255: *pos++ = nweights[sign ? (expsign ? 254 : 128)
! 256: : (expsign ? 0 : 126)];
! 257: warnx("exponent out of bounds");
! 258: return (pos);
! 259: }
! 260: bite = min(exponent, 61);
! 261: *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
! 262: : (expsign ? 64-bite : 64+bite)];
! 263: if (bite >= 61) {
! 264: do {
! 265: exponent -= bite;
! 266: bite = min(exponent, 254);
! 267: *pos++ = nweights[or_sign ? 254-bite : bite];
! 268: } while (bite == 254);
! 269: }
! 270: C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
! 271: for (; line < lineend; line++) {
! 272: if (*line >= '0' && *line <= '9') {
! 273: if (parity) {
! 274: *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
! 275: : *line);
! 276: if (pos == bufend)
! 277: return (NULL);
! 278: if (*line != '0' || lastvalue != '0')
! 279: nonzero = pos;
! 280: } else
! 281: lastvalue = *line;
! 282: parity ^= 1;
! 283: } else if(*line == DECIMAL) {
! 284: if(!expincr) /* a decimal already occurred once */
! 285: break;
! 286: expincr = 0;
! 287: } else
! 288: break;
! 289: }
! 290: if (parity && lastvalue != '0') {
! 291: *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
! 292: OFF_TENS[lastvalue] + '0';
! 293: } else
! 294: pos = nonzero;
! 295: if (pos > bufend-1)
! 296: return (NULL);
! 297: *pos++ = or_sign ? nweights[254] : nweights[0];
! 298: return (pos);
! 299: }
! 300:
! 301: /* This forces a gap around the record delimiter
! 302: * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
! 303: * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
! 304: */
! 305: void
! 306: num_init()
! 307: {
! 308: int i;
! 309: TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
! 310: NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
! 311: OFF_TENS = TENS - '0';
! 312: OFF_NTENS = NEGTENS - '0';
! 313: for (i = 1; i < 10; i++) {
! 314: TENS[i] = TENS[i-1] + 10;
! 315: NEGTENS[i] = NEGTENS[i-1] - 10;
! 316: }
! 317: for (i = 0; i < REC_D; i++) {
! 318: fnum[i] = i;
! 319: rnum[255-i] = i;
! 320: }
! 321: for (i = REC_D; i <255; i++) {
! 322: fnum[i] = i+1;
! 323: rnum[255-i] = i-1;
! 324: }
! 325: }