src/usr.bin/sort/fields.c - annotate

Return to fields.c CVS log
Up to [local] / src / usr.bin / sort
Annotation of src/usr.bin/sort/fields.c, Revision 1.4

1.4     ! millert     1: /*     $OpenBSD: fields.c,v 1.3 1997/06/30 05:36:16 millert Exp $      */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (c) 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Peter McIlroy.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  * 3. All advertising materials mentioning features or use of this software
                     19:  *    must display the following acknowledgement:
                     20:  *     This product includes software developed by the University of
                     21:  *     California, Berkeley and its contributors.
                     22:  * 4. Neither the name of the University nor the names of its contributors
                     23:  *    may be used to endorse or promote products derived from this software
                     24:  *    without specific prior written permission.
                     25:  *
                     26:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     30:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     31:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     32:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     36:  * SUCH DAMAGE.
                     37:  */
                     38:
                     39: #ifndef lint
                     40: #if 0
                     41: static char sccsid[] = "@(#)fields.c   8.1 (Berkeley) 6/6/93";
                     42: #else
1.4     ! millert    43: static char rcsid[] = "$OpenBSD: fields.c,v 1.3 1997/06/30 05:36:16 millert Exp $";
1.1       millert    44: #endif
                     45: #endif /* not lint */
                     46:
                     47: /* Subroutines to generate sort keys. */
                     48:
                     49: #include "sort.h"
                     50:
                     51: #define blancmange(ptr) {                                      \
                     52:        if (BLANK & d_mask[*(ptr)])                             \
                     53:                while (BLANK & d_mask[*(++(ptr))]);             \
                     54: }
                     55:
                     56: #define NEXTCOL(pos) {                                         \
                     57:        if (!SEP_FLAG)                                          \
                     58:                while (BLANK & l_d_mask[*(++pos)]);             \
                     59:        while (!((FLD_D | REC_D_F) & l_d_mask[*++pos]));        \
                     60: }
                     61:
                     62: extern u_char *enterfield __P((u_char *, u_char *, struct field *, int));
                     63:
                     64: extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
                     65:
1.4     ! millert    66: extern struct coldesc *clist;
1.1       millert    67: extern int ncols;
                     68:
                     69: #define DECIMAL '.'
                     70: #define OFFSET 128
                     71:
                     72: u_char TENS[10];       /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
                     73: u_char NEGTENS[10];    /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
                     74: u_char *OFF_TENS, *OFF_NTENS;  /* TENS - '0', NEGTENS - '0' */
                     75: u_char fnum[NBINS], rnum[NBINS];
                     76:
                     77: /*
                     78:  * constructs sort key with leading recheader, followed by the key,
                     79:  * followed by the original line.
                     80:  */
                     81: length_t
                     82: enterkey(keybuf, line, size, fieldtable)
1.3       millert    83:        RECHEADER *keybuf;      /* pointer to start of key */
1.1       millert    84:        DBT *line;
                     85:        int size;
                     86:        struct field fieldtable[];
                     87: {
                     88:        int i;
                     89:        register u_char *l_d_mask;
                     90:        register u_char *lineend, *pos;
                     91:        u_char *endkey, *keypos;
                     92:        register struct coldesc *clpos;
                     93:        register int col = 1;
                     94:        struct field *ftpos;
                     95:        l_d_mask = d_mask;
                     96:        pos = (u_char *) line->data - 1;
                     97:        lineend = (u_char *) line->data + line->size-1;
                     98:                                /* don't include rec_delimiter */
                     99:        keypos = keybuf->data;
                    100:
                    101:        for (i = 0; i < ncols; i++) {
                    102:                clpos = clist + i;
                    103:                for (; (col < clpos->num) && (pos < lineend); col++)
                    104:                        { NEXTCOL(pos); }
                    105:                if (pos >= lineend)
                    106:                        break;
                    107:                clpos->start = SEP_FLAG ? pos + 1 : pos;
                    108:                NEXTCOL(pos);
                    109:                clpos->end = pos;
                    110:                col++;
                    111:                if (pos >= lineend) {
                    112:                        clpos->end = lineend;
1.2       millert   113:                        i++;
1.1       millert   114:                        break;
                    115:                }
                    116:        }
                    117:        for (; i <= ncols; i++)
                    118:                clist[i].start = clist[i].end = lineend;
                    119:        if (clist[0].start < (u_char *) line->data)
1.2       millert   120:                clist[0].start++;
1.1       millert   121:        endkey = (u_char *) keybuf + size - line->size;
                    122:        for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
                    123:                if ((keypos = enterfield(keypos, endkey, ftpos,
                    124:                    fieldtable->flags)) == NULL)
                    125:                        return (1);
                    126:
                    127:        if (UNIQUE)
                    128:                *(keypos-1) = REC_D;
                    129:        keybuf->offset = keypos - keybuf->data;
                    130:        keybuf->length = keybuf->offset + line->size;
                    131:        if (keybuf->length + sizeof(TRECHEADER) > size)
                    132:                return (1);             /* line too long for buffer */
                    133:        memcpy(keybuf->data + keybuf->offset, line->data, line->size);
                    134:        return (0);
                    135: }
                    136:
                    137: /*
                    138:  * constructs a field (as defined by -k) within a key
                    139:  */
                    140: u_char *
                    141: enterfield(tablepos, endkey, cur_fld, gflags)
                    142:        struct field *cur_fld;
                    143:        register u_char *tablepos, *endkey;
                    144:        int gflags;
                    145: {
                    146:        register u_char *start, *end, *lineend, *mask, *lweight;
                    147:        struct column icol, tcol;
                    148:        register u_int flags;
                    149:        u_int Rflag;
1.2       millert   150:
1.1       millert   151:        icol = cur_fld->icol;
                    152:        tcol = cur_fld->tcol;
                    153:        flags = cur_fld->flags;
                    154:        start = icol.p->start;
                    155:        lineend = clist[ncols].end;
                    156:        if (flags & BI)
                    157:                blancmange(start);
                    158:        start += icol.indent;
                    159:        start = min(start, lineend);
1.2       millert   160:
1.1       millert   161:        if (!tcol.num)
                    162:                end = lineend;
                    163:        else {
                    164:                if (tcol.indent) {
                    165:                        end = tcol.p->start;
1.2       millert   166:                        if (flags & BT)
                    167:                                blancmange(end);
1.1       millert   168:                        end += tcol.indent;
                    169:                        end = min(end, lineend);
                    170:                } else
                    171:                        end = tcol.p->end;
                    172:        }
                    173:        if (flags & N) {
                    174:                Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
                    175:                tablepos = number(tablepos, endkey, start, end, Rflag);
                    176:                return (tablepos);
                    177:        }
                    178:        mask = alltable;
                    179:        mask = cur_fld->mask;
                    180:        lweight = cur_fld->weights;
                    181:        for (; start < end; start++)
                    182:                if (mask[*start]) {
                    183:                        if (*start <= 1) {
                    184:                                if (tablepos+2 >= endkey)
                    185:                                        return (NULL);
                    186:                                *tablepos++ = lweight[1];
                    187:                                *tablepos++ = lweight[*start ? 2 : 1];
                    188:                        } else {
                    189:                                *tablepos++ = lweight[*start];
                    190:                                if (tablepos == endkey)
1.2       millert   191:                                        return (NULL);
1.1       millert   192:                        }
                    193:                }
                    194:        *tablepos++ = lweight[0];
                    195:        return (tablepos == endkey ? NULL : tablepos);
                    196: }
                    197:
                    198: /* Uses the first bin to assign sign, expsign, 0, and the first
                    199:  * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
                    200:  *   When sorting in forward order:
                    201:  * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
                    202:  * else use (0-99)->(2-102).
                    203:  * If the exponent is >=61, use another byte for each additional 253
                    204:  * in the exponent. Cutoff is at 567.
                    205:  * To avoid confusing the exponent and the mantissa, use a field delimiter
                    206:  * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
                    207:  * only time a field delimiter can come in that position.
                    208:  * Reverse order is done analagously.
1.2       millert   209:  */
1.1       millert   210:
                    211: u_char *
                    212: number(pos, bufend, line, lineend, Rflag)
                    213:        register u_char *line, *pos, *bufend, *lineend;
                    214:        int Rflag;
                    215: {
                    216:        register int or_sign, parity = 0;
                    217:        register int expincr = 1, exponent = -1;
                    218:        int bite, expsign = 1, sign = 1;
                    219:        register u_char lastvalue, *nonzero, *tline, *C_TENS;
                    220:        u_char *nweights;
                    221:
                    222:        if (Rflag)
                    223:                nweights = rnum;
                    224:        else
                    225:                nweights = fnum;
                    226:        if (pos > bufend - 8)
                    227:                return (NULL);
1.2       millert   228:        /*
                    229:         * or_sign sets the sort direction:
                    230:         *      (-r: +/-)(sign: +/-)(expsign: +/-)
                    231:         */
1.1       millert   232:        or_sign = sign ^ expsign ^ Rflag;
                    233:        blancmange(line);
                    234:        if (*line == '-') {     /* set the sign */
                    235:                or_sign ^= 1;
                    236:                sign = 0;
                    237:                line++;
                    238:        }
                    239:        /* eat initial zeroes */
1.2       millert   240:        for (; *line == '0' && line < lineend; line++)
                    241:                ;
1.1       millert   242:        /* calculate exponents < 0 */
                    243:        if (*line == DECIMAL) {
                    244:                exponent = 1;
                    245:                while (*++line == '0' && line < lineend)
                    246:                        exponent++;
                    247:                expincr = 0;
                    248:                expsign = 0;
                    249:        }
                    250:        /* next character better be a digit */
                    251:        if (*line < '1' || *line > '9' || line >= lineend) {
                    252:                *pos++ = nweights[127];
                    253:                return (pos);
                    254:        }
                    255:        if (expincr) {
                    256:                for (tline = line-1; *++tline >= '0' &&
                    257:                    *tline <= '9' && tline < lineend;)
                    258:                        exponent++;
                    259:        }
                    260:        if (exponent > 567) {
                    261:                *pos++ = nweights[sign ? (expsign ? 254 : 128)
                    262:                                        : (expsign ? 0 : 126)];
                    263:                warnx("exponent out of bounds");
                    264:                return (pos);
                    265:        }
                    266:        bite = min(exponent, 61);
                    267:        *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
                    268:                                : (expsign ? 64-bite : 64+bite)];
                    269:        if (bite >= 61) {
                    270:                do {
                    271:                        exponent -= bite;
                    272:                        bite = min(exponent, 254);
                    273:                        *pos++ = nweights[or_sign ? 254-bite : bite];
                    274:                } while (bite == 254);
                    275:        }
                    276:        C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
                    277:        for (; line < lineend; line++) {
                    278:                if (*line >= '0' && *line <= '9') {
                    279:                        if (parity) {
                    280:                                *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
                    281:                                                : *line);
                    282:                                if (pos == bufend)
                    283:                                        return (NULL);
                    284:                                if (*line != '0' || lastvalue != '0')
                    285:                                        nonzero = pos;
                    286:                        } else
                    287:                                lastvalue = *line;
                    288:                        parity ^= 1;
                    289:                } else if(*line == DECIMAL) {
                    290:                        if(!expincr)    /* a decimal already occurred once */
                    291:                                break;
                    292:                        expincr = 0;
                    293:                } else
                    294:                        break;
                    295:        }
                    296:        if (parity && lastvalue != '0') {
                    297:                *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
                    298:                                        OFF_TENS[lastvalue] + '0';
                    299:        } else
                    300:                pos = nonzero;
                    301:        if (pos > bufend-1)
                    302:                return (NULL);
                    303:        *pos++ = or_sign ? nweights[254] : nweights[0];
                    304:        return (pos);
                    305: }
                    306:
                    307: /* This forces a gap around the record delimiter
                    308:  * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
                    309:  * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
1.2       millert   310:  */
1.1       millert   311: void
                    312: num_init()
                    313: {
                    314:        int i;
                    315:        TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
                    316:        NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
                    317:        OFF_TENS = TENS - '0';
                    318:        OFF_NTENS = NEGTENS - '0';
                    319:        for (i = 1; i < 10; i++) {
1.2       millert   320:                TENS[i] = TENS[i - 1] + 10;
                    321:                NEGTENS[i] = NEGTENS[i - 1] - 10;
1.1       millert   322:        }
                    323:        for (i = 0; i < REC_D; i++) {
                    324:                fnum[i] = i;
1.2       millert   325:                rnum[255 - i] = i;
1.1       millert   326:        }
                    327:        for (i = REC_D; i <255; i++) {
1.2       millert   328:                fnum[i] = i + 1;
                    329:                rnum[255 - i] = i - 1;
1.1       millert   330:        }
                    331: }