[BACK]Return to fields.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/fields.c, Revision 1.9

1.9     ! tedu        1: /*     $OpenBSD: fields.c,v 1.8 2003/06/10 22:20:51 deraadt Exp $      */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (c) 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Peter McIlroy.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
1.7       millert    18:  * 3. Neither the name of the University nor the names of its contributors
1.1       millert    19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
                     34:
                     35: #ifndef lint
                     36: #if 0
                     37: static char sccsid[] = "@(#)fields.c   8.1 (Berkeley) 6/6/93";
                     38: #else
1.9     ! tedu       39: static char rcsid[] = "$OpenBSD: fields.c,v 1.8 2003/06/10 22:20:51 deraadt Exp $";
1.1       millert    40: #endif
                     41: #endif /* not lint */
                     42:
                     43: /* Subroutines to generate sort keys. */
                     44:
                     45: #include "sort.h"
                     46:
                     47: #define blancmange(ptr) {                                      \
                     48:        if (BLANK & d_mask[*(ptr)])                             \
                     49:                while (BLANK & d_mask[*(++(ptr))]);             \
                     50: }
                     51:
                     52: #define NEXTCOL(pos) {                                         \
                     53:        if (!SEP_FLAG)                                          \
1.9     ! tedu       54:                while (pos < lineend && BLANK & l_d_mask[*(++pos)]);            \
        !            55:        while (pos < lineend && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));       \
1.1       millert    56: }
                     57:
1.6       millert    58: extern u_char *enterfield(u_char *, u_char *, struct field *, int);
1.1       millert    59:
1.6       millert    60: extern u_char *number(u_char *, u_char *, u_char *, u_char *, int);
1.1       millert    61:
1.4       millert    62: extern struct coldesc *clist;
1.1       millert    63: extern int ncols;
                     64:
                     65: #define DECIMAL '.'
                     66: #define OFFSET 128
                     67:
                     68: u_char TENS[10];       /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
                     69: u_char NEGTENS[10];    /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
                     70: u_char *OFF_TENS, *OFF_NTENS;  /* TENS - '0', NEGTENS - '0' */
                     71: u_char fnum[NBINS], rnum[NBINS];
                     72:
                     73: /*
                     74:  * constructs sort key with leading recheader, followed by the key,
                     75:  * followed by the original line.
                     76:  */
                     77: length_t
                     78: enterkey(keybuf, line, size, fieldtable)
1.3       millert    79:        RECHEADER *keybuf;      /* pointer to start of key */
1.1       millert    80:        DBT *line;
                     81:        int size;
                     82:        struct field fieldtable[];
                     83: {
                     84:        int i;
1.5       ericj      85:        u_char *l_d_mask;
                     86:        u_char *lineend, *pos;
1.1       millert    87:        u_char *endkey, *keypos;
1.5       ericj      88:        struct coldesc *clpos;
                     89:        int col = 1;
1.1       millert    90:        struct field *ftpos;
                     91:        l_d_mask = d_mask;
                     92:        pos = (u_char *) line->data - 1;
                     93:        lineend = (u_char *) line->data + line->size-1;
                     94:                                /* don't include rec_delimiter */
                     95:        keypos = keybuf->data;
                     96:
                     97:        for (i = 0; i < ncols; i++) {
                     98:                clpos = clist + i;
1.5       ericj      99:                for (; (col < clpos->num) && (pos < lineend); col++) {
                    100:                        NEXTCOL(pos);
                    101:                }
1.1       millert   102:                if (pos >= lineend)
                    103:                        break;
                    104:                clpos->start = SEP_FLAG ? pos + 1 : pos;
                    105:                NEXTCOL(pos);
                    106:                clpos->end = pos;
                    107:                col++;
                    108:                if (pos >= lineend) {
                    109:                        clpos->end = lineend;
1.2       millert   110:                        i++;
1.1       millert   111:                        break;
                    112:                }
                    113:        }
                    114:        for (; i <= ncols; i++)
                    115:                clist[i].start = clist[i].end = lineend;
                    116:        if (clist[0].start < (u_char *) line->data)
1.2       millert   117:                clist[0].start++;
1.1       millert   118:        endkey = (u_char *) keybuf + size - line->size;
                    119:        for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
                    120:                if ((keypos = enterfield(keypos, endkey, ftpos,
                    121:                    fieldtable->flags)) == NULL)
                    122:                        return (1);
                    123:
                    124:        if (UNIQUE)
                    125:                *(keypos-1) = REC_D;
                    126:        keybuf->offset = keypos - keybuf->data;
                    127:        keybuf->length = keybuf->offset + line->size;
                    128:        if (keybuf->length + sizeof(TRECHEADER) > size)
                    129:                return (1);             /* line too long for buffer */
                    130:        memcpy(keybuf->data + keybuf->offset, line->data, line->size);
                    131:        return (0);
                    132: }
                    133:
                    134: /*
                    135:  * constructs a field (as defined by -k) within a key
                    136:  */
                    137: u_char *
                    138: enterfield(tablepos, endkey, cur_fld, gflags)
                    139:        struct field *cur_fld;
1.5       ericj     140:        u_char *tablepos, *endkey;
1.1       millert   141:        int gflags;
                    142: {
1.5       ericj     143:        u_char *start, *end, *lineend, *mask, *lweight;
1.1       millert   144:        struct column icol, tcol;
1.5       ericj     145:        u_int flags;
1.1       millert   146:        u_int Rflag;
1.2       millert   147:
1.1       millert   148:        icol = cur_fld->icol;
                    149:        tcol = cur_fld->tcol;
                    150:        flags = cur_fld->flags;
                    151:        start = icol.p->start;
                    152:        lineend = clist[ncols].end;
                    153:        if (flags & BI)
                    154:                blancmange(start);
                    155:        start += icol.indent;
                    156:        start = min(start, lineend);
1.2       millert   157:
1.1       millert   158:        if (!tcol.num)
                    159:                end = lineend;
                    160:        else {
                    161:                if (tcol.indent) {
                    162:                        end = tcol.p->start;
1.2       millert   163:                        if (flags & BT)
                    164:                                blancmange(end);
1.1       millert   165:                        end += tcol.indent;
                    166:                        end = min(end, lineend);
                    167:                } else
                    168:                        end = tcol.p->end;
                    169:        }
                    170:        if (flags & N) {
                    171:                Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
                    172:                tablepos = number(tablepos, endkey, start, end, Rflag);
                    173:                return (tablepos);
                    174:        }
                    175:        mask = alltable;
                    176:        mask = cur_fld->mask;
                    177:        lweight = cur_fld->weights;
                    178:        for (; start < end; start++)
                    179:                if (mask[*start]) {
                    180:                        if (*start <= 1) {
                    181:                                if (tablepos+2 >= endkey)
                    182:                                        return (NULL);
                    183:                                *tablepos++ = lweight[1];
                    184:                                *tablepos++ = lweight[*start ? 2 : 1];
                    185:                        } else {
                    186:                                *tablepos++ = lweight[*start];
                    187:                                if (tablepos == endkey)
1.2       millert   188:                                        return (NULL);
1.1       millert   189:                        }
                    190:                }
                    191:        *tablepos++ = lweight[0];
                    192:        return (tablepos == endkey ? NULL : tablepos);
                    193: }
                    194:
                    195: /* Uses the first bin to assign sign, expsign, 0, and the first
                    196:  * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
                    197:  *   When sorting in forward order:
                    198:  * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
                    199:  * else use (0-99)->(2-102).
                    200:  * If the exponent is >=61, use another byte for each additional 253
                    201:  * in the exponent. Cutoff is at 567.
                    202:  * To avoid confusing the exponent and the mantissa, use a field delimiter
                    203:  * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
                    204:  * only time a field delimiter can come in that position.
                    205:  * Reverse order is done analagously.
1.2       millert   206:  */
1.1       millert   207:
                    208: u_char *
                    209: number(pos, bufend, line, lineend, Rflag)
1.5       ericj     210:        u_char *line, *pos, *bufend, *lineend;
1.1       millert   211:        int Rflag;
                    212: {
1.5       ericj     213:        int or_sign, parity = 0;
                    214:        int expincr = 1, exponent = -1;
1.1       millert   215:        int bite, expsign = 1, sign = 1;
1.5       ericj     216:        u_char lastvalue, *nonzero, *tline, *C_TENS;
1.1       millert   217:        u_char *nweights;
                    218:
                    219:        if (Rflag)
                    220:                nweights = rnum;
                    221:        else
                    222:                nweights = fnum;
                    223:        if (pos > bufend - 8)
                    224:                return (NULL);
1.2       millert   225:        /*
                    226:         * or_sign sets the sort direction:
                    227:         *      (-r: +/-)(sign: +/-)(expsign: +/-)
                    228:         */
1.1       millert   229:        or_sign = sign ^ expsign ^ Rflag;
                    230:        blancmange(line);
                    231:        if (*line == '-') {     /* set the sign */
                    232:                or_sign ^= 1;
                    233:                sign = 0;
                    234:                line++;
                    235:        }
                    236:        /* eat initial zeroes */
1.2       millert   237:        for (; *line == '0' && line < lineend; line++)
                    238:                ;
1.1       millert   239:        /* calculate exponents < 0 */
                    240:        if (*line == DECIMAL) {
                    241:                exponent = 1;
                    242:                while (*++line == '0' && line < lineend)
                    243:                        exponent++;
                    244:                expincr = 0;
                    245:                expsign = 0;
                    246:        }
                    247:        /* next character better be a digit */
                    248:        if (*line < '1' || *line > '9' || line >= lineend) {
                    249:                *pos++ = nweights[127];
                    250:                return (pos);
                    251:        }
                    252:        if (expincr) {
                    253:                for (tline = line-1; *++tline >= '0' &&
                    254:                    *tline <= '9' && tline < lineend;)
                    255:                        exponent++;
                    256:        }
                    257:        if (exponent > 567) {
                    258:                *pos++ = nweights[sign ? (expsign ? 254 : 128)
                    259:                                        : (expsign ? 0 : 126)];
                    260:                warnx("exponent out of bounds");
                    261:                return (pos);
                    262:        }
                    263:        bite = min(exponent, 61);
                    264:        *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
                    265:                                : (expsign ? 64-bite : 64+bite)];
                    266:        if (bite >= 61) {
                    267:                do {
                    268:                        exponent -= bite;
                    269:                        bite = min(exponent, 254);
                    270:                        *pos++ = nweights[or_sign ? 254-bite : bite];
                    271:                } while (bite == 254);
                    272:        }
                    273:        C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
                    274:        for (; line < lineend; line++) {
                    275:                if (*line >= '0' && *line <= '9') {
                    276:                        if (parity) {
                    277:                                *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
                    278:                                                : *line);
                    279:                                if (pos == bufend)
                    280:                                        return (NULL);
                    281:                                if (*line != '0' || lastvalue != '0')
                    282:                                        nonzero = pos;
                    283:                        } else
                    284:                                lastvalue = *line;
                    285:                        parity ^= 1;
                    286:                } else if(*line == DECIMAL) {
                    287:                        if(!expincr)    /* a decimal already occurred once */
                    288:                                break;
                    289:                        expincr = 0;
                    290:                } else
                    291:                        break;
                    292:        }
                    293:        if (parity && lastvalue != '0') {
                    294:                *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
                    295:                                        OFF_TENS[lastvalue] + '0';
                    296:        } else
                    297:                pos = nonzero;
                    298:        if (pos > bufend-1)
                    299:                return (NULL);
                    300:        *pos++ = or_sign ? nweights[254] : nweights[0];
                    301:        return (pos);
                    302: }
                    303:
                    304: /* This forces a gap around the record delimiter
                    305:  * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
                    306:  * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
1.2       millert   307:  */
1.1       millert   308: void
1.8       deraadt   309: num_init(void)
1.1       millert   310: {
                    311:        int i;
                    312:        TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
                    313:        NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
                    314:        OFF_TENS = TENS - '0';
                    315:        OFF_NTENS = NEGTENS - '0';
                    316:        for (i = 1; i < 10; i++) {
1.2       millert   317:                TENS[i] = TENS[i - 1] + 10;
                    318:                NEGTENS[i] = NEGTENS[i - 1] - 10;
1.1       millert   319:        }
                    320:        for (i = 0; i < REC_D; i++) {
                    321:                fnum[i] = i;
1.2       millert   322:                rnum[255 - i] = i;
1.1       millert   323:        }
                    324:        for (i = REC_D; i <255; i++) {
1.2       millert   325:                fnum[i] = i + 1;
                    326:                rnum[255 - i] = i - 1;
1.1       millert   327:        }
                    328: }