[BACK]Return to fields.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/fields.c, Revision 1.13

1.13    ! millert     1: /*     $OpenBSD: fields.c,v 1.12 2007/08/21 20:29:25 millert Exp $     */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (c) 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Peter McIlroy.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
1.7       millert    18:  * 3. Neither the name of the University nor the names of its contributors
1.1       millert    19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
                     34:
                     35: #ifndef lint
                     36: #if 0
                     37: static char sccsid[] = "@(#)fields.c   8.1 (Berkeley) 6/6/93";
                     38: #else
1.13    ! millert    39: static char rcsid[] = "$OpenBSD: fields.c,v 1.12 2007/08/21 20:29:25 millert Exp $";
1.1       millert    40: #endif
                     41: #endif /* not lint */
                     42:
                     43: /* Subroutines to generate sort keys. */
                     44:
                     45: #include "sort.h"
                     46:
                     47: #define blancmange(ptr) {                                      \
                     48:        if (BLANK & d_mask[*(ptr)])                             \
                     49:                while (BLANK & d_mask[*(++(ptr))]);             \
                     50: }
                     51:
                     52: #define NEXTCOL(pos) {                                         \
                     53:        if (!SEP_FLAG)                                          \
1.9       tedu       54:                while (pos < lineend && BLANK & l_d_mask[*(++pos)]);            \
                     55:        while (pos < lineend && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));       \
1.1       millert    56: }
                     57:
1.6       millert    58: extern u_char *enterfield(u_char *, u_char *, struct field *, int);
1.1       millert    59:
1.6       millert    60: extern u_char *number(u_char *, u_char *, u_char *, u_char *, int);
1.1       millert    61:
1.4       millert    62: extern struct coldesc *clist;
1.1       millert    63: extern int ncols;
                     64:
                     65: #define DECIMAL '.'
                     66: #define OFFSET 128
                     67:
                     68: u_char TENS[10];       /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
                     69: u_char NEGTENS[10];    /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
                     70: u_char *OFF_TENS, *OFF_NTENS;  /* TENS - '0', NEGTENS - '0' */
                     71: u_char fnum[NBINS], rnum[NBINS];
                     72:
                     73: /*
                     74:  * constructs sort key with leading recheader, followed by the key,
                     75:  * followed by the original line.
                     76:  */
                     77: length_t
1.10      deraadt    78: enterkey(RECHEADER *keybuf,    /* pointer to start of key */
                     79:     DBT *line, int size, struct field fieldtable[])
1.1       millert    80: {
                     81:        int i;
1.5       ericj      82:        u_char *l_d_mask;
                     83:        u_char *lineend, *pos;
1.1       millert    84:        u_char *endkey, *keypos;
1.5       ericj      85:        struct coldesc *clpos;
                     86:        int col = 1;
1.1       millert    87:        struct field *ftpos;
                     88:        l_d_mask = d_mask;
                     89:        pos = (u_char *) line->data - 1;
                     90:        lineend = (u_char *) line->data + line->size-1;
                     91:                                /* don't include rec_delimiter */
                     92:        keypos = keybuf->data;
                     93:
                     94:        for (i = 0; i < ncols; i++) {
                     95:                clpos = clist + i;
1.5       ericj      96:                for (; (col < clpos->num) && (pos < lineend); col++) {
                     97:                        NEXTCOL(pos);
                     98:                }
1.1       millert    99:                if (pos >= lineend)
                    100:                        break;
                    101:                clpos->start = SEP_FLAG ? pos + 1 : pos;
                    102:                NEXTCOL(pos);
                    103:                clpos->end = pos;
                    104:                col++;
                    105:                if (pos >= lineend) {
                    106:                        clpos->end = lineend;
1.2       millert   107:                        i++;
1.1       millert   108:                        break;
                    109:                }
                    110:        }
                    111:        for (; i <= ncols; i++)
                    112:                clist[i].start = clist[i].end = lineend;
                    113:        if (clist[0].start < (u_char *) line->data)
1.2       millert   114:                clist[0].start++;
1.1       millert   115:        endkey = (u_char *) keybuf + size - line->size;
                    116:        for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
                    117:                if ((keypos = enterfield(keypos, endkey, ftpos,
                    118:                    fieldtable->flags)) == NULL)
                    119:                        return (1);
                    120:
1.12      millert   121:        if (UNIQUE || STABLE)
1.1       millert   122:                *(keypos-1) = REC_D;
                    123:        keybuf->offset = keypos - keybuf->data;
                    124:        keybuf->length = keybuf->offset + line->size;
                    125:        if (keybuf->length + sizeof(TRECHEADER) > size)
                    126:                return (1);             /* line too long for buffer */
                    127:        memcpy(keybuf->data + keybuf->offset, line->data, line->size);
                    128:        return (0);
                    129: }
                    130:
                    131: /*
                    132:  * constructs a field (as defined by -k) within a key
                    133:  */
                    134: u_char *
1.10      deraadt   135: enterfield(u_char *tablepos, u_char *endkey, struct field *cur_fld, int gflags)
1.1       millert   136: {
1.5       ericj     137:        u_char *start, *end, *lineend, *mask, *lweight;
1.1       millert   138:        struct column icol, tcol;
1.5       ericj     139:        u_int flags;
1.1       millert   140:        u_int Rflag;
1.2       millert   141:
1.1       millert   142:        icol = cur_fld->icol;
                    143:        tcol = cur_fld->tcol;
                    144:        flags = cur_fld->flags;
                    145:        start = icol.p->start;
                    146:        lineend = clist[ncols].end;
                    147:        if (flags & BI)
                    148:                blancmange(start);
                    149:        start += icol.indent;
                    150:        start = min(start, lineend);
1.2       millert   151:
1.1       millert   152:        if (!tcol.num)
                    153:                end = lineend;
                    154:        else {
                    155:                if (tcol.indent) {
                    156:                        end = tcol.p->start;
1.2       millert   157:                        if (flags & BT)
                    158:                                blancmange(end);
1.1       millert   159:                        end += tcol.indent;
                    160:                        end = min(end, lineend);
                    161:                } else
                    162:                        end = tcol.p->end;
                    163:        }
                    164:        if (flags & N) {
                    165:                Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
                    166:                tablepos = number(tablepos, endkey, start, end, Rflag);
                    167:                return (tablepos);
                    168:        }
                    169:        mask = alltable;
                    170:        mask = cur_fld->mask;
                    171:        lweight = cur_fld->weights;
                    172:        for (; start < end; start++)
                    173:                if (mask[*start]) {
                    174:                        if (*start <= 1) {
                    175:                                if (tablepos+2 >= endkey)
                    176:                                        return (NULL);
                    177:                                *tablepos++ = lweight[1];
                    178:                                *tablepos++ = lweight[*start ? 2 : 1];
                    179:                        } else {
                    180:                                *tablepos++ = lweight[*start];
                    181:                                if (tablepos == endkey)
1.2       millert   182:                                        return (NULL);
1.1       millert   183:                        }
                    184:                }
                    185:        *tablepos++ = lweight[0];
                    186:        return (tablepos == endkey ? NULL : tablepos);
                    187: }
                    188:
                    189: /* Uses the first bin to assign sign, expsign, 0, and the first
                    190:  * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
                    191:  *   When sorting in forward order:
                    192:  * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
                    193:  * else use (0-99)->(2-102).
                    194:  * If the exponent is >=61, use another byte for each additional 253
                    195:  * in the exponent. Cutoff is at 567.
                    196:  * To avoid confusing the exponent and the mantissa, use a field delimiter
                    197:  * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
                    198:  * only time a field delimiter can come in that position.
1.12      millert   199:  * Reverse order is done analogously.
1.2       millert   200:  */
1.1       millert   201:
                    202: u_char *
1.10      deraadt   203: number(u_char *pos, u_char *bufend, u_char *line, u_char *lineend, int Rflag)
1.1       millert   204: {
1.5       ericj     205:        int or_sign, parity = 0;
                    206:        int expincr = 1, exponent = -1;
1.13    ! millert   207:        int bite, expsign = 1, sign = 1, zeroskip = 0;
        !           208:        u_char lastvalue, *tline, *C_TENS;
1.1       millert   209:        u_char *nweights;
                    210:
                    211:        if (Rflag)
                    212:                nweights = rnum;
                    213:        else
                    214:                nweights = fnum;
                    215:        if (pos > bufend - 8)
                    216:                return (NULL);
1.2       millert   217:        /*
                    218:         * or_sign sets the sort direction:
                    219:         *      (-r: +/-)(sign: +/-)(expsign: +/-)
                    220:         */
1.1       millert   221:        or_sign = sign ^ expsign ^ Rflag;
                    222:        blancmange(line);
                    223:        if (*line == '-') {     /* set the sign */
                    224:                or_sign ^= 1;
                    225:                sign = 0;
                    226:                line++;
                    227:        }
                    228:        /* eat initial zeroes */
1.2       millert   229:        for (; *line == '0' && line < lineend; line++)
1.13    ! millert   230:                zeroskip = 1;
1.1       millert   231:        /* calculate exponents < 0 */
                    232:        if (*line == DECIMAL) {
                    233:                exponent = 1;
                    234:                while (*++line == '0' && line < lineend)
                    235:                        exponent++;
                    236:                expincr = 0;
                    237:                expsign = 0;
                    238:        }
                    239:        /* next character better be a digit */
                    240:        if (*line < '1' || *line > '9' || line >= lineend) {
1.13    ! millert   241:                if (!zeroskip) {
        !           242:                        *pos++ = nweights[127];
        !           243:                        return (pos);
        !           244:                }
1.1       millert   245:        }
                    246:        if (expincr) {
                    247:                for (tline = line-1; *++tline >= '0' &&
                    248:                    *tline <= '9' && tline < lineend;)
                    249:                        exponent++;
                    250:        }
                    251:        if (exponent > 567) {
                    252:                *pos++ = nweights[sign ? (expsign ? 254 : 128)
                    253:                                        : (expsign ? 0 : 126)];
                    254:                warnx("exponent out of bounds");
                    255:                return (pos);
                    256:        }
                    257:        bite = min(exponent, 61);
                    258:        *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
                    259:                                : (expsign ? 64-bite : 64+bite)];
                    260:        if (bite >= 61) {
                    261:                do {
                    262:                        exponent -= bite;
                    263:                        bite = min(exponent, 254);
                    264:                        *pos++ = nweights[or_sign ? 254-bite : bite];
                    265:                } while (bite == 254);
                    266:        }
                    267:        C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
                    268:        for (; line < lineend; line++) {
                    269:                if (*line >= '0' && *line <= '9') {
                    270:                        if (parity) {
                    271:                                *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
                    272:                                                : *line);
                    273:                                if (pos == bufend)
                    274:                                        return (NULL);
                    275:                        } else
                    276:                                lastvalue = *line;
                    277:                        parity ^= 1;
                    278:                } else if(*line == DECIMAL) {
                    279:                        if(!expincr)    /* a decimal already occurred once */
                    280:                                break;
                    281:                        expincr = 0;
                    282:                } else
                    283:                        break;
                    284:        }
1.13    ! millert   285:        if (parity) {
1.1       millert   286:                *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
                    287:                                        OFF_TENS[lastvalue] + '0';
1.13    ! millert   288:        }
1.1       millert   289:        if (pos > bufend-1)
                    290:                return (NULL);
                    291:        *pos++ = or_sign ? nweights[254] : nweights[0];
                    292:        return (pos);
                    293: }
                    294:
                    295: /* This forces a gap around the record delimiter
1.11      jmc       296:  * Thus fnum has values over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
1.1       millert   297:  * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
1.2       millert   298:  */
1.1       millert   299: void
1.8       deraadt   300: num_init(void)
1.1       millert   301: {
                    302:        int i;
                    303:        TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
                    304:        NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
                    305:        OFF_TENS = TENS - '0';
                    306:        OFF_NTENS = NEGTENS - '0';
                    307:        for (i = 1; i < 10; i++) {
1.2       millert   308:                TENS[i] = TENS[i - 1] + 10;
                    309:                NEGTENS[i] = NEGTENS[i - 1] - 10;
1.1       millert   310:        }
                    311:        for (i = 0; i < REC_D; i++) {
                    312:                fnum[i] = i;
1.2       millert   313:                rnum[255 - i] = i;
1.1       millert   314:        }
                    315:        for (i = REC_D; i <255; i++) {
1.2       millert   316:                fnum[i] = i + 1;
                    317:                rnum[255 - i] = i - 1;
1.1       millert   318:        }
                    319: }