[BACK]Return to fields.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/fields.c, Revision 1.1

1.1     ! millert     1: /*     $OpenBSD$       */
        !             2:
        !             3: /*-
        !             4:  * Copyright (c) 1993
        !             5:  *     The Regents of the University of California.  All rights reserved.
        !             6:  *
        !             7:  * This code is derived from software contributed to Berkeley by
        !             8:  * Peter McIlroy.
        !             9:  *
        !            10:  * Redistribution and use in source and binary forms, with or without
        !            11:  * modification, are permitted provided that the following conditions
        !            12:  * are met:
        !            13:  * 1. Redistributions of source code must retain the above copyright
        !            14:  *    notice, this list of conditions and the following disclaimer.
        !            15:  * 2. Redistributions in binary form must reproduce the above copyright
        !            16:  *    notice, this list of conditions and the following disclaimer in the
        !            17:  *    documentation and/or other materials provided with the distribution.
        !            18:  * 3. All advertising materials mentioning features or use of this software
        !            19:  *    must display the following acknowledgement:
        !            20:  *     This product includes software developed by the University of
        !            21:  *     California, Berkeley and its contributors.
        !            22:  * 4. Neither the name of the University nor the names of its contributors
        !            23:  *    may be used to endorse or promote products derived from this software
        !            24:  *    without specific prior written permission.
        !            25:  *
        !            26:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            28:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            29:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            30:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            31:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            32:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            36:  * SUCH DAMAGE.
        !            37:  */
        !            38:
        !            39: #ifndef lint
        !            40: #if 0
        !            41: static char sccsid[] = "@(#)fields.c   8.1 (Berkeley) 6/6/93";
        !            42: #else
        !            43: static char rcsid[] = "$OpenBSD: $";
        !            44: #endif
        !            45: #endif /* not lint */
        !            46:
        !            47: /* Subroutines to generate sort keys. */
        !            48:
        !            49: #include "sort.h"
        !            50:
        !            51: #define blancmange(ptr) {                                      \
        !            52:        if (BLANK & d_mask[*(ptr)])                             \
        !            53:                while (BLANK & d_mask[*(++(ptr))]);             \
        !            54: }
        !            55:
        !            56: #define NEXTCOL(pos) {                                         \
        !            57:        if (!SEP_FLAG)                                          \
        !            58:                while (BLANK & l_d_mask[*(++pos)]);             \
        !            59:        while (!((FLD_D | REC_D_F) & l_d_mask[*++pos]));        \
        !            60: }
        !            61:
        !            62: extern u_char *enterfield __P((u_char *, u_char *, struct field *, int));
        !            63:
        !            64: extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
        !            65:
        !            66: extern struct coldesc clist[(ND+1)*2];
        !            67: extern int ncols;
        !            68:
        !            69: #define DECIMAL '.'
        !            70: #define OFFSET 128
        !            71:
        !            72: u_char TENS[10];       /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
        !            73: u_char NEGTENS[10];    /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
        !            74: u_char *OFF_TENS, *OFF_NTENS;  /* TENS - '0', NEGTENS - '0' */
        !            75: u_char fnum[NBINS], rnum[NBINS];
        !            76:
        !            77: /*
        !            78:  * constructs sort key with leading recheader, followed by the key,
        !            79:  * followed by the original line.
        !            80:  */
        !            81: length_t
        !            82: enterkey(keybuf, line, size, fieldtable)
        !            83:        struct recheader *keybuf;       /* pointer to start of key */
        !            84:        DBT *line;
        !            85:        int size;
        !            86:        struct field fieldtable[];
        !            87: {
        !            88:        int i;
        !            89:        register u_char *l_d_mask;
        !            90:        register u_char *lineend, *pos;
        !            91:        u_char *endkey, *keypos;
        !            92:        register struct coldesc *clpos;
        !            93:        register int col = 1;
        !            94:        struct field *ftpos;
        !            95:        l_d_mask = d_mask;
        !            96:        pos = (u_char *) line->data - 1;
        !            97:        lineend = (u_char *) line->data + line->size-1;
        !            98:                                /* don't include rec_delimiter */
        !            99:        keypos = keybuf->data;
        !           100:
        !           101:        for (i = 0; i < ncols; i++) {
        !           102:                clpos = clist + i;
        !           103:                for (; (col < clpos->num) && (pos < lineend); col++)
        !           104:                        { NEXTCOL(pos); }
        !           105:                if (pos >= lineend)
        !           106:                        break;
        !           107:                clpos->start = SEP_FLAG ? pos + 1 : pos;
        !           108:                NEXTCOL(pos);
        !           109:                clpos->end = pos;
        !           110:                col++;
        !           111:                if (pos >= lineend) {
        !           112:                        clpos->end = lineend;
        !           113:                        ++i;
        !           114:                        break;
        !           115:                }
        !           116:        }
        !           117:        for (; i <= ncols; i++)
        !           118:                clist[i].start = clist[i].end = lineend;
        !           119:        if (clist[0].start < (u_char *) line->data)
        !           120:                ++clist[0].start;
        !           121:        endkey = (u_char *) keybuf + size - line->size;
        !           122:        for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
        !           123:                if ((keypos = enterfield(keypos, endkey, ftpos,
        !           124:                    fieldtable->flags)) == NULL)
        !           125:                        return (1);
        !           126:
        !           127:        if (UNIQUE)
        !           128:                *(keypos-1) = REC_D;
        !           129:        keybuf->offset = keypos - keybuf->data;
        !           130:        keybuf->length = keybuf->offset + line->size;
        !           131:        if (keybuf->length + sizeof(TRECHEADER) > size)
        !           132:                return (1);             /* line too long for buffer */
        !           133:        memcpy(keybuf->data + keybuf->offset, line->data, line->size);
        !           134:        return (0);
        !           135: }
        !           136:
        !           137: /*
        !           138:  * constructs a field (as defined by -k) within a key
        !           139:  */
        !           140: u_char *
        !           141: enterfield(tablepos, endkey, cur_fld, gflags)
        !           142:        struct field *cur_fld;
        !           143:        register u_char *tablepos, *endkey;
        !           144:        int gflags;
        !           145: {
        !           146:        register u_char *start, *end, *lineend, *mask, *lweight;
        !           147:        struct column icol, tcol;
        !           148:        register u_int flags;
        !           149:        u_int Rflag;
        !           150:        icol = cur_fld->icol;
        !           151:        tcol = cur_fld->tcol;
        !           152:        flags = cur_fld->flags;
        !           153:        start = icol.p->start;
        !           154:        lineend = clist[ncols].end;
        !           155:        if (flags & BI)
        !           156:                blancmange(start);
        !           157:        start += icol.indent;
        !           158:        start = min(start, lineend);
        !           159:        if (!tcol.num)
        !           160:                end = lineend;
        !           161:        else {
        !           162:                if (tcol.indent) {
        !           163:                        end = tcol.p->start;
        !           164:                        if (flags & BT) blancmange(end);
        !           165:                        end += tcol.indent;
        !           166:                        end = min(end, lineend);
        !           167:                } else
        !           168:                        end = tcol.p->end;
        !           169:        }
        !           170:        if (flags & N) {
        !           171:                Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
        !           172:                tablepos = number(tablepos, endkey, start, end, Rflag);
        !           173:                return (tablepos);
        !           174:        }
        !           175:        mask = alltable;
        !           176:        mask = cur_fld->mask;
        !           177:        lweight = cur_fld->weights;
        !           178:        for (; start < end; start++)
        !           179:                if (mask[*start]) {
        !           180:                        if (*start <= 1) {
        !           181:                                if (tablepos+2 >= endkey)
        !           182:                                        return (NULL);
        !           183:                                *tablepos++ = lweight[1];
        !           184:                                *tablepos++ = lweight[*start ? 2 : 1];
        !           185:                        } else {
        !           186:                                *tablepos++ = lweight[*start];
        !           187:                                if (tablepos == endkey)
        !           188:                                return (NULL);
        !           189:                        }
        !           190:                }
        !           191:        *tablepos++ = lweight[0];
        !           192:        return (tablepos == endkey ? NULL : tablepos);
        !           193: }
        !           194:
        !           195: /* Uses the first bin to assign sign, expsign, 0, and the first
        !           196:  * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
        !           197:  *   When sorting in forward order:
        !           198:  * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
        !           199:  * else use (0-99)->(2-102).
        !           200:  * If the exponent is >=61, use another byte for each additional 253
        !           201:  * in the exponent. Cutoff is at 567.
        !           202:  * To avoid confusing the exponent and the mantissa, use a field delimiter
        !           203:  * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
        !           204:  * only time a field delimiter can come in that position.
        !           205:  * Reverse order is done analagously.
        !           206: */
        !           207:
        !           208: u_char *
        !           209: number(pos, bufend, line, lineend, Rflag)
        !           210:        register u_char *line, *pos, *bufend, *lineend;
        !           211:        int Rflag;
        !           212: {
        !           213:        register int or_sign, parity = 0;
        !           214:        register int expincr = 1, exponent = -1;
        !           215:        int bite, expsign = 1, sign = 1;
        !           216:        register u_char lastvalue, *nonzero, *tline, *C_TENS;
        !           217:        u_char *nweights;
        !           218:
        !           219:        if (Rflag)
        !           220:                nweights = rnum;
        !           221:        else
        !           222:                nweights = fnum;
        !           223:        if (pos > bufend - 8)
        !           224:                return (NULL);
        !           225:        /* or_sign sets the sort direction:
        !           226:         *      (-r: +/-)(sign: +/-)(expsign: +/-) */
        !           227:        or_sign = sign ^ expsign ^ Rflag;
        !           228:        blancmange(line);
        !           229:        if (*line == '-') {     /* set the sign */
        !           230:                or_sign ^= 1;
        !           231:                sign = 0;
        !           232:                line++;
        !           233:        }
        !           234:        /* eat initial zeroes */
        !           235:        for (; *line == '0' && line < lineend; line++);
        !           236:        /* calculate exponents < 0 */
        !           237:        if (*line == DECIMAL) {
        !           238:                exponent = 1;
        !           239:                while (*++line == '0' && line < lineend)
        !           240:                        exponent++;
        !           241:                expincr = 0;
        !           242:                expsign = 0;
        !           243:        }
        !           244:        /* next character better be a digit */
        !           245:        if (*line < '1' || *line > '9' || line >= lineend) {
        !           246:                *pos++ = nweights[127];
        !           247:                return (pos);
        !           248:        }
        !           249:        if (expincr) {
        !           250:                for (tline = line-1; *++tline >= '0' &&
        !           251:                    *tline <= '9' && tline < lineend;)
        !           252:                        exponent++;
        !           253:        }
        !           254:        if (exponent > 567) {
        !           255:                *pos++ = nweights[sign ? (expsign ? 254 : 128)
        !           256:                                        : (expsign ? 0 : 126)];
        !           257:                warnx("exponent out of bounds");
        !           258:                return (pos);
        !           259:        }
        !           260:        bite = min(exponent, 61);
        !           261:        *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
        !           262:                                : (expsign ? 64-bite : 64+bite)];
        !           263:        if (bite >= 61) {
        !           264:                do {
        !           265:                        exponent -= bite;
        !           266:                        bite = min(exponent, 254);
        !           267:                        *pos++ = nweights[or_sign ? 254-bite : bite];
        !           268:                } while (bite == 254);
        !           269:        }
        !           270:        C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
        !           271:        for (; line < lineend; line++) {
        !           272:                if (*line >= '0' && *line <= '9') {
        !           273:                        if (parity) {
        !           274:                                *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
        !           275:                                                : *line);
        !           276:                                if (pos == bufend)
        !           277:                                        return (NULL);
        !           278:                                if (*line != '0' || lastvalue != '0')
        !           279:                                        nonzero = pos;
        !           280:                        } else
        !           281:                                lastvalue = *line;
        !           282:                        parity ^= 1;
        !           283:                } else if(*line == DECIMAL) {
        !           284:                        if(!expincr)    /* a decimal already occurred once */
        !           285:                                break;
        !           286:                        expincr = 0;
        !           287:                } else
        !           288:                        break;
        !           289:        }
        !           290:        if (parity && lastvalue != '0') {
        !           291:                *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
        !           292:                                        OFF_TENS[lastvalue] + '0';
        !           293:        } else
        !           294:                pos = nonzero;
        !           295:        if (pos > bufend-1)
        !           296:                return (NULL);
        !           297:        *pos++ = or_sign ? nweights[254] : nweights[0];
        !           298:        return (pos);
        !           299: }
        !           300:
        !           301: /* This forces a gap around the record delimiter
        !           302:  * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
        !           303:  * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
        !           304: */
        !           305: void
        !           306: num_init()
        !           307: {
        !           308:        int i;
        !           309:        TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
        !           310:        NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
        !           311:        OFF_TENS = TENS - '0';
        !           312:        OFF_NTENS = NEGTENS - '0';
        !           313:        for (i = 1; i < 10; i++) {
        !           314:                TENS[i] = TENS[i-1] + 10;
        !           315:                NEGTENS[i] = NEGTENS[i-1] - 10;
        !           316:        }
        !           317:        for (i = 0; i < REC_D; i++) {
        !           318:                fnum[i] = i;
        !           319:                rnum[255-i] = i;
        !           320:        }
        !           321:        for (i = REC_D; i <255; i++) {
        !           322:                fnum[i] = i+1;
        !           323:                rnum[255-i] = i-1;
        !           324:        }
        !           325: }