[BACK]Return to msort.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/msort.c, Revision 1.16

1.16    ! naddy       1: /*     $OpenBSD: msort.c,v 1.15 2006/10/18 23:30:43 millert Exp $      */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (c) 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Peter McIlroy.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
1.12      millert    18:  * 3. Neither the name of the University nor the names of its contributors
1.1       millert    19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
                     34:
                     35: #ifndef lint
                     36: #if 0
                     37: static char sccsid[] = "@(#)msort.c    8.1 (Berkeley) 6/6/93";
                     38: #else
1.16    ! naddy      39: static char rcsid[] = "$OpenBSD: msort.c,v 1.15 2006/10/18 23:30:43 millert Exp $";
1.1       millert    40: #endif
                     41: #endif /* not lint */
                     42:
                     43: #include "sort.h"
                     44: #include "fsort.h"
                     45:
                     46: #include <stdlib.h>
                     47: #include <string.h>
                     48: #include <unistd.h>
                     49:
                     50: /* Subroutines using comparisons: merge sort and check order */
                     51: #define DELETE (1)
1.5       millert    52: #define LALIGN(n) ((n+(sizeof(long)-1)) & ~(sizeof(long)-1))
1.1       millert    53:
                     54: typedef struct mfile {
                     55:        u_char *end;
                     56:        short flno;
1.6       millert    57:        RECHEADER rec[1];
1.1       millert    58: } MFILE;
                     59: typedef struct tmfile {
                     60:        u_char *end;
                     61:        short flno;
1.6       millert    62:        TRECHEADER rec[1];
1.1       millert    63: } TMFILE;
                     64: u_char *wts, *wts1 = 0;
                     65: struct mfile *cfilebuf;
                     66:
1.11      millert    67: static int cmp(RECHEADER *, RECHEADER *);
                     68: static int insert(struct mfile **, struct mfile **, int, int);
1.1       millert    69:
                     70: void
1.14      deraadt    71: fmerge(int binno, union f_handle files, int nfiles,
                     72:     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *),
                     73:     FILE *outfp, void (*fput)(RECHEADER *, FILE *), struct field *ftbl)
1.1       millert    74: {
                     75:        FILE *tout;
                     76:        int i, j, last;
1.6       millert    77:        void (*put)(RECHEADER *, FILE *);
1.1       millert    78:        struct tempfile *l_fstack;
                     79:
                     80:        wts = ftbl->weights;
1.9       ericj      81:        if (!UNIQUE && SINGL_FLD && (ftbl->flags & F))
1.1       millert    82:                wts1 = (ftbl->flags & R) ? Rascii : ascii;
1.7       millert    83:        if (!cfilebuf) {
1.1       millert    84:                cfilebuf = malloc(MAXLLEN + sizeof(TMFILE));
1.7       millert    85:                if (cfilebuf == NULL)
                     86:                        errx(2, "cannot allocate memory");
                     87:        }
1.1       millert    88:
                     89:        i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE));
1.16    ! naddy      90:        if (!buffer || i > BUFSIZE) {
        !            91:                buffer = buffer ? realloc(buffer, i) : malloc(i);
1.1       millert    92:                if (!buffer)
1.7       millert    93:                        errx(2, "cannot allocate memory");
1.16    ! naddy      94:                if (!SINGL_FLD) {
        !            95:                        if ((linebuf = malloc(MAXLLEN)) == NULL)
        !            96:                                errx(2, "cannot allocate memory");
        !            97:                }
1.1       millert    98:        }
                     99:
                    100:        if (binno >= 0)
                    101:                l_fstack = fstack + files.top;
                    102:        else
                    103:                l_fstack = fstack;
1.4       millert   104:
1.1       millert   105:        while (nfiles) {
                    106:                put = putrec;
                    107:                for (j = 0; j < nfiles; j += 16) {
                    108:                        if (nfiles <= 16) {
                    109:                                tout = outfp;
                    110:                                put = fput;
                    111:                        }
                    112:                        else
                    113:                                tout = ftmp();
                    114:                        last = min(16, nfiles - j);
                    115:                        if (binno < 0) {
                    116:                                for (i = 0; i < last; i++)
                    117:                                        if (!(l_fstack[i+MAXFCT-1-16].fp =
                    118:                                            fopen(files.names[j + i], "r")))
1.8       millert   119:                                                err(2, "%s", files.names[j+i]);
1.1       millert   120:                                merge(MAXFCT-1-16, last, get, tout, put, ftbl);
1.4       millert   121:                        } else {
1.1       millert   122:                                for (i = 0; i< last; i++)
                    123:                                        rewind(l_fstack[i+j].fp);
                    124:                                merge(files.top+j, last, get, tout, put, ftbl);
                    125:                        }
1.4       millert   126:                        if (nfiles > 16)
                    127:                                l_fstack[j/16].fp = tout;
1.1       millert   128:                }
                    129:                nfiles = (nfiles + 15) / 16;
                    130:                if (nfiles == 1)
                    131:                        nfiles = 0;
                    132:                if (binno < 0) {
                    133:                        binno = 0;
                    134:                        get = geteasy;
                    135:                        files.top = 0;
                    136:                }
                    137:        }
                    138: }
                    139:
                    140: void
1.14      deraadt   141: merge(int infl0, int nfiles,
                    142:     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *),
                    143:     FILE *outfp, void (*put)(RECHEADER *, FILE *), struct field *ftbl)
1.1       millert   144: {
                    145:        int c, i, j;
                    146:        union f_handle dummy = {0};
                    147:        struct mfile *flist[16], *cfile;
1.2       millert   148:
1.1       millert   149:        for (i = j = 0; i < nfiles; i++) {
                    150:                cfile = (MFILE *) (buffer +
                    151:                    i * LALIGN(MAXLLEN + sizeof(TMFILE)));
                    152:                cfile->flno = j + infl0;
                    153:                cfile->end = cfile->rec->data + MAXLLEN;
                    154:                for (c = 1; c == 1;) {
                    155:                        if (EOF == (c = get(j+infl0, dummy, nfiles,
                    156:                           cfile->rec, cfile->end, ftbl))) {
1.4       millert   157:                                i--;
                    158:                                nfiles--;
1.1       millert   159:                                break;
                    160:                        }
                    161:                        if (i)
                    162:                                c = insert(flist, &cfile, i, !DELETE);
                    163:                        else
                    164:                                flist[0] = cfile;
                    165:                }
                    166:                j++;
                    167:        }
1.2       millert   168:        if (nfiles > 0) {
                    169:                cfile = cfilebuf;
                    170:                cfile->flno = flist[0]->flno;
                    171:                cfile->end = cfile->rec->data + MAXLLEN;
                    172:                while (nfiles) {
                    173:                        for (c = 1; c == 1;) {
                    174:                                if (EOF == (c = get(cfile->flno, dummy, nfiles,
                    175:                                   cfile->rec, cfile->end, ftbl))) {
                    176:                                        put(flist[0]->rec, outfp);
                    177:                                        memmove(flist, flist + 1,
                    178:                                            sizeof(MFILE *) * (--nfiles));
                    179:                                        cfile->flno = flist[0]->flno;
                    180:                                        break;
                    181:                                }
                    182:                                if (!(c = insert(flist, &cfile, nfiles, DELETE)))
                    183:                                        put(cfile->rec, outfp);
1.1       millert   184:                        }
1.2       millert   185:                }
1.1       millert   186:        }
                    187: }
                    188:
                    189: /*
                    190:  * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
                    191:  * otherwise just inserts *rec in flist.
1.4       millert   192:  */
1.1       millert   193: static int
1.14      deraadt   194: insert(struct mfile **flist, struct mfile **rec, int ttop,
                    195:     int delete)                        /* delete = 0 or 1 */
1.1       millert   196: {
1.9       ericj     197:        struct mfile *tmprec;
                    198:        int top, mid, bot = 0, cmpv = 1;
1.1       millert   199:        tmprec = *rec;
                    200:        top = ttop;
                    201:        for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
                    202:                cmpv = cmp(tmprec->rec, flist[mid]->rec);
                    203:                if (cmpv < 0)
                    204:                        top = mid;
                    205:                else if (cmpv > 0)
                    206:                        bot = mid;
                    207:                else {
                    208:                        if (!UNIQUE)
                    209:                                bot = mid - 1;
                    210:                        break;
                    211:                }
                    212:        }
                    213:        if (delete) {
                    214:                if (UNIQUE) {
                    215:                        if (!bot && cmpv)
                    216:                                cmpv = cmp(tmprec->rec, flist[0]->rec);
                    217:                        if (!cmpv)
1.4       millert   218:                                return (1);
1.1       millert   219:                }
                    220:                tmprec = flist[0];
                    221:                if (bot)
                    222:                        memmove(flist, flist+1, bot * sizeof(MFILE **));
                    223:                flist[bot] = *rec;
                    224:                *rec = tmprec;
                    225:                (*rec)->flno = (*flist)->flno;
                    226:                return (0);
                    227:        }
                    228:        else {
                    229:                if (!bot && !(UNIQUE && !cmpv)) {
                    230:                        cmpv = cmp(tmprec->rec, flist[0]->rec);
                    231:                        if (cmpv < 0)
                    232:                                bot = -1;
                    233:                }
                    234:                if (UNIQUE && !cmpv)
                    235:                        return (1);
                    236:                bot++;
                    237:                memmove(flist + bot+1, flist + bot,
                    238:                    (ttop - bot) * sizeof(MFILE **));
                    239:                flist[bot] = *rec;
                    240:                return (0);
                    241:        }
                    242: }
                    243:
                    244: /*
                    245:  * check order on one file
                    246:  */
                    247: void
1.14      deraadt   248: order(union f_handle infile,
                    249:     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *),
                    250:     struct field *ftbl)
1.1       millert   251: {
1.9       ericj     252:        u_char *crec_end, *prec_end, *trec_end;
1.1       millert   253:        int c;
1.6       millert   254:        RECHEADER *crec, *prec, *trec;
1.1       millert   255:
1.16    ! naddy     256:        if (!SINGL_FLD) {
        !           257:                if ((linebuf = malloc(MAXLLEN)) == NULL)
        !           258:                        errx(2, "cannot allocate memory");
1.7       millert   259:        }
1.16    ! naddy     260:        buffer = malloc(2 * ALIGN((MAXLLEN + sizeof(TRECHEADER))));
        !           261:        if (buffer == NULL)
        !           262:                errx(2, "cannot allocate memory");
        !           263:
1.1       millert   264:        crec = (RECHEADER *) buffer;
1.10      art       265:        crec_end = ((char *)crec) + ALIGN(MAXLLEN + sizeof(TRECHEADER));
                    266:        prec = (RECHEADER *) crec_end;
                    267:        prec_end = ((char *)prec) + ALIGN(MAXLLEN + sizeof(TRECHEADER));
1.1       millert   268:        wts = ftbl->weights;
1.9       ericj     269:        if (SINGL_FLD && (ftbl->flags & F))
1.1       millert   270:                wts1 = ftbl->flags & R ? Rascii : ascii;
                    271:        else
                    272:                wts1 = 0;
1.9       ericj     273:        if (get(-1, infile, 1, prec, prec_end, ftbl) == 0)
                    274:                while (get(-1, infile, 1, crec, crec_end, ftbl) == 0) {
1.4       millert   275:                        if (0 < (c = cmp(prec, crec))) {
                    276:                                crec->data[crec->length-1] = 0;
                    277:                                errx(1, "found disorder: %s",
                    278:                                    crec->data+crec->offset);
                    279:                        }
                    280:                        if (UNIQUE && !c) {
                    281:                                crec->data[crec->length-1] = 0;
                    282:                                errx(1, "found non-uniqueness: %s",
                    283:                                    crec->data+crec->offset);
                    284:                        }
1.9       ericj     285:                        /* Swap pointers so that this record is on place
                    286:                         * pointed to by prec and new record is read to place
                    287:                         * pointed to by crec.
                    288:                         */
1.4       millert   289:                        trec = prec;
                    290:                        prec = crec;
                    291:                        crec = trec;
1.9       ericj     292:                        trec_end = prec_end;
                    293:                        prec_end = crec_end;
                    294:                        crec_end = trec_end;
1.1       millert   295:                }
                    296:        exit(0);
                    297: }
                    298:
                    299: static int
1.14      deraadt   300: cmp(RECHEADER *rec1, RECHEADER *rec2)
1.1       millert   301: {
1.9       ericj     302:        int r;
                    303:        u_char *pos1, *pos2, *end;
                    304:        u_char *cwts;
1.1       millert   305:        for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
                    306:                pos1 = rec1->data;
                    307:                pos2 = rec2->data;
                    308:                if (!SINGL_FLD && UNIQUE)
                    309:                        end = pos1 + min(rec1->offset, rec2->offset);
                    310:                else
                    311:                        end = pos1 + min(rec1->length, rec2->length);
                    312:                for (; pos1 < end; ) {
                    313:                        if ((r = cwts[*pos1++] - cwts[*pos2++]))
                    314:                                return (r);
                    315:                }
                    316:        }
                    317:        return (0);
                    318: }