[BACK]Return to msort.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/msort.c, Revision 1.23

1.23    ! schwarze    1: /*     $OpenBSD: msort.c,v 1.22 2009/10/27 23:59:43 deraadt Exp $      */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (c) 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Peter McIlroy.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
1.12      millert    18:  * 3. Neither the name of the University nor the names of its contributors
1.1       millert    19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
                     34:
                     35: #include "sort.h"
                     36: #include "fsort.h"
                     37:
                     38: #include <stdlib.h>
                     39: #include <string.h>
                     40: #include <unistd.h>
                     41:
                     42: /* Subroutines using comparisons: merge sort and check order */
                     43: #define DELETE (1)
1.5       millert    44: #define LALIGN(n) ((n+(sizeof(long)-1)) & ~(sizeof(long)-1))
1.1       millert    45:
                     46: typedef struct mfile {
                     47:        u_char *end;
                     48:        short flno;
1.6       millert    49:        RECHEADER rec[1];
1.1       millert    50: } MFILE;
1.20      millert    51: static u_char *wts, *wts1;
                     52: static struct mfile *cfilebuf;
                     53: static void *buffer;
                     54: static size_t bufsize;
1.1       millert    55:
1.11      millert    56: static int cmp(RECHEADER *, RECHEADER *);
                     57: static int insert(struct mfile **, struct mfile **, int, int);
1.1       millert    58:
                     59: void
1.14      deraadt    60: fmerge(int binno, union f_handle files, int nfiles,
                     61:     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *),
                     62:     FILE *outfp, void (*fput)(RECHEADER *, FILE *), struct field *ftbl)
1.1       millert    63: {
                     64:        FILE *tout;
                     65:        int i, j, last;
1.6       millert    66:        void (*put)(RECHEADER *, FILE *);
1.1       millert    67:        struct tempfile *l_fstack;
                     68:
                     69:        wts = ftbl->weights;
1.9       ericj      70:        if (!UNIQUE && SINGL_FLD && (ftbl->flags & F))
1.1       millert    71:                wts1 = (ftbl->flags & R) ? Rascii : ascii;
1.20      millert    72:        if (cfilebuf == NULL) {
                     73:                cfilebuf = malloc(MAXLLEN + sizeof(MFILE));
1.7       millert    74:                if (cfilebuf == NULL)
                     75:                        errx(2, "cannot allocate memory");
                     76:        }
1.1       millert    77:
1.20      millert    78:        i = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE));
1.17      millert    79:        if (i > bufsize) {
1.20      millert    80:                bufsize = i;
                     81:                if ((buffer = realloc(buffer, bufsize)) == NULL)
                     82:                        err(2, NULL);
1.1       millert    83:        }
                     84:
                     85:        if (binno >= 0)
                     86:                l_fstack = fstack + files.top;
                     87:        else
                     88:                l_fstack = fstack;
1.4       millert    89:
1.1       millert    90:        while (nfiles) {
                     91:                put = putrec;
                     92:                for (j = 0; j < nfiles; j += 16) {
                     93:                        if (nfiles <= 16) {
                     94:                                tout = outfp;
                     95:                                put = fput;
                     96:                        }
                     97:                        else
                     98:                                tout = ftmp();
                     99:                        last = min(16, nfiles - j);
                    100:                        if (binno < 0) {
                    101:                                for (i = 0; i < last; i++)
                    102:                                        if (!(l_fstack[i+MAXFCT-1-16].fp =
                    103:                                            fopen(files.names[j + i], "r")))
1.8       millert   104:                                                err(2, "%s", files.names[j+i]);
1.1       millert   105:                                merge(MAXFCT-1-16, last, get, tout, put, ftbl);
1.4       millert   106:                        } else {
1.1       millert   107:                                for (i = 0; i< last; i++)
                    108:                                        rewind(l_fstack[i+j].fp);
                    109:                                merge(files.top+j, last, get, tout, put, ftbl);
                    110:                        }
1.4       millert   111:                        if (nfiles > 16)
                    112:                                l_fstack[j/16].fp = tout;
1.1       millert   113:                }
                    114:                nfiles = (nfiles + 15) / 16;
                    115:                if (nfiles == 1)
                    116:                        nfiles = 0;
                    117:                if (binno < 0) {
                    118:                        binno = 0;
                    119:                        get = geteasy;
                    120:                        files.top = 0;
                    121:                }
                    122:        }
                    123: }
                    124:
                    125: void
1.14      deraadt   126: merge(int infl0, int nfiles,
                    127:     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *),
                    128:     FILE *outfp, void (*put)(RECHEADER *, FILE *), struct field *ftbl)
1.1       millert   129: {
                    130:        int c, i, j;
                    131:        union f_handle dummy = {0};
                    132:        struct mfile *flist[16], *cfile;
1.2       millert   133:
1.1       millert   134:        for (i = j = 0; i < nfiles; i++) {
                    135:                cfile = (MFILE *) (buffer +
1.20      millert   136:                    i * LALIGN(MAXLLEN + sizeof(MFILE)));
1.1       millert   137:                cfile->flno = j + infl0;
                    138:                cfile->end = cfile->rec->data + MAXLLEN;
                    139:                for (c = 1; c == 1;) {
                    140:                        if (EOF == (c = get(j+infl0, dummy, nfiles,
                    141:                           cfile->rec, cfile->end, ftbl))) {
1.4       millert   142:                                i--;
                    143:                                nfiles--;
1.1       millert   144:                                break;
                    145:                        }
                    146:                        if (i)
                    147:                                c = insert(flist, &cfile, i, !DELETE);
                    148:                        else
                    149:                                flist[0] = cfile;
                    150:                }
                    151:                j++;
                    152:        }
1.2       millert   153:        if (nfiles > 0) {
                    154:                cfile = cfilebuf;
                    155:                cfile->flno = flist[0]->flno;
                    156:                cfile->end = cfile->rec->data + MAXLLEN;
                    157:                while (nfiles) {
                    158:                        for (c = 1; c == 1;) {
                    159:                                if (EOF == (c = get(cfile->flno, dummy, nfiles,
                    160:                                   cfile->rec, cfile->end, ftbl))) {
                    161:                                        put(flist[0]->rec, outfp);
                    162:                                        memmove(flist, flist + 1,
                    163:                                            sizeof(MFILE *) * (--nfiles));
                    164:                                        cfile->flno = flist[0]->flno;
                    165:                                        break;
                    166:                                }
                    167:                                if (!(c = insert(flist, &cfile, nfiles, DELETE)))
                    168:                                        put(cfile->rec, outfp);
1.1       millert   169:                        }
1.2       millert   170:                }
1.1       millert   171:        }
                    172: }
                    173:
                    174: /*
                    175:  * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
                    176:  * otherwise just inserts *rec in flist.
1.4       millert   177:  */
1.1       millert   178: static int
1.14      deraadt   179: insert(struct mfile **flist, struct mfile **rec, int ttop,
                    180:     int delete)                        /* delete = 0 or 1 */
1.1       millert   181: {
1.9       ericj     182:        struct mfile *tmprec;
                    183:        int top, mid, bot = 0, cmpv = 1;
1.1       millert   184:        tmprec = *rec;
                    185:        top = ttop;
                    186:        for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
                    187:                cmpv = cmp(tmprec->rec, flist[mid]->rec);
                    188:                if (cmpv < 0)
                    189:                        top = mid;
                    190:                else if (cmpv > 0)
                    191:                        bot = mid;
                    192:                else {
                    193:                        if (!UNIQUE)
                    194:                                bot = mid - 1;
                    195:                        break;
                    196:                }
                    197:        }
                    198:        if (delete) {
                    199:                if (UNIQUE) {
                    200:                        if (!bot && cmpv)
                    201:                                cmpv = cmp(tmprec->rec, flist[0]->rec);
                    202:                        if (!cmpv)
1.4       millert   203:                                return (1);
1.1       millert   204:                }
                    205:                tmprec = flist[0];
                    206:                if (bot)
                    207:                        memmove(flist, flist+1, bot * sizeof(MFILE **));
                    208:                flist[bot] = *rec;
                    209:                *rec = tmprec;
                    210:                (*rec)->flno = (*flist)->flno;
                    211:                return (0);
                    212:        }
                    213:        else {
                    214:                if (!bot && !(UNIQUE && !cmpv)) {
                    215:                        cmpv = cmp(tmprec->rec, flist[0]->rec);
                    216:                        if (cmpv < 0)
                    217:                                bot = -1;
                    218:                }
                    219:                if (UNIQUE && !cmpv)
                    220:                        return (1);
                    221:                bot++;
                    222:                memmove(flist + bot+1, flist + bot,
                    223:                    (ttop - bot) * sizeof(MFILE **));
                    224:                flist[bot] = *rec;
                    225:                return (0);
                    226:        }
                    227: }
                    228:
                    229: /*
                    230:  * check order on one file
                    231:  */
                    232: void
1.14      deraadt   233: order(union f_handle infile,
                    234:     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *),
1.23    ! schwarze  235:     struct field *ftbl,
        !           236:     int c_warn)
1.1       millert   237: {
1.9       ericj     238:        u_char *crec_end, *prec_end, *trec_end;
1.1       millert   239:        int c;
1.6       millert   240:        RECHEADER *crec, *prec, *trec;
1.1       millert   241:
1.20      millert   242:        if (bufsize < 2 * ALIGN(MAXLLEN + sizeof(RECHEADER))) {
                    243:                bufsize = 2 * ALIGN(MAXLLEN + sizeof(RECHEADER));
1.17      millert   244:                if ((buffer = realloc(buffer, bufsize)) == NULL)
                    245:                        err(2, NULL);
1.7       millert   246:        }
1.1       millert   247:        crec = (RECHEADER *) buffer;
1.20      millert   248:        crec_end = ((char *)crec) + ALIGN(MAXLLEN + sizeof(RECHEADER));
1.10      art       249:        prec = (RECHEADER *) crec_end;
1.20      millert   250:        prec_end = ((char *)prec) + ALIGN(MAXLLEN + sizeof(RECHEADER));
1.1       millert   251:        wts = ftbl->weights;
1.9       ericj     252:        if (SINGL_FLD && (ftbl->flags & F))
1.1       millert   253:                wts1 = ftbl->flags & R ? Rascii : ascii;
                    254:        else
                    255:                wts1 = 0;
1.9       ericj     256:        if (get(-1, infile, 1, prec, prec_end, ftbl) == 0)
                    257:                while (get(-1, infile, 1, crec, crec_end, ftbl) == 0) {
1.4       millert   258:                        if (0 < (c = cmp(prec, crec))) {
                    259:                                crec->data[crec->length-1] = 0;
1.23    ! schwarze  260:                                if (c_warn)
        !           261:                                        errx(1, "found disorder: %s",
        !           262:                                            crec->data+crec->offset);
        !           263:                                else
        !           264:                                        exit(1);
1.4       millert   265:                        }
                    266:                        if (UNIQUE && !c) {
                    267:                                crec->data[crec->length-1] = 0;
1.23    ! schwarze  268:                                if (c_warn)
        !           269:                                        errx(1, "found non-uniqueness: %s",
        !           270:                                            crec->data+crec->offset);
        !           271:                                else
        !           272:                                        exit(1);
1.4       millert   273:                        }
1.9       ericj     274:                        /* Swap pointers so that this record is on place
                    275:                         * pointed to by prec and new record is read to place
                    276:                         * pointed to by crec.
                    277:                         */
1.4       millert   278:                        trec = prec;
                    279:                        prec = crec;
                    280:                        crec = trec;
1.9       ericj     281:                        trec_end = prec_end;
                    282:                        prec_end = crec_end;
                    283:                        crec_end = trec_end;
1.1       millert   284:                }
                    285:        exit(0);
                    286: }
                    287:
                    288: static int
1.14      deraadt   289: cmp(RECHEADER *rec1, RECHEADER *rec2)
1.1       millert   290: {
1.9       ericj     291:        int r;
                    292:        u_char *pos1, *pos2, *end;
                    293:        u_char *cwts;
1.1       millert   294:        for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
                    295:                pos1 = rec1->data;
                    296:                pos2 = rec2->data;
1.21      millert   297:                if (!SINGL_FLD && (UNIQUE || STABLE))
1.1       millert   298:                        end = pos1 + min(rec1->offset, rec2->offset);
                    299:                else
                    300:                        end = pos1 + min(rec1->length, rec2->length);
                    301:                for (; pos1 < end; ) {
                    302:                        if ((r = cwts[*pos1++] - cwts[*pos2++]))
                    303:                                return (r);
                    304:                }
                    305:        }
                    306:        return (0);
                    307: }