[BACK]Return to msort.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/msort.c, Revision 1.25

1.25    ! deraadt     1: /*     $OpenBSD: msort.c,v 1.24 2013/11/13 15:07:27 deraadt Exp $      */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (c) 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Peter McIlroy.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
1.12      millert    18:  * 3. Neither the name of the University nor the names of its contributors
1.1       millert    19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
1.25    ! deraadt    34:
        !            35: #include <sys/param.h> /* ALIGN */
1.1       millert    36:
                     37: #include "sort.h"
                     38: #include "fsort.h"
                     39:
                     40: #include <stdlib.h>
                     41: #include <string.h>
                     42: #include <unistd.h>
                     43:
                     44: /* Subroutines using comparisons: merge sort and check order */
                     45: #define DELETE (1)
1.5       millert    46: #define LALIGN(n) ((n+(sizeof(long)-1)) & ~(sizeof(long)-1))
1.1       millert    47:
                     48: typedef struct mfile {
                     49:        u_char *end;
                     50:        short flno;
1.6       millert    51:        RECHEADER rec[1];
1.1       millert    52: } MFILE;
1.20      millert    53: static u_char *wts, *wts1;
                     54: static struct mfile *cfilebuf;
1.24      deraadt    55: static char *buffer;
1.20      millert    56: static size_t bufsize;
1.1       millert    57:
1.11      millert    58: static int cmp(RECHEADER *, RECHEADER *);
                     59: static int insert(struct mfile **, struct mfile **, int, int);
1.1       millert    60:
                     61: void
1.14      deraadt    62: fmerge(int binno, union f_handle files, int nfiles,
                     63:     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *),
                     64:     FILE *outfp, void (*fput)(RECHEADER *, FILE *), struct field *ftbl)
1.1       millert    65: {
                     66:        FILE *tout;
                     67:        int i, j, last;
1.6       millert    68:        void (*put)(RECHEADER *, FILE *);
1.1       millert    69:        struct tempfile *l_fstack;
                     70:
                     71:        wts = ftbl->weights;
1.9       ericj      72:        if (!UNIQUE && SINGL_FLD && (ftbl->flags & F))
1.1       millert    73:                wts1 = (ftbl->flags & R) ? Rascii : ascii;
1.20      millert    74:        if (cfilebuf == NULL) {
                     75:                cfilebuf = malloc(MAXLLEN + sizeof(MFILE));
1.7       millert    76:                if (cfilebuf == NULL)
                     77:                        errx(2, "cannot allocate memory");
                     78:        }
1.1       millert    79:
1.20      millert    80:        i = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE));
1.17      millert    81:        if (i > bufsize) {
1.20      millert    82:                bufsize = i;
                     83:                if ((buffer = realloc(buffer, bufsize)) == NULL)
                     84:                        err(2, NULL);
1.1       millert    85:        }
                     86:
                     87:        if (binno >= 0)
                     88:                l_fstack = fstack + files.top;
                     89:        else
                     90:                l_fstack = fstack;
1.4       millert    91:
1.1       millert    92:        while (nfiles) {
                     93:                put = putrec;
                     94:                for (j = 0; j < nfiles; j += 16) {
                     95:                        if (nfiles <= 16) {
                     96:                                tout = outfp;
                     97:                                put = fput;
                     98:                        }
                     99:                        else
                    100:                                tout = ftmp();
                    101:                        last = min(16, nfiles - j);
                    102:                        if (binno < 0) {
                    103:                                for (i = 0; i < last; i++)
                    104:                                        if (!(l_fstack[i+MAXFCT-1-16].fp =
                    105:                                            fopen(files.names[j + i], "r")))
1.8       millert   106:                                                err(2, "%s", files.names[j+i]);
1.1       millert   107:                                merge(MAXFCT-1-16, last, get, tout, put, ftbl);
1.4       millert   108:                        } else {
1.1       millert   109:                                for (i = 0; i< last; i++)
                    110:                                        rewind(l_fstack[i+j].fp);
                    111:                                merge(files.top+j, last, get, tout, put, ftbl);
                    112:                        }
1.4       millert   113:                        if (nfiles > 16)
                    114:                                l_fstack[j/16].fp = tout;
1.1       millert   115:                }
                    116:                nfiles = (nfiles + 15) / 16;
                    117:                if (nfiles == 1)
                    118:                        nfiles = 0;
                    119:                if (binno < 0) {
                    120:                        binno = 0;
                    121:                        get = geteasy;
                    122:                        files.top = 0;
                    123:                }
                    124:        }
                    125: }
                    126:
                    127: void
1.14      deraadt   128: merge(int infl0, int nfiles,
                    129:     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *),
                    130:     FILE *outfp, void (*put)(RECHEADER *, FILE *), struct field *ftbl)
1.1       millert   131: {
                    132:        int c, i, j;
                    133:        union f_handle dummy = {0};
                    134:        struct mfile *flist[16], *cfile;
1.2       millert   135:
1.1       millert   136:        for (i = j = 0; i < nfiles; i++) {
                    137:                cfile = (MFILE *) (buffer +
1.20      millert   138:                    i * LALIGN(MAXLLEN + sizeof(MFILE)));
1.1       millert   139:                cfile->flno = j + infl0;
                    140:                cfile->end = cfile->rec->data + MAXLLEN;
                    141:                for (c = 1; c == 1;) {
                    142:                        if (EOF == (c = get(j+infl0, dummy, nfiles,
                    143:                           cfile->rec, cfile->end, ftbl))) {
1.4       millert   144:                                i--;
                    145:                                nfiles--;
1.1       millert   146:                                break;
                    147:                        }
                    148:                        if (i)
                    149:                                c = insert(flist, &cfile, i, !DELETE);
                    150:                        else
                    151:                                flist[0] = cfile;
                    152:                }
                    153:                j++;
                    154:        }
1.2       millert   155:        if (nfiles > 0) {
                    156:                cfile = cfilebuf;
                    157:                cfile->flno = flist[0]->flno;
                    158:                cfile->end = cfile->rec->data + MAXLLEN;
                    159:                while (nfiles) {
                    160:                        for (c = 1; c == 1;) {
                    161:                                if (EOF == (c = get(cfile->flno, dummy, nfiles,
                    162:                                   cfile->rec, cfile->end, ftbl))) {
                    163:                                        put(flist[0]->rec, outfp);
                    164:                                        memmove(flist, flist + 1,
                    165:                                            sizeof(MFILE *) * (--nfiles));
                    166:                                        cfile->flno = flist[0]->flno;
                    167:                                        break;
                    168:                                }
                    169:                                if (!(c = insert(flist, &cfile, nfiles, DELETE)))
                    170:                                        put(cfile->rec, outfp);
1.1       millert   171:                        }
1.2       millert   172:                }
1.1       millert   173:        }
                    174: }
                    175:
                    176: /*
                    177:  * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
                    178:  * otherwise just inserts *rec in flist.
1.4       millert   179:  */
1.1       millert   180: static int
1.14      deraadt   181: insert(struct mfile **flist, struct mfile **rec, int ttop,
                    182:     int delete)                        /* delete = 0 or 1 */
1.1       millert   183: {
1.9       ericj     184:        struct mfile *tmprec;
                    185:        int top, mid, bot = 0, cmpv = 1;
1.1       millert   186:        tmprec = *rec;
                    187:        top = ttop;
                    188:        for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
                    189:                cmpv = cmp(tmprec->rec, flist[mid]->rec);
                    190:                if (cmpv < 0)
                    191:                        top = mid;
                    192:                else if (cmpv > 0)
                    193:                        bot = mid;
                    194:                else {
                    195:                        if (!UNIQUE)
                    196:                                bot = mid - 1;
                    197:                        break;
                    198:                }
                    199:        }
                    200:        if (delete) {
                    201:                if (UNIQUE) {
                    202:                        if (!bot && cmpv)
                    203:                                cmpv = cmp(tmprec->rec, flist[0]->rec);
                    204:                        if (!cmpv)
1.4       millert   205:                                return (1);
1.1       millert   206:                }
                    207:                tmprec = flist[0];
                    208:                if (bot)
                    209:                        memmove(flist, flist+1, bot * sizeof(MFILE **));
                    210:                flist[bot] = *rec;
                    211:                *rec = tmprec;
                    212:                (*rec)->flno = (*flist)->flno;
                    213:                return (0);
                    214:        }
                    215:        else {
                    216:                if (!bot && !(UNIQUE && !cmpv)) {
                    217:                        cmpv = cmp(tmprec->rec, flist[0]->rec);
                    218:                        if (cmpv < 0)
                    219:                                bot = -1;
                    220:                }
                    221:                if (UNIQUE && !cmpv)
                    222:                        return (1);
                    223:                bot++;
                    224:                memmove(flist + bot+1, flist + bot,
                    225:                    (ttop - bot) * sizeof(MFILE **));
                    226:                flist[bot] = *rec;
                    227:                return (0);
                    228:        }
                    229: }
                    230:
                    231: /*
                    232:  * check order on one file
                    233:  */
                    234: void
1.14      deraadt   235: order(union f_handle infile,
                    236:     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *),
1.23      schwarze  237:     struct field *ftbl,
                    238:     int c_warn)
1.1       millert   239: {
1.9       ericj     240:        u_char *crec_end, *prec_end, *trec_end;
1.1       millert   241:        int c;
1.6       millert   242:        RECHEADER *crec, *prec, *trec;
1.1       millert   243:
1.20      millert   244:        if (bufsize < 2 * ALIGN(MAXLLEN + sizeof(RECHEADER))) {
                    245:                bufsize = 2 * ALIGN(MAXLLEN + sizeof(RECHEADER));
1.17      millert   246:                if ((buffer = realloc(buffer, bufsize)) == NULL)
                    247:                        err(2, NULL);
1.7       millert   248:        }
1.1       millert   249:        crec = (RECHEADER *) buffer;
1.20      millert   250:        crec_end = ((char *)crec) + ALIGN(MAXLLEN + sizeof(RECHEADER));
1.10      art       251:        prec = (RECHEADER *) crec_end;
1.20      millert   252:        prec_end = ((char *)prec) + ALIGN(MAXLLEN + sizeof(RECHEADER));
1.1       millert   253:        wts = ftbl->weights;
1.9       ericj     254:        if (SINGL_FLD && (ftbl->flags & F))
1.1       millert   255:                wts1 = ftbl->flags & R ? Rascii : ascii;
                    256:        else
                    257:                wts1 = 0;
1.9       ericj     258:        if (get(-1, infile, 1, prec, prec_end, ftbl) == 0)
                    259:                while (get(-1, infile, 1, crec, crec_end, ftbl) == 0) {
1.4       millert   260:                        if (0 < (c = cmp(prec, crec))) {
                    261:                                crec->data[crec->length-1] = 0;
1.23      schwarze  262:                                if (c_warn)
                    263:                                        errx(1, "found disorder: %s",
                    264:                                            crec->data+crec->offset);
                    265:                                else
                    266:                                        exit(1);
1.4       millert   267:                        }
                    268:                        if (UNIQUE && !c) {
                    269:                                crec->data[crec->length-1] = 0;
1.23      schwarze  270:                                if (c_warn)
                    271:                                        errx(1, "found non-uniqueness: %s",
                    272:                                            crec->data+crec->offset);
                    273:                                else
                    274:                                        exit(1);
1.4       millert   275:                        }
1.9       ericj     276:                        /* Swap pointers so that this record is on place
                    277:                         * pointed to by prec and new record is read to place
                    278:                         * pointed to by crec.
                    279:                         */
1.4       millert   280:                        trec = prec;
                    281:                        prec = crec;
                    282:                        crec = trec;
1.9       ericj     283:                        trec_end = prec_end;
                    284:                        prec_end = crec_end;
                    285:                        crec_end = trec_end;
1.1       millert   286:                }
                    287:        exit(0);
                    288: }
                    289:
                    290: static int
1.14      deraadt   291: cmp(RECHEADER *rec1, RECHEADER *rec2)
1.1       millert   292: {
1.9       ericj     293:        int r;
                    294:        u_char *pos1, *pos2, *end;
                    295:        u_char *cwts;
1.1       millert   296:        for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
                    297:                pos1 = rec1->data;
                    298:                pos2 = rec2->data;
1.21      millert   299:                if (!SINGL_FLD && (UNIQUE || STABLE))
1.1       millert   300:                        end = pos1 + min(rec1->offset, rec2->offset);
                    301:                else
                    302:                        end = pos1 + min(rec1->length, rec2->length);
                    303:                for (; pos1 < end; ) {
                    304:                        if ((r = cwts[*pos1++] - cwts[*pos2++]))
                    305:                                return (r);
                    306:                }
                    307:        }
                    308:        return (0);
                    309: }