Annotation of src/usr.bin/sort/msort.c, Revision 1.3
1.3 ! millert 1: /* $OpenBSD: msort.c,v 1.2 1997/01/22 06:43:53 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. All advertising materials mentioning features or use of this software
19: * must display the following acknowledgement:
20: * This product includes software developed by the University of
21: * California, Berkeley and its contributors.
22: * 4. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
39: #ifndef lint
40: #if 0
41: static char sccsid[] = "@(#)msort.c 8.1 (Berkeley) 6/6/93";
42: #else
1.3 ! millert 43: static char rcsid[] = "$OpenBSD: msort.c,v 1.2 1997/01/22 06:43:53 millert Exp $";
1.1 millert 44: #endif
45: #endif /* not lint */
46:
47: #include "sort.h"
48: #include "fsort.h"
49:
50: #include <stdlib.h>
51: #include <string.h>
52: #include <unistd.h>
53:
54: /* Subroutines using comparisons: merge sort and check order */
55: #define DELETE (1)
56: #define LALIGN(n) ((n+3) & ~3)
57:
58: typedef struct mfile {
59: u_char *end;
60: short flno;
61: struct recheader rec[1];
62: } MFILE;
63: typedef struct tmfile {
64: u_char *end;
65: short flno;
66: struct trecheader rec[1];
67: } TMFILE;
68: u_char *wts, *wts1 = 0;
69: struct mfile *cfilebuf;
70:
71: static int cmp __P((struct recheader *, struct recheader *));
72: static int insert __P((struct mfile **, struct mfile **, int, int));
73:
74: void
75: fmerge(binno, files, nfiles, get, outfp, fput, ftbl)
76: union f_handle files;
77: int binno, nfiles;
78: int (*get)();
79: FILE *outfp;
80: void (*fput)();
81: struct field *ftbl;
82: {
83: FILE *tout;
84: int i, j, last;
85: void (*put)(struct recheader *, FILE *);
86: extern int geteasy();
87: struct tempfile *l_fstack;
88:
89: wts = ftbl->weights;
90: if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
91: wts1 = (ftbl->flags & R) ? Rascii : ascii;
92: if (!cfilebuf)
93: cfilebuf = malloc(MAXLLEN + sizeof(TMFILE));
94:
95: i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE));
96: if (!buffer || i > BUFSIZE) {
97: buffer = buffer ? realloc(buffer, i) : malloc(i);
98: if (!buffer)
99: err(2, NULL);
100: if (!SINGL_FLD)
101: linebuf = malloc(MAXLLEN);
102: }
103:
104: if (binno >= 0)
105: l_fstack = fstack + files.top;
106: else
107: l_fstack = fstack;
108: while (nfiles) {
109: put = putrec;
110: for (j = 0; j < nfiles; j += 16) {
111: if (nfiles <= 16) {
112: tout = outfp;
113: put = fput;
114: }
115: else
116: tout = ftmp();
117: last = min(16, nfiles - j);
118: if (binno < 0) {
119: for (i = 0; i < last; i++)
120: if (!(l_fstack[i+MAXFCT-1-16].fp =
121: fopen(files.names[j + i], "r")))
1.3 ! millert 122: err(2, files.names[j+i]);
1.1 millert 123: merge(MAXFCT-1-16, last, get, tout, put, ftbl);
124: }
125: else {
126: for (i = 0; i< last; i++)
127: rewind(l_fstack[i+j].fp);
128: merge(files.top+j, last, get, tout, put, ftbl);
129: }
130: if (nfiles > 16) l_fstack[j/16].fp = tout;
131: }
132: nfiles = (nfiles + 15) / 16;
133: if (nfiles == 1)
134: nfiles = 0;
135: if (binno < 0) {
136: binno = 0;
137: get = geteasy;
138: files.top = 0;
139: }
140: }
141: }
142:
143: void
144: merge(infl0, nfiles, get, outfp, put, ftbl)
145: int infl0, nfiles;
146: int (*get)();
147: void (*put)(struct recheader *, FILE *);
148: FILE *outfp;
149: struct field *ftbl;
150: {
151: int c, i, j;
152: union f_handle dummy = {0};
153: struct mfile *flist[16], *cfile;
1.2 millert 154:
1.1 millert 155: for (i = j = 0; i < nfiles; i++) {
156: cfile = (MFILE *) (buffer +
157: i * LALIGN(MAXLLEN + sizeof(TMFILE)));
158: cfile->flno = j + infl0;
159: cfile->end = cfile->rec->data + MAXLLEN;
160: for (c = 1; c == 1;) {
161: if (EOF == (c = get(j+infl0, dummy, nfiles,
162: cfile->rec, cfile->end, ftbl))) {
163: --i;
164: --nfiles;
165: break;
166: }
167: if (i)
168: c = insert(flist, &cfile, i, !DELETE);
169: else
170: flist[0] = cfile;
171: }
172: j++;
173: }
1.2 millert 174: if (nfiles > 0) {
175: cfile = cfilebuf;
176: cfile->flno = flist[0]->flno;
177: cfile->end = cfile->rec->data + MAXLLEN;
178: while (nfiles) {
179: for (c = 1; c == 1;) {
180: if (EOF == (c = get(cfile->flno, dummy, nfiles,
181: cfile->rec, cfile->end, ftbl))) {
182: put(flist[0]->rec, outfp);
183: memmove(flist, flist + 1,
184: sizeof(MFILE *) * (--nfiles));
185: cfile->flno = flist[0]->flno;
186: break;
187: }
188: if (!(c = insert(flist, &cfile, nfiles, DELETE)))
189: put(cfile->rec, outfp);
1.1 millert 190: }
1.2 millert 191: }
1.1 millert 192: }
193: }
194:
195: /*
196: * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
197: * otherwise just inserts *rec in flist.
198: */
199: static int
200: insert(flist, rec, ttop, delete)
201: struct mfile **flist, **rec;
202: int delete, ttop; /* delete = 0 or 1 */
203: {
204: register struct mfile *tmprec;
205: register int top, mid, bot = 0, cmpv = 1;
206: tmprec = *rec;
207: top = ttop;
208: for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
209: cmpv = cmp(tmprec->rec, flist[mid]->rec);
210: if (cmpv < 0)
211: top = mid;
212: else if (cmpv > 0)
213: bot = mid;
214: else {
215: if (!UNIQUE)
216: bot = mid - 1;
217: break;
218: }
219: }
220: if (delete) {
221: if (UNIQUE) {
222: if (!bot && cmpv)
223: cmpv = cmp(tmprec->rec, flist[0]->rec);
224: if (!cmpv)
225: return(1);
226: }
227: tmprec = flist[0];
228: if (bot)
229: memmove(flist, flist+1, bot * sizeof(MFILE **));
230: flist[bot] = *rec;
231: *rec = tmprec;
232: (*rec)->flno = (*flist)->flno;
233: return (0);
234: }
235: else {
236: if (!bot && !(UNIQUE && !cmpv)) {
237: cmpv = cmp(tmprec->rec, flist[0]->rec);
238: if (cmpv < 0)
239: bot = -1;
240: }
241: if (UNIQUE && !cmpv)
242: return (1);
243: bot++;
244: memmove(flist + bot+1, flist + bot,
245: (ttop - bot) * sizeof(MFILE **));
246: flist[bot] = *rec;
247: return (0);
248: }
249: }
250:
251: /*
252: * check order on one file
253: */
254: void
255: order(infile, get, ftbl)
256: union f_handle infile;
257: int (*get)();
258: struct field *ftbl;
259: {
260: u_char *end;
261: int c;
262: struct recheader *crec, *prec, *trec;
263:
264: if (!SINGL_FLD)
265: linebuf = malloc(MAXLLEN);
266: buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER)));
267: end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER));
268: crec = (RECHEADER *) buffer;
269: prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER));
270: wts = ftbl->weights;
271: if (SINGL_FLD && ftbl->flags & F)
272: wts1 = ftbl->flags & R ? Rascii : ascii;
273: else
274: wts1 = 0;
275: if (0 == get(-1, infile, 1, prec, end, ftbl))
276: while (0 == get(-1, infile, 1, crec, end, ftbl)) {
277: if (0 < (c = cmp(prec, crec))) {
278: crec->data[crec->length-1] = 0;
279: errx(1, "found disorder: %s", crec->data+crec->offset);
280: }
281: if (UNIQUE && !c) {
282: crec->data[crec->length-1] = 0;
283: errx(1, "found non-uniqueness: %s",
284: crec->data+crec->offset);
285: }
286: trec = prec;
287: prec = crec;
288: crec = trec;
289: }
290: exit(0);
291: }
292:
293: static int
294: cmp(rec1, rec2)
295: struct recheader *rec1, *rec2;
296: {
297: register r;
298: register u_char *pos1, *pos2, *end;
299: register u_char *cwts;
300: for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
301: pos1 = rec1->data;
302: pos2 = rec2->data;
303: if (!SINGL_FLD && UNIQUE)
304: end = pos1 + min(rec1->offset, rec2->offset);
305: else
306: end = pos1 + min(rec1->length, rec2->length);
307: for (; pos1 < end; ) {
308: if ((r = cwts[*pos1++] - cwts[*pos2++]))
309: return (r);
310: }
311: }
312: return (0);
313: }