Annotation of src/usr.bin/sort/files.c, Revision 1.6
1.6 ! millert 1: /* $OpenBSD: files.c,v 1.5 1997/06/30 05:36:16 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. All advertising materials mentioning features or use of this software
19: * must display the following acknowledgement:
20: * This product includes software developed by the University of
21: * California, Berkeley and its contributors.
22: * 4. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
39: #ifndef lint
40: #if 0
41: static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93";
42: #else
1.6 ! millert 43: static char rcsid[] = "$OpenBSD: files.c,v 1.5 1997/06/30 05:36:16 millert Exp $";
1.1 millert 44: #endif
45: #endif /* not lint */
46:
47: #include "sort.h"
48: #include "fsort.h"
49:
50: #include <string.h>
51:
52: /*
53: * this is the subroutine for file management for fsort().
54: * It keeps the buffers for all temporary files.
55: */
56: int
57: getnext(binno, infl0, nfiles, pos, end, dummy)
1.4 millert 58: int binno;
1.1 millert 59: union f_handle infl0;
1.4 millert 60: int nfiles;
1.5 millert 61: register RECHEADER *pos;
1.1 millert 62: register u_char *end;
63: struct field *dummy;
64: {
65: register int i;
66: register u_char *hp;
1.5 millert 67: static int nleft = 0;
1.1 millert 68: static int cnt = 0, flag = -1;
69: static u_char maxb = 0;
70: static FILE *fp;
71:
72: if (nleft == 0) {
73: if (binno < 0) /* reset files. */ {
74: for (i = 0; i < nfiles; i++) {
75: rewind(fstack[infl0.top + i].fp);
76: fstack[infl0.top + i].max_o = 0;
77: }
78: flag = -1;
79: nleft = cnt = 0;
1.4 millert 80: return (-1);
1.1 millert 81: }
82: maxb = fstack[infl0.top].maxb;
83: for (; nleft == 0; cnt++) {
84: if (cnt >= nfiles) {
85: cnt = 0;
86: return (EOF);
87: }
88: fp = fstack[infl0.top + cnt].fp;
89: hp = (u_char *) &nleft;
90: for (i = sizeof(TRECHEADER); i; --i)
91: *hp++ = getc(fp);
92: if (binno < maxb)
93: fstack[infl0.top+cnt].max_o
94: += sizeof(nleft) + nleft;
95: else if (binno == maxb) {
96: if (binno != fstack[infl0.top].lastb) {
97: fseek(fp, fstack[infl0.top+
98: cnt].max_o, SEEK_SET);
99: fread(&nleft, sizeof(nleft), 1, fp);
100: }
101: if (nleft == 0)
102: fclose(fp);
103: } else if (binno == maxb + 1) { /* skip a bin */
104: fseek(fp, nleft, SEEK_CUR);
105: fread(&nleft, sizeof(nleft), 1, fp);
106: flag = cnt;
107: }
108: }
109: }
110: if ((u_char *) pos > end - sizeof(TRECHEADER))
111: return (BUFFEND);
112: hp = (u_char *) pos;
113: for (i = sizeof(TRECHEADER); i ; --i)
114: *hp++ = (u_char) getc(fp);
115: if (end - pos->data < pos->length) {
116: for (i = sizeof(TRECHEADER); i ; i--)
117: ungetc(*--hp, fp);
118: return (BUFFEND);
119: }
120: fread(pos->data, pos->length, 1, fp);
121: nleft -= pos->length + sizeof(TRECHEADER);
122: if (nleft == 0 && binno == fstack[infl0.top].maxb)
123: fclose(fp);
124: return (0);
125: }
126:
127: /*
128: * this is called when there is no special key. It's only called
129: * in the first fsort pass.
130: */
131: int
132: makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
1.4 millert 133: int flno;
1.1 millert 134: union f_handle filelist;
1.4 millert 135: int nfiles;
1.5 millert 136: RECHEADER *buffer;
1.1 millert 137: u_char *bufend;
138: struct field *dummy2;
139: {
140: static char *opos;
141: register char *end, *pos;
142: static int fileno = 0, overflow = 0;
143: static FILE *fp = 0;
144: register int c;
145:
146: pos = (char *) buffer->data;
147: end = min((char *) bufend, pos + MAXLLEN);
148: if (overflow) {
149: memmove(pos, opos, bufend - (u_char *) opos);
150: pos += ((char *) bufend - opos);
151: overflow = 0;
152: }
153: for (;;) {
1.4 millert 154: if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
155: return (EOF);
156: else if (fp == 0) {
157: if (fileno >= nfiles)
1.1 millert 158: return (EOF);
159: if (!(fp = fopen(filelist.names[fileno], "r")))
1.6 ! millert 160: err(2, "%s", filelist.names[fileno]);
1.4 millert 161: fileno++;
1.1 millert 162: }
163: while ((pos < end) && ((c = getc(fp)) != EOF)) {
164: if ((*pos++ = c) == REC_D) {
165: buffer->offset = 0;
166: buffer->length = pos - (char *) buffer->data;
167: return (0);
168: }
169: }
170: if (pos >= end && end == (char *) bufend) {
171: if ((char *) buffer->data < end) {
172: overflow = 1;
173: opos = (char *) buffer->data;
174: }
175: return (BUFFEND);
176: } else if (c == EOF) {
177: if (buffer->data != (u_char *) pos) {
178: warnx("last character not record delimiter");
179: *pos++ = REC_D;
180: buffer->offset = 0;
181: buffer->length = pos - (char *) buffer->data;
1.4 millert 182: return (0);
1.1 millert 183: }
184: FCLOSE(fp);
185: fp = 0;
1.4 millert 186: if (flno >= 0)
187: fstack[flno].fp = 0;
1.1 millert 188: } else {
189: buffer->data[100] = '\000';
1.2 millert 190: warnx("line too long: ignoring %s...", buffer->data);
1.1 millert 191: }
192: }
193: }
194:
195: /*
196: * This generates keys. It's only called in the first fsort pass
197: */
198: int
199: makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
200: int flno, nfiles;
201: union f_handle filelist;
1.5 millert 202: RECHEADER *buffer;
1.1 millert 203: u_char *bufend;
204: struct field *ftbl;
205: {
206: static int (*get)();
207: static int fileno = 0;
208: static FILE *dbdesc = 0;
209: static DBT dbkey[1], line[1];
210: static int overflow = 0;
211: int c;
1.4 millert 212:
1.1 millert 213: if (overflow) {
214: overflow = 0;
215: enterkey(buffer, line, bufend - (u_char *) buffer, ftbl);
216: return (0);
217: }
218: for (;;) {
219: if (flno >= 0) {
220: get = seq;
221: if (!(dbdesc = fstack[flno].fp))
1.4 millert 222: return (EOF);
1.1 millert 223: } else if (!dbdesc) {
224: if (fileno >= nfiles)
225: return (EOF);
226: dbdesc = fopen(filelist.names[fileno], "r");
227: if (!dbdesc)
1.6 ! millert 228: err(2, "%s", filelist.names[fileno]);
1.4 millert 229: fileno++;
1.1 millert 230: get = seq;
231: }
232: if (!(c = get(dbdesc, line, dbkey))) {
233: if ((signed)line->size > bufend - buffer->data)
234: overflow = 1;
235: else
236: overflow = enterkey(buffer, line,
237: bufend - (u_char *) buffer, ftbl);
238: if (overflow)
239: return (BUFFEND);
240: else
241: return (0);
242: }
243: if (c == EOF) {
244: FCLOSE(dbdesc);
245: dbdesc = 0;
1.4 millert 246: if (flno >= 0)
247: fstack[flno].fp = 0;
1.1 millert 248: } else {
249: ((char *) line->data)[60] = '\000';
250: warnx("line too long: ignoring %.100s...",
251: (char *)line->data);
252: }
253: }
254: }
255:
256: /*
257: * get a key/line pair from fp
258: */
259: int
260: seq(fp, line, key)
261: FILE *fp;
1.4 millert 262: DBT *line;
263: DBT *key;
1.1 millert 264: {
265: static char *buf, flag = 1;
266: register char *end, *pos;
267: register int c;
1.4 millert 268:
1.1 millert 269: if (flag) {
270: flag = 0;
271: buf = (char *) linebuf;
272: end = buf + MAXLLEN;
273: line->data = buf;
274: }
275: pos = buf;
276: while ((c = getc(fp)) != EOF) {
277: if ((*pos++ = c) == REC_D) {
278: line->size = pos - buf;
279: return (0);
280: }
281: if (pos == end) {
282: line->size = MAXLLEN;
283: *--pos = REC_D;
284: while ((c = getc(fp)) != EOF) {
285: if (c == REC_D)
286: return (BUFFEND);
287: }
288: }
289: }
290: if (pos != buf) {
291: warnx("last character not record delimiter");
292: *pos++ = REC_D;
293: line->size = pos - buf;
294: return (0);
295: } else
296: return (EOF);
297: }
298:
299: /*
300: * write a key/line pair to a temporary file
301: */
302: void
303: putrec(rec, fp)
1.5 millert 304: register RECHEADER *rec;
1.1 millert 305: register FILE *fp;
306: {
307: EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
308: }
309:
310: /*
311: * write a line to output
312: */
313: void
314: putline(rec, fp)
1.5 millert 315: register RECHEADER *rec;
1.1 millert 316: register FILE *fp;
317: {
318: EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
319: }
320:
321: /*
322: * get a record from a temporary file. (Used by merge sort.)
323: */
324: int
325: geteasy(flno, filelist, nfiles, rec, end, dummy2)
326: int flno, nfiles;
327: union f_handle filelist;
1.5 millert 328: register RECHEADER *rec;
1.1 millert 329: register u_char *end;
330: struct field *dummy2;
331: {
332: int i;
333: FILE *fp;
1.4 millert 334:
1.1 millert 335: fp = fstack[flno].fp;
336: if ((u_char *) rec > end - sizeof(TRECHEADER))
337: return (BUFFEND);
338: if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
339: fclose(fp);
340: fstack[flno].fp = 0;
341: return (EOF);
342: }
343: if (end - rec->data < rec->length) {
344: for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
345: ungetc(*((char *) rec + i), fp);
346: return (BUFFEND);
347: }
348: fread(rec->data, rec->length, 1, fp);
349: return (0);
350: }