Annotation of src/usr.bin/sort/files.c, Revision 1.2
1.2 ! millert 1: /* $OpenBSD: files.c,v 1.1 1997/01/20 19:39:50 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. All advertising materials mentioning features or use of this software
19: * must display the following acknowledgement:
20: * This product includes software developed by the University of
21: * California, Berkeley and its contributors.
22: * 4. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
39: #ifndef lint
40: #if 0
41: static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93";
42: #else
1.2 ! millert 43: static char rcsid[] = "$OpenBSD: files.c,v 1.1 1997/01/20 19:39:50 millert Exp $";
1.1 millert 44: #endif
45: #endif /* not lint */
46:
47: #include "sort.h"
48: #include "fsort.h"
49:
50: #include <string.h>
51:
52: /*
53: * this is the subroutine for file management for fsort().
54: * It keeps the buffers for all temporary files.
55: */
56: int
57: getnext(binno, infl0, nfiles, pos, end, dummy)
58: int binno, nfiles;
59: union f_handle infl0;
60: register struct recheader *pos;
61: register u_char *end;
62: struct field *dummy;
63: {
64: register int i;
65: register u_char *hp;
66: static long nleft = 0;
67: static int cnt = 0, flag = -1;
68: static u_char maxb = 0;
69: static FILE *fp;
70:
71: if (nleft == 0) {
72: if (binno < 0) /* reset files. */ {
73: for (i = 0; i < nfiles; i++) {
74: rewind(fstack[infl0.top + i].fp);
75: fstack[infl0.top + i].max_o = 0;
76: }
77: flag = -1;
78: nleft = cnt = 0;
79: return(-1);
80: }
81: maxb = fstack[infl0.top].maxb;
82: for (; nleft == 0; cnt++) {
83: if (cnt >= nfiles) {
84: cnt = 0;
85: return (EOF);
86: }
87: fp = fstack[infl0.top + cnt].fp;
88: hp = (u_char *) &nleft;
89: for (i = sizeof(TRECHEADER); i; --i)
90: *hp++ = getc(fp);
91: if (binno < maxb)
92: fstack[infl0.top+cnt].max_o
93: += sizeof(nleft) + nleft;
94: else if (binno == maxb) {
95: if (binno != fstack[infl0.top].lastb) {
96: fseek(fp, fstack[infl0.top+
97: cnt].max_o, SEEK_SET);
98: fread(&nleft, sizeof(nleft), 1, fp);
99: }
100: if (nleft == 0)
101: fclose(fp);
102: } else if (binno == maxb + 1) { /* skip a bin */
103: fseek(fp, nleft, SEEK_CUR);
104: fread(&nleft, sizeof(nleft), 1, fp);
105: flag = cnt;
106: }
107: }
108: }
109: if ((u_char *) pos > end - sizeof(TRECHEADER))
110: return (BUFFEND);
111: hp = (u_char *) pos;
112: for (i = sizeof(TRECHEADER); i ; --i)
113: *hp++ = (u_char) getc(fp);
114: if (end - pos->data < pos->length) {
115: for (i = sizeof(TRECHEADER); i ; i--)
116: ungetc(*--hp, fp);
117: return (BUFFEND);
118: }
119: fread(pos->data, pos->length, 1, fp);
120: nleft -= pos->length + sizeof(TRECHEADER);
121: if (nleft == 0 && binno == fstack[infl0.top].maxb)
122: fclose(fp);
123: return (0);
124: }
125:
126: /*
127: * this is called when there is no special key. It's only called
128: * in the first fsort pass.
129: */
130: int
131: makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
132: int flno, nfiles;
133: union f_handle filelist;
134: struct recheader *buffer;
135: u_char *bufend;
136: struct field *dummy2;
137: {
138: static char *opos;
139: register char *end, *pos;
140: static int fileno = 0, overflow = 0;
141: static FILE *fp = 0;
142: register int c;
143:
144: pos = (char *) buffer->data;
145: end = min((char *) bufend, pos + MAXLLEN);
146: if (overflow) {
147: memmove(pos, opos, bufend - (u_char *) opos);
148: pos += ((char *) bufend - opos);
149: overflow = 0;
150: }
151: for (;;) {
152: if (flno >= 0) {
153: if (!(fp = fstack[flno].fp))
154: return (EOF);
155: } else if (!fp) {
156: if (fileno >= nfiles) return(EOF);
157: if (!(fp = fopen(filelist.names[fileno], "r")))
158: err(2, "%s", filelist.names[fileno]);
159: ++fileno;
160: }
161: while ((pos < end) && ((c = getc(fp)) != EOF)) {
162: if ((*pos++ = c) == REC_D) {
163: buffer->offset = 0;
164: buffer->length = pos - (char *) buffer->data;
165: return (0);
166: }
167: }
168: if (pos >= end && end == (char *) bufend) {
169: if ((char *) buffer->data < end) {
170: overflow = 1;
171: opos = (char *) buffer->data;
172: }
173: return (BUFFEND);
174: } else if (c == EOF) {
175: if (buffer->data != (u_char *) pos) {
176: warnx("last character not record delimiter");
177: *pos++ = REC_D;
178: buffer->offset = 0;
179: buffer->length = pos - (char *) buffer->data;
180: return(0);
181: }
182: FCLOSE(fp);
183: fp = 0;
184: if(flno >= 0) fstack[flno].fp = 0;
185: } else {
186: buffer->data[100] = '\000';
1.2 ! millert 187: warnx("line too long: ignoring %s...", buffer->data);
1.1 millert 188: }
189: }
190: }
191:
192: /*
193: * This generates keys. It's only called in the first fsort pass
194: */
195: int
196: makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
197: int flno, nfiles;
198: union f_handle filelist;
199: struct recheader *buffer;
200: u_char *bufend;
201: struct field *ftbl;
202: {
203: static int (*get)();
204: static int fileno = 0;
205: static FILE *dbdesc = 0;
206: static DBT dbkey[1], line[1];
207: static int overflow = 0;
208: int c;
209: if (overflow) {
210: overflow = 0;
211: enterkey(buffer, line, bufend - (u_char *) buffer, ftbl);
212: return (0);
213: }
214: for (;;) {
215: if (flno >= 0) {
216: get = seq;
217: if (!(dbdesc = fstack[flno].fp))
218: return(EOF);
219: } else if (!dbdesc) {
220: if (fileno >= nfiles)
221: return (EOF);
222: dbdesc = fopen(filelist.names[fileno], "r");
223: if (!dbdesc)
224: err(2, "%s", filelist.names[fileno]);
225: ++fileno;
226: get = seq;
227: }
228: if (!(c = get(dbdesc, line, dbkey))) {
229: if ((signed)line->size > bufend - buffer->data)
230: overflow = 1;
231: else
232: overflow = enterkey(buffer, line,
233: bufend - (u_char *) buffer, ftbl);
234: if (overflow)
235: return (BUFFEND);
236: else
237: return (0);
238: }
239: if (c == EOF) {
240: FCLOSE(dbdesc);
241: dbdesc = 0;
242: if (flno >= 0) fstack[flno].fp = 0;
243: } else {
244:
245: ((char *) line->data)[60] = '\000';
246: warnx("line too long: ignoring %.100s...",
247: (char *)line->data);
248: }
249:
250: }
251: }
252:
253: /*
254: * get a key/line pair from fp
255: */
256: int
257: seq(fp, line, key)
258: FILE *fp;
259: DBT *key, *line;
260: {
261: static char *buf, flag = 1;
262: register char *end, *pos;
263: register int c;
264: if (flag) {
265: flag = 0;
266: buf = (char *) linebuf;
267: end = buf + MAXLLEN;
268: line->data = buf;
269: }
270: pos = buf;
271: while ((c = getc(fp)) != EOF) {
272: if ((*pos++ = c) == REC_D) {
273: line->size = pos - buf;
274: return (0);
275: }
276: if (pos == end) {
277: line->size = MAXLLEN;
278: *--pos = REC_D;
279: while ((c = getc(fp)) != EOF) {
280: if (c == REC_D)
281: return (BUFFEND);
282: }
283: }
284: }
285: if (pos != buf) {
286: warnx("last character not record delimiter");
287: *pos++ = REC_D;
288: line->size = pos - buf;
289: return (0);
290: } else
291: return (EOF);
292: }
293:
294: /*
295: * write a key/line pair to a temporary file
296: */
297: void
298: putrec(rec, fp)
299: register struct recheader *rec;
300: register FILE *fp;
301: {
302: EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
303: }
304:
305: /*
306: * write a line to output
307: */
308: void
309: putline(rec, fp)
310: register struct recheader *rec;
311: register FILE *fp;
312: {
313: EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
314: }
315:
316: /*
317: * get a record from a temporary file. (Used by merge sort.)
318: */
319: int
320: geteasy(flno, filelist, nfiles, rec, end, dummy2)
321: int flno, nfiles;
322: union f_handle filelist;
323: register struct recheader *rec;
324: register u_char *end;
325: struct field *dummy2;
326: {
327: int i;
328: FILE *fp;
329: fp = fstack[flno].fp;
330: if ((u_char *) rec > end - sizeof(TRECHEADER))
331: return (BUFFEND);
332: if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
333: fclose(fp);
334: fstack[flno].fp = 0;
335: return (EOF);
336: }
337: if (end - rec->data < rec->length) {
338: for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
339: ungetc(*((char *) rec + i), fp);
340: return (BUFFEND);
341: }
342: fread(rec->data, rec->length, 1, fp);
343: return (0);
344: }