Annotation of src/usr.bin/sort/files.c, Revision 1.8
1.8 ! millert 1: /* $OpenBSD: files.c,v 1.7 2001/02/04 21:27:00 ericj Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. All advertising materials mentioning features or use of this software
19: * must display the following acknowledgement:
20: * This product includes software developed by the University of
21: * California, Berkeley and its contributors.
22: * 4. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
39: #ifndef lint
40: #if 0
41: static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93";
42: #else
1.8 ! millert 43: static char rcsid[] = "$OpenBSD: files.c,v 1.7 2001/02/04 21:27:00 ericj Exp $";
1.1 millert 44: #endif
45: #endif /* not lint */
46:
47: #include "sort.h"
48: #include "fsort.h"
49:
50: #include <string.h>
51:
1.8 ! millert 52: static int seq(FILE *, DBT *, DBT *);
1.7 ericj 53:
1.1 millert 54: /*
55: * this is the subroutine for file management for fsort().
56: * It keeps the buffers for all temporary files.
57: */
58: int
59: getnext(binno, infl0, nfiles, pos, end, dummy)
1.4 millert 60: int binno;
1.1 millert 61: union f_handle infl0;
1.4 millert 62: int nfiles;
1.7 ericj 63: RECHEADER *pos;
64: u_char *end;
1.1 millert 65: struct field *dummy;
66: {
1.7 ericj 67: int i;
68: u_char *hp;
69: static size_t nleft = 0;
1.1 millert 70: static int cnt = 0, flag = -1;
71: static u_char maxb = 0;
72: static FILE *fp;
73:
74: if (nleft == 0) {
75: if (binno < 0) /* reset files. */ {
76: for (i = 0; i < nfiles; i++) {
77: rewind(fstack[infl0.top + i].fp);
78: fstack[infl0.top + i].max_o = 0;
79: }
80: flag = -1;
81: nleft = cnt = 0;
1.4 millert 82: return (-1);
1.1 millert 83: }
84: maxb = fstack[infl0.top].maxb;
85: for (; nleft == 0; cnt++) {
86: if (cnt >= nfiles) {
87: cnt = 0;
88: return (EOF);
89: }
90: fp = fstack[infl0.top + cnt].fp;
1.7 ericj 91: fread(&nleft, sizeof(nleft), 1, fp);
1.1 millert 92: if (binno < maxb)
93: fstack[infl0.top+cnt].max_o
94: += sizeof(nleft) + nleft;
95: else if (binno == maxb) {
96: if (binno != fstack[infl0.top].lastb) {
97: fseek(fp, fstack[infl0.top+
98: cnt].max_o, SEEK_SET);
99: fread(&nleft, sizeof(nleft), 1, fp);
100: }
101: if (nleft == 0)
102: fclose(fp);
103: } else if (binno == maxb + 1) { /* skip a bin */
104: fseek(fp, nleft, SEEK_CUR);
105: fread(&nleft, sizeof(nleft), 1, fp);
106: flag = cnt;
107: }
108: }
109: }
110: if ((u_char *) pos > end - sizeof(TRECHEADER))
111: return (BUFFEND);
1.7 ericj 112: fread(pos, sizeof(TRECHEADER), 1, fp);
1.1 millert 113: if (end - pos->data < pos->length) {
1.7 ericj 114: hp = ((u_char *)pos) + sizeof(TRECHEADER);
1.1 millert 115: for (i = sizeof(TRECHEADER); i ; i--)
116: ungetc(*--hp, fp);
117: return (BUFFEND);
118: }
119: fread(pos->data, pos->length, 1, fp);
120: nleft -= pos->length + sizeof(TRECHEADER);
121: if (nleft == 0 && binno == fstack[infl0.top].maxb)
122: fclose(fp);
123: return (0);
124: }
125:
126: /*
127: * this is called when there is no special key. It's only called
128: * in the first fsort pass.
129: */
130: int
131: makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
1.4 millert 132: int flno;
1.1 millert 133: union f_handle filelist;
1.4 millert 134: int nfiles;
1.5 millert 135: RECHEADER *buffer;
1.1 millert 136: u_char *bufend;
137: struct field *dummy2;
138: {
1.7 ericj 139: static u_char *obufend;
140: static size_t osz;
141: char *pos;
1.1 millert 142: static int fileno = 0, overflow = 0;
143: static FILE *fp = 0;
1.7 ericj 144: int c;
1.1 millert 145:
146: pos = (char *) buffer->data;
147: if (overflow) {
1.7 ericj 148: /*
149: * Buffer shortage is solved by either of two ways:
150: * * Flush previous buffered data and start using the
151: * buffer from start (see fsort())
152: * * realloc buffer and bump bufend
153: *
154: * The former is perferred, realloc is only done when
155: * there is exactly one item in buffer which does not fit.
156: */
157: if (bufend == obufend)
158: memmove(pos, bufend - osz, osz);
159: pos+=osz;
1.1 millert 160: overflow = 0;
161: }
162: for (;;) {
1.4 millert 163: if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
164: return (EOF);
165: else if (fp == 0) {
166: if (fileno >= nfiles)
1.1 millert 167: return (EOF);
168: if (!(fp = fopen(filelist.names[fileno], "r")))
1.6 millert 169: err(2, "%s", filelist.names[fileno]);
1.4 millert 170: fileno++;
1.1 millert 171: }
1.7 ericj 172: while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
1.1 millert 173: if ((*pos++ = c) == REC_D) {
174: buffer->offset = 0;
175: buffer->length = pos - (char *) buffer->data;
176: return (0);
177: }
178: }
1.7 ericj 179: if (pos >= (char *)bufend) {
180: if (buffer->data < bufend) {
1.1 millert 181: overflow = 1;
1.7 ericj 182: obufend = bufend;
183: osz = (pos - (char *)buffer->data);
1.1 millert 184: }
185: return (BUFFEND);
186: } else if (c == EOF) {
187: if (buffer->data != (u_char *) pos) {
188: *pos++ = REC_D;
189: buffer->offset = 0;
190: buffer->length = pos - (char *) buffer->data;
1.4 millert 191: return (0);
1.1 millert 192: }
193: FCLOSE(fp);
194: fp = 0;
1.4 millert 195: if (flno >= 0)
196: fstack[flno].fp = 0;
1.1 millert 197: } else {
1.7 ericj 198: warnx("line too long: ignoring %100s...", buffer->data);
199:
200: /* Consume the rest of the line from input */
201: while((c = getc(fp)) != REC_D && c != EOF)
202: ;
203:
204: buffer->offset = 0;
205: buffer->length = 0;
206:
207: return (BUFFEND);
1.1 millert 208: }
209: }
210: }
211:
212: /*
213: * This generates keys. It's only called in the first fsort pass
214: */
215: int
216: makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
217: int flno, nfiles;
218: union f_handle filelist;
1.5 millert 219: RECHEADER *buffer;
1.1 millert 220: u_char *bufend;
221: struct field *ftbl;
222: {
223: static int fileno = 0;
224: static FILE *dbdesc = 0;
225: static DBT dbkey[1], line[1];
226: static int overflow = 0;
1.7 ericj 227: static int c;
1.4 millert 228:
1.1 millert 229: if (overflow) {
1.7 ericj 230: overflow = enterkey(buffer, line, bufend - (u_char *)buffer,
231: ftbl);
232: if (overflow)
233: return (BUFFEND);
234: else
235: return (0);
1.1 millert 236: }
1.7 ericj 237:
1.1 millert 238: for (;;) {
239: if (flno >= 0) {
240: if (!(dbdesc = fstack[flno].fp))
1.4 millert 241: return (EOF);
1.1 millert 242: } else if (!dbdesc) {
243: if (fileno >= nfiles)
244: return (EOF);
245: dbdesc = fopen(filelist.names[fileno], "r");
246: if (!dbdesc)
1.6 millert 247: err(2, "%s", filelist.names[fileno]);
1.4 millert 248: fileno++;
1.1 millert 249: }
1.7 ericj 250: if (!(c = seq(dbdesc, line, dbkey))) {
251: if ((signed)line->size > bufend - buffer->data) {
1.1 millert 252: overflow = 1;
1.7 ericj 253: } else {
1.1 millert 254: overflow = enterkey(buffer, line,
255: bufend - (u_char *) buffer, ftbl);
1.7 ericj 256: }
1.1 millert 257: if (overflow)
258: return (BUFFEND);
259: else
260: return (0);
261: }
262: if (c == EOF) {
263: FCLOSE(dbdesc);
264: dbdesc = 0;
1.4 millert 265: if (flno >= 0)
266: fstack[flno].fp = 0;
1.1 millert 267: } else {
268: ((char *) line->data)[60] = '\000';
269: warnx("line too long: ignoring %.100s...",
270: (char *)line->data);
271: }
272: }
273: }
274:
275: /*
276: * get a key/line pair from fp
277: */
1.7 ericj 278: static int
1.1 millert 279: seq(fp, line, key)
280: FILE *fp;
1.4 millert 281: DBT *line;
282: DBT *key;
1.1 millert 283: {
284: static char *buf, flag = 1;
1.7 ericj 285: char *end, *pos;
286: int c;
1.4 millert 287:
1.1 millert 288: if (flag) {
289: flag = 0;
290: buf = (char *) linebuf;
1.7 ericj 291: end = buf + linebuf_size;
1.1 millert 292: line->data = buf;
293: }
294: pos = buf;
295: while ((c = getc(fp)) != EOF) {
296: if ((*pos++ = c) == REC_D) {
297: line->size = pos - buf;
298: return (0);
299: }
300: if (pos == end) {
1.7 ericj 301: linebuf_size *= 2;
302: linebuf = realloc(linebuf, linebuf_size);
303: if (!linebuf)
304: err(2, "realloc of linebuf to %lu bytes failed",
305: (unsigned long)linebuf_size);
306: end = linebuf + linebuf_size;
307: pos = linebuf + (pos - buf);
308: line->data = buf = (char *)linebuf;
309: continue;
1.1 millert 310: }
311: }
312: if (pos != buf) {
313: *pos++ = REC_D;
314: line->size = pos - buf;
315: return (0);
316: } else
317: return (EOF);
318: }
319:
320: /*
321: * write a key/line pair to a temporary file
322: */
323: void
324: putrec(rec, fp)
1.7 ericj 325: RECHEADER *rec;
326: FILE *fp;
1.1 millert 327: {
328: EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
329: }
330:
331: /*
332: * write a line to output
333: */
334: void
335: putline(rec, fp)
1.7 ericj 336: RECHEADER *rec;
337: FILE *fp;
1.1 millert 338: {
339: EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
340: }
341:
342: /*
343: * get a record from a temporary file. (Used by merge sort.)
344: */
345: int
346: geteasy(flno, filelist, nfiles, rec, end, dummy2)
347: int flno, nfiles;
348: union f_handle filelist;
1.7 ericj 349: RECHEADER *rec;
350: u_char *end;
1.1 millert 351: struct field *dummy2;
352: {
353: int i;
354: FILE *fp;
1.4 millert 355:
1.1 millert 356: fp = fstack[flno].fp;
357: if ((u_char *) rec > end - sizeof(TRECHEADER))
358: return (BUFFEND);
359: if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
360: fclose(fp);
361: fstack[flno].fp = 0;
362: return (EOF);
363: }
364: if (end - rec->data < rec->length) {
365: for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
366: ungetc(*((char *) rec + i), fp);
367: return (BUFFEND);
368: }
369: fread(rec->data, rec->length, 1, fp);
370: return (0);
371: }