Annotation of src/usr.bin/sort/files.c, Revision 1.11
1.11 ! deraadt 1: /* $OpenBSD: files.c,v 1.10 2003/11/15 05:56:28 tedu Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.9 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 millert 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #ifndef lint
36: #if 0
37: static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93";
38: #else
1.11 ! deraadt 39: static char rcsid[] = "$OpenBSD: files.c,v 1.10 2003/11/15 05:56:28 tedu Exp $";
1.1 millert 40: #endif
41: #endif /* not lint */
42:
43: #include "sort.h"
44: #include "fsort.h"
45:
46: #include <string.h>
47:
1.8 millert 48: static int seq(FILE *, DBT *, DBT *);
1.7 ericj 49:
1.1 millert 50: /*
51: * this is the subroutine for file management for fsort().
52: * It keeps the buffers for all temporary files.
53: */
54: int
1.11 ! deraadt 55: getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char *end,
! 56: struct field *dummy)
1.1 millert 57: {
1.7 ericj 58: int i;
59: u_char *hp;
60: static size_t nleft = 0;
1.1 millert 61: static int cnt = 0, flag = -1;
62: static u_char maxb = 0;
63: static FILE *fp;
64:
65: if (nleft == 0) {
66: if (binno < 0) /* reset files. */ {
67: for (i = 0; i < nfiles; i++) {
68: rewind(fstack[infl0.top + i].fp);
69: fstack[infl0.top + i].max_o = 0;
70: }
71: flag = -1;
72: nleft = cnt = 0;
1.4 millert 73: return (-1);
1.1 millert 74: }
75: maxb = fstack[infl0.top].maxb;
76: for (; nleft == 0; cnt++) {
77: if (cnt >= nfiles) {
78: cnt = 0;
79: return (EOF);
80: }
81: fp = fstack[infl0.top + cnt].fp;
1.7 ericj 82: fread(&nleft, sizeof(nleft), 1, fp);
1.1 millert 83: if (binno < maxb)
84: fstack[infl0.top+cnt].max_o
85: += sizeof(nleft) + nleft;
86: else if (binno == maxb) {
87: if (binno != fstack[infl0.top].lastb) {
88: fseek(fp, fstack[infl0.top+
89: cnt].max_o, SEEK_SET);
90: fread(&nleft, sizeof(nleft), 1, fp);
91: }
92: if (nleft == 0)
93: fclose(fp);
94: } else if (binno == maxb + 1) { /* skip a bin */
95: fseek(fp, nleft, SEEK_CUR);
96: fread(&nleft, sizeof(nleft), 1, fp);
97: flag = cnt;
98: }
99: }
100: }
101: if ((u_char *) pos > end - sizeof(TRECHEADER))
102: return (BUFFEND);
1.7 ericj 103: fread(pos, sizeof(TRECHEADER), 1, fp);
1.1 millert 104: if (end - pos->data < pos->length) {
1.7 ericj 105: hp = ((u_char *)pos) + sizeof(TRECHEADER);
1.1 millert 106: for (i = sizeof(TRECHEADER); i ; i--)
107: ungetc(*--hp, fp);
108: return (BUFFEND);
109: }
110: fread(pos->data, pos->length, 1, fp);
111: nleft -= pos->length + sizeof(TRECHEADER);
112: if (nleft == 0 && binno == fstack[infl0.top].maxb)
113: fclose(fp);
114: return (0);
115: }
116:
117: /*
118: * this is called when there is no special key. It's only called
119: * in the first fsort pass.
120: */
121: int
1.11 ! deraadt 122: makeline(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
! 123: u_char *bufend, struct field *dummy2)
1.1 millert 124: {
1.7 ericj 125: static u_char *obufend;
126: static size_t osz;
127: char *pos;
1.1 millert 128: static int fileno = 0, overflow = 0;
129: static FILE *fp = 0;
1.7 ericj 130: int c;
1.1 millert 131:
132: pos = (char *) buffer->data;
133: if (overflow) {
1.7 ericj 134: /*
135: * Buffer shortage is solved by either of two ways:
136: * * Flush previous buffered data and start using the
137: * buffer from start (see fsort())
138: * * realloc buffer and bump bufend
139: *
1.10 tedu 140: * The former is preferred, realloc is only done when
1.7 ericj 141: * there is exactly one item in buffer which does not fit.
142: */
143: if (bufend == obufend)
144: memmove(pos, bufend - osz, osz);
145: pos+=osz;
1.1 millert 146: overflow = 0;
147: }
148: for (;;) {
1.4 millert 149: if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
150: return (EOF);
151: else if (fp == 0) {
152: if (fileno >= nfiles)
1.1 millert 153: return (EOF);
154: if (!(fp = fopen(filelist.names[fileno], "r")))
1.6 millert 155: err(2, "%s", filelist.names[fileno]);
1.4 millert 156: fileno++;
1.1 millert 157: }
1.7 ericj 158: while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
1.1 millert 159: if ((*pos++ = c) == REC_D) {
160: buffer->offset = 0;
161: buffer->length = pos - (char *) buffer->data;
162: return (0);
163: }
164: }
1.7 ericj 165: if (pos >= (char *)bufend) {
166: if (buffer->data < bufend) {
1.1 millert 167: overflow = 1;
1.7 ericj 168: obufend = bufend;
169: osz = (pos - (char *)buffer->data);
1.1 millert 170: }
171: return (BUFFEND);
172: } else if (c == EOF) {
173: if (buffer->data != (u_char *) pos) {
174: *pos++ = REC_D;
175: buffer->offset = 0;
176: buffer->length = pos - (char *) buffer->data;
1.4 millert 177: return (0);
1.1 millert 178: }
179: FCLOSE(fp);
180: fp = 0;
1.4 millert 181: if (flno >= 0)
182: fstack[flno].fp = 0;
1.1 millert 183: } else {
1.7 ericj 184: warnx("line too long: ignoring %100s...", buffer->data);
185:
186: /* Consume the rest of the line from input */
187: while((c = getc(fp)) != REC_D && c != EOF)
188: ;
189:
190: buffer->offset = 0;
191: buffer->length = 0;
192:
193: return (BUFFEND);
1.1 millert 194: }
195: }
196: }
197:
198: /*
199: * This generates keys. It's only called in the first fsort pass
200: */
201: int
1.11 ! deraadt 202: makekey(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
! 203: u_char *bufend, struct field *ftbl)
1.1 millert 204: {
205: static int fileno = 0;
206: static FILE *dbdesc = 0;
207: static DBT dbkey[1], line[1];
208: static int overflow = 0;
1.7 ericj 209: static int c;
1.4 millert 210:
1.1 millert 211: if (overflow) {
1.7 ericj 212: overflow = enterkey(buffer, line, bufend - (u_char *)buffer,
213: ftbl);
214: if (overflow)
215: return (BUFFEND);
216: else
217: return (0);
1.1 millert 218: }
1.7 ericj 219:
1.1 millert 220: for (;;) {
221: if (flno >= 0) {
222: if (!(dbdesc = fstack[flno].fp))
1.4 millert 223: return (EOF);
1.1 millert 224: } else if (!dbdesc) {
225: if (fileno >= nfiles)
226: return (EOF);
227: dbdesc = fopen(filelist.names[fileno], "r");
228: if (!dbdesc)
1.6 millert 229: err(2, "%s", filelist.names[fileno]);
1.4 millert 230: fileno++;
1.1 millert 231: }
1.7 ericj 232: if (!(c = seq(dbdesc, line, dbkey))) {
233: if ((signed)line->size > bufend - buffer->data) {
1.1 millert 234: overflow = 1;
1.7 ericj 235: } else {
1.1 millert 236: overflow = enterkey(buffer, line,
237: bufend - (u_char *) buffer, ftbl);
1.7 ericj 238: }
1.1 millert 239: if (overflow)
240: return (BUFFEND);
241: else
242: return (0);
243: }
244: if (c == EOF) {
245: FCLOSE(dbdesc);
246: dbdesc = 0;
1.4 millert 247: if (flno >= 0)
248: fstack[flno].fp = 0;
1.1 millert 249: } else {
250: ((char *) line->data)[60] = '\000';
251: warnx("line too long: ignoring %.100s...",
252: (char *)line->data);
253: }
254: }
255: }
256:
257: /*
258: * get a key/line pair from fp
259: */
1.7 ericj 260: static int
1.11 ! deraadt 261: seq(FILE *fp, DBT *line, DBT *key)
1.1 millert 262: {
263: static char *buf, flag = 1;
1.7 ericj 264: char *end, *pos;
265: int c;
1.4 millert 266:
1.1 millert 267: if (flag) {
268: flag = 0;
269: buf = (char *) linebuf;
1.7 ericj 270: end = buf + linebuf_size;
1.1 millert 271: line->data = buf;
272: }
273: pos = buf;
274: while ((c = getc(fp)) != EOF) {
275: if ((*pos++ = c) == REC_D) {
276: line->size = pos - buf;
277: return (0);
278: }
279: if (pos == end) {
1.7 ericj 280: linebuf_size *= 2;
281: linebuf = realloc(linebuf, linebuf_size);
282: if (!linebuf)
283: err(2, "realloc of linebuf to %lu bytes failed",
284: (unsigned long)linebuf_size);
285: end = linebuf + linebuf_size;
286: pos = linebuf + (pos - buf);
287: line->data = buf = (char *)linebuf;
288: continue;
1.1 millert 289: }
290: }
291: if (pos != buf) {
292: *pos++ = REC_D;
293: line->size = pos - buf;
294: return (0);
295: } else
296: return (EOF);
297: }
298:
299: /*
300: * write a key/line pair to a temporary file
301: */
302: void
1.11 ! deraadt 303: putrec(RECHEADER *rec, FILE *fp)
1.1 millert 304: {
305: EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
306: }
307:
308: /*
309: * write a line to output
310: */
311: void
1.11 ! deraadt 312: putline(RECHEADER *rec, FILE *fp)
1.1 millert 313: {
314: EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
315: }
316:
317: /*
318: * get a record from a temporary file. (Used by merge sort.)
319: */
320: int
1.11 ! deraadt 321: geteasy(int flno, union f_handle filelist, int nfiles, RECHEADER *rec,
! 322: u_char *end, struct field *dummy2)
1.1 millert 323: {
324: int i;
325: FILE *fp;
1.4 millert 326:
1.1 millert 327: fp = fstack[flno].fp;
328: if ((u_char *) rec > end - sizeof(TRECHEADER))
329: return (BUFFEND);
330: if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
331: fclose(fp);
332: fstack[flno].fp = 0;
333: return (EOF);
334: }
335: if (end - rec->data < rec->length) {
336: for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
337: ungetc(*((char *) rec + i), fp);
338: return (BUFFEND);
339: }
340: fread(rec->data, rec->length, 1, fp);
341: return (0);
342: }