Annotation of src/usr.bin/sort/files.c, Revision 1.13
1.13 ! deraadt 1: /* $OpenBSD: files.c,v 1.12 2005/08/18 14:56:25 jaredy Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.9 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 millert 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #include "sort.h"
36: #include "fsort.h"
37:
38: #include <string.h>
39:
1.8 millert 40: static int seq(FILE *, DBT *, DBT *);
1.7 ericj 41:
1.1 millert 42: /*
43: * this is the subroutine for file management for fsort().
44: * It keeps the buffers for all temporary files.
45: */
46: int
1.11 deraadt 47: getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char *end,
48: struct field *dummy)
1.1 millert 49: {
1.7 ericj 50: int i;
51: u_char *hp;
52: static size_t nleft = 0;
1.1 millert 53: static int cnt = 0, flag = -1;
54: static u_char maxb = 0;
55: static FILE *fp;
56:
57: if (nleft == 0) {
58: if (binno < 0) /* reset files. */ {
59: for (i = 0; i < nfiles; i++) {
60: rewind(fstack[infl0.top + i].fp);
61: fstack[infl0.top + i].max_o = 0;
62: }
63: flag = -1;
64: nleft = cnt = 0;
1.4 millert 65: return (-1);
1.1 millert 66: }
67: maxb = fstack[infl0.top].maxb;
68: for (; nleft == 0; cnt++) {
69: if (cnt >= nfiles) {
70: cnt = 0;
71: return (EOF);
72: }
73: fp = fstack[infl0.top + cnt].fp;
1.7 ericj 74: fread(&nleft, sizeof(nleft), 1, fp);
1.1 millert 75: if (binno < maxb)
76: fstack[infl0.top+cnt].max_o
77: += sizeof(nleft) + nleft;
78: else if (binno == maxb) {
79: if (binno != fstack[infl0.top].lastb) {
80: fseek(fp, fstack[infl0.top+
81: cnt].max_o, SEEK_SET);
82: fread(&nleft, sizeof(nleft), 1, fp);
83: }
84: if (nleft == 0)
85: fclose(fp);
86: } else if (binno == maxb + 1) { /* skip a bin */
87: fseek(fp, nleft, SEEK_CUR);
88: fread(&nleft, sizeof(nleft), 1, fp);
89: flag = cnt;
90: }
91: }
92: }
93: if ((u_char *) pos > end - sizeof(TRECHEADER))
94: return (BUFFEND);
1.7 ericj 95: fread(pos, sizeof(TRECHEADER), 1, fp);
1.1 millert 96: if (end - pos->data < pos->length) {
1.7 ericj 97: hp = ((u_char *)pos) + sizeof(TRECHEADER);
1.1 millert 98: for (i = sizeof(TRECHEADER); i ; i--)
99: ungetc(*--hp, fp);
100: return (BUFFEND);
101: }
102: fread(pos->data, pos->length, 1, fp);
103: nleft -= pos->length + sizeof(TRECHEADER);
104: if (nleft == 0 && binno == fstack[infl0.top].maxb)
105: fclose(fp);
106: return (0);
107: }
108:
109: /*
110: * this is called when there is no special key. It's only called
111: * in the first fsort pass.
112: */
113: int
1.11 deraadt 114: makeline(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
115: u_char *bufend, struct field *dummy2)
1.1 millert 116: {
1.7 ericj 117: static u_char *obufend;
118: static size_t osz;
119: char *pos;
1.1 millert 120: static int fileno = 0, overflow = 0;
121: static FILE *fp = 0;
1.7 ericj 122: int c;
1.1 millert 123:
124: pos = (char *) buffer->data;
125: if (overflow) {
1.7 ericj 126: /*
127: * Buffer shortage is solved by either of two ways:
128: * * Flush previous buffered data and start using the
129: * buffer from start (see fsort())
130: * * realloc buffer and bump bufend
131: *
1.10 tedu 132: * The former is preferred, realloc is only done when
1.7 ericj 133: * there is exactly one item in buffer which does not fit.
134: */
135: if (bufend == obufend)
136: memmove(pos, bufend - osz, osz);
137: pos+=osz;
1.1 millert 138: overflow = 0;
139: }
140: for (;;) {
1.4 millert 141: if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
142: return (EOF);
143: else if (fp == 0) {
144: if (fileno >= nfiles)
1.1 millert 145: return (EOF);
146: if (!(fp = fopen(filelist.names[fileno], "r")))
1.6 millert 147: err(2, "%s", filelist.names[fileno]);
1.4 millert 148: fileno++;
1.1 millert 149: }
1.7 ericj 150: while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
1.1 millert 151: if ((*pos++ = c) == REC_D) {
152: buffer->offset = 0;
153: buffer->length = pos - (char *) buffer->data;
154: return (0);
155: }
156: }
1.7 ericj 157: if (pos >= (char *)bufend) {
158: if (buffer->data < bufend) {
1.1 millert 159: overflow = 1;
1.7 ericj 160: obufend = bufend;
161: osz = (pos - (char *)buffer->data);
1.1 millert 162: }
163: return (BUFFEND);
164: } else if (c == EOF) {
165: if (buffer->data != (u_char *) pos) {
166: *pos++ = REC_D;
167: buffer->offset = 0;
168: buffer->length = pos - (char *) buffer->data;
1.4 millert 169: return (0);
1.1 millert 170: }
171: FCLOSE(fp);
172: fp = 0;
1.4 millert 173: if (flno >= 0)
174: fstack[flno].fp = 0;
1.1 millert 175: } else {
1.7 ericj 176: warnx("line too long: ignoring %100s...", buffer->data);
177:
178: /* Consume the rest of the line from input */
179: while((c = getc(fp)) != REC_D && c != EOF)
180: ;
181:
182: buffer->offset = 0;
183: buffer->length = 0;
184:
185: return (BUFFEND);
1.1 millert 186: }
187: }
188: }
189:
190: /*
191: * This generates keys. It's only called in the first fsort pass
192: */
193: int
1.11 deraadt 194: makekey(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
195: u_char *bufend, struct field *ftbl)
1.1 millert 196: {
197: static int fileno = 0;
198: static FILE *dbdesc = 0;
199: static DBT dbkey[1], line[1];
200: static int overflow = 0;
1.7 ericj 201: static int c;
1.4 millert 202:
1.1 millert 203: if (overflow) {
1.7 ericj 204: overflow = enterkey(buffer, line, bufend - (u_char *)buffer,
205: ftbl);
206: if (overflow)
207: return (BUFFEND);
208: else
209: return (0);
1.1 millert 210: }
1.7 ericj 211:
1.1 millert 212: for (;;) {
213: if (flno >= 0) {
214: if (!(dbdesc = fstack[flno].fp))
1.4 millert 215: return (EOF);
1.1 millert 216: } else if (!dbdesc) {
217: if (fileno >= nfiles)
218: return (EOF);
219: dbdesc = fopen(filelist.names[fileno], "r");
220: if (!dbdesc)
1.6 millert 221: err(2, "%s", filelist.names[fileno]);
1.4 millert 222: fileno++;
1.1 millert 223: }
1.7 ericj 224: if (!(c = seq(dbdesc, line, dbkey))) {
225: if ((signed)line->size > bufend - buffer->data) {
1.1 millert 226: overflow = 1;
1.7 ericj 227: } else {
1.1 millert 228: overflow = enterkey(buffer, line,
229: bufend - (u_char *) buffer, ftbl);
1.7 ericj 230: }
1.1 millert 231: if (overflow)
232: return (BUFFEND);
233: else
234: return (0);
235: }
236: if (c == EOF) {
237: FCLOSE(dbdesc);
238: dbdesc = 0;
1.4 millert 239: if (flno >= 0)
240: fstack[flno].fp = 0;
1.1 millert 241: } else {
242: ((char *) line->data)[60] = '\000';
243: warnx("line too long: ignoring %.100s...",
244: (char *)line->data);
245: }
246: }
247: }
248:
249: /*
250: * get a key/line pair from fp
251: */
1.7 ericj 252: static int
1.11 deraadt 253: seq(FILE *fp, DBT *line, DBT *key)
1.1 millert 254: {
255: static char *buf, flag = 1;
1.7 ericj 256: char *end, *pos;
257: int c;
1.4 millert 258:
1.1 millert 259: if (flag) {
260: flag = 0;
261: buf = (char *) linebuf;
262: line->data = buf;
263: }
264: pos = buf;
1.12 jaredy 265: end = buf + linebuf_size;
1.1 millert 266: while ((c = getc(fp)) != EOF) {
267: if ((*pos++ = c) == REC_D) {
268: line->size = pos - buf;
269: return (0);
270: }
271: if (pos == end) {
1.7 ericj 272: linebuf_size *= 2;
273: linebuf = realloc(linebuf, linebuf_size);
274: if (!linebuf)
275: err(2, "realloc of linebuf to %lu bytes failed",
276: (unsigned long)linebuf_size);
277: end = linebuf + linebuf_size;
278: pos = linebuf + (pos - buf);
279: line->data = buf = (char *)linebuf;
280: continue;
1.1 millert 281: }
282: }
283: if (pos != buf) {
284: *pos++ = REC_D;
285: line->size = pos - buf;
286: return (0);
287: } else
288: return (EOF);
289: }
290:
291: /*
292: * write a key/line pair to a temporary file
293: */
294: void
1.11 deraadt 295: putrec(RECHEADER *rec, FILE *fp)
1.1 millert 296: {
297: EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
298: }
299:
300: /*
301: * write a line to output
302: */
303: void
1.11 deraadt 304: putline(RECHEADER *rec, FILE *fp)
1.1 millert 305: {
306: EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
307: }
308:
309: /*
310: * get a record from a temporary file. (Used by merge sort.)
311: */
312: int
1.11 deraadt 313: geteasy(int flno, union f_handle filelist, int nfiles, RECHEADER *rec,
314: u_char *end, struct field *dummy2)
1.1 millert 315: {
316: int i;
317: FILE *fp;
1.4 millert 318:
1.1 millert 319: fp = fstack[flno].fp;
320: if ((u_char *) rec > end - sizeof(TRECHEADER))
321: return (BUFFEND);
322: if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
323: fclose(fp);
324: fstack[flno].fp = 0;
325: return (EOF);
326: }
327: if (end - rec->data < rec->length) {
328: for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
329: ungetc(*((char *) rec + i), fp);
330: return (BUFFEND);
331: }
332: fread(rec->data, rec->length, 1, fp);
333: return (0);
334: }