[BACK]Return to files.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/files.c, Revision 1.11

1.11    ! deraadt     1: /*     $OpenBSD: files.c,v 1.10 2003/11/15 05:56:28 tedu Exp $ */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (c) 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Peter McIlroy.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
1.9       millert    18:  * 3. Neither the name of the University nor the names of its contributors
1.1       millert    19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
                     34:
                     35: #ifndef lint
                     36: #if 0
                     37: static char sccsid[] = "@(#)files.c    8.1 (Berkeley) 6/6/93";
                     38: #else
1.11    ! deraadt    39: static char rcsid[] = "$OpenBSD: files.c,v 1.10 2003/11/15 05:56:28 tedu Exp $";
1.1       millert    40: #endif
                     41: #endif /* not lint */
                     42:
                     43: #include "sort.h"
                     44: #include "fsort.h"
                     45:
                     46: #include <string.h>
                     47:
1.8       millert    48: static int     seq(FILE *, DBT *, DBT *);
1.7       ericj      49:
1.1       millert    50: /*
                     51:  * this is the subroutine for file management for fsort().
                     52:  * It keeps the buffers for all temporary files.
                     53:  */
                     54: int
1.11    ! deraadt    55: getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char *end,
        !            56:     struct field *dummy)
1.1       millert    57: {
1.7       ericj      58:        int i;
                     59:        u_char *hp;
                     60:        static size_t nleft = 0;
1.1       millert    61:        static int cnt = 0, flag = -1;
                     62:        static u_char maxb = 0;
                     63:        static FILE *fp;
                     64:
                     65:        if (nleft == 0) {
                     66:                if (binno < 0)  /* reset files. */ {
                     67:                        for (i = 0; i < nfiles; i++) {
                     68:                                rewind(fstack[infl0.top + i].fp);
                     69:                                fstack[infl0.top + i].max_o = 0;
                     70:                        }
                     71:                        flag = -1;
                     72:                        nleft = cnt = 0;
1.4       millert    73:                        return (-1);
1.1       millert    74:                }
                     75:                maxb = fstack[infl0.top].maxb;
                     76:                for (; nleft == 0; cnt++) {
                     77:                        if (cnt >= nfiles) {
                     78:                                cnt = 0;
                     79:                                return (EOF);
                     80:                        }
                     81:                        fp = fstack[infl0.top + cnt].fp;
1.7       ericj      82:                        fread(&nleft, sizeof(nleft), 1, fp);
1.1       millert    83:                        if (binno < maxb)
                     84:                                fstack[infl0.top+cnt].max_o
                     85:                                        += sizeof(nleft) + nleft;
                     86:                        else if (binno == maxb) {
                     87:                                if (binno != fstack[infl0.top].lastb) {
                     88:                                        fseek(fp, fstack[infl0.top+
                     89:                                                cnt].max_o, SEEK_SET);
                     90:                                        fread(&nleft, sizeof(nleft), 1, fp);
                     91:                                }
                     92:                                if (nleft == 0)
                     93:                                        fclose(fp);
                     94:                        } else if (binno == maxb + 1) {         /* skip a bin */
                     95:                                fseek(fp, nleft, SEEK_CUR);
                     96:                                fread(&nleft, sizeof(nleft), 1, fp);
                     97:                                flag = cnt;
                     98:                        }
                     99:                }
                    100:        }
                    101:        if ((u_char *) pos > end - sizeof(TRECHEADER))
                    102:                return (BUFFEND);
1.7       ericj     103:        fread(pos, sizeof(TRECHEADER), 1, fp);
1.1       millert   104:        if (end - pos->data < pos->length) {
1.7       ericj     105:                hp = ((u_char *)pos) + sizeof(TRECHEADER);
1.1       millert   106:                for (i = sizeof(TRECHEADER); i ;  i--)
                    107:                        ungetc(*--hp, fp);
                    108:                return (BUFFEND);
                    109:        }
                    110:        fread(pos->data, pos->length, 1, fp);
                    111:        nleft -= pos->length + sizeof(TRECHEADER);
                    112:        if (nleft == 0 && binno == fstack[infl0.top].maxb)
                    113:                fclose(fp);
                    114:        return (0);
                    115: }
                    116:
                    117: /*
                    118:  * this is called when there is no special key. It's only called
                    119:  * in the first fsort pass.
                    120:  */
                    121: int
1.11    ! deraadt   122: makeline(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
        !           123:     u_char *bufend, struct field *dummy2)
1.1       millert   124: {
1.7       ericj     125:        static u_char *obufend;
                    126:        static size_t osz;
                    127:        char *pos;
1.1       millert   128:        static int fileno = 0, overflow = 0;
                    129:        static FILE *fp = 0;
1.7       ericj     130:        int c;
1.1       millert   131:
                    132:        pos = (char *) buffer->data;
                    133:        if (overflow) {
1.7       ericj     134:                /*
                    135:                 * Buffer shortage is solved by either of two ways:
                    136:                 * * Flush previous buffered data and start using the
                    137:                 *   buffer from start (see fsort())
                    138:                 * * realloc buffer and bump bufend
                    139:                 *
1.10      tedu      140:                 * The former is preferred, realloc is only done when
1.7       ericj     141:                 * there is exactly one item in buffer which does not fit.
                    142:                 */
                    143:                if (bufend == obufend)
                    144:                        memmove(pos, bufend - osz, osz);
                    145:                pos+=osz;
1.1       millert   146:                overflow = 0;
                    147:        }
                    148:        for (;;) {
1.4       millert   149:                if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
                    150:                        return (EOF);
                    151:                else if (fp == 0) {
                    152:                        if (fileno  >= nfiles)
1.1       millert   153:                                return (EOF);
                    154:                        if (!(fp = fopen(filelist.names[fileno], "r")))
1.6       millert   155:                                err(2, "%s", filelist.names[fileno]);
1.4       millert   156:                        fileno++;
1.1       millert   157:                }
1.7       ericj     158:                while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
1.1       millert   159:                        if ((*pos++ = c) == REC_D) {
                    160:                                buffer->offset = 0;
                    161:                                buffer->length = pos - (char *) buffer->data;
                    162:                                return (0);
                    163:                        }
                    164:                }
1.7       ericj     165:                if (pos >= (char *)bufend) {
                    166:                        if (buffer->data < bufend) {
1.1       millert   167:                                overflow = 1;
1.7       ericj     168:                                obufend = bufend;
                    169:                                osz = (pos - (char *)buffer->data);
1.1       millert   170:                        }
                    171:                        return (BUFFEND);
                    172:                } else if (c == EOF) {
                    173:                        if (buffer->data != (u_char *) pos) {
                    174:                                *pos++ = REC_D;
                    175:                                buffer->offset = 0;
                    176:                                buffer->length = pos - (char *) buffer->data;
1.4       millert   177:                                return (0);
1.1       millert   178:                        }
                    179:                        FCLOSE(fp);
                    180:                        fp = 0;
1.4       millert   181:                        if (flno >= 0)
                    182:                                fstack[flno].fp = 0;
1.1       millert   183:                } else {
1.7       ericj     184:                        warnx("line too long: ignoring %100s...", buffer->data);
                    185:
                    186:                        /* Consume the rest of the line from input */
                    187:                        while((c = getc(fp)) != REC_D && c != EOF)
                    188:                                ;
                    189:
                    190:                        buffer->offset = 0;
                    191:                        buffer->length = 0;
                    192:
                    193:                        return (BUFFEND);
1.1       millert   194:                }
                    195:        }
                    196: }
                    197:
                    198: /*
                    199:  * This generates keys. It's only called in the first fsort pass
                    200:  */
                    201: int
1.11    ! deraadt   202: makekey(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
        !           203:     u_char *bufend, struct field *ftbl)
1.1       millert   204: {
                    205:        static int fileno = 0;
                    206:        static FILE *dbdesc = 0;
                    207:        static DBT dbkey[1], line[1];
                    208:        static int overflow = 0;
1.7       ericj     209:        static int c;
1.4       millert   210:
1.1       millert   211:        if (overflow) {
1.7       ericj     212:                overflow = enterkey(buffer, line, bufend - (u_char *)buffer,
                    213:                                                                        ftbl);
                    214:                if (overflow)
                    215:                        return (BUFFEND);
                    216:                else
                    217:                        return (0);
1.1       millert   218:        }
1.7       ericj     219:
1.1       millert   220:        for (;;) {
                    221:                if (flno >= 0) {
                    222:                        if (!(dbdesc = fstack[flno].fp))
1.4       millert   223:                                return (EOF);
1.1       millert   224:                } else if (!dbdesc) {
                    225:                        if (fileno  >= nfiles)
                    226:                                return (EOF);
                    227:                        dbdesc = fopen(filelist.names[fileno], "r");
                    228:                        if (!dbdesc)
1.6       millert   229:                                err(2, "%s", filelist.names[fileno]);
1.4       millert   230:                        fileno++;
1.1       millert   231:                }
1.7       ericj     232:                if (!(c = seq(dbdesc, line, dbkey))) {
                    233:                        if ((signed)line->size > bufend - buffer->data) {
1.1       millert   234:                                overflow = 1;
1.7       ericj     235:                        } else {
1.1       millert   236:                                overflow = enterkey(buffer, line,
                    237:                                    bufend - (u_char *) buffer, ftbl);
1.7       ericj     238:                        }
1.1       millert   239:                        if (overflow)
                    240:                                return (BUFFEND);
                    241:                        else
                    242:                                return (0);
                    243:                }
                    244:                if (c == EOF) {
                    245:                        FCLOSE(dbdesc);
                    246:                        dbdesc = 0;
1.4       millert   247:                        if (flno >= 0)
                    248:                                fstack[flno].fp = 0;
1.1       millert   249:                } else {
                    250:                        ((char *) line->data)[60] = '\000';
                    251:                        warnx("line too long: ignoring %.100s...",
                    252:                            (char *)line->data);
                    253:                }
                    254:        }
                    255: }
                    256:
                    257: /*
                    258:  * get a key/line pair from fp
                    259:  */
1.7       ericj     260: static int
1.11    ! deraadt   261: seq(FILE *fp, DBT *line, DBT *key)
1.1       millert   262: {
                    263:        static char *buf, flag = 1;
1.7       ericj     264:        char *end, *pos;
                    265:        int c;
1.4       millert   266:
1.1       millert   267:        if (flag) {
                    268:                flag = 0;
                    269:                buf = (char *) linebuf;
1.7       ericj     270:                end = buf + linebuf_size;
1.1       millert   271:                line->data = buf;
                    272:        }
                    273:        pos = buf;
                    274:        while ((c = getc(fp)) != EOF) {
                    275:                if ((*pos++ = c) == REC_D) {
                    276:                        line->size = pos - buf;
                    277:                        return (0);
                    278:                }
                    279:                if (pos == end) {
1.7       ericj     280:                        linebuf_size *= 2;
                    281:                        linebuf = realloc(linebuf, linebuf_size);
                    282:                        if (!linebuf)
                    283:                                err(2, "realloc of linebuf to %lu bytes failed",
                    284:                                        (unsigned long)linebuf_size);
                    285:                        end = linebuf + linebuf_size;
                    286:                        pos = linebuf + (pos - buf);
                    287:                        line->data = buf = (char *)linebuf;
                    288:                        continue;
1.1       millert   289:                }
                    290:        }
                    291:        if (pos != buf) {
                    292:                *pos++ = REC_D;
                    293:                line->size = pos - buf;
                    294:                return (0);
                    295:        } else
                    296:                return (EOF);
                    297: }
                    298:
                    299: /*
                    300:  * write a key/line pair to a temporary file
                    301:  */
                    302: void
1.11    ! deraadt   303: putrec(RECHEADER *rec, FILE *fp)
1.1       millert   304: {
                    305:        EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
                    306: }
                    307:
                    308: /*
                    309:  * write a line to output
                    310:  */
                    311: void
1.11    ! deraadt   312: putline(RECHEADER *rec, FILE *fp)
1.1       millert   313: {
                    314:        EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
                    315: }
                    316:
                    317: /*
                    318:  * get a record from a temporary file. (Used by merge sort.)
                    319:  */
                    320: int
1.11    ! deraadt   321: geteasy(int flno, union f_handle filelist, int nfiles, RECHEADER *rec,
        !           322:     u_char *end, struct field *dummy2)
1.1       millert   323: {
                    324:        int i;
                    325:        FILE *fp;
1.4       millert   326:
1.1       millert   327:        fp = fstack[flno].fp;
                    328:        if ((u_char *) rec > end - sizeof(TRECHEADER))
                    329:                return (BUFFEND);
                    330:        if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
                    331:                fclose(fp);
                    332:                fstack[flno].fp = 0;
                    333:                return (EOF);
                    334:        }
                    335:        if (end - rec->data < rec->length) {
                    336:                for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
                    337:                        ungetc(*((char *) rec + i), fp);
                    338:                return (BUFFEND);
                    339:        }
                    340:        fread(rec->data, rec->length, 1, fp);
                    341:        return (0);
                    342: }