[BACK]Return to files.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/files.c, Revision 1.13

1.13    ! deraadt     1: /*     $OpenBSD: files.c,v 1.12 2005/08/18 14:56:25 jaredy Exp $       */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (c) 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Peter McIlroy.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
1.9       millert    18:  * 3. Neither the name of the University nor the names of its contributors
1.1       millert    19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
                     34:
                     35: #include "sort.h"
                     36: #include "fsort.h"
                     37:
                     38: #include <string.h>
                     39:
1.8       millert    40: static int     seq(FILE *, DBT *, DBT *);
1.7       ericj      41:
1.1       millert    42: /*
                     43:  * this is the subroutine for file management for fsort().
                     44:  * It keeps the buffers for all temporary files.
                     45:  */
                     46: int
1.11      deraadt    47: getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char *end,
                     48:     struct field *dummy)
1.1       millert    49: {
1.7       ericj      50:        int i;
                     51:        u_char *hp;
                     52:        static size_t nleft = 0;
1.1       millert    53:        static int cnt = 0, flag = -1;
                     54:        static u_char maxb = 0;
                     55:        static FILE *fp;
                     56:
                     57:        if (nleft == 0) {
                     58:                if (binno < 0)  /* reset files. */ {
                     59:                        for (i = 0; i < nfiles; i++) {
                     60:                                rewind(fstack[infl0.top + i].fp);
                     61:                                fstack[infl0.top + i].max_o = 0;
                     62:                        }
                     63:                        flag = -1;
                     64:                        nleft = cnt = 0;
1.4       millert    65:                        return (-1);
1.1       millert    66:                }
                     67:                maxb = fstack[infl0.top].maxb;
                     68:                for (; nleft == 0; cnt++) {
                     69:                        if (cnt >= nfiles) {
                     70:                                cnt = 0;
                     71:                                return (EOF);
                     72:                        }
                     73:                        fp = fstack[infl0.top + cnt].fp;
1.7       ericj      74:                        fread(&nleft, sizeof(nleft), 1, fp);
1.1       millert    75:                        if (binno < maxb)
                     76:                                fstack[infl0.top+cnt].max_o
                     77:                                        += sizeof(nleft) + nleft;
                     78:                        else if (binno == maxb) {
                     79:                                if (binno != fstack[infl0.top].lastb) {
                     80:                                        fseek(fp, fstack[infl0.top+
                     81:                                                cnt].max_o, SEEK_SET);
                     82:                                        fread(&nleft, sizeof(nleft), 1, fp);
                     83:                                }
                     84:                                if (nleft == 0)
                     85:                                        fclose(fp);
                     86:                        } else if (binno == maxb + 1) {         /* skip a bin */
                     87:                                fseek(fp, nleft, SEEK_CUR);
                     88:                                fread(&nleft, sizeof(nleft), 1, fp);
                     89:                                flag = cnt;
                     90:                        }
                     91:                }
                     92:        }
                     93:        if ((u_char *) pos > end - sizeof(TRECHEADER))
                     94:                return (BUFFEND);
1.7       ericj      95:        fread(pos, sizeof(TRECHEADER), 1, fp);
1.1       millert    96:        if (end - pos->data < pos->length) {
1.7       ericj      97:                hp = ((u_char *)pos) + sizeof(TRECHEADER);
1.1       millert    98:                for (i = sizeof(TRECHEADER); i ;  i--)
                     99:                        ungetc(*--hp, fp);
                    100:                return (BUFFEND);
                    101:        }
                    102:        fread(pos->data, pos->length, 1, fp);
                    103:        nleft -= pos->length + sizeof(TRECHEADER);
                    104:        if (nleft == 0 && binno == fstack[infl0.top].maxb)
                    105:                fclose(fp);
                    106:        return (0);
                    107: }
                    108:
                    109: /*
                    110:  * this is called when there is no special key. It's only called
                    111:  * in the first fsort pass.
                    112:  */
                    113: int
1.11      deraadt   114: makeline(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
                    115:     u_char *bufend, struct field *dummy2)
1.1       millert   116: {
1.7       ericj     117:        static u_char *obufend;
                    118:        static size_t osz;
                    119:        char *pos;
1.1       millert   120:        static int fileno = 0, overflow = 0;
                    121:        static FILE *fp = 0;
1.7       ericj     122:        int c;
1.1       millert   123:
                    124:        pos = (char *) buffer->data;
                    125:        if (overflow) {
1.7       ericj     126:                /*
                    127:                 * Buffer shortage is solved by either of two ways:
                    128:                 * * Flush previous buffered data and start using the
                    129:                 *   buffer from start (see fsort())
                    130:                 * * realloc buffer and bump bufend
                    131:                 *
1.10      tedu      132:                 * The former is preferred, realloc is only done when
1.7       ericj     133:                 * there is exactly one item in buffer which does not fit.
                    134:                 */
                    135:                if (bufend == obufend)
                    136:                        memmove(pos, bufend - osz, osz);
                    137:                pos+=osz;
1.1       millert   138:                overflow = 0;
                    139:        }
                    140:        for (;;) {
1.4       millert   141:                if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
                    142:                        return (EOF);
                    143:                else if (fp == 0) {
                    144:                        if (fileno  >= nfiles)
1.1       millert   145:                                return (EOF);
                    146:                        if (!(fp = fopen(filelist.names[fileno], "r")))
1.6       millert   147:                                err(2, "%s", filelist.names[fileno]);
1.4       millert   148:                        fileno++;
1.1       millert   149:                }
1.7       ericj     150:                while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
1.1       millert   151:                        if ((*pos++ = c) == REC_D) {
                    152:                                buffer->offset = 0;
                    153:                                buffer->length = pos - (char *) buffer->data;
                    154:                                return (0);
                    155:                        }
                    156:                }
1.7       ericj     157:                if (pos >= (char *)bufend) {
                    158:                        if (buffer->data < bufend) {
1.1       millert   159:                                overflow = 1;
1.7       ericj     160:                                obufend = bufend;
                    161:                                osz = (pos - (char *)buffer->data);
1.1       millert   162:                        }
                    163:                        return (BUFFEND);
                    164:                } else if (c == EOF) {
                    165:                        if (buffer->data != (u_char *) pos) {
                    166:                                *pos++ = REC_D;
                    167:                                buffer->offset = 0;
                    168:                                buffer->length = pos - (char *) buffer->data;
1.4       millert   169:                                return (0);
1.1       millert   170:                        }
                    171:                        FCLOSE(fp);
                    172:                        fp = 0;
1.4       millert   173:                        if (flno >= 0)
                    174:                                fstack[flno].fp = 0;
1.1       millert   175:                } else {
1.7       ericj     176:                        warnx("line too long: ignoring %100s...", buffer->data);
                    177:
                    178:                        /* Consume the rest of the line from input */
                    179:                        while((c = getc(fp)) != REC_D && c != EOF)
                    180:                                ;
                    181:
                    182:                        buffer->offset = 0;
                    183:                        buffer->length = 0;
                    184:
                    185:                        return (BUFFEND);
1.1       millert   186:                }
                    187:        }
                    188: }
                    189:
                    190: /*
                    191:  * This generates keys. It's only called in the first fsort pass
                    192:  */
                    193: int
1.11      deraadt   194: makekey(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
                    195:     u_char *bufend, struct field *ftbl)
1.1       millert   196: {
                    197:        static int fileno = 0;
                    198:        static FILE *dbdesc = 0;
                    199:        static DBT dbkey[1], line[1];
                    200:        static int overflow = 0;
1.7       ericj     201:        static int c;
1.4       millert   202:
1.1       millert   203:        if (overflow) {
1.7       ericj     204:                overflow = enterkey(buffer, line, bufend - (u_char *)buffer,
                    205:                                                                        ftbl);
                    206:                if (overflow)
                    207:                        return (BUFFEND);
                    208:                else
                    209:                        return (0);
1.1       millert   210:        }
1.7       ericj     211:
1.1       millert   212:        for (;;) {
                    213:                if (flno >= 0) {
                    214:                        if (!(dbdesc = fstack[flno].fp))
1.4       millert   215:                                return (EOF);
1.1       millert   216:                } else if (!dbdesc) {
                    217:                        if (fileno  >= nfiles)
                    218:                                return (EOF);
                    219:                        dbdesc = fopen(filelist.names[fileno], "r");
                    220:                        if (!dbdesc)
1.6       millert   221:                                err(2, "%s", filelist.names[fileno]);
1.4       millert   222:                        fileno++;
1.1       millert   223:                }
1.7       ericj     224:                if (!(c = seq(dbdesc, line, dbkey))) {
                    225:                        if ((signed)line->size > bufend - buffer->data) {
1.1       millert   226:                                overflow = 1;
1.7       ericj     227:                        } else {
1.1       millert   228:                                overflow = enterkey(buffer, line,
                    229:                                    bufend - (u_char *) buffer, ftbl);
1.7       ericj     230:                        }
1.1       millert   231:                        if (overflow)
                    232:                                return (BUFFEND);
                    233:                        else
                    234:                                return (0);
                    235:                }
                    236:                if (c == EOF) {
                    237:                        FCLOSE(dbdesc);
                    238:                        dbdesc = 0;
1.4       millert   239:                        if (flno >= 0)
                    240:                                fstack[flno].fp = 0;
1.1       millert   241:                } else {
                    242:                        ((char *) line->data)[60] = '\000';
                    243:                        warnx("line too long: ignoring %.100s...",
                    244:                            (char *)line->data);
                    245:                }
                    246:        }
                    247: }
                    248:
                    249: /*
                    250:  * get a key/line pair from fp
                    251:  */
1.7       ericj     252: static int
1.11      deraadt   253: seq(FILE *fp, DBT *line, DBT *key)
1.1       millert   254: {
                    255:        static char *buf, flag = 1;
1.7       ericj     256:        char *end, *pos;
                    257:        int c;
1.4       millert   258:
1.1       millert   259:        if (flag) {
                    260:                flag = 0;
                    261:                buf = (char *) linebuf;
                    262:                line->data = buf;
                    263:        }
                    264:        pos = buf;
1.12      jaredy    265:        end = buf + linebuf_size;
1.1       millert   266:        while ((c = getc(fp)) != EOF) {
                    267:                if ((*pos++ = c) == REC_D) {
                    268:                        line->size = pos - buf;
                    269:                        return (0);
                    270:                }
                    271:                if (pos == end) {
1.7       ericj     272:                        linebuf_size *= 2;
                    273:                        linebuf = realloc(linebuf, linebuf_size);
                    274:                        if (!linebuf)
                    275:                                err(2, "realloc of linebuf to %lu bytes failed",
                    276:                                        (unsigned long)linebuf_size);
                    277:                        end = linebuf + linebuf_size;
                    278:                        pos = linebuf + (pos - buf);
                    279:                        line->data = buf = (char *)linebuf;
                    280:                        continue;
1.1       millert   281:                }
                    282:        }
                    283:        if (pos != buf) {
                    284:                *pos++ = REC_D;
                    285:                line->size = pos - buf;
                    286:                return (0);
                    287:        } else
                    288:                return (EOF);
                    289: }
                    290:
                    291: /*
                    292:  * write a key/line pair to a temporary file
                    293:  */
                    294: void
1.11      deraadt   295: putrec(RECHEADER *rec, FILE *fp)
1.1       millert   296: {
                    297:        EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
                    298: }
                    299:
                    300: /*
                    301:  * write a line to output
                    302:  */
                    303: void
1.11      deraadt   304: putline(RECHEADER *rec, FILE *fp)
1.1       millert   305: {
                    306:        EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
                    307: }
                    308:
                    309: /*
                    310:  * get a record from a temporary file. (Used by merge sort.)
                    311:  */
                    312: int
1.11      deraadt   313: geteasy(int flno, union f_handle filelist, int nfiles, RECHEADER *rec,
                    314:     u_char *end, struct field *dummy2)
1.1       millert   315: {
                    316:        int i;
                    317:        FILE *fp;
1.4       millert   318:
1.1       millert   319:        fp = fstack[flno].fp;
                    320:        if ((u_char *) rec > end - sizeof(TRECHEADER))
                    321:                return (BUFFEND);
                    322:        if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
                    323:                fclose(fp);
                    324:                fstack[flno].fp = 0;
                    325:                return (EOF);
                    326:        }
                    327:        if (end - rec->data < rec->length) {
                    328:                for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
                    329:                        ungetc(*((char *) rec + i), fp);
                    330:                return (BUFFEND);
                    331:        }
                    332:        fread(rec->data, rec->length, 1, fp);
                    333:        return (0);
                    334: }