[BACK]Return to sort.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/sort.c, Revision 1.38

1.38    ! guenther    1: /*     $OpenBSD: sort.c,v 1.37 2009/10/27 23:59:43 deraadt Exp $       */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (c) 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Peter McIlroy.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
1.20      millert    18:  * 3. Neither the name of the University nor the names of its contributors
1.1       millert    19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
                     34:
1.4       millert    35: /*
                     36:  * Sort sorts a file using an optional user-defined key.
1.1       millert    37:  * Sort uses radix sort for internal sorting, and allows
                     38:  * a choice of merge sort and radix sort for external sorting.
                     39:  */
                     40:
                     41: #include "sort.h"
                     42: #include "fsort.h"
                     43: #include "pathnames.h"
                     44:
1.13      espie      45: #include <sys/types.h>
                     46: #include <sys/stat.h>
1.16      ericj      47: #include <locale.h>
1.1       millert    48: #include <paths.h>
                     49: #include <signal.h>
                     50: #include <stdlib.h>
                     51: #include <string.h>
                     52: #include <unistd.h>
1.10      mickey     53: #include <err.h>
1.1       millert    54:
                     55: int REC_D = '\n';
                     56: u_char d_mask[NBINS];          /* flags for rec_d, field_d, <blank> */
1.4       millert    57:
1.1       millert    58: /*
                     59:  * weight tables.  Gweights is one of ascii, Rascii..
                     60:  * modified to weight rec_d = 0 (or 255)
                     61:  */
                     62: extern u_char gweights[NBINS];
                     63: u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
1.4       millert    64:
1.1       millert    65: /*
                     66:  * masks of ignored characters.  Alltable is 256 ones
                     67:  */
                     68: u_char dtable[NBINS], itable[NBINS], alltable[NBINS];
1.35      millert    69: int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0, STABLE = 0;
1.12      millert    70: struct coldesc *clist;
1.1       millert    71: int ncols = 0;
1.12      millert    72: int ND = 10;                   /* limit on number of -k options. */
1.1       millert    73:
1.19      millert    74: char *devstdin = _PATH_STDIN;
1.1       millert    75: char *tmpdir = _PATH_VARTMP;
1.19      millert    76: char toutpath[PATH_MAX];
1.1       millert    77:
1.18      millert    78: static void cleanup(void);
                     79: static void onsig(int);
                     80: static void usage(char *);
1.1       millert    81:
1.12      millert    82: #define CHECK_NFIELDS                                          \
                     83:        if (++nfields == ND) {                                  \
                     84:                ND += 10;                                       \
                     85:                if ((p = realloc(fldtab, ND)) == NULL)          \
                     86:                        errx(2, "cannot allocate memory");      \
                     87:                ftpos = p + (ftpos - fldtab);                   \
                     88:                fldtab = p;                                     \
                     89:        }
                     90:
1.1       millert    91: int
1.21      deraadt    92: main(int argc, char *argv[])
1.1       millert    93: {
1.22      deraadt    94:        int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *);
1.1       millert    95:        int ch, i, stdinflag = 0, tmp = 0;
1.12      millert    96:        char nfields = 0, cflag = 0, mflag = 0;
1.1       millert    97:        char *outfile, *outpath = 0;
1.12      millert    98:        struct field *fldtab, *ftpos;
1.1       millert    99:        union f_handle filelist;
                    100:        FILE *outfp = NULL;
1.12      millert   101:        void *p;
1.4       millert   102:
1.16      ericj     103:        setlocale(LC_ALL, "");
                    104:
1.12      millert   105:        if ((clist = calloc((ND+1)*2, sizeof(struct coldesc))) == NULL ||
                    106:            (ftpos = fldtab = calloc(ND+2, sizeof(struct field))) == NULL)
                    107:                errx(2, "cannot allocate memory");
1.1       millert   108:        memset(d_mask, 0, NBINS);
                    109:        d_mask[REC_D = '\n'] = REC_D_F;
                    110:        d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
                    111:        fixit(&argc, argv);
                    112:        if (!issetugid() && (outfile = getenv("TMPDIR")))
                    113:                tmpdir = outfile;
1.35      millert   114:        while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:t:T:uy:zs")) != -1) {
1.4       millert   115:                switch (ch) {
1.1       millert   116:                case 'b': fldtab->flags |= BI | BT;
                    117:                        break;
                    118:                case 'd':
1.4       millert   119:                case 'f':
1.1       millert   120:                case 'i':
1.5       millert   121:                case 'n':
1.1       millert   122:                case 'r': tmp |= optval(ch, 0);
                    123:                        if (tmp & R && tmp & F)
                    124:                                fldtab->weights = RFtable;
                    125:                        else if (tmp & F)
                    126:                                fldtab->weights = Ftable;
1.4       millert   127:                        else if (tmp & R)
1.1       millert   128:                                fldtab->weights = Rascii;
                    129:                        fldtab->flags |= tmp;
                    130:                        break;
                    131:                case 'o':
                    132:                        outpath = optarg;
                    133:                        break;
                    134:                case 'k':
1.12      millert   135:                        CHECK_NFIELDS;
1.5       millert   136:                        setfield(optarg, ++ftpos, fldtab->flags);
1.1       millert   137:                        break;
                    138:                case 't':
                    139:                        if (SEP_FLAG)
                    140:                                usage("multiple field delimiters");
                    141:                        SEP_FLAG = 1;
                    142:                        d_mask[' '] &= ~FLD_D;
                    143:                        d_mask['\t'] &= ~FLD_D;
                    144:                        d_mask[(int)*optarg] |= FLD_D;
                    145:                        if (d_mask[(int)*optarg] & REC_D_F)
                    146:                                err(2, "record/field delimiter clash");
                    147:                        break;
                    148:                case 'R':
                    149:                        if (REC_D != '\n')
                    150:                                usage("multiple record delimiters");
                    151:                        if ('\n' == (REC_D = *optarg))
                    152:                                break;
                    153:                        d_mask['\n'] = d_mask[' '];
                    154:                        d_mask[REC_D] = REC_D_F;
                    155:                        break;
                    156:                case 'T':
                    157:                        tmpdir = optarg;
                    158:                        break;
                    159:                case 'u':
                    160:                        UNIQUE = 1;
                    161:                        break;
                    162:                case 'c':
                    163:                        cflag = 1;
                    164:                        break;
                    165:                case 'm':
                    166:                        mflag = 1;
                    167:                        break;
                    168:                case 'H':
                    169:                        PANIC = 0;
                    170:                        break;
                    171:                case 'y':
                    172:                        /* accept -y for backwards compat. */
                    173:                        break;
1.26      dlg       174:                case 'z':
                    175:                        if (REC_D != '\n')
                    176:                                usage("multiple record delimiters");
                    177:                        REC_D = '\0';
                    178:                        d_mask['\n'] = d_mask[' '];
                    179:                        d_mask[REC_D] = REC_D_F;
                    180:                        break;
1.35      millert   181:                case 's':
                    182:                        STABLE = 1;
                    183:                        break;
1.1       millert   184:                case '?':
1.8       deraadt   185:                default:
                    186:                        usage(NULL);
1.1       millert   187:                }
                    188:        }
1.4       millert   189:
1.1       millert   190:        if (cflag && argc > optind+1)
                    191:                errx(2, "too many input files for -c option");
1.4       millert   192:
1.1       millert   193:        if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
                    194:                outpath = argv[argc-1];
                    195:                argc -= 2;
                    196:        }
1.4       millert   197:
1.1       millert   198:        if (mflag && argc - optind > (MAXFCT - (16+1))*16)
                    199:                errx(2, "too many input files for -m option");
1.4       millert   200:
1.1       millert   201:        for (i = optind; i < argc; i++) {
                    202:                /* allow one occurrence of /dev/stdin */
                    203:                if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) {
                    204:                        if (stdinflag)
                    205:                                warnx("ignoring extra \"%s\" in file list",
                    206:                                    argv[i]);
                    207:                        else {
                    208:                                stdinflag = 1;
                    209:                                argv[i] = devstdin;
                    210:                        }
                    211:                } else if ((ch = access(argv[i], R_OK)))
1.14      millert   212:                        err(2, "%s", argv[i]);
1.1       millert   213:        }
1.4       millert   214:
1.5       millert   215:        if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) {
1.1       millert   216:                SINGL_FLD = 1;
                    217:                fldtab[0].icol.num = 1;
                    218:        } else {
                    219:                if (!fldtab[1].icol.num) {
1.12      millert   220:                        CHECK_NFIELDS;
1.1       millert   221:                        fldtab[0].flags &= ~(BI|BT);
                    222:                        setfield("1", ++ftpos, fldtab->flags);
                    223:                }
                    224:                fldreset(fldtab);
                    225:                fldtab[0].flags &= ~F;
                    226:        }
                    227:        settables(fldtab[0].flags);
                    228:        num_init();
                    229:        fldtab->weights = gweights;
1.4       millert   230:
1.3       deraadt   231:        if (optind == argc) {
                    232:                static char *names[2];
                    233:
                    234:                names[0] = devstdin;
                    235:                names[1] = NULL;
                    236:                filelist.names = names;
                    237:                optind--;
                    238:        } else
                    239:                filelist.names = argv+optind;
1.4       millert   240:
1.1       millert   241:        if (SINGL_FLD)
                    242:                get = makeline;
                    243:        else
                    244:                get = makekey;
1.4       millert   245:
1.34      millert   246:        if (!SINGL_FLD) {
1.31      millert   247:                if ((linebuf = malloc(linebuf_size)) == NULL)
                    248:                        err(2, NULL);
                    249:        }
                    250:
1.1       millert   251:        if (cflag) {
                    252:                order(filelist, get, fldtab);
                    253:                /* NOT REACHED */
                    254:        }
1.4       millert   255:
1.1       millert   256:        if (!outpath) {
                    257:                (void)snprintf(toutpath,
                    258:                    sizeof(toutpath), "%sstdout", _PATH_DEV);
                    259:                outfile = outpath = toutpath;
                    260:        } else if (!(ch = access(outpath, 0)) &&
                    261:            strncmp(_PATH_DEV, outpath, 5)) {
1.38    ! guenther  262:                struct sigaction oact, act;
1.1       millert   263:                int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ,
                    264:                    SIGVTALRM, SIGPROF, 0};
                    265:                int outfd;
1.13      espie     266:                mode_t um;
1.4       millert   267:
1.1       millert   268:                errno = 0;
1.4       millert   269:
1.1       millert   270:                if (access(outpath, W_OK))
1.14      millert   271:                        err(2, "%s", outpath);
1.4       millert   272:                (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXXXXXX",
                    273:                    outpath);
1.13      espie     274:                um = umask(S_IWGRP|S_IWOTH);
1.25      moritz    275:                (void)umask(um);
1.13      espie     276:                if ((outfd = mkstemp(toutpath)) == -1 ||
                    277:                    fchmod(outfd, DEFFILEMODE & ~um) == -1 ||
1.1       millert   278:                    (outfp = fdopen(outfd, "w")) == 0)
1.14      millert   279:                        err(2, "%s", toutpath);
1.1       millert   280:                outfile = toutpath;
1.4       millert   281:
1.1       millert   282:                (void)atexit(cleanup);
1.17      millert   283:                sigfillset(&act.sa_mask);
                    284:                act.sa_flags = SA_RESTART;
                    285:                act.sa_handler = onsig;
1.1       millert   286:                for (i = 0; sigtable[i]; ++i)   /* always unlink toutpath */
1.38    ! guenther  287:                        if (sigaction(sigtable[i], NULL, &oact) < 0 ||
        !           288:                            oact.sa_handler != SIG_IGN &&
        !           289:                            sigaction(sigtable[i], &act, NULL) < 0)
        !           290:                                err(2, "sigaction");
1.1       millert   291:        } else
                    292:                outfile = outpath;
                    293:        if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL)
1.14      millert   294:                err(2, "%s", outfile);
1.1       millert   295:        if (mflag)
                    296:                fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab);
                    297:        else
                    298:                fsort(-1, 0, filelist, argc-optind, outfp, fldtab);
                    299:        if (outfile != outpath) {
                    300:                if (access(outfile, 0))
1.14      millert   301:                        err(2, "%s", outfile);
1.1       millert   302:                (void)unlink(outpath);
                    303:                if (link(outfile, outpath))
                    304:                        err(2, "cannot link %s: output left in %s",
                    305:                            outpath, outfile);
                    306:                (void)unlink(outfile);
                    307:        }
                    308:        exit(0);
                    309: }
                    310:
1.24      deraadt   311: /* ARGSUSED */
1.1       millert   312: static void
1.21      deraadt   313: onsig(int signo)
1.1       millert   314: {
1.4       millert   315:
1.1       millert   316:        cleanup();
1.15      deraadt   317:        _exit(2);                       /* return 2 on error/interrupt */
1.1       millert   318: }
                    319:
                    320: static void
1.21      deraadt   321: cleanup(void)
1.1       millert   322: {
1.4       millert   323:
1.1       millert   324:        if (toutpath[0])
                    325:                (void)unlink(toutpath);
                    326: }
                    327:
                    328: static void
1.21      deraadt   329: usage(char *msg)
1.1       millert   330: {
1.8       deraadt   331:        extern char *__progname;
1.4       millert   332:
1.16      ericj     333:        if (msg != NULL)
1.14      millert   334:                warnx("%s", msg);
1.36      jmc       335:        (void)fprintf(stderr, "usage: %s [-bcdfHimnrsuz] "
1.28      jmc       336:            "[-k field1[,field2]] [-o output] [-R char]\n"
1.27      jmc       337:            "\t[-T dir] [-t char] [file ...]\n", __progname);
1.1       millert   338:        exit(2);
                    339: }