Annotation of src/usr.bin/sort/sort.c, Revision 1.38
1.38 ! guenther 1: /* $OpenBSD: sort.c,v 1.37 2009/10/27 23:59:43 deraadt Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.20 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 millert 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
1.4 millert 35: /*
36: * Sort sorts a file using an optional user-defined key.
1.1 millert 37: * Sort uses radix sort for internal sorting, and allows
38: * a choice of merge sort and radix sort for external sorting.
39: */
40:
41: #include "sort.h"
42: #include "fsort.h"
43: #include "pathnames.h"
44:
1.13 espie 45: #include <sys/types.h>
46: #include <sys/stat.h>
1.16 ericj 47: #include <locale.h>
1.1 millert 48: #include <paths.h>
49: #include <signal.h>
50: #include <stdlib.h>
51: #include <string.h>
52: #include <unistd.h>
1.10 mickey 53: #include <err.h>
1.1 millert 54:
55: int REC_D = '\n';
56: u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
1.4 millert 57:
1.1 millert 58: /*
59: * weight tables. Gweights is one of ascii, Rascii..
60: * modified to weight rec_d = 0 (or 255)
61: */
62: extern u_char gweights[NBINS];
63: u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
1.4 millert 64:
1.1 millert 65: /*
66: * masks of ignored characters. Alltable is 256 ones
67: */
68: u_char dtable[NBINS], itable[NBINS], alltable[NBINS];
1.35 millert 69: int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0, STABLE = 0;
1.12 millert 70: struct coldesc *clist;
1.1 millert 71: int ncols = 0;
1.12 millert 72: int ND = 10; /* limit on number of -k options. */
1.1 millert 73:
1.19 millert 74: char *devstdin = _PATH_STDIN;
1.1 millert 75: char *tmpdir = _PATH_VARTMP;
1.19 millert 76: char toutpath[PATH_MAX];
1.1 millert 77:
1.18 millert 78: static void cleanup(void);
79: static void onsig(int);
80: static void usage(char *);
1.1 millert 81:
1.12 millert 82: #define CHECK_NFIELDS \
83: if (++nfields == ND) { \
84: ND += 10; \
85: if ((p = realloc(fldtab, ND)) == NULL) \
86: errx(2, "cannot allocate memory"); \
87: ftpos = p + (ftpos - fldtab); \
88: fldtab = p; \
89: }
90:
1.1 millert 91: int
1.21 deraadt 92: main(int argc, char *argv[])
1.1 millert 93: {
1.22 deraadt 94: int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *);
1.1 millert 95: int ch, i, stdinflag = 0, tmp = 0;
1.12 millert 96: char nfields = 0, cflag = 0, mflag = 0;
1.1 millert 97: char *outfile, *outpath = 0;
1.12 millert 98: struct field *fldtab, *ftpos;
1.1 millert 99: union f_handle filelist;
100: FILE *outfp = NULL;
1.12 millert 101: void *p;
1.4 millert 102:
1.16 ericj 103: setlocale(LC_ALL, "");
104:
1.12 millert 105: if ((clist = calloc((ND+1)*2, sizeof(struct coldesc))) == NULL ||
106: (ftpos = fldtab = calloc(ND+2, sizeof(struct field))) == NULL)
107: errx(2, "cannot allocate memory");
1.1 millert 108: memset(d_mask, 0, NBINS);
109: d_mask[REC_D = '\n'] = REC_D_F;
110: d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
111: fixit(&argc, argv);
112: if (!issetugid() && (outfile = getenv("TMPDIR")))
113: tmpdir = outfile;
1.35 millert 114: while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:t:T:uy:zs")) != -1) {
1.4 millert 115: switch (ch) {
1.1 millert 116: case 'b': fldtab->flags |= BI | BT;
117: break;
118: case 'd':
1.4 millert 119: case 'f':
1.1 millert 120: case 'i':
1.5 millert 121: case 'n':
1.1 millert 122: case 'r': tmp |= optval(ch, 0);
123: if (tmp & R && tmp & F)
124: fldtab->weights = RFtable;
125: else if (tmp & F)
126: fldtab->weights = Ftable;
1.4 millert 127: else if (tmp & R)
1.1 millert 128: fldtab->weights = Rascii;
129: fldtab->flags |= tmp;
130: break;
131: case 'o':
132: outpath = optarg;
133: break;
134: case 'k':
1.12 millert 135: CHECK_NFIELDS;
1.5 millert 136: setfield(optarg, ++ftpos, fldtab->flags);
1.1 millert 137: break;
138: case 't':
139: if (SEP_FLAG)
140: usage("multiple field delimiters");
141: SEP_FLAG = 1;
142: d_mask[' '] &= ~FLD_D;
143: d_mask['\t'] &= ~FLD_D;
144: d_mask[(int)*optarg] |= FLD_D;
145: if (d_mask[(int)*optarg] & REC_D_F)
146: err(2, "record/field delimiter clash");
147: break;
148: case 'R':
149: if (REC_D != '\n')
150: usage("multiple record delimiters");
151: if ('\n' == (REC_D = *optarg))
152: break;
153: d_mask['\n'] = d_mask[' '];
154: d_mask[REC_D] = REC_D_F;
155: break;
156: case 'T':
157: tmpdir = optarg;
158: break;
159: case 'u':
160: UNIQUE = 1;
161: break;
162: case 'c':
163: cflag = 1;
164: break;
165: case 'm':
166: mflag = 1;
167: break;
168: case 'H':
169: PANIC = 0;
170: break;
171: case 'y':
172: /* accept -y for backwards compat. */
173: break;
1.26 dlg 174: case 'z':
175: if (REC_D != '\n')
176: usage("multiple record delimiters");
177: REC_D = '\0';
178: d_mask['\n'] = d_mask[' '];
179: d_mask[REC_D] = REC_D_F;
180: break;
1.35 millert 181: case 's':
182: STABLE = 1;
183: break;
1.1 millert 184: case '?':
1.8 deraadt 185: default:
186: usage(NULL);
1.1 millert 187: }
188: }
1.4 millert 189:
1.1 millert 190: if (cflag && argc > optind+1)
191: errx(2, "too many input files for -c option");
1.4 millert 192:
1.1 millert 193: if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
194: outpath = argv[argc-1];
195: argc -= 2;
196: }
1.4 millert 197:
1.1 millert 198: if (mflag && argc - optind > (MAXFCT - (16+1))*16)
199: errx(2, "too many input files for -m option");
1.4 millert 200:
1.1 millert 201: for (i = optind; i < argc; i++) {
202: /* allow one occurrence of /dev/stdin */
203: if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) {
204: if (stdinflag)
205: warnx("ignoring extra \"%s\" in file list",
206: argv[i]);
207: else {
208: stdinflag = 1;
209: argv[i] = devstdin;
210: }
211: } else if ((ch = access(argv[i], R_OK)))
1.14 millert 212: err(2, "%s", argv[i]);
1.1 millert 213: }
1.4 millert 214:
1.5 millert 215: if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) {
1.1 millert 216: SINGL_FLD = 1;
217: fldtab[0].icol.num = 1;
218: } else {
219: if (!fldtab[1].icol.num) {
1.12 millert 220: CHECK_NFIELDS;
1.1 millert 221: fldtab[0].flags &= ~(BI|BT);
222: setfield("1", ++ftpos, fldtab->flags);
223: }
224: fldreset(fldtab);
225: fldtab[0].flags &= ~F;
226: }
227: settables(fldtab[0].flags);
228: num_init();
229: fldtab->weights = gweights;
1.4 millert 230:
1.3 deraadt 231: if (optind == argc) {
232: static char *names[2];
233:
234: names[0] = devstdin;
235: names[1] = NULL;
236: filelist.names = names;
237: optind--;
238: } else
239: filelist.names = argv+optind;
1.4 millert 240:
1.1 millert 241: if (SINGL_FLD)
242: get = makeline;
243: else
244: get = makekey;
1.4 millert 245:
1.34 millert 246: if (!SINGL_FLD) {
1.31 millert 247: if ((linebuf = malloc(linebuf_size)) == NULL)
248: err(2, NULL);
249: }
250:
1.1 millert 251: if (cflag) {
252: order(filelist, get, fldtab);
253: /* NOT REACHED */
254: }
1.4 millert 255:
1.1 millert 256: if (!outpath) {
257: (void)snprintf(toutpath,
258: sizeof(toutpath), "%sstdout", _PATH_DEV);
259: outfile = outpath = toutpath;
260: } else if (!(ch = access(outpath, 0)) &&
261: strncmp(_PATH_DEV, outpath, 5)) {
1.38 ! guenther 262: struct sigaction oact, act;
1.1 millert 263: int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ,
264: SIGVTALRM, SIGPROF, 0};
265: int outfd;
1.13 espie 266: mode_t um;
1.4 millert 267:
1.1 millert 268: errno = 0;
1.4 millert 269:
1.1 millert 270: if (access(outpath, W_OK))
1.14 millert 271: err(2, "%s", outpath);
1.4 millert 272: (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXXXXXX",
273: outpath);
1.13 espie 274: um = umask(S_IWGRP|S_IWOTH);
1.25 moritz 275: (void)umask(um);
1.13 espie 276: if ((outfd = mkstemp(toutpath)) == -1 ||
277: fchmod(outfd, DEFFILEMODE & ~um) == -1 ||
1.1 millert 278: (outfp = fdopen(outfd, "w")) == 0)
1.14 millert 279: err(2, "%s", toutpath);
1.1 millert 280: outfile = toutpath;
1.4 millert 281:
1.1 millert 282: (void)atexit(cleanup);
1.17 millert 283: sigfillset(&act.sa_mask);
284: act.sa_flags = SA_RESTART;
285: act.sa_handler = onsig;
1.1 millert 286: for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
1.38 ! guenther 287: if (sigaction(sigtable[i], NULL, &oact) < 0 ||
! 288: oact.sa_handler != SIG_IGN &&
! 289: sigaction(sigtable[i], &act, NULL) < 0)
! 290: err(2, "sigaction");
1.1 millert 291: } else
292: outfile = outpath;
293: if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL)
1.14 millert 294: err(2, "%s", outfile);
1.1 millert 295: if (mflag)
296: fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab);
297: else
298: fsort(-1, 0, filelist, argc-optind, outfp, fldtab);
299: if (outfile != outpath) {
300: if (access(outfile, 0))
1.14 millert 301: err(2, "%s", outfile);
1.1 millert 302: (void)unlink(outpath);
303: if (link(outfile, outpath))
304: err(2, "cannot link %s: output left in %s",
305: outpath, outfile);
306: (void)unlink(outfile);
307: }
308: exit(0);
309: }
310:
1.24 deraadt 311: /* ARGSUSED */
1.1 millert 312: static void
1.21 deraadt 313: onsig(int signo)
1.1 millert 314: {
1.4 millert 315:
1.1 millert 316: cleanup();
1.15 deraadt 317: _exit(2); /* return 2 on error/interrupt */
1.1 millert 318: }
319:
320: static void
1.21 deraadt 321: cleanup(void)
1.1 millert 322: {
1.4 millert 323:
1.1 millert 324: if (toutpath[0])
325: (void)unlink(toutpath);
326: }
327:
328: static void
1.21 deraadt 329: usage(char *msg)
1.1 millert 330: {
1.8 deraadt 331: extern char *__progname;
1.4 millert 332:
1.16 ericj 333: if (msg != NULL)
1.14 millert 334: warnx("%s", msg);
1.36 jmc 335: (void)fprintf(stderr, "usage: %s [-bcdfHimnrsuz] "
1.28 jmc 336: "[-k field1[,field2]] [-o output] [-R char]\n"
1.27 jmc 337: "\t[-T dir] [-t char] [file ...]\n", __progname);
1.1 millert 338: exit(2);
339: }