Annotation of src/usr.bin/sort/sort.c, Revision 1.90
1.90 ! deraadt 1: /* $OpenBSD: sort.c,v 1.89 2019/05/15 09:07:46 schwarze Exp $ */
1.1 millert 2:
3: /*-
1.44 millert 4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
1.1 millert 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
1.44 millert 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1.1 millert 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.44 millert 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1.1 millert 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
1.48 millert 30: #include <sys/resource.h>
1.44 millert 31: #include <sys/stat.h>
1.48 millert 32: #include <sys/sysctl.h>
1.44 millert 33: #include <sys/types.h>
1.1 millert 34:
1.44 millert 35: #include <err.h>
36: #include <errno.h>
37: #include <getopt.h>
38: #include <limits.h>
39: #include <md5.h>
40: #include <regex.h>
1.1 millert 41: #include <signal.h>
1.44 millert 42: #include <stdbool.h>
1.74 tobias 43: #include <stdint.h>
1.44 millert 44: #include <stdio.h>
1.1 millert 45: #include <stdlib.h>
46: #include <string.h>
47: #include <unistd.h>
1.44 millert 48: #include <wchar.h>
49: #include <wctype.h>
50:
51: #include "coll.h"
52: #include "file.h"
53: #include "sort.h"
54:
1.78 millert 55: #ifdef GNUSORT_COMPATIBILITY
56: # define PERMUTE ""
57: #else
58: # define PERMUTE "+"
59: #endif
60: #define OPTIONS PERMUTE"bCcdfgHhik:Mmno:RrS:st:T:uVz"
1.44 millert 61:
62: static bool need_random;
63: static const char *random_source;
64:
65: MD5_CTX md5_ctx;
66:
67: struct sort_opts sort_opts_vals;
68:
69: bool debug_sort;
70: bool need_hint;
71:
72: static struct sort_mods default_sort_mods_object;
73: struct sort_mods * const default_sort_mods = &default_sort_mods_object;
74:
75: /*
76: * Arguments from file (when file0-from option is used:
77: */
78: static size_t argc_from_file0 = (size_t)-1;
79: static char **argv_from_file0;
80:
81: /*
82: * Placeholder symbols for options which have no single-character equivalent
83: */
84: enum {
85: SORT_OPT = CHAR_MAX + 1,
86: HELP_OPT,
87: FF_OPT,
88: BS_OPT,
89: VERSION_OPT,
90: DEBUG_OPT,
91: RANDOMSOURCE_OPT,
92: COMPRESSPROGRAM_OPT,
93: QSORT_OPT,
94: HEAPSORT_OPT,
95: RADIXSORT_OPT,
96: MMAP_OPT
97: };
98:
99: #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
100: static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
101:
102: static const struct option long_options[] = {
103: { "batch-size", required_argument, NULL, BS_OPT },
104: { "buffer-size", required_argument, NULL, 'S' },
105: { "check", optional_argument, NULL, 'c' },
106: { "check=silent|quiet", optional_argument, NULL, 'C' },
107: { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
108: { "debug", no_argument, NULL, DEBUG_OPT },
109: { "dictionary-order", no_argument, NULL, 'd' },
110: { "field-separator", required_argument, NULL, 't' },
111: { "files0-from", required_argument, NULL, FF_OPT },
112: { "general-numeric-sort", no_argument, NULL, 'g' },
113: { "heapsort", no_argument, NULL, HEAPSORT_OPT },
114: { "help", no_argument, NULL, HELP_OPT },
115: { "human-numeric-sort", no_argument, NULL, 'h' },
116: { "ignore-leading-blanks", no_argument, NULL, 'b' },
117: { "ignore-case", no_argument, NULL, 'f' },
118: { "ignore-nonprinting", no_argument, NULL, 'i' },
119: { "key", required_argument, NULL, 'k' },
120: { "merge", no_argument, NULL, 'm' },
121: { "mergesort", no_argument, NULL, 'H' },
122: { "mmap", no_argument, NULL, MMAP_OPT },
123: { "month-sort", no_argument, NULL, 'M' },
124: { "numeric-sort", no_argument, NULL, 'n' },
125: { "output", required_argument, NULL, 'o' },
126: { "qsort", no_argument, NULL, QSORT_OPT },
127: { "radixsort", no_argument, NULL, RADIXSORT_OPT },
128: { "random-sort", no_argument, NULL, 'R' },
129: { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
130: { "reverse", no_argument, NULL, 'r' },
131: { "sort", required_argument, NULL, SORT_OPT },
132: { "stable", no_argument, NULL, 's' },
133: { "temporary-directory", required_argument, NULL, 'T' },
134: { "unique", no_argument, NULL, 'u' },
135: { "version", no_argument, NULL, VERSION_OPT },
136: { "version-sort", no_argument, NULL, 'V' },
137: { "zero-terminated", no_argument, NULL, 'z' },
138: { NULL, no_argument, NULL, 0 }
139: };
140:
141: /*
142: * Check where sort modifier is present
143: */
144: static bool
145: sort_modifier_empty(struct sort_mods *sm)
146: {
147: return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
148: sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
149: }
150:
151: /*
152: * Print out usage text.
153: */
154: static __dead void
155: usage(int exit_val)
156: {
157: fprintf(exit_val ? stderr : stdout,
1.46 jmc 158: "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] "
1.45 jmc 159: "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
1.44 millert 160: exit(exit_val);
161: }
1.4 millert 162:
1.1 millert 163: /*
1.44 millert 164: * Read input file names from a file (file0-from option).
1.1 millert 165: */
1.44 millert 166: static void
167: read_fns_from_file0(const char *fn)
168: {
1.47 millert 169: FILE *f;
170: char *line = NULL;
171: size_t linesize = 0;
172: ssize_t linelen;
173:
174: f = fopen(fn, "r");
175: if (f == NULL)
176: err(2, "%s", fn);
177:
178: while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
179: if (*line != '\0') {
180: if (argc_from_file0 == (size_t)-1)
181: argc_from_file0 = 0;
182: ++argc_from_file0;
183: argv_from_file0 = sort_reallocarray(argv_from_file0,
184: argc_from_file0, sizeof(char *));
185: argv_from_file0[argc_from_file0 - 1] = line;
186: } else {
187: free(line);
1.44 millert 188: }
1.47 millert 189: line = NULL;
190: linesize = 0;
1.44 millert 191: }
1.47 millert 192: if (ferror(f))
193: err(2, "%s: getdelim", fn);
194:
195: closefile(f, fn);
1.44 millert 196: }
1.4 millert 197:
1.1 millert 198: /*
1.44 millert 199: * Check how much RAM is available for the sort.
1.1 millert 200: */
1.44 millert 201: static void
202: set_hw_params(void)
203: {
1.73 tobias 204: unsigned long long free_memory;
1.48 millert 205: long long user_memory;
206: struct rlimit rl;
207: size_t len;
208: int mib[] = { CTL_HW, HW_USERMEM64 };
209:
210: /* Get total user (non-kernel) memory. */
211: len = sizeof(user_memory);
212: if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1)
213: user_memory = -1;
214:
215: /* Increase our data size to the max */
216: if (getrlimit(RLIMIT_DATA, &rl) == 0) {
217: free_memory = (unsigned long long)rl.rlim_cur;
218: rl.rlim_cur = rl.rlim_max;
219: if (setrlimit(RLIMIT_DATA, &rl) == 0) {
220: free_memory = (unsigned long long)rl.rlim_max;
221: } else {
222: warn("Can't set resource limit to max data size");
223: }
1.73 tobias 224: } else {
225: free_memory = 1000000;
1.48 millert 226: warn("Can't get resource limit for data size");
1.73 tobias 227: }
1.1 millert 228:
1.48 millert 229: /* We prefer to use temp files rather than swap space. */
230: if (user_memory != -1 && free_memory > user_memory)
231: free_memory = user_memory;
1.44 millert 232:
233: available_free_memory = free_memory / 2;
234: }
235:
236: /*
237: * Set directory temporary files.
238: */
239: static void
240: set_tmpdir(void)
241: {
1.53 millert 242: if (!issetugid()) {
243: char *td;
1.44 millert 244:
1.53 millert 245: td = getenv("TMPDIR");
246: if (td != NULL)
1.71 tobias 247: tmpdir = td;
1.53 millert 248: }
1.44 millert 249: }
250:
251: /*
252: * Parse -S option.
253: */
254: static unsigned long long
255: parse_memory_buffer_value(const char *value)
256: {
1.66 millert 257: char *endptr;
258: unsigned long long membuf;
259:
260: membuf = strtoll(value, &endptr, 10);
261: if (endptr == value || (long long)membuf < 0 ||
262: (errno == ERANGE && membuf == LLONG_MAX))
1.68 millert 263: goto invalid;
1.66 millert 264:
265: switch (*endptr) {
266: case 'Y':
1.68 millert 267: if (membuf > ULLONG_MAX / 1024)
268: goto invalid;
1.66 millert 269: membuf *= 1024;
270: /* FALLTHROUGH */
271: case 'Z':
1.68 millert 272: if (membuf > ULLONG_MAX / 1024)
273: goto invalid;
1.66 millert 274: membuf *= 1024;
275: /* FALLTHROUGH */
276: case 'E':
1.68 millert 277: if (membuf > ULLONG_MAX / 1024)
278: goto invalid;
1.66 millert 279: membuf *= 1024;
280: /* FALLTHROUGH */
281: case 'P':
1.68 millert 282: if (membuf > ULLONG_MAX / 1024)
283: goto invalid;
1.66 millert 284: membuf *= 1024;
285: /* FALLTHROUGH */
286: case 'T':
1.68 millert 287: if (membuf > ULLONG_MAX / 1024)
288: goto invalid;
1.66 millert 289: membuf *= 1024;
290: /* FALLTHROUGH */
291: case 'G':
1.68 millert 292: if (membuf > ULLONG_MAX / 1024)
293: goto invalid;
1.66 millert 294: membuf *= 1024;
295: /* FALLTHROUGH */
296: case 'M':
1.68 millert 297: if (membuf > ULLONG_MAX / 1024)
298: goto invalid;
1.66 millert 299: membuf *= 1024;
300: /* FALLTHROUGH */
301: case '\0':
302: case 'K':
1.68 millert 303: if (membuf > ULLONG_MAX / 1024)
304: goto invalid;
1.66 millert 305: membuf *= 1024;
306: /* FALLTHROUGH */
307: case 'b':
308: break;
309: case '%':
1.74 tobias 310: if (available_free_memory != 0 &&
311: membuf > ULLONG_MAX / available_free_memory)
312: goto invalid;
1.66 millert 313: membuf = (available_free_memory * membuf) /
314: 100;
315: break;
316: default:
317: warnc(EINVAL, "%s", optarg);
318: membuf = available_free_memory;
1.44 millert 319: }
1.74 tobias 320: if (membuf > SIZE_MAX)
321: goto invalid;
1.66 millert 322: return membuf;
1.68 millert 323: invalid:
324: errx(2, "invalid memory buffer size: %s", value);
1.44 millert 325: }
326:
327: /*
328: * Signal handler that clears the temporary files.
329: */
330: static void
1.49 millert 331: sig_handler(int sig __unused)
1.44 millert 332: {
333: clear_tmp_files();
1.50 millert 334: _exit(2);
1.44 millert 335: }
336:
337: /*
338: * Set signal handler on panic signals.
339: */
340: static void
341: set_signal_handler(void)
342: {
343: struct sigaction sa;
1.69 millert 344: int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGUSR1, SIGUSR2,
345: SIGPIPE, SIGXCPU, SIGXFSZ, 0};
1.44 millert 346:
347: memset(&sa, 0, sizeof(sa));
1.49 millert 348: sigfillset(&sa.sa_mask);
349: sa.sa_flags = SA_RESTART;
350: sa.sa_handler = sig_handler;
351:
352: for (i = 0; signals[i] != 0; i++) {
1.90 ! deraadt 353: if (sigaction(signals[i], &sa, NULL) == -1) {
1.70 tobias 354: warn("sigaction(%s)", strsignal(signals[i]));
1.49 millert 355: continue;
356: }
1.44 millert 357: }
358: }
359:
360: /*
361: * Print "unknown" message and exit with status 2.
362: */
363: static void
364: unknown(const char *what)
365: {
366: errx(2, "Unknown feature: %s", what);
367: }
368:
369: /*
370: * Check whether contradictory input options are used.
371: */
372: static void
373: check_mutually_exclusive_flags(char c, bool *mef_flags)
374: {
375: int i, fo_index, mec;
376: bool found_others, found_this;
377:
1.72 tobias 378: found_others = found_this = false;
1.44 millert 379: fo_index = 0;
380:
381: for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
382: mec = mutually_exclusive_flags[i];
383:
384: if (mec != c) {
385: if (mef_flags[i]) {
1.65 millert 386: if (found_this) {
387: errx(2,
388: "%c:%c: mutually exclusive flags",
389: c, mec);
390: }
1.44 millert 391: found_others = true;
392: fo_index = i;
393: }
394: } else {
1.65 millert 395: if (found_others) {
396: errx(2,
397: "%c:%c: mutually exclusive flags",
398: c, mutually_exclusive_flags[fo_index]);
399: }
1.44 millert 400: mef_flags[i] = true;
401: found_this = true;
402: }
403: }
404: }
405:
406: /*
407: * Initialise sort opts data.
408: */
409: static void
410: set_sort_opts(void)
411: {
412: memset(&default_sort_mods_object, 0,
413: sizeof(default_sort_mods_object));
414: memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
415: default_sort_mods_object.func =
416: get_sort_func(&default_sort_mods_object);
417: }
418:
419: /*
420: * Set a sort modifier on a sort modifiers object.
421: */
422: static bool
423: set_sort_modifier(struct sort_mods *sm, int c)
424: {
1.66 millert 425: switch (c) {
426: case 'b':
427: sm->bflag = true;
428: break;
429: case 'd':
430: sm->dflag = true;
431: break;
432: case 'f':
433: sm->fflag = true;
434: break;
435: case 'g':
436: sm->gflag = true;
437: need_hint = true;
438: break;
439: case 'i':
440: sm->iflag = true;
441: break;
442: case 'R':
443: sm->Rflag = true;
444: need_random = true;
445: break;
446: case 'M':
447: initialise_months();
448: sm->Mflag = true;
449: need_hint = true;
450: break;
451: case 'n':
452: sm->nflag = true;
453: need_hint = true;
454: break;
455: case 'r':
456: sm->rflag = true;
457: break;
458: case 'V':
459: sm->Vflag = true;
460: break;
461: case 'h':
462: sm->hflag = true;
463: need_hint = true;
464: break;
465: default:
466: return false;
1.44 millert 467: }
1.66 millert 468: sort_opts_vals.complex_sort = true;
469: sm->func = get_sort_func(sm);
470:
1.44 millert 471: return true;
472: }
473:
474: /*
475: * Parse POS in -k option.
476: */
477: static int
478: parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
479: {
480: regmatch_t pmatch[4];
481: regex_t re;
482: char *c, *f;
483: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
484: size_t len, nmatch;
485: int ret;
486:
487: ret = -1;
488: nmatch = 4;
489: c = f = NULL;
490:
491: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
492: return -1;
493:
494: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
495: goto end;
496:
497: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
498: goto end;
499:
500: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
501: goto end;
502:
503: len = pmatch[1].rm_eo - pmatch[1].rm_so;
504:
1.57 millert 505: f = sort_malloc(len + 1);
506: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 507: f[len] = '\0';
508:
509: if (second) {
510: errno = 0;
1.58 millert 511: ks->f2 = (size_t)strtoul(f, NULL, 10);
1.44 millert 512: if (errno != 0)
1.58 millert 513: goto end;
1.44 millert 514: if (ks->f2 == 0) {
515: warn("0 field in key specs");
516: goto end;
517: }
518: } else {
519: errno = 0;
1.58 millert 520: ks->f1 = (size_t)strtoul(f, NULL, 10);
1.44 millert 521: if (errno != 0)
1.58 millert 522: goto end;
1.44 millert 523: if (ks->f1 == 0) {
524: warn("0 field in key specs");
525: goto end;
526: }
527: }
528:
529: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
530: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
531:
1.57 millert 532: c = sort_malloc(len + 1);
533: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 534: c[len] = '\0';
535:
536: if (second) {
537: errno = 0;
1.58 millert 538: ks->c2 = (size_t)strtoul(c, NULL, 10);
1.44 millert 539: if (errno != 0)
1.58 millert 540: goto end;
1.44 millert 541: } else {
542: errno = 0;
1.58 millert 543: ks->c1 = (size_t)strtoul(c, NULL, 10);
1.44 millert 544: if (errno != 0)
1.58 millert 545: goto end;
1.44 millert 546: if (ks->c1 == 0) {
547: warn("0 column in key specs");
548: goto end;
549: }
550: }
551: } else {
552: if (second)
553: ks->c2 = 0;
554: else
555: ks->c1 = 1;
556: }
557:
558: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
559: regoff_t i = 0;
560:
561: for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
562: check_mutually_exclusive_flags(s[i], mef_flags);
563: if (s[i] == 'b') {
564: if (second)
565: ks->pos2b = true;
566: else
567: ks->pos1b = true;
568: } else if (!set_sort_modifier(&(ks->sm), s[i]))
569: goto end;
570: }
571: }
572:
573: ret = 0;
574:
575: end:
1.61 millert 576: sort_free(c);
577: sort_free(f);
1.44 millert 578: regfree(&re);
579:
580: return ret;
581: }
582:
583: /*
584: * Parse -k option value.
585: */
586: static int
587: parse_k(const char *s, struct key_specs *ks)
588: {
589: int ret = -1;
590: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
591: { false, false, false, false, false, false };
592:
1.66 millert 593: if (*s != '\0') {
1.44 millert 594: char *sptr;
595:
596: sptr = strchr(s, ',');
597: if (sptr) {
598: size_t size1;
599: char *pos1, *pos2;
600:
601: size1 = sptr - s;
602:
603: if (size1 < 1)
604: return -1;
605:
1.57 millert 606: pos1 = sort_malloc(size1 + 1);
607: memcpy(pos1, s, size1);
1.44 millert 608: pos1[size1] = '\0';
609:
610: ret = parse_pos(pos1, ks, mef_flags, false);
611:
612: sort_free(pos1);
613: if (ret < 0)
614: return ret;
615:
616: pos2 = sort_strdup(sptr + 1);
617: ret = parse_pos(pos2, ks, mef_flags, true);
618: sort_free(pos2);
619: } else
620: ret = parse_pos(s, ks, mef_flags, false);
1.1 millert 621: }
1.4 millert 622:
1.44 millert 623: return ret;
624: }
625:
626: /*
627: * Parse POS in +POS -POS option.
628: */
629: static int
1.66 millert 630: parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size)
1.44 millert 631: {
632: regex_t re;
633: regmatch_t pmatch[4];
634: char *c, *f;
635: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
636: int ret;
637: size_t len, nmatch;
638:
639: ret = -1;
640: nmatch = 4;
641: c = f = NULL;
642: *nc = *nf = 0;
643:
644: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
645: return -1;
646:
647: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
648: goto end;
649:
650: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
651: goto end;
652:
653: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
654: goto end;
655:
656: len = pmatch[1].rm_eo - pmatch[1].rm_so;
657:
1.57 millert 658: f = sort_malloc(len + 1);
659: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 660: f[len] = '\0';
661:
662: errno = 0;
1.66 millert 663: *nf = (size_t)strtoul(f, NULL, 10);
1.44 millert 664: if (errno != 0)
665: errx(2, "Invalid key position");
666:
667: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
668: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
1.57 millert 669:
1.63 millert 670: c = sort_malloc(len + 1);
1.57 millert 671: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 672: c[len] = '\0';
673:
674: errno = 0;
1.66 millert 675: *nc = (size_t)strtoul(c, NULL, 10);
1.44 millert 676: if (errno != 0)
677: errx(2, "Invalid key position");
678: }
679:
680: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
681:
682: len = pmatch[3].rm_eo - pmatch[3].rm_so;
1.4 millert 683:
1.56 millert 684: if (len >= sopts_size)
685: errx(2, "Invalid key position");
1.57 millert 686: memcpy(sopts, s + pmatch[3].rm_so, len);
1.44 millert 687: sopts[len] = '\0';
1.1 millert 688: }
1.4 millert 689:
1.44 millert 690: ret = 0;
1.4 millert 691:
1.44 millert 692: end:
1.61 millert 693: sort_free(c);
694: sort_free(f);
1.44 millert 695: regfree(&re);
696:
697: return ret;
698: }
699:
700: /*
701: * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
702: */
703: static void
704: fix_obsolete_keys(int *argc, char **argv)
705: {
706: char sopt[129];
707: int i;
708:
709: for (i = 1; i < *argc; i++) {
1.60 millert 710: const char *arg1 = argv[i];
1.44 millert 711:
1.60 millert 712: if (arg1[0] == '+') {
1.56 millert 713: size_t c1, f1;
1.44 millert 714: char sopts1[128];
715:
716: sopts1[0] = 0;
717: c1 = f1 = 0;
718:
1.56 millert 719: if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1,
720: sizeof(sopts1)) < 0)
1.44 millert 721: continue;
1.60 millert 722:
723: f1 += 1;
724: c1 += 1;
725: if (i + 1 < *argc) {
726: const char *arg2 = argv[i + 1];
727:
728: if (arg2[0] == '-') {
729: size_t c2, f2;
730: char sopts2[128];
731:
732: sopts2[0] = 0;
733: c2 = f2 = 0;
734:
735: if (parse_pos_obs(arg2 + 1, &f2, &c2,
736: sopts2, sizeof(sopts2)) >= 0) {
737: int j;
738: if (c2 > 0)
739: f2 += 1;
740: snprintf(sopt, sizeof(sopt),
741: "-k%zu.%zu%s,%zu.%zu%s",
742: f1, c1, sopts1, f2,
743: c2, sopts2);
744: argv[i] = sort_strdup(sopt);
745: for (j = i + 1; j + 1 < *argc; j++)
746: argv[j] = argv[j + 1];
747: *argc -= 1;
748: continue;
1.44 millert 749: }
750: }
1.1 millert 751: }
1.60 millert 752: snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s",
753: f1, c1, sopts1);
754: argv[i] = sort_strdup(sopt);
1.44 millert 755: }
1.1 millert 756: }
1.44 millert 757: }
758:
759: /*
760: * Set random seed
761: */
762: static void
763: set_random_seed(void)
764: {
765: if (!need_random)
766: return;
1.4 millert 767:
1.44 millert 768: MD5Init(&md5_ctx);
769: if (random_source != NULL) {
770: unsigned char buf[BUFSIZ];
771: size_t nr;
772: FILE *fp;
773:
774: if ((fp = fopen(random_source, "r")) == NULL)
775: err(2, "%s", random_source);
776: while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
777: MD5Update(&md5_ctx, buf, nr);
778: if (ferror(fp))
779: err(2, "%s", random_source);
780: fclose(fp);
1.1 millert 781: } else {
1.44 millert 782: unsigned char rsd[1024];
783:
784: arc4random_buf(rsd, sizeof(rsd));
785: MD5Update(&md5_ctx, rsd, sizeof(rsd));
786: }
787: }
788:
789: /*
790: * Main function.
791: */
792: int
793: main(int argc, char *argv[])
794: {
1.75 tobias 795: char *outfile, *real_outfile, *sflag;
1.76 tobias 796: int c;
1.44 millert 797: size_t i;
1.78 millert 798: struct sort_mods *sm = &default_sort_mods_object;
1.44 millert 799: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
800: { false, false, false, false, false, false };
801:
1.83 tim 802: set_hw_params();
803:
1.86 semarie 804: if (pledge("stdio rpath wpath cpath fattr chown proc exec", NULL) == -1)
1.83 tim 805: err(2, "pledge");
806:
1.51 millert 807: outfile = "-";
1.44 millert 808: real_outfile = NULL;
1.75 tobias 809: sflag = NULL;
1.44 millert 810:
811: init_tmp_files();
812:
813: set_signal_handler();
814:
1.51 millert 815: atexit(clear_tmp_files);
816:
1.44 millert 817: set_tmpdir();
818: set_sort_opts();
819:
820: fix_obsolete_keys(&argc, argv);
821:
822: while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
823: != -1)) {
824:
825: check_mutually_exclusive_flags(c, mef_flags);
826:
827: if (!set_sort_modifier(sm, c)) {
828: switch (c) {
829: case 'c':
830: sort_opts_vals.cflag = true;
831: if (optarg) {
832: if (!strcmp(optarg, "diagnose-first"))
833: ;
834: else if (!strcmp(optarg, "silent") ||
835: !strcmp(optarg, "quiet"))
836: sort_opts_vals.csilentflag = true;
837: else if (*optarg)
838: unknown(optarg);
839: }
840: break;
841: case 'C':
842: sort_opts_vals.cflag = true;
843: sort_opts_vals.csilentflag = true;
844: break;
845: case 'k':
846: {
847: sort_opts_vals.complex_sort = true;
848: sort_opts_vals.kflag = true;
849:
1.79 millert 850: keys = sort_reallocarray(keys, keys_num + 1,
1.44 millert 851: sizeof(struct key_specs));
1.79 millert 852: memset(&(keys[keys_num]), 0,
1.44 millert 853: sizeof(struct key_specs));
1.79 millert 854: #ifndef GNUSORT_COMPATIBILITY
855: keys[keys_num].pos1b = default_sort_mods->bflag;
856: keys[keys_num].pos2b = default_sort_mods->bflag;
857: #endif
1.44 millert 858:
1.79 millert 859: if (parse_k(optarg, &(keys[keys_num++])) < 0)
1.44 millert 860: errc(2, EINVAL, "-k %s", optarg);
861:
862: break;
863: }
864: case 'm':
865: sort_opts_vals.mflag = true;
866: break;
867: case 'o':
1.51 millert 868: outfile = optarg;
1.44 millert 869: break;
870: case 's':
871: sort_opts_vals.sflag = true;
872: break;
873: case 'S':
1.75 tobias 874: sflag = optarg;
1.44 millert 875: break;
876: case 'T':
1.71 tobias 877: tmpdir = optarg;
1.44 millert 878: break;
879: case 't':
880: while (strlen(optarg) > 1) {
881: if (optarg[0] != '\\') {
882: errc(2, EINVAL, "%s", optarg);
883: }
884: optarg += 1;
885: if (*optarg == '0') {
886: *optarg = 0;
887: break;
888: }
889: }
890: sort_opts_vals.tflag = true;
891: sort_opts_vals.field_sep = btowc(optarg[0]);
892: if (sort_opts_vals.field_sep == WEOF) {
893: errno = EINVAL;
894: err(2, NULL);
895: }
896: break;
897: case 'u':
898: sort_opts_vals.uflag = true;
899: /* stable sort for the correct unique val */
900: sort_opts_vals.sflag = true;
901: break;
902: case 'z':
903: sort_opts_vals.zflag = true;
904: break;
905: case SORT_OPT:
1.62 millert 906: if (!strcmp(optarg, "general-numeric"))
907: set_sort_modifier(sm, 'g');
908: else if (!strcmp(optarg, "human-numeric"))
909: set_sort_modifier(sm, 'h');
910: else if (!strcmp(optarg, "numeric"))
911: set_sort_modifier(sm, 'n');
912: else if (!strcmp(optarg, "month"))
913: set_sort_modifier(sm, 'M');
914: else if (!strcmp(optarg, "random"))
915: set_sort_modifier(sm, 'R');
916: else
917: unknown(optarg);
1.44 millert 918: break;
919: case QSORT_OPT:
920: sort_opts_vals.sort_method = SORT_QSORT;
921: break;
922: case 'H':
923: sort_opts_vals.sort_method = SORT_MERGESORT;
924: break;
925: case MMAP_OPT:
926: use_mmap = true;
927: break;
928: case HEAPSORT_OPT:
929: sort_opts_vals.sort_method = SORT_HEAPSORT;
930: break;
931: case RADIXSORT_OPT:
932: sort_opts_vals.sort_method = SORT_RADIXSORT;
933: break;
934: case RANDOMSOURCE_OPT:
1.71 tobias 935: random_source = optarg;
1.44 millert 936: break;
937: case COMPRESSPROGRAM_OPT:
1.71 tobias 938: compress_program = optarg;
1.44 millert 939: break;
940: case FF_OPT:
941: read_fns_from_file0(optarg);
942: break;
943: case BS_OPT:
944: {
1.54 millert 945: const char *errstr;
946:
947: max_open_files = strtonum(optarg, 2,
948: UINT_MAX - 1, &errstr) + 1;
949: if (errstr != NULL)
950: errx(2, "--batch-size argument is %s",
951: errstr);
952: break;
1.44 millert 953: }
954: case VERSION_OPT:
955: printf("%s\n", VERSION);
956: exit(EXIT_SUCCESS);
957: /* NOTREACHED */
958: break;
959: case DEBUG_OPT:
960: debug_sort = true;
961: break;
962: case HELP_OPT:
963: usage(0);
964: /* NOTREACHED */
965: break;
966: default:
967: usage(2);
968: /* NOTREACHED */
969: }
970: }
971: }
972: argc -= optind;
973: argv += optind;
1.78 millert 974:
1.83 tim 975: if (compress_program == NULL) {
1.86 semarie 976: if (pledge("stdio rpath wpath cpath fattr chown", NULL) == -1)
1.83 tim 977: err(2, "pledge");
978: }
979:
1.78 millert 980: #ifndef GNUSORT_COMPATIBILITY
981: if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) {
982: outfile = argv[argc - 1];
983: argc -= 2;
984: }
985: #endif
1.77 millert 986:
1.80 tim 987: if (argv_from_file0) {
988: argc = argc_from_file0;
989: argv = argv_from_file0;
990: }
991:
1.83 tim 992: if (sort_opts_vals.cflag) {
993: if (argc > 1)
994: errx(2, "only one input file is allowed with the -%c flag",
995: sort_opts_vals.csilentflag ? 'C' : 'c');
996:
997: if (argc == 0 || strcmp(argv[0], "-") == 0) {
998: if (compress_program) {
999: if (pledge("stdio proc exec", NULL) == -1)
1000: err(2, "pledge");
1001: } else {
1002: if (pledge("stdio", NULL) == -1)
1003: err(2, "pledge");
1004: }
1005: } else {
1006: if (compress_program) {
1007: if (pledge("stdio rpath proc exec", NULL) == -1)
1008: err(2, "pledge");
1009: } else {
1010: if (pledge("stdio rpath", NULL) == -1)
1011: err(2, "pledge");
1012: }
1013: }
1014: } else {
1015: /* Case when the outfile equals one of the input files: */
1016: if (strcmp(outfile, "-") != 0) {
1017: struct stat sb;
1018: int fd, i;
1019:
1020: for (i = 0; i < argc; ++i) {
1021: if (strcmp(argv[i], outfile) == 0) {
1022: if (stat(outfile, &sb) == -1)
1023: err(2, "%s", outfile);
1024: if (access(outfile, W_OK) == -1)
1025: err(2, "%s", outfile);
1026: real_outfile = outfile;
1027: sort_asprintf(&outfile, "%s.XXXXXXXXXX",
1028: real_outfile);
1029: if ((fd = mkstemp(outfile)) == -1)
1.84 millert 1030: err(2, "%s", outfile);
1031: (void)fchown(fd, sb.st_uid, sb.st_gid);
1.83 tim 1032: if (fchmod(fd, sb.st_mode & ACCESSPERMS) == -1)
1.84 millert 1033: err(2, "%s", outfile);
1.83 tim 1034: close(fd);
1035: tmp_file_atexit(outfile);
1036: break;
1037: }
1038: }
1039: }
1040:
1041: if (compress_program) {
1042: if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1)
1043: err(2, "pledge");
1044: } else {
1045: if (pledge("stdio rpath wpath cpath", NULL) == -1)
1046: err(2, "pledge");
1047: }
1048: }
1.75 tobias 1049:
1050: if (sflag != NULL)
1051: available_free_memory = parse_memory_buffer_value(sflag);
1.44 millert 1052:
1053: if (keys_num == 0) {
1054: keys_num = 1;
1.67 millert 1055: keys = sort_reallocarray(keys, 1, sizeof(struct key_specs));
1.44 millert 1056: memset(&(keys[0]), 0, sizeof(struct key_specs));
1057: keys[0].c1 = 1;
1.79 millert 1058: #ifdef GNUSORT_COMPATIBILITY
1059: keys[0].pos1b = sm->bflag;
1060: keys[0].pos2b = sm->bflag;
1061: #endif
1062: memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods));
1.44 millert 1063: }
1064:
1065: for (i = 0; i < keys_num; i++) {
1066: struct key_specs *ks;
1067:
1068: ks = &(keys[i]);
1069:
1.85 millert 1070: if (sort_modifier_empty(&(ks->sm))) {
1.79 millert 1071: #ifdef GNUSORT_COMPATIBILITY
1.85 millert 1072: if (!(ks->pos1b) && !(ks->pos2b)) {
1073: ks->pos1b = sm->bflag;
1074: ks->pos2b = sm->bflag;
1075: }
1.79 millert 1076: #endif
1.44 millert 1077: memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1078: }
1079:
1080: ks->sm.func = get_sort_func(&(ks->sm));
1081: }
1082:
1.89 schwarze 1083: if (debug_sort)
1.44 millert 1084: printf("Memory to be used for sorting: %llu\n",
1085: available_free_memory);
1086:
1.76 tobias 1087: if (sort_opts_vals.cflag)
1088: return check(argc ? *argv : "-");
1089:
1.44 millert 1090: set_random_seed();
1091:
1.76 tobias 1092: if (!sort_opts_vals.mflag) {
1.44 millert 1093: struct file_list fl;
1094: struct sort_list list;
1095:
1096: sort_list_init(&list);
1097: file_list_init(&fl, true);
1098:
1099: if (argc < 1)
1100: procfile("-", &list, &fl);
1101: else {
1102: while (argc > 0) {
1103: procfile(*argv, &list, &fl);
1104: --argc;
1105: ++argv;
1106: }
1107: }
1108:
1109: if (fl.count < 1)
1110: sort_list_to_file(&list, outfile);
1111: else {
1112: if (list.count > 0) {
1113: char *flast = new_tmp_file_name();
1114:
1115: sort_list_to_file(&list, flast);
1116: file_list_add(&fl, flast, false);
1117: }
1118: merge_files(&fl, outfile);
1119: }
1120:
1121: file_list_clean(&fl);
1122:
1123: /*
1124: * We are about to exit the program, so we can ignore
1125: * the clean-up for speed
1126: *
1127: * sort_list_clean(&list);
1128: */
1129:
1.76 tobias 1130: } else {
1.44 millert 1131: struct file_list fl;
1132:
1133: file_list_init(&fl, false);
1.87 millert 1134: if (argc < 1)
1135: file_list_add(&fl, "-", true);
1136: else
1137: file_list_populate(&fl, argc, argv, true);
1.44 millert 1138: merge_files(&fl, outfile);
1139: file_list_clean(&fl);
1140: }
1141:
1142: if (real_outfile) {
1.90 ! deraadt 1143: if (rename(outfile, real_outfile) == -1)
1.44 millert 1144: err(2, "%s", real_outfile);
1.51 millert 1145: sort_free(outfile);
1.44 millert 1146: }
1.4 millert 1147:
1.76 tobias 1148: return 0;
1.1 millert 1149: }