Annotation of src/usr.bin/sort/sort.c, Revision 1.87
1.87 ! millert 1: /* $OpenBSD: sort.c,v 1.86 2016/07/14 08:31:18 semarie Exp $ */
1.1 millert 2:
3: /*-
1.44 millert 4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
1.1 millert 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
1.44 millert 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1.1 millert 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.44 millert 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1.1 millert 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
1.48 millert 30: #include <sys/resource.h>
1.44 millert 31: #include <sys/stat.h>
1.48 millert 32: #include <sys/sysctl.h>
1.44 millert 33: #include <sys/types.h>
1.1 millert 34:
1.44 millert 35: #include <err.h>
36: #include <errno.h>
37: #include <getopt.h>
38: #include <limits.h>
1.16 ericj 39: #include <locale.h>
1.44 millert 40: #include <md5.h>
41: #include <regex.h>
1.1 millert 42: #include <signal.h>
1.44 millert 43: #include <stdbool.h>
1.74 tobias 44: #include <stdint.h>
1.44 millert 45: #include <stdio.h>
1.1 millert 46: #include <stdlib.h>
47: #include <string.h>
48: #include <unistd.h>
1.44 millert 49: #include <wchar.h>
50: #include <wctype.h>
51:
52: #include "coll.h"
53: #include "file.h"
54: #include "sort.h"
55:
1.78 millert 56: #ifdef GNUSORT_COMPATIBILITY
57: # define PERMUTE ""
58: #else
59: # define PERMUTE "+"
60: #endif
61: #define OPTIONS PERMUTE"bCcdfgHhik:Mmno:RrS:st:T:uVz"
1.44 millert 62:
63: static bool need_random;
64: static const char *random_source;
65:
66: MD5_CTX md5_ctx;
67:
68: struct sort_opts sort_opts_vals;
69:
70: bool debug_sort;
71: bool need_hint;
72:
73: static bool gnusort_numeric_compatibility;
74:
75: static struct sort_mods default_sort_mods_object;
76: struct sort_mods * const default_sort_mods = &default_sort_mods_object;
77:
78: static bool print_symbols_on_debug;
79:
80: /*
81: * Arguments from file (when file0-from option is used:
82: */
83: static size_t argc_from_file0 = (size_t)-1;
84: static char **argv_from_file0;
85:
86: /*
87: * Placeholder symbols for options which have no single-character equivalent
88: */
89: enum {
90: SORT_OPT = CHAR_MAX + 1,
91: HELP_OPT,
92: FF_OPT,
93: BS_OPT,
94: VERSION_OPT,
95: DEBUG_OPT,
96: RANDOMSOURCE_OPT,
97: COMPRESSPROGRAM_OPT,
98: QSORT_OPT,
99: HEAPSORT_OPT,
100: RADIXSORT_OPT,
101: MMAP_OPT
102: };
103:
104: #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
105: static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
106:
107: static const struct option long_options[] = {
108: { "batch-size", required_argument, NULL, BS_OPT },
109: { "buffer-size", required_argument, NULL, 'S' },
110: { "check", optional_argument, NULL, 'c' },
111: { "check=silent|quiet", optional_argument, NULL, 'C' },
112: { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
113: { "debug", no_argument, NULL, DEBUG_OPT },
114: { "dictionary-order", no_argument, NULL, 'd' },
115: { "field-separator", required_argument, NULL, 't' },
116: { "files0-from", required_argument, NULL, FF_OPT },
117: { "general-numeric-sort", no_argument, NULL, 'g' },
118: { "heapsort", no_argument, NULL, HEAPSORT_OPT },
119: { "help", no_argument, NULL, HELP_OPT },
120: { "human-numeric-sort", no_argument, NULL, 'h' },
121: { "ignore-leading-blanks", no_argument, NULL, 'b' },
122: { "ignore-case", no_argument, NULL, 'f' },
123: { "ignore-nonprinting", no_argument, NULL, 'i' },
124: { "key", required_argument, NULL, 'k' },
125: { "merge", no_argument, NULL, 'm' },
126: { "mergesort", no_argument, NULL, 'H' },
127: { "mmap", no_argument, NULL, MMAP_OPT },
128: { "month-sort", no_argument, NULL, 'M' },
129: { "numeric-sort", no_argument, NULL, 'n' },
130: { "output", required_argument, NULL, 'o' },
131: { "qsort", no_argument, NULL, QSORT_OPT },
132: { "radixsort", no_argument, NULL, RADIXSORT_OPT },
133: { "random-sort", no_argument, NULL, 'R' },
134: { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
135: { "reverse", no_argument, NULL, 'r' },
136: { "sort", required_argument, NULL, SORT_OPT },
137: { "stable", no_argument, NULL, 's' },
138: { "temporary-directory", required_argument, NULL, 'T' },
139: { "unique", no_argument, NULL, 'u' },
140: { "version", no_argument, NULL, VERSION_OPT },
141: { "version-sort", no_argument, NULL, 'V' },
142: { "zero-terminated", no_argument, NULL, 'z' },
143: { NULL, no_argument, NULL, 0 }
144: };
145:
146: /*
147: * Check where sort modifier is present
148: */
149: static bool
150: sort_modifier_empty(struct sort_mods *sm)
151: {
152: return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
153: sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
154: }
155:
156: /*
157: * Print out usage text.
158: */
159: static __dead void
160: usage(int exit_val)
161: {
162: fprintf(exit_val ? stderr : stdout,
1.46 jmc 163: "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] "
1.45 jmc 164: "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
1.44 millert 165: exit(exit_val);
166: }
1.4 millert 167:
1.1 millert 168: /*
1.44 millert 169: * Read input file names from a file (file0-from option).
1.1 millert 170: */
1.44 millert 171: static void
172: read_fns_from_file0(const char *fn)
173: {
1.47 millert 174: FILE *f;
175: char *line = NULL;
176: size_t linesize = 0;
177: ssize_t linelen;
178:
179: f = fopen(fn, "r");
180: if (f == NULL)
181: err(2, "%s", fn);
182:
183: while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
184: if (*line != '\0') {
185: if (argc_from_file0 == (size_t)-1)
186: argc_from_file0 = 0;
187: ++argc_from_file0;
188: argv_from_file0 = sort_reallocarray(argv_from_file0,
189: argc_from_file0, sizeof(char *));
190: argv_from_file0[argc_from_file0 - 1] = line;
191: } else {
192: free(line);
1.44 millert 193: }
1.47 millert 194: line = NULL;
195: linesize = 0;
1.44 millert 196: }
1.47 millert 197: if (ferror(f))
198: err(2, "%s: getdelim", fn);
199:
200: closefile(f, fn);
1.44 millert 201: }
1.4 millert 202:
1.1 millert 203: /*
1.44 millert 204: * Check how much RAM is available for the sort.
1.1 millert 205: */
1.44 millert 206: static void
207: set_hw_params(void)
208: {
1.73 tobias 209: unsigned long long free_memory;
1.48 millert 210: long long user_memory;
211: struct rlimit rl;
212: size_t len;
213: int mib[] = { CTL_HW, HW_USERMEM64 };
214:
215: /* Get total user (non-kernel) memory. */
216: len = sizeof(user_memory);
217: if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1)
218: user_memory = -1;
219:
220: /* Increase our data size to the max */
221: if (getrlimit(RLIMIT_DATA, &rl) == 0) {
222: free_memory = (unsigned long long)rl.rlim_cur;
223: rl.rlim_cur = rl.rlim_max;
224: if (setrlimit(RLIMIT_DATA, &rl) == 0) {
225: free_memory = (unsigned long long)rl.rlim_max;
226: } else {
227: warn("Can't set resource limit to max data size");
228: }
1.73 tobias 229: } else {
230: free_memory = 1000000;
1.48 millert 231: warn("Can't get resource limit for data size");
1.73 tobias 232: }
1.1 millert 233:
1.48 millert 234: /* We prefer to use temp files rather than swap space. */
235: if (user_memory != -1 && free_memory > user_memory)
236: free_memory = user_memory;
1.44 millert 237:
238: available_free_memory = free_memory / 2;
239: }
240:
241: /*
242: * Convert "plain" symbol to wide symbol, with default value.
243: */
244: static void
245: conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
246: {
1.66 millert 247: int res;
1.1 millert 248:
1.66 millert 249: res = mbtowc(wc, c, MB_CUR_MAX);
250: if (res < 1)
251: *wc = def;
1.44 millert 252: }
1.12 millert 253:
1.44 millert 254: /*
255: * Set current locale symbols.
256: */
257: static void
258: set_locale(void)
1.1 millert 259: {
1.44 millert 260: struct lconv *lc;
261: const char *locale;
1.4 millert 262:
1.16 ericj 263: setlocale(LC_ALL, "");
264:
1.66 millert 265: /* Obtain LC_NUMERIC info */
1.44 millert 266: lc = localeconv();
267:
1.66 millert 268: /* Convert to wide char form */
269: conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
270: symbol_decimal_point);
271: conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
272: symbol_thousands_sep);
273: conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
274: symbol_positive_sign);
275: conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
276: symbol_negative_sign);
1.44 millert 277:
278: if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
279: gnusort_numeric_compatibility = true;
280:
281: locale = setlocale(LC_COLLATE, NULL);
282: if (locale != NULL) {
283: char *tmpl;
284: const char *byteclocale;
285:
286: tmpl = sort_strdup(locale);
287: byteclocale = setlocale(LC_COLLATE, "C");
288: if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
289: byte_sort = true;
290: } else {
291: byteclocale = setlocale(LC_COLLATE, "POSIX");
292: if (byteclocale && strcmp(byteclocale, tmpl) == 0)
293: byte_sort = true;
294: else
295: setlocale(LC_COLLATE, tmpl);
296: }
297: sort_free(tmpl);
298: }
299: if (!byte_sort)
300: sort_mb_cur_max = MB_CUR_MAX;
301: }
302:
303: /*
304: * Set directory temporary files.
305: */
306: static void
307: set_tmpdir(void)
308: {
1.53 millert 309: if (!issetugid()) {
310: char *td;
1.44 millert 311:
1.53 millert 312: td = getenv("TMPDIR");
313: if (td != NULL)
1.71 tobias 314: tmpdir = td;
1.53 millert 315: }
1.44 millert 316: }
317:
318: /*
319: * Parse -S option.
320: */
321: static unsigned long long
322: parse_memory_buffer_value(const char *value)
323: {
1.66 millert 324: char *endptr;
325: unsigned long long membuf;
326:
327: membuf = strtoll(value, &endptr, 10);
328: if (endptr == value || (long long)membuf < 0 ||
329: (errno == ERANGE && membuf == LLONG_MAX))
1.68 millert 330: goto invalid;
1.66 millert 331:
332: switch (*endptr) {
333: case 'Y':
1.68 millert 334: if (membuf > ULLONG_MAX / 1024)
335: goto invalid;
1.66 millert 336: membuf *= 1024;
337: /* FALLTHROUGH */
338: case 'Z':
1.68 millert 339: if (membuf > ULLONG_MAX / 1024)
340: goto invalid;
1.66 millert 341: membuf *= 1024;
342: /* FALLTHROUGH */
343: case 'E':
1.68 millert 344: if (membuf > ULLONG_MAX / 1024)
345: goto invalid;
1.66 millert 346: membuf *= 1024;
347: /* FALLTHROUGH */
348: case 'P':
1.68 millert 349: if (membuf > ULLONG_MAX / 1024)
350: goto invalid;
1.66 millert 351: membuf *= 1024;
352: /* FALLTHROUGH */
353: case 'T':
1.68 millert 354: if (membuf > ULLONG_MAX / 1024)
355: goto invalid;
1.66 millert 356: membuf *= 1024;
357: /* FALLTHROUGH */
358: case 'G':
1.68 millert 359: if (membuf > ULLONG_MAX / 1024)
360: goto invalid;
1.66 millert 361: membuf *= 1024;
362: /* FALLTHROUGH */
363: case 'M':
1.68 millert 364: if (membuf > ULLONG_MAX / 1024)
365: goto invalid;
1.66 millert 366: membuf *= 1024;
367: /* FALLTHROUGH */
368: case '\0':
369: case 'K':
1.68 millert 370: if (membuf > ULLONG_MAX / 1024)
371: goto invalid;
1.66 millert 372: membuf *= 1024;
373: /* FALLTHROUGH */
374: case 'b':
375: break;
376: case '%':
1.74 tobias 377: if (available_free_memory != 0 &&
378: membuf > ULLONG_MAX / available_free_memory)
379: goto invalid;
1.66 millert 380: membuf = (available_free_memory * membuf) /
381: 100;
382: break;
383: default:
384: warnc(EINVAL, "%s", optarg);
385: membuf = available_free_memory;
1.44 millert 386: }
1.74 tobias 387: if (membuf > SIZE_MAX)
388: goto invalid;
1.66 millert 389: return membuf;
1.68 millert 390: invalid:
391: errx(2, "invalid memory buffer size: %s", value);
1.44 millert 392: }
393:
394: /*
395: * Signal handler that clears the temporary files.
396: */
397: static void
1.49 millert 398: sig_handler(int sig __unused)
1.44 millert 399: {
400: clear_tmp_files();
1.50 millert 401: _exit(2);
1.44 millert 402: }
403:
404: /*
405: * Set signal handler on panic signals.
406: */
407: static void
408: set_signal_handler(void)
409: {
410: struct sigaction sa;
1.69 millert 411: int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGUSR1, SIGUSR2,
412: SIGPIPE, SIGXCPU, SIGXFSZ, 0};
1.44 millert 413:
414: memset(&sa, 0, sizeof(sa));
1.49 millert 415: sigfillset(&sa.sa_mask);
416: sa.sa_flags = SA_RESTART;
417: sa.sa_handler = sig_handler;
418:
419: for (i = 0; signals[i] != 0; i++) {
420: if (sigaction(signals[i], &sa, NULL) < 0) {
1.70 tobias 421: warn("sigaction(%s)", strsignal(signals[i]));
1.49 millert 422: continue;
423: }
1.44 millert 424: }
425: }
426:
427: /*
428: * Print "unknown" message and exit with status 2.
429: */
430: static void
431: unknown(const char *what)
432: {
433: errx(2, "Unknown feature: %s", what);
434: }
435:
436: /*
437: * Check whether contradictory input options are used.
438: */
439: static void
440: check_mutually_exclusive_flags(char c, bool *mef_flags)
441: {
442: int i, fo_index, mec;
443: bool found_others, found_this;
444:
1.72 tobias 445: found_others = found_this = false;
1.44 millert 446: fo_index = 0;
447:
448: for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
449: mec = mutually_exclusive_flags[i];
450:
451: if (mec != c) {
452: if (mef_flags[i]) {
1.65 millert 453: if (found_this) {
454: errx(2,
455: "%c:%c: mutually exclusive flags",
456: c, mec);
457: }
1.44 millert 458: found_others = true;
459: fo_index = i;
460: }
461: } else {
1.65 millert 462: if (found_others) {
463: errx(2,
464: "%c:%c: mutually exclusive flags",
465: c, mutually_exclusive_flags[fo_index]);
466: }
1.44 millert 467: mef_flags[i] = true;
468: found_this = true;
469: }
470: }
471: }
472:
473: /*
474: * Initialise sort opts data.
475: */
476: static void
477: set_sort_opts(void)
478: {
479: memset(&default_sort_mods_object, 0,
480: sizeof(default_sort_mods_object));
481: memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
482: default_sort_mods_object.func =
483: get_sort_func(&default_sort_mods_object);
484: }
485:
486: /*
487: * Set a sort modifier on a sort modifiers object.
488: */
489: static bool
490: set_sort_modifier(struct sort_mods *sm, int c)
491: {
1.66 millert 492: switch (c) {
493: case 'b':
494: sm->bflag = true;
495: break;
496: case 'd':
497: sm->dflag = true;
498: break;
499: case 'f':
500: sm->fflag = true;
501: break;
502: case 'g':
503: sm->gflag = true;
504: need_hint = true;
505: break;
506: case 'i':
507: sm->iflag = true;
508: break;
509: case 'R':
510: sm->Rflag = true;
511: need_random = true;
512: break;
513: case 'M':
514: initialise_months();
515: sm->Mflag = true;
516: need_hint = true;
517: break;
518: case 'n':
519: sm->nflag = true;
520: need_hint = true;
521: print_symbols_on_debug = true;
522: break;
523: case 'r':
524: sm->rflag = true;
525: break;
526: case 'V':
527: sm->Vflag = true;
528: break;
529: case 'h':
530: sm->hflag = true;
531: need_hint = true;
532: print_symbols_on_debug = true;
533: break;
534: default:
535: return false;
1.44 millert 536: }
1.66 millert 537: sort_opts_vals.complex_sort = true;
538: sm->func = get_sort_func(sm);
539:
1.44 millert 540: return true;
541: }
542:
543: /*
544: * Parse POS in -k option.
545: */
546: static int
547: parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
548: {
549: regmatch_t pmatch[4];
550: regex_t re;
551: char *c, *f;
552: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
553: size_t len, nmatch;
554: int ret;
555:
556: ret = -1;
557: nmatch = 4;
558: c = f = NULL;
559:
560: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
561: return -1;
562:
563: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
564: goto end;
565:
566: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
567: goto end;
568:
569: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
570: goto end;
571:
572: len = pmatch[1].rm_eo - pmatch[1].rm_so;
573:
1.57 millert 574: f = sort_malloc(len + 1);
575: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 576: f[len] = '\0';
577:
578: if (second) {
579: errno = 0;
1.58 millert 580: ks->f2 = (size_t)strtoul(f, NULL, 10);
1.44 millert 581: if (errno != 0)
1.58 millert 582: goto end;
1.44 millert 583: if (ks->f2 == 0) {
584: warn("0 field in key specs");
585: goto end;
586: }
587: } else {
588: errno = 0;
1.58 millert 589: ks->f1 = (size_t)strtoul(f, NULL, 10);
1.44 millert 590: if (errno != 0)
1.58 millert 591: goto end;
1.44 millert 592: if (ks->f1 == 0) {
593: warn("0 field in key specs");
594: goto end;
595: }
596: }
597:
598: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
599: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
600:
1.57 millert 601: c = sort_malloc(len + 1);
602: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 603: c[len] = '\0';
604:
605: if (second) {
606: errno = 0;
1.58 millert 607: ks->c2 = (size_t)strtoul(c, NULL, 10);
1.44 millert 608: if (errno != 0)
1.58 millert 609: goto end;
1.44 millert 610: } else {
611: errno = 0;
1.58 millert 612: ks->c1 = (size_t)strtoul(c, NULL, 10);
1.44 millert 613: if (errno != 0)
1.58 millert 614: goto end;
1.44 millert 615: if (ks->c1 == 0) {
616: warn("0 column in key specs");
617: goto end;
618: }
619: }
620: } else {
621: if (second)
622: ks->c2 = 0;
623: else
624: ks->c1 = 1;
625: }
626:
627: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
628: regoff_t i = 0;
629:
630: for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
631: check_mutually_exclusive_flags(s[i], mef_flags);
632: if (s[i] == 'b') {
633: if (second)
634: ks->pos2b = true;
635: else
636: ks->pos1b = true;
637: } else if (!set_sort_modifier(&(ks->sm), s[i]))
638: goto end;
639: }
640: }
641:
642: ret = 0;
643:
644: end:
1.61 millert 645: sort_free(c);
646: sort_free(f);
1.44 millert 647: regfree(&re);
648:
649: return ret;
650: }
651:
652: /*
653: * Parse -k option value.
654: */
655: static int
656: parse_k(const char *s, struct key_specs *ks)
657: {
658: int ret = -1;
659: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
660: { false, false, false, false, false, false };
661:
1.66 millert 662: if (*s != '\0') {
1.44 millert 663: char *sptr;
664:
665: sptr = strchr(s, ',');
666: if (sptr) {
667: size_t size1;
668: char *pos1, *pos2;
669:
670: size1 = sptr - s;
671:
672: if (size1 < 1)
673: return -1;
674:
1.57 millert 675: pos1 = sort_malloc(size1 + 1);
676: memcpy(pos1, s, size1);
1.44 millert 677: pos1[size1] = '\0';
678:
679: ret = parse_pos(pos1, ks, mef_flags, false);
680:
681: sort_free(pos1);
682: if (ret < 0)
683: return ret;
684:
685: pos2 = sort_strdup(sptr + 1);
686: ret = parse_pos(pos2, ks, mef_flags, true);
687: sort_free(pos2);
688: } else
689: ret = parse_pos(s, ks, mef_flags, false);
1.1 millert 690: }
1.4 millert 691:
1.44 millert 692: return ret;
693: }
694:
695: /*
696: * Parse POS in +POS -POS option.
697: */
698: static int
1.66 millert 699: parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size)
1.44 millert 700: {
701: regex_t re;
702: regmatch_t pmatch[4];
703: char *c, *f;
704: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
705: int ret;
706: size_t len, nmatch;
707:
708: ret = -1;
709: nmatch = 4;
710: c = f = NULL;
711: *nc = *nf = 0;
712:
713: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
714: return -1;
715:
716: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
717: goto end;
718:
719: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
720: goto end;
721:
722: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
723: goto end;
724:
725: len = pmatch[1].rm_eo - pmatch[1].rm_so;
726:
1.57 millert 727: f = sort_malloc(len + 1);
728: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 729: f[len] = '\0';
730:
731: errno = 0;
1.66 millert 732: *nf = (size_t)strtoul(f, NULL, 10);
1.44 millert 733: if (errno != 0)
734: errx(2, "Invalid key position");
735:
736: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
737: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
1.57 millert 738:
1.63 millert 739: c = sort_malloc(len + 1);
1.57 millert 740: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 741: c[len] = '\0';
742:
743: errno = 0;
1.66 millert 744: *nc = (size_t)strtoul(c, NULL, 10);
1.44 millert 745: if (errno != 0)
746: errx(2, "Invalid key position");
747: }
748:
749: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
750:
751: len = pmatch[3].rm_eo - pmatch[3].rm_so;
1.4 millert 752:
1.56 millert 753: if (len >= sopts_size)
754: errx(2, "Invalid key position");
1.57 millert 755: memcpy(sopts, s + pmatch[3].rm_so, len);
1.44 millert 756: sopts[len] = '\0';
1.1 millert 757: }
1.4 millert 758:
1.44 millert 759: ret = 0;
1.4 millert 760:
1.44 millert 761: end:
1.61 millert 762: sort_free(c);
763: sort_free(f);
1.44 millert 764: regfree(&re);
765:
766: return ret;
767: }
768:
769: /*
770: * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
771: */
772: static void
773: fix_obsolete_keys(int *argc, char **argv)
774: {
775: char sopt[129];
776: int i;
777:
778: for (i = 1; i < *argc; i++) {
1.60 millert 779: const char *arg1 = argv[i];
1.44 millert 780:
1.60 millert 781: if (arg1[0] == '+') {
1.56 millert 782: size_t c1, f1;
1.44 millert 783: char sopts1[128];
784:
785: sopts1[0] = 0;
786: c1 = f1 = 0;
787:
1.56 millert 788: if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1,
789: sizeof(sopts1)) < 0)
1.44 millert 790: continue;
1.60 millert 791:
792: f1 += 1;
793: c1 += 1;
794: if (i + 1 < *argc) {
795: const char *arg2 = argv[i + 1];
796:
797: if (arg2[0] == '-') {
798: size_t c2, f2;
799: char sopts2[128];
800:
801: sopts2[0] = 0;
802: c2 = f2 = 0;
803:
804: if (parse_pos_obs(arg2 + 1, &f2, &c2,
805: sopts2, sizeof(sopts2)) >= 0) {
806: int j;
807: if (c2 > 0)
808: f2 += 1;
809: snprintf(sopt, sizeof(sopt),
810: "-k%zu.%zu%s,%zu.%zu%s",
811: f1, c1, sopts1, f2,
812: c2, sopts2);
813: argv[i] = sort_strdup(sopt);
814: for (j = i + 1; j + 1 < *argc; j++)
815: argv[j] = argv[j + 1];
816: *argc -= 1;
817: continue;
1.44 millert 818: }
819: }
1.1 millert 820: }
1.60 millert 821: snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s",
822: f1, c1, sopts1);
823: argv[i] = sort_strdup(sopt);
1.44 millert 824: }
1.1 millert 825: }
1.44 millert 826: }
827:
828: /*
829: * Set random seed
830: */
831: static void
832: set_random_seed(void)
833: {
834: if (!need_random)
835: return;
1.4 millert 836:
1.44 millert 837: MD5Init(&md5_ctx);
838: if (random_source != NULL) {
839: unsigned char buf[BUFSIZ];
840: size_t nr;
841: FILE *fp;
842:
843: if ((fp = fopen(random_source, "r")) == NULL)
844: err(2, "%s", random_source);
845: while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
846: MD5Update(&md5_ctx, buf, nr);
847: if (ferror(fp))
848: err(2, "%s", random_source);
849: fclose(fp);
1.1 millert 850: } else {
1.44 millert 851: unsigned char rsd[1024];
852:
853: arc4random_buf(rsd, sizeof(rsd));
854: MD5Update(&md5_ctx, rsd, sizeof(rsd));
855: }
856: }
857:
858: /*
859: * Main function.
860: */
861: int
862: main(int argc, char *argv[])
863: {
1.75 tobias 864: char *outfile, *real_outfile, *sflag;
1.76 tobias 865: int c;
1.44 millert 866: size_t i;
1.78 millert 867: struct sort_mods *sm = &default_sort_mods_object;
1.44 millert 868: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
869: { false, false, false, false, false, false };
870:
1.83 tim 871: set_hw_params();
872:
1.86 semarie 873: if (pledge("stdio rpath wpath cpath fattr chown proc exec", NULL) == -1)
1.83 tim 874: err(2, "pledge");
875:
1.51 millert 876: outfile = "-";
1.44 millert 877: real_outfile = NULL;
1.75 tobias 878: sflag = NULL;
1.44 millert 879:
880: init_tmp_files();
881:
882: set_signal_handler();
883:
1.51 millert 884: atexit(clear_tmp_files);
885:
1.44 millert 886: set_locale();
887: set_tmpdir();
888: set_sort_opts();
889:
890: fix_obsolete_keys(&argc, argv);
891:
892: while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
893: != -1)) {
894:
895: check_mutually_exclusive_flags(c, mef_flags);
896:
897: if (!set_sort_modifier(sm, c)) {
898: switch (c) {
899: case 'c':
900: sort_opts_vals.cflag = true;
901: if (optarg) {
902: if (!strcmp(optarg, "diagnose-first"))
903: ;
904: else if (!strcmp(optarg, "silent") ||
905: !strcmp(optarg, "quiet"))
906: sort_opts_vals.csilentflag = true;
907: else if (*optarg)
908: unknown(optarg);
909: }
910: break;
911: case 'C':
912: sort_opts_vals.cflag = true;
913: sort_opts_vals.csilentflag = true;
914: break;
915: case 'k':
916: {
917: sort_opts_vals.complex_sort = true;
918: sort_opts_vals.kflag = true;
919:
1.79 millert 920: keys = sort_reallocarray(keys, keys_num + 1,
1.44 millert 921: sizeof(struct key_specs));
1.79 millert 922: memset(&(keys[keys_num]), 0,
1.44 millert 923: sizeof(struct key_specs));
1.79 millert 924: #ifndef GNUSORT_COMPATIBILITY
925: keys[keys_num].pos1b = default_sort_mods->bflag;
926: keys[keys_num].pos2b = default_sort_mods->bflag;
927: #endif
1.44 millert 928:
1.79 millert 929: if (parse_k(optarg, &(keys[keys_num++])) < 0)
1.44 millert 930: errc(2, EINVAL, "-k %s", optarg);
931:
932: break;
933: }
934: case 'm':
935: sort_opts_vals.mflag = true;
936: break;
937: case 'o':
1.51 millert 938: outfile = optarg;
1.44 millert 939: break;
940: case 's':
941: sort_opts_vals.sflag = true;
942: break;
943: case 'S':
1.75 tobias 944: sflag = optarg;
1.44 millert 945: break;
946: case 'T':
1.71 tobias 947: tmpdir = optarg;
1.44 millert 948: break;
949: case 't':
950: while (strlen(optarg) > 1) {
951: if (optarg[0] != '\\') {
952: errc(2, EINVAL, "%s", optarg);
953: }
954: optarg += 1;
955: if (*optarg == '0') {
956: *optarg = 0;
957: break;
958: }
959: }
960: sort_opts_vals.tflag = true;
961: sort_opts_vals.field_sep = btowc(optarg[0]);
962: if (sort_opts_vals.field_sep == WEOF) {
963: errno = EINVAL;
964: err(2, NULL);
965: }
966: if (!gnusort_numeric_compatibility) {
967: if (symbol_decimal_point == sort_opts_vals.field_sep)
968: symbol_decimal_point = WEOF;
969: if (symbol_thousands_sep == sort_opts_vals.field_sep)
970: symbol_thousands_sep = WEOF;
971: if (symbol_negative_sign == sort_opts_vals.field_sep)
972: symbol_negative_sign = WEOF;
973: if (symbol_positive_sign == sort_opts_vals.field_sep)
974: symbol_positive_sign = WEOF;
975: }
976: break;
977: case 'u':
978: sort_opts_vals.uflag = true;
979: /* stable sort for the correct unique val */
980: sort_opts_vals.sflag = true;
981: break;
982: case 'z':
983: sort_opts_vals.zflag = true;
984: break;
985: case SORT_OPT:
1.62 millert 986: if (!strcmp(optarg, "general-numeric"))
987: set_sort_modifier(sm, 'g');
988: else if (!strcmp(optarg, "human-numeric"))
989: set_sort_modifier(sm, 'h');
990: else if (!strcmp(optarg, "numeric"))
991: set_sort_modifier(sm, 'n');
992: else if (!strcmp(optarg, "month"))
993: set_sort_modifier(sm, 'M');
994: else if (!strcmp(optarg, "random"))
995: set_sort_modifier(sm, 'R');
996: else
997: unknown(optarg);
1.44 millert 998: break;
999: case QSORT_OPT:
1000: sort_opts_vals.sort_method = SORT_QSORT;
1001: break;
1002: case 'H':
1003: sort_opts_vals.sort_method = SORT_MERGESORT;
1004: break;
1005: case MMAP_OPT:
1006: use_mmap = true;
1007: break;
1008: case HEAPSORT_OPT:
1009: sort_opts_vals.sort_method = SORT_HEAPSORT;
1010: break;
1011: case RADIXSORT_OPT:
1012: sort_opts_vals.sort_method = SORT_RADIXSORT;
1013: break;
1014: case RANDOMSOURCE_OPT:
1.71 tobias 1015: random_source = optarg;
1.44 millert 1016: break;
1017: case COMPRESSPROGRAM_OPT:
1.71 tobias 1018: compress_program = optarg;
1.44 millert 1019: break;
1020: case FF_OPT:
1021: read_fns_from_file0(optarg);
1022: break;
1023: case BS_OPT:
1024: {
1.54 millert 1025: const char *errstr;
1026:
1027: max_open_files = strtonum(optarg, 2,
1028: UINT_MAX - 1, &errstr) + 1;
1029: if (errstr != NULL)
1030: errx(2, "--batch-size argument is %s",
1031: errstr);
1032: break;
1.44 millert 1033: }
1034: case VERSION_OPT:
1035: printf("%s\n", VERSION);
1036: exit(EXIT_SUCCESS);
1037: /* NOTREACHED */
1038: break;
1039: case DEBUG_OPT:
1040: debug_sort = true;
1041: break;
1042: case HELP_OPT:
1043: usage(0);
1044: /* NOTREACHED */
1045: break;
1046: default:
1047: usage(2);
1048: /* NOTREACHED */
1049: }
1050: }
1051: }
1052: argc -= optind;
1053: argv += optind;
1.78 millert 1054:
1.83 tim 1055: if (compress_program == NULL) {
1.86 semarie 1056: if (pledge("stdio rpath wpath cpath fattr chown", NULL) == -1)
1.83 tim 1057: err(2, "pledge");
1058: }
1059:
1.78 millert 1060: #ifndef GNUSORT_COMPATIBILITY
1061: if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) {
1062: outfile = argv[argc - 1];
1063: argc -= 2;
1064: }
1065: #endif
1.77 millert 1066:
1.80 tim 1067: if (argv_from_file0) {
1068: argc = argc_from_file0;
1069: argv = argv_from_file0;
1070: }
1071:
1.83 tim 1072: if (sort_opts_vals.cflag) {
1073: if (argc > 1)
1074: errx(2, "only one input file is allowed with the -%c flag",
1075: sort_opts_vals.csilentflag ? 'C' : 'c');
1076:
1077: if (argc == 0 || strcmp(argv[0], "-") == 0) {
1078: if (compress_program) {
1079: if (pledge("stdio proc exec", NULL) == -1)
1080: err(2, "pledge");
1081: } else {
1082: if (pledge("stdio", NULL) == -1)
1083: err(2, "pledge");
1084: }
1085: } else {
1086: if (compress_program) {
1087: if (pledge("stdio rpath proc exec", NULL) == -1)
1088: err(2, "pledge");
1089: } else {
1090: if (pledge("stdio rpath", NULL) == -1)
1091: err(2, "pledge");
1092: }
1093: }
1094: } else {
1095: /* Case when the outfile equals one of the input files: */
1096: if (strcmp(outfile, "-") != 0) {
1097: struct stat sb;
1098: int fd, i;
1099:
1100: for (i = 0; i < argc; ++i) {
1101: if (strcmp(argv[i], outfile) == 0) {
1102: if (stat(outfile, &sb) == -1)
1103: err(2, "%s", outfile);
1104: if (access(outfile, W_OK) == -1)
1105: err(2, "%s", outfile);
1106: real_outfile = outfile;
1107: sort_asprintf(&outfile, "%s.XXXXXXXXXX",
1108: real_outfile);
1109: if ((fd = mkstemp(outfile)) == -1)
1.84 millert 1110: err(2, "%s", outfile);
1111: (void)fchown(fd, sb.st_uid, sb.st_gid);
1.83 tim 1112: if (fchmod(fd, sb.st_mode & ACCESSPERMS) == -1)
1.84 millert 1113: err(2, "%s", outfile);
1.83 tim 1114: close(fd);
1115: tmp_file_atexit(outfile);
1116: break;
1117: }
1118: }
1119: }
1120:
1121: if (compress_program) {
1122: if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1)
1123: err(2, "pledge");
1124: } else {
1125: if (pledge("stdio rpath wpath cpath", NULL) == -1)
1126: err(2, "pledge");
1127: }
1128: }
1.75 tobias 1129:
1130: if (sflag != NULL)
1131: available_free_memory = parse_memory_buffer_value(sflag);
1.44 millert 1132:
1133: if (keys_num == 0) {
1134: keys_num = 1;
1.67 millert 1135: keys = sort_reallocarray(keys, 1, sizeof(struct key_specs));
1.44 millert 1136: memset(&(keys[0]), 0, sizeof(struct key_specs));
1137: keys[0].c1 = 1;
1.79 millert 1138: #ifdef GNUSORT_COMPATIBILITY
1139: keys[0].pos1b = sm->bflag;
1140: keys[0].pos2b = sm->bflag;
1141: #endif
1142: memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods));
1.44 millert 1143: }
1144:
1145: for (i = 0; i < keys_num; i++) {
1146: struct key_specs *ks;
1147:
1148: ks = &(keys[i]);
1149:
1.85 millert 1150: if (sort_modifier_empty(&(ks->sm))) {
1.79 millert 1151: #ifdef GNUSORT_COMPATIBILITY
1.85 millert 1152: if (!(ks->pos1b) && !(ks->pos2b)) {
1153: ks->pos1b = sm->bflag;
1154: ks->pos2b = sm->bflag;
1155: }
1.79 millert 1156: #endif
1.44 millert 1157: memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1158: }
1159:
1160: ks->sm.func = get_sort_func(&(ks->sm));
1161: }
1162:
1163: if (debug_sort) {
1164: printf("Memory to be used for sorting: %llu\n",
1165: available_free_memory);
1166: printf("Using collate rules of %s locale\n",
1167: setlocale(LC_COLLATE, NULL));
1168: if (byte_sort)
1169: printf("Byte sort is used\n");
1170: if (print_symbols_on_debug) {
1171: printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1172: if (symbol_thousands_sep)
1173: printf("Thousands separator: <%lc>\n",
1174: symbol_thousands_sep);
1175: printf("Positive sign: <%lc>\n", symbol_positive_sign);
1176: printf("Negative sign: <%lc>\n", symbol_negative_sign);
1177: }
1178: }
1179:
1.76 tobias 1180: if (sort_opts_vals.cflag)
1181: return check(argc ? *argv : "-");
1182:
1.44 millert 1183: set_random_seed();
1184:
1.76 tobias 1185: if (!sort_opts_vals.mflag) {
1.44 millert 1186: struct file_list fl;
1187: struct sort_list list;
1188:
1189: sort_list_init(&list);
1190: file_list_init(&fl, true);
1191:
1192: if (argc < 1)
1193: procfile("-", &list, &fl);
1194: else {
1195: while (argc > 0) {
1196: procfile(*argv, &list, &fl);
1197: --argc;
1198: ++argv;
1199: }
1200: }
1201:
1202: if (fl.count < 1)
1203: sort_list_to_file(&list, outfile);
1204: else {
1205: if (list.count > 0) {
1206: char *flast = new_tmp_file_name();
1207:
1208: sort_list_to_file(&list, flast);
1209: file_list_add(&fl, flast, false);
1210: }
1211: merge_files(&fl, outfile);
1212: }
1213:
1214: file_list_clean(&fl);
1215:
1216: /*
1217: * We are about to exit the program, so we can ignore
1218: * the clean-up for speed
1219: *
1220: * sort_list_clean(&list);
1221: */
1222:
1.76 tobias 1223: } else {
1.44 millert 1224: struct file_list fl;
1225:
1226: file_list_init(&fl, false);
1.87 ! millert 1227: if (argc < 1)
! 1228: file_list_add(&fl, "-", true);
! 1229: else
! 1230: file_list_populate(&fl, argc, argv, true);
1.44 millert 1231: merge_files(&fl, outfile);
1232: file_list_clean(&fl);
1233: }
1234:
1235: if (real_outfile) {
1236: if (rename(outfile, real_outfile) < 0)
1237: err(2, "%s", real_outfile);
1.51 millert 1238: sort_free(outfile);
1.44 millert 1239: }
1.4 millert 1240:
1.76 tobias 1241: return 0;
1.1 millert 1242: }