Annotation of src/usr.bin/sort/sort.c, Revision 1.75
1.75 ! tobias 1: /* $OpenBSD: sort.c,v 1.74 2015/04/02 21:09:51 tobias Exp $ */
1.1 millert 2:
3: /*-
1.44 millert 4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
1.1 millert 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
1.44 millert 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1.1 millert 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.44 millert 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1.1 millert 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
1.48 millert 30: #include <sys/resource.h>
1.44 millert 31: #include <sys/stat.h>
1.48 millert 32: #include <sys/sysctl.h>
1.44 millert 33: #include <sys/types.h>
1.1 millert 34:
1.44 millert 35: #include <err.h>
36: #include <errno.h>
37: #include <getopt.h>
38: #include <limits.h>
1.16 ericj 39: #include <locale.h>
1.44 millert 40: #include <md5.h>
41: #include <regex.h>
1.1 millert 42: #include <signal.h>
1.44 millert 43: #include <stdbool.h>
1.74 tobias 44: #include <stdint.h>
1.44 millert 45: #include <stdio.h>
1.1 millert 46: #include <stdlib.h>
47: #include <string.h>
48: #include <unistd.h>
1.44 millert 49: #include <wchar.h>
50: #include <wctype.h>
51:
52: #include "coll.h"
53: #include "file.h"
54: #include "sort.h"
55:
56: #define OPTIONS "bCcdfgHhik:Mmno:RrS:st:T:uVz"
57:
58: static bool need_random;
59: static const char *random_source;
60:
61: MD5_CTX md5_ctx;
62:
63: struct sort_opts sort_opts_vals;
64:
65: bool debug_sort;
66: bool need_hint;
67:
68: static bool gnusort_numeric_compatibility;
69:
70: static struct sort_mods default_sort_mods_object;
71: struct sort_mods * const default_sort_mods = &default_sort_mods_object;
72:
73: static bool print_symbols_on_debug;
74:
75: /*
76: * Arguments from file (when file0-from option is used:
77: */
78: static size_t argc_from_file0 = (size_t)-1;
79: static char **argv_from_file0;
80:
81: /*
82: * Placeholder symbols for options which have no single-character equivalent
83: */
84: enum {
85: SORT_OPT = CHAR_MAX + 1,
86: HELP_OPT,
87: FF_OPT,
88: BS_OPT,
89: VERSION_OPT,
90: DEBUG_OPT,
91: RANDOMSOURCE_OPT,
92: COMPRESSPROGRAM_OPT,
93: QSORT_OPT,
94: HEAPSORT_OPT,
95: RADIXSORT_OPT,
96: MMAP_OPT
97: };
98:
99: #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
100: static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
101:
102: static const struct option long_options[] = {
103: { "batch-size", required_argument, NULL, BS_OPT },
104: { "buffer-size", required_argument, NULL, 'S' },
105: { "check", optional_argument, NULL, 'c' },
106: { "check=silent|quiet", optional_argument, NULL, 'C' },
107: { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
108: { "debug", no_argument, NULL, DEBUG_OPT },
109: { "dictionary-order", no_argument, NULL, 'd' },
110: { "field-separator", required_argument, NULL, 't' },
111: { "files0-from", required_argument, NULL, FF_OPT },
112: { "general-numeric-sort", no_argument, NULL, 'g' },
113: { "heapsort", no_argument, NULL, HEAPSORT_OPT },
114: { "help", no_argument, NULL, HELP_OPT },
115: { "human-numeric-sort", no_argument, NULL, 'h' },
116: { "ignore-leading-blanks", no_argument, NULL, 'b' },
117: { "ignore-case", no_argument, NULL, 'f' },
118: { "ignore-nonprinting", no_argument, NULL, 'i' },
119: { "key", required_argument, NULL, 'k' },
120: { "merge", no_argument, NULL, 'm' },
121: { "mergesort", no_argument, NULL, 'H' },
122: { "mmap", no_argument, NULL, MMAP_OPT },
123: { "month-sort", no_argument, NULL, 'M' },
124: { "numeric-sort", no_argument, NULL, 'n' },
125: { "output", required_argument, NULL, 'o' },
126: { "qsort", no_argument, NULL, QSORT_OPT },
127: { "radixsort", no_argument, NULL, RADIXSORT_OPT },
128: { "random-sort", no_argument, NULL, 'R' },
129: { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
130: { "reverse", no_argument, NULL, 'r' },
131: { "sort", required_argument, NULL, SORT_OPT },
132: { "stable", no_argument, NULL, 's' },
133: { "temporary-directory", required_argument, NULL, 'T' },
134: { "unique", no_argument, NULL, 'u' },
135: { "version", no_argument, NULL, VERSION_OPT },
136: { "version-sort", no_argument, NULL, 'V' },
137: { "zero-terminated", no_argument, NULL, 'z' },
138: { NULL, no_argument, NULL, 0 }
139: };
140:
141: /*
142: * Check where sort modifier is present
143: */
144: static bool
145: sort_modifier_empty(struct sort_mods *sm)
146: {
147: return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
148: sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
149: }
150:
151: /*
152: * Print out usage text.
153: */
154: static __dead void
155: usage(int exit_val)
156: {
157: fprintf(exit_val ? stderr : stdout,
1.46 jmc 158: "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] "
1.45 jmc 159: "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
1.44 millert 160: exit(exit_val);
161: }
1.4 millert 162:
1.1 millert 163: /*
1.44 millert 164: * Read input file names from a file (file0-from option).
1.1 millert 165: */
1.44 millert 166: static void
167: read_fns_from_file0(const char *fn)
168: {
1.47 millert 169: FILE *f;
170: char *line = NULL;
171: size_t linesize = 0;
172: ssize_t linelen;
173:
174: f = fopen(fn, "r");
175: if (f == NULL)
176: err(2, "%s", fn);
177:
178: while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
179: if (*line != '\0') {
180: if (argc_from_file0 == (size_t)-1)
181: argc_from_file0 = 0;
182: ++argc_from_file0;
183: argv_from_file0 = sort_reallocarray(argv_from_file0,
184: argc_from_file0, sizeof(char *));
185: argv_from_file0[argc_from_file0 - 1] = line;
186: } else {
187: free(line);
1.44 millert 188: }
1.47 millert 189: line = NULL;
190: linesize = 0;
1.44 millert 191: }
1.47 millert 192: if (ferror(f))
193: err(2, "%s: getdelim", fn);
194:
195: closefile(f, fn);
1.44 millert 196: }
1.4 millert 197:
1.1 millert 198: /*
1.44 millert 199: * Check how much RAM is available for the sort.
1.1 millert 200: */
1.44 millert 201: static void
202: set_hw_params(void)
203: {
1.73 tobias 204: unsigned long long free_memory;
1.48 millert 205: long long user_memory;
206: struct rlimit rl;
207: size_t len;
208: int mib[] = { CTL_HW, HW_USERMEM64 };
209:
210: /* Get total user (non-kernel) memory. */
211: len = sizeof(user_memory);
212: if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1)
213: user_memory = -1;
214:
215: /* Increase our data size to the max */
216: if (getrlimit(RLIMIT_DATA, &rl) == 0) {
217: free_memory = (unsigned long long)rl.rlim_cur;
218: rl.rlim_cur = rl.rlim_max;
219: if (setrlimit(RLIMIT_DATA, &rl) == 0) {
220: free_memory = (unsigned long long)rl.rlim_max;
221: } else {
222: warn("Can't set resource limit to max data size");
223: }
1.73 tobias 224: } else {
225: free_memory = 1000000;
1.48 millert 226: warn("Can't get resource limit for data size");
1.73 tobias 227: }
1.1 millert 228:
1.48 millert 229: /* We prefer to use temp files rather than swap space. */
230: if (user_memory != -1 && free_memory > user_memory)
231: free_memory = user_memory;
1.44 millert 232:
233: available_free_memory = free_memory / 2;
234: }
235:
236: /*
237: * Convert "plain" symbol to wide symbol, with default value.
238: */
239: static void
240: conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
241: {
1.66 millert 242: int res;
1.1 millert 243:
1.66 millert 244: res = mbtowc(wc, c, MB_CUR_MAX);
245: if (res < 1)
246: *wc = def;
1.44 millert 247: }
1.12 millert 248:
1.44 millert 249: /*
250: * Set current locale symbols.
251: */
252: static void
253: set_locale(void)
1.1 millert 254: {
1.44 millert 255: struct lconv *lc;
256: const char *locale;
1.4 millert 257:
1.16 ericj 258: setlocale(LC_ALL, "");
259:
1.66 millert 260: /* Obtain LC_NUMERIC info */
1.44 millert 261: lc = localeconv();
262:
1.66 millert 263: /* Convert to wide char form */
264: conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
265: symbol_decimal_point);
266: conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
267: symbol_thousands_sep);
268: conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
269: symbol_positive_sign);
270: conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
271: symbol_negative_sign);
1.44 millert 272:
273: if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
274: gnusort_numeric_compatibility = true;
275:
276: locale = setlocale(LC_COLLATE, NULL);
277: if (locale != NULL) {
278: char *tmpl;
279: const char *byteclocale;
280:
281: tmpl = sort_strdup(locale);
282: byteclocale = setlocale(LC_COLLATE, "C");
283: if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
284: byte_sort = true;
285: } else {
286: byteclocale = setlocale(LC_COLLATE, "POSIX");
287: if (byteclocale && strcmp(byteclocale, tmpl) == 0)
288: byte_sort = true;
289: else
290: setlocale(LC_COLLATE, tmpl);
291: }
292: sort_free(tmpl);
293: }
294: if (!byte_sort)
295: sort_mb_cur_max = MB_CUR_MAX;
296: }
297:
298: /*
299: * Set directory temporary files.
300: */
301: static void
302: set_tmpdir(void)
303: {
1.53 millert 304: if (!issetugid()) {
305: char *td;
1.44 millert 306:
1.53 millert 307: td = getenv("TMPDIR");
308: if (td != NULL)
1.71 tobias 309: tmpdir = td;
1.53 millert 310: }
1.44 millert 311: }
312:
313: /*
314: * Parse -S option.
315: */
316: static unsigned long long
317: parse_memory_buffer_value(const char *value)
318: {
1.66 millert 319: char *endptr;
320: unsigned long long membuf;
321:
322: membuf = strtoll(value, &endptr, 10);
323: if (endptr == value || (long long)membuf < 0 ||
324: (errno == ERANGE && membuf == LLONG_MAX))
1.68 millert 325: goto invalid;
1.66 millert 326:
327: switch (*endptr) {
328: case 'Y':
1.68 millert 329: if (membuf > ULLONG_MAX / 1024)
330: goto invalid;
1.66 millert 331: membuf *= 1024;
332: /* FALLTHROUGH */
333: case 'Z':
1.68 millert 334: if (membuf > ULLONG_MAX / 1024)
335: goto invalid;
1.66 millert 336: membuf *= 1024;
337: /* FALLTHROUGH */
338: case 'E':
1.68 millert 339: if (membuf > ULLONG_MAX / 1024)
340: goto invalid;
1.66 millert 341: membuf *= 1024;
342: /* FALLTHROUGH */
343: case 'P':
1.68 millert 344: if (membuf > ULLONG_MAX / 1024)
345: goto invalid;
1.66 millert 346: membuf *= 1024;
347: /* FALLTHROUGH */
348: case 'T':
1.68 millert 349: if (membuf > ULLONG_MAX / 1024)
350: goto invalid;
1.66 millert 351: membuf *= 1024;
352: /* FALLTHROUGH */
353: case 'G':
1.68 millert 354: if (membuf > ULLONG_MAX / 1024)
355: goto invalid;
1.66 millert 356: membuf *= 1024;
357: /* FALLTHROUGH */
358: case 'M':
1.68 millert 359: if (membuf > ULLONG_MAX / 1024)
360: goto invalid;
1.66 millert 361: membuf *= 1024;
362: /* FALLTHROUGH */
363: case '\0':
364: case 'K':
1.68 millert 365: if (membuf > ULLONG_MAX / 1024)
366: goto invalid;
1.66 millert 367: membuf *= 1024;
368: /* FALLTHROUGH */
369: case 'b':
370: break;
371: case '%':
1.74 tobias 372: if (available_free_memory != 0 &&
373: membuf > ULLONG_MAX / available_free_memory)
374: goto invalid;
1.66 millert 375: membuf = (available_free_memory * membuf) /
376: 100;
377: break;
378: default:
379: warnc(EINVAL, "%s", optarg);
380: membuf = available_free_memory;
1.44 millert 381: }
1.74 tobias 382: if (membuf > SIZE_MAX)
383: goto invalid;
1.66 millert 384: return membuf;
1.68 millert 385: invalid:
386: errx(2, "invalid memory buffer size: %s", value);
1.44 millert 387: }
388:
389: /*
390: * Signal handler that clears the temporary files.
391: */
392: static void
1.49 millert 393: sig_handler(int sig __unused)
1.44 millert 394: {
395: clear_tmp_files();
1.50 millert 396: _exit(2);
1.44 millert 397: }
398:
399: /*
400: * Set signal handler on panic signals.
401: */
402: static void
403: set_signal_handler(void)
404: {
405: struct sigaction sa;
1.69 millert 406: int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGUSR1, SIGUSR2,
407: SIGPIPE, SIGXCPU, SIGXFSZ, 0};
1.44 millert 408:
409: memset(&sa, 0, sizeof(sa));
1.49 millert 410: sigfillset(&sa.sa_mask);
411: sa.sa_flags = SA_RESTART;
412: sa.sa_handler = sig_handler;
413:
414: for (i = 0; signals[i] != 0; i++) {
415: if (sigaction(signals[i], &sa, NULL) < 0) {
1.70 tobias 416: warn("sigaction(%s)", strsignal(signals[i]));
1.49 millert 417: continue;
418: }
1.44 millert 419: }
420: }
421:
422: /*
423: * Print "unknown" message and exit with status 2.
424: */
425: static void
426: unknown(const char *what)
427: {
428: errx(2, "Unknown feature: %s", what);
429: }
430:
431: /*
432: * Check whether contradictory input options are used.
433: */
434: static void
435: check_mutually_exclusive_flags(char c, bool *mef_flags)
436: {
437: int i, fo_index, mec;
438: bool found_others, found_this;
439:
1.72 tobias 440: found_others = found_this = false;
1.44 millert 441: fo_index = 0;
442:
443: for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
444: mec = mutually_exclusive_flags[i];
445:
446: if (mec != c) {
447: if (mef_flags[i]) {
1.65 millert 448: if (found_this) {
449: errx(2,
450: "%c:%c: mutually exclusive flags",
451: c, mec);
452: }
1.44 millert 453: found_others = true;
454: fo_index = i;
455: }
456: } else {
1.65 millert 457: if (found_others) {
458: errx(2,
459: "%c:%c: mutually exclusive flags",
460: c, mutually_exclusive_flags[fo_index]);
461: }
1.44 millert 462: mef_flags[i] = true;
463: found_this = true;
464: }
465: }
466: }
467:
468: /*
469: * Initialise sort opts data.
470: */
471: static void
472: set_sort_opts(void)
473: {
474: memset(&default_sort_mods_object, 0,
475: sizeof(default_sort_mods_object));
476: memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
477: default_sort_mods_object.func =
478: get_sort_func(&default_sort_mods_object);
479: }
480:
481: /*
482: * Set a sort modifier on a sort modifiers object.
483: */
484: static bool
485: set_sort_modifier(struct sort_mods *sm, int c)
486: {
1.66 millert 487: switch (c) {
488: case 'b':
489: sm->bflag = true;
490: break;
491: case 'd':
492: sm->dflag = true;
493: break;
494: case 'f':
495: sm->fflag = true;
496: break;
497: case 'g':
498: sm->gflag = true;
499: need_hint = true;
500: break;
501: case 'i':
502: sm->iflag = true;
503: break;
504: case 'R':
505: sm->Rflag = true;
506: need_random = true;
507: break;
508: case 'M':
509: initialise_months();
510: sm->Mflag = true;
511: need_hint = true;
512: break;
513: case 'n':
514: sm->nflag = true;
515: need_hint = true;
516: print_symbols_on_debug = true;
517: break;
518: case 'r':
519: sm->rflag = true;
520: break;
521: case 'V':
522: sm->Vflag = true;
523: break;
524: case 'h':
525: sm->hflag = true;
526: need_hint = true;
527: print_symbols_on_debug = true;
528: break;
529: default:
530: return false;
1.44 millert 531: }
1.66 millert 532: sort_opts_vals.complex_sort = true;
533: sm->func = get_sort_func(sm);
534:
1.44 millert 535: return true;
536: }
537:
538: /*
539: * Parse POS in -k option.
540: */
541: static int
542: parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
543: {
544: regmatch_t pmatch[4];
545: regex_t re;
546: char *c, *f;
547: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
548: size_t len, nmatch;
549: int ret;
550:
551: ret = -1;
552: nmatch = 4;
553: c = f = NULL;
554:
555: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
556: return -1;
557:
558: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
559: goto end;
560:
561: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
562: goto end;
563:
564: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
565: goto end;
566:
567: len = pmatch[1].rm_eo - pmatch[1].rm_so;
568:
1.57 millert 569: f = sort_malloc(len + 1);
570: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 571: f[len] = '\0';
572:
573: if (second) {
574: errno = 0;
1.58 millert 575: ks->f2 = (size_t)strtoul(f, NULL, 10);
1.44 millert 576: if (errno != 0)
1.58 millert 577: goto end;
1.44 millert 578: if (ks->f2 == 0) {
579: warn("0 field in key specs");
580: goto end;
581: }
582: } else {
583: errno = 0;
1.58 millert 584: ks->f1 = (size_t)strtoul(f, NULL, 10);
1.44 millert 585: if (errno != 0)
1.58 millert 586: goto end;
1.44 millert 587: if (ks->f1 == 0) {
588: warn("0 field in key specs");
589: goto end;
590: }
591: }
592:
593: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
594: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
595:
1.57 millert 596: c = sort_malloc(len + 1);
597: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 598: c[len] = '\0';
599:
600: if (second) {
601: errno = 0;
1.58 millert 602: ks->c2 = (size_t)strtoul(c, NULL, 10);
1.44 millert 603: if (errno != 0)
1.58 millert 604: goto end;
1.44 millert 605: } else {
606: errno = 0;
1.58 millert 607: ks->c1 = (size_t)strtoul(c, NULL, 10);
1.44 millert 608: if (errno != 0)
1.58 millert 609: goto end;
1.44 millert 610: if (ks->c1 == 0) {
611: warn("0 column in key specs");
612: goto end;
613: }
614: }
615: } else {
616: if (second)
617: ks->c2 = 0;
618: else
619: ks->c1 = 1;
620: }
621:
622: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
623: regoff_t i = 0;
624:
625: for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
626: check_mutually_exclusive_flags(s[i], mef_flags);
627: if (s[i] == 'b') {
628: if (second)
629: ks->pos2b = true;
630: else
631: ks->pos1b = true;
632: } else if (!set_sort_modifier(&(ks->sm), s[i]))
633: goto end;
634: }
635: }
636:
637: ret = 0;
638:
639: end:
1.61 millert 640: sort_free(c);
641: sort_free(f);
1.44 millert 642: regfree(&re);
643:
644: return ret;
645: }
646:
647: /*
648: * Parse -k option value.
649: */
650: static int
651: parse_k(const char *s, struct key_specs *ks)
652: {
653: int ret = -1;
654: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
655: { false, false, false, false, false, false };
656:
1.66 millert 657: if (*s != '\0') {
1.44 millert 658: char *sptr;
659:
660: sptr = strchr(s, ',');
661: if (sptr) {
662: size_t size1;
663: char *pos1, *pos2;
664:
665: size1 = sptr - s;
666:
667: if (size1 < 1)
668: return -1;
669:
1.57 millert 670: pos1 = sort_malloc(size1 + 1);
671: memcpy(pos1, s, size1);
1.44 millert 672: pos1[size1] = '\0';
673:
674: ret = parse_pos(pos1, ks, mef_flags, false);
675:
676: sort_free(pos1);
677: if (ret < 0)
678: return ret;
679:
680: pos2 = sort_strdup(sptr + 1);
681: ret = parse_pos(pos2, ks, mef_flags, true);
682: sort_free(pos2);
683: } else
684: ret = parse_pos(s, ks, mef_flags, false);
1.1 millert 685: }
1.4 millert 686:
1.44 millert 687: return ret;
688: }
689:
690: /*
691: * Parse POS in +POS -POS option.
692: */
693: static int
1.66 millert 694: parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size)
1.44 millert 695: {
696: regex_t re;
697: regmatch_t pmatch[4];
698: char *c, *f;
699: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
700: int ret;
701: size_t len, nmatch;
702:
703: ret = -1;
704: nmatch = 4;
705: c = f = NULL;
706: *nc = *nf = 0;
707:
708: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
709: return -1;
710:
711: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
712: goto end;
713:
714: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
715: goto end;
716:
717: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
718: goto end;
719:
720: len = pmatch[1].rm_eo - pmatch[1].rm_so;
721:
1.57 millert 722: f = sort_malloc(len + 1);
723: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 724: f[len] = '\0';
725:
726: errno = 0;
1.66 millert 727: *nf = (size_t)strtoul(f, NULL, 10);
1.44 millert 728: if (errno != 0)
729: errx(2, "Invalid key position");
730:
731: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
732: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
1.57 millert 733:
1.63 millert 734: c = sort_malloc(len + 1);
1.57 millert 735: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 736: c[len] = '\0';
737:
738: errno = 0;
1.66 millert 739: *nc = (size_t)strtoul(c, NULL, 10);
1.44 millert 740: if (errno != 0)
741: errx(2, "Invalid key position");
742: }
743:
744: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
745:
746: len = pmatch[3].rm_eo - pmatch[3].rm_so;
1.4 millert 747:
1.56 millert 748: if (len >= sopts_size)
749: errx(2, "Invalid key position");
1.57 millert 750: memcpy(sopts, s + pmatch[3].rm_so, len);
1.44 millert 751: sopts[len] = '\0';
1.1 millert 752: }
1.4 millert 753:
1.44 millert 754: ret = 0;
1.4 millert 755:
1.44 millert 756: end:
1.61 millert 757: sort_free(c);
758: sort_free(f);
1.44 millert 759: regfree(&re);
760:
761: return ret;
762: }
763:
764: /*
765: * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
766: */
767: static void
768: fix_obsolete_keys(int *argc, char **argv)
769: {
770: char sopt[129];
771: int i;
772:
773: for (i = 1; i < *argc; i++) {
1.60 millert 774: const char *arg1 = argv[i];
1.44 millert 775:
1.60 millert 776: if (arg1[0] == '+') {
1.56 millert 777: size_t c1, f1;
1.44 millert 778: char sopts1[128];
779:
780: sopts1[0] = 0;
781: c1 = f1 = 0;
782:
1.56 millert 783: if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1,
784: sizeof(sopts1)) < 0)
1.44 millert 785: continue;
1.60 millert 786:
787: f1 += 1;
788: c1 += 1;
789: if (i + 1 < *argc) {
790: const char *arg2 = argv[i + 1];
791:
792: if (arg2[0] == '-') {
793: size_t c2, f2;
794: char sopts2[128];
795:
796: sopts2[0] = 0;
797: c2 = f2 = 0;
798:
799: if (parse_pos_obs(arg2 + 1, &f2, &c2,
800: sopts2, sizeof(sopts2)) >= 0) {
801: int j;
802: if (c2 > 0)
803: f2 += 1;
804: snprintf(sopt, sizeof(sopt),
805: "-k%zu.%zu%s,%zu.%zu%s",
806: f1, c1, sopts1, f2,
807: c2, sopts2);
808: argv[i] = sort_strdup(sopt);
809: for (j = i + 1; j + 1 < *argc; j++)
810: argv[j] = argv[j + 1];
811: *argc -= 1;
812: continue;
1.44 millert 813: }
814: }
1.1 millert 815: }
1.60 millert 816: snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s",
817: f1, c1, sopts1);
818: argv[i] = sort_strdup(sopt);
1.44 millert 819: }
1.1 millert 820: }
1.44 millert 821: }
822:
823: /*
824: * Set random seed
825: */
826: static void
827: set_random_seed(void)
828: {
829: if (!need_random)
830: return;
1.4 millert 831:
1.44 millert 832: MD5Init(&md5_ctx);
833: if (random_source != NULL) {
834: unsigned char buf[BUFSIZ];
835: size_t nr;
836: FILE *fp;
837:
838: if ((fp = fopen(random_source, "r")) == NULL)
839: err(2, "%s", random_source);
840: while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
841: MD5Update(&md5_ctx, buf, nr);
842: if (ferror(fp))
843: err(2, "%s", random_source);
844: fclose(fp);
1.1 millert 845: } else {
1.44 millert 846: unsigned char rsd[1024];
847:
848: arc4random_buf(rsd, sizeof(rsd));
849: MD5Update(&md5_ctx, rsd, sizeof(rsd));
850: }
851: }
852:
853: /*
854: * Main function.
855: */
856: int
857: main(int argc, char *argv[])
858: {
1.75 ! tobias 859: char *outfile, *real_outfile, *sflag;
1.44 millert 860: int c, result;
861: size_t i;
862: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
863: { false, false, false, false, false, false };
864:
865: result = 0;
1.51 millert 866: outfile = "-";
1.44 millert 867: real_outfile = NULL;
1.75 ! tobias 868: sflag = NULL;
1.44 millert 869:
870: struct sort_mods *sm = &default_sort_mods_object;
871:
872: init_tmp_files();
873:
874: set_signal_handler();
875:
1.51 millert 876: atexit(clear_tmp_files);
877:
1.44 millert 878: set_hw_params();
879: set_locale();
880: set_tmpdir();
881: set_sort_opts();
882:
883: fix_obsolete_keys(&argc, argv);
884:
885: while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
886: != -1)) {
887:
888: check_mutually_exclusive_flags(c, mef_flags);
889:
890: if (!set_sort_modifier(sm, c)) {
891: switch (c) {
892: case 'c':
893: sort_opts_vals.cflag = true;
894: if (optarg) {
895: if (!strcmp(optarg, "diagnose-first"))
896: ;
897: else if (!strcmp(optarg, "silent") ||
898: !strcmp(optarg, "quiet"))
899: sort_opts_vals.csilentflag = true;
900: else if (*optarg)
901: unknown(optarg);
902: }
903: break;
904: case 'C':
905: sort_opts_vals.cflag = true;
906: sort_opts_vals.csilentflag = true;
907: break;
908: case 'k':
909: {
910: sort_opts_vals.complex_sort = true;
911: sort_opts_vals.kflag = true;
912:
913: keys_num++;
914: keys = sort_reallocarray(keys, keys_num,
915: sizeof(struct key_specs));
916: memset(&(keys[keys_num - 1]), 0,
917: sizeof(struct key_specs));
918:
1.66 millert 919: if (parse_k(optarg, &(keys[keys_num - 1])) < 0)
1.44 millert 920: errc(2, EINVAL, "-k %s", optarg);
921:
922: break;
923: }
924: case 'm':
925: sort_opts_vals.mflag = true;
926: break;
927: case 'o':
1.51 millert 928: outfile = optarg;
1.44 millert 929: break;
930: case 's':
931: sort_opts_vals.sflag = true;
932: break;
933: case 'S':
1.75 ! tobias 934: sflag = optarg;
1.44 millert 935: break;
936: case 'T':
1.71 tobias 937: tmpdir = optarg;
1.44 millert 938: break;
939: case 't':
940: while (strlen(optarg) > 1) {
941: if (optarg[0] != '\\') {
942: errc(2, EINVAL, "%s", optarg);
943: }
944: optarg += 1;
945: if (*optarg == '0') {
946: *optarg = 0;
947: break;
948: }
949: }
950: sort_opts_vals.tflag = true;
951: sort_opts_vals.field_sep = btowc(optarg[0]);
952: if (sort_opts_vals.field_sep == WEOF) {
953: errno = EINVAL;
954: err(2, NULL);
955: }
956: if (!gnusort_numeric_compatibility) {
957: if (symbol_decimal_point == sort_opts_vals.field_sep)
958: symbol_decimal_point = WEOF;
959: if (symbol_thousands_sep == sort_opts_vals.field_sep)
960: symbol_thousands_sep = WEOF;
961: if (symbol_negative_sign == sort_opts_vals.field_sep)
962: symbol_negative_sign = WEOF;
963: if (symbol_positive_sign == sort_opts_vals.field_sep)
964: symbol_positive_sign = WEOF;
965: }
966: break;
967: case 'u':
968: sort_opts_vals.uflag = true;
969: /* stable sort for the correct unique val */
970: sort_opts_vals.sflag = true;
971: break;
972: case 'z':
973: sort_opts_vals.zflag = true;
974: break;
975: case SORT_OPT:
1.62 millert 976: if (!strcmp(optarg, "general-numeric"))
977: set_sort_modifier(sm, 'g');
978: else if (!strcmp(optarg, "human-numeric"))
979: set_sort_modifier(sm, 'h');
980: else if (!strcmp(optarg, "numeric"))
981: set_sort_modifier(sm, 'n');
982: else if (!strcmp(optarg, "month"))
983: set_sort_modifier(sm, 'M');
984: else if (!strcmp(optarg, "random"))
985: set_sort_modifier(sm, 'R');
986: else
987: unknown(optarg);
1.44 millert 988: break;
989: case QSORT_OPT:
990: sort_opts_vals.sort_method = SORT_QSORT;
991: break;
992: case 'H':
993: sort_opts_vals.sort_method = SORT_MERGESORT;
994: break;
995: case MMAP_OPT:
996: use_mmap = true;
997: break;
998: case HEAPSORT_OPT:
999: sort_opts_vals.sort_method = SORT_HEAPSORT;
1000: break;
1001: case RADIXSORT_OPT:
1002: sort_opts_vals.sort_method = SORT_RADIXSORT;
1003: break;
1004: case RANDOMSOURCE_OPT:
1.71 tobias 1005: random_source = optarg;
1.44 millert 1006: break;
1007: case COMPRESSPROGRAM_OPT:
1.71 tobias 1008: compress_program = optarg;
1.44 millert 1009: break;
1010: case FF_OPT:
1011: read_fns_from_file0(optarg);
1012: break;
1013: case BS_OPT:
1014: {
1.54 millert 1015: const char *errstr;
1016:
1017: max_open_files = strtonum(optarg, 2,
1018: UINT_MAX - 1, &errstr) + 1;
1019: if (errstr != NULL)
1020: errx(2, "--batch-size argument is %s",
1021: errstr);
1022: break;
1.44 millert 1023: }
1024: case VERSION_OPT:
1025: printf("%s\n", VERSION);
1026: exit(EXIT_SUCCESS);
1027: /* NOTREACHED */
1028: break;
1029: case DEBUG_OPT:
1030: debug_sort = true;
1031: break;
1032: case HELP_OPT:
1033: usage(0);
1034: /* NOTREACHED */
1035: break;
1036: default:
1037: usage(2);
1038: /* NOTREACHED */
1039: }
1040: }
1041: }
1042:
1043: argc -= optind;
1044: argv += optind;
1.75 ! tobias 1045:
! 1046: if (sflag != NULL)
! 1047: available_free_memory = parse_memory_buffer_value(sflag);
1.44 millert 1048:
1049: if (keys_num == 0) {
1050: keys_num = 1;
1.67 millert 1051: keys = sort_reallocarray(keys, 1, sizeof(struct key_specs));
1.44 millert 1052: memset(&(keys[0]), 0, sizeof(struct key_specs));
1053: keys[0].c1 = 1;
1054: keys[0].pos1b = default_sort_mods->bflag;
1055: keys[0].pos2b = default_sort_mods->bflag;
1056: memcpy(&(keys[0].sm), default_sort_mods,
1057: sizeof(struct sort_mods));
1058: }
1059:
1060: for (i = 0; i < keys_num; i++) {
1061: struct key_specs *ks;
1062:
1063: ks = &(keys[i]);
1064:
1065: if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1066: !(ks->pos2b)) {
1067: ks->pos1b = sm->bflag;
1068: ks->pos2b = sm->bflag;
1069: memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1070: }
1071:
1072: ks->sm.func = get_sort_func(&(ks->sm));
1073: }
1074:
1075: if (argv_from_file0) {
1076: argc = argc_from_file0;
1077: argv = argv_from_file0;
1078: }
1079:
1080: if (debug_sort) {
1081: printf("Memory to be used for sorting: %llu\n",
1082: available_free_memory);
1083: printf("Using collate rules of %s locale\n",
1084: setlocale(LC_COLLATE, NULL));
1085: if (byte_sort)
1086: printf("Byte sort is used\n");
1087: if (print_symbols_on_debug) {
1088: printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1089: if (symbol_thousands_sep)
1090: printf("Thousands separator: <%lc>\n",
1091: symbol_thousands_sep);
1092: printf("Positive sign: <%lc>\n", symbol_positive_sign);
1093: printf("Negative sign: <%lc>\n", symbol_negative_sign);
1094: }
1095: }
1096:
1097: set_random_seed();
1.4 millert 1098:
1.44 millert 1099: /* Case when the outfile equals one of the input files: */
1.51 millert 1100: if (strcmp(outfile, "-") != 0) {
1101: struct stat sb;
1102: int fd, i;
1.44 millert 1103:
1104: for (i = 0; i < argc; ++i) {
1105: if (strcmp(argv[i], outfile) == 0) {
1.51 millert 1106: if (stat(outfile, &sb) == -1)
1107: err(2, "%s", outfile);
1108: if (access(outfile, W_OK) == -1)
1109: err(2, "%s", outfile);
1110: real_outfile = outfile;
1111: sort_asprintf(&outfile, "%s.XXXXXXXXXX",
1112: real_outfile);
1113: if ((fd = mkstemp(outfile)) == -1 ||
1114: fchmod(fd, sb.st_mode & ALLPERMS) == -1)
1115: err(2, "%s", outfile);
1116: close(fd);
1.44 millert 1117: tmp_file_atexit(outfile);
1.51 millert 1118: break;
1.44 millert 1119: }
1120: }
1121: }
1122:
1123: if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1124: struct file_list fl;
1125: struct sort_list list;
1126:
1127: sort_list_init(&list);
1128: file_list_init(&fl, true);
1129:
1130: if (argc < 1)
1131: procfile("-", &list, &fl);
1132: else {
1133: while (argc > 0) {
1134: procfile(*argv, &list, &fl);
1135: --argc;
1136: ++argv;
1137: }
1138: }
1139:
1140: if (fl.count < 1)
1141: sort_list_to_file(&list, outfile);
1142: else {
1143: if (list.count > 0) {
1144: char *flast = new_tmp_file_name();
1145:
1146: sort_list_to_file(&list, flast);
1147: file_list_add(&fl, flast, false);
1148: }
1149: merge_files(&fl, outfile);
1150: }
1151:
1152: file_list_clean(&fl);
1153:
1154: /*
1155: * We are about to exit the program, so we can ignore
1156: * the clean-up for speed
1157: *
1158: * sort_list_clean(&list);
1159: */
1160:
1161: } else if (sort_opts_vals.cflag) {
1162: result = (argc == 0) ? (check("-")) : (check(*argv));
1163: } else if (sort_opts_vals.mflag) {
1164: struct file_list fl;
1165:
1166: file_list_init(&fl, false);
1167: file_list_populate(&fl, argc, argv, true);
1168: merge_files(&fl, outfile);
1169: file_list_clean(&fl);
1170: }
1171:
1172: if (real_outfile) {
1173: if (rename(outfile, real_outfile) < 0)
1174: err(2, "%s", real_outfile);
1.51 millert 1175: sort_free(outfile);
1.44 millert 1176: }
1.4 millert 1177:
1.44 millert 1178: return result;
1.1 millert 1179: }