Annotation of src/usr.bin/sort/sort.c, Revision 1.45
1.45 ! jmc 1: /* $OpenBSD: sort.c,v 1.44 2015/03/17 17:45:13 millert Exp $ */
1.1 millert 2:
3: /*-
1.44 millert 4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
1.1 millert 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
1.44 millert 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1.1 millert 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.44 millert 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1.1 millert 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
1.44 millert 30: #include <sys/stat.h>
31: #include <sys/types.h>
1.1 millert 32:
1.44 millert 33: #include <err.h>
34: #include <errno.h>
35: #include <getopt.h>
36: #include <limits.h>
1.16 ericj 37: #include <locale.h>
1.44 millert 38: #include <md5.h>
39: #include <regex.h>
1.1 millert 40: #include <signal.h>
1.44 millert 41: #include <stdbool.h>
42: #include <stdio.h>
1.1 millert 43: #include <stdlib.h>
44: #include <string.h>
45: #include <unistd.h>
1.44 millert 46: #include <wchar.h>
47: #include <wctype.h>
48:
49: #include "coll.h"
50: #include "file.h"
51: #include "sort.h"
52:
53: #define OPTIONS "bCcdfgHhik:Mmno:RrS:st:T:uVz"
54:
55: static bool need_random;
56: static const char *random_source;
57:
58: MD5_CTX md5_ctx;
59:
60: struct sort_opts sort_opts_vals;
61:
62: bool debug_sort;
63: bool need_hint;
64:
65: static bool gnusort_numeric_compatibility;
66:
67: static struct sort_mods default_sort_mods_object;
68: struct sort_mods * const default_sort_mods = &default_sort_mods_object;
69:
70: static bool print_symbols_on_debug;
71:
72: /*
73: * Arguments from file (when file0-from option is used:
74: */
75: static size_t argc_from_file0 = (size_t)-1;
76: static char **argv_from_file0;
77:
78: /*
79: * Placeholder symbols for options which have no single-character equivalent
80: */
81: enum {
82: SORT_OPT = CHAR_MAX + 1,
83: HELP_OPT,
84: FF_OPT,
85: BS_OPT,
86: VERSION_OPT,
87: DEBUG_OPT,
88: RANDOMSOURCE_OPT,
89: COMPRESSPROGRAM_OPT,
90: QSORT_OPT,
91: HEAPSORT_OPT,
92: RADIXSORT_OPT,
93: MMAP_OPT
94: };
95:
96: #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
97: static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
98:
99: static const struct option long_options[] = {
100: { "batch-size", required_argument, NULL, BS_OPT },
101: { "buffer-size", required_argument, NULL, 'S' },
102: { "check", optional_argument, NULL, 'c' },
103: { "check=silent|quiet", optional_argument, NULL, 'C' },
104: { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
105: { "debug", no_argument, NULL, DEBUG_OPT },
106: { "dictionary-order", no_argument, NULL, 'd' },
107: { "field-separator", required_argument, NULL, 't' },
108: { "files0-from", required_argument, NULL, FF_OPT },
109: { "general-numeric-sort", no_argument, NULL, 'g' },
110: { "heapsort", no_argument, NULL, HEAPSORT_OPT },
111: { "help", no_argument, NULL, HELP_OPT },
112: { "human-numeric-sort", no_argument, NULL, 'h' },
113: { "ignore-leading-blanks", no_argument, NULL, 'b' },
114: { "ignore-case", no_argument, NULL, 'f' },
115: { "ignore-nonprinting", no_argument, NULL, 'i' },
116: { "key", required_argument, NULL, 'k' },
117: { "merge", no_argument, NULL, 'm' },
118: { "mergesort", no_argument, NULL, 'H' },
119: { "mmap", no_argument, NULL, MMAP_OPT },
120: { "month-sort", no_argument, NULL, 'M' },
121: { "numeric-sort", no_argument, NULL, 'n' },
122: { "output", required_argument, NULL, 'o' },
123: { "qsort", no_argument, NULL, QSORT_OPT },
124: { "radixsort", no_argument, NULL, RADIXSORT_OPT },
125: { "random-sort", no_argument, NULL, 'R' },
126: { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
127: { "reverse", no_argument, NULL, 'r' },
128: { "sort", required_argument, NULL, SORT_OPT },
129: { "stable", no_argument, NULL, 's' },
130: { "temporary-directory", required_argument, NULL, 'T' },
131: { "unique", no_argument, NULL, 'u' },
132: { "version", no_argument, NULL, VERSION_OPT },
133: { "version-sort", no_argument, NULL, 'V' },
134: { "zero-terminated", no_argument, NULL, 'z' },
135: { NULL, no_argument, NULL, 0 }
136: };
137:
138: /*
139: * Check where sort modifier is present
140: */
141: static bool
142: sort_modifier_empty(struct sort_mods *sm)
143: {
144:
145: if (sm == NULL)
146: return true;
147: return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
148: sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
149: }
150:
151: /*
152: * Print out usage text.
153: */
154: static __dead void
155: usage(int exit_val)
156: {
1.1 millert 157:
1.44 millert 158: fprintf(exit_val ? stderr : stdout,
1.45 ! jmc 159: "usage: %s [-bCcdfgHhiMnRrsuVz] [-k field1[,field2]] [-o output] "
! 160: "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
1.44 millert 161: exit(exit_val);
162: }
1.4 millert 163:
1.1 millert 164: /*
1.44 millert 165: * Read input file names from a file (file0-from option).
1.1 millert 166: */
1.44 millert 167: static void
168: read_fns_from_file0(const char *fn)
169: {
170: if (fn) {
171: struct file0_reader f0r;
172: FILE *f;
173:
174: f = fopen(fn, "r");
175: if (f == NULL)
176: err(2, "%s", fn);
177:
178: memset(&f0r, 0, sizeof(f0r));
179: f0r.f = f;
180:
181: while (!feof(f)) {
182: char *line = read_file0_line(&f0r);
183:
184: if (line && *line) {
185: if (argc_from_file0 == (size_t)-1)
186: argc_from_file0 = 0;
187: ++argc_from_file0;
188: argv_from_file0 = sort_reallocarray(argv_from_file0,
189: argc_from_file0, sizeof(char *));
190: argv_from_file0[argc_from_file0 - 1] =
191: sort_strdup(line);
192: }
193: }
194: closefile(f, fn);
195: }
196: }
1.4 millert 197:
1.1 millert 198: /*
1.44 millert 199: * Check how much RAM is available for the sort.
1.1 millert 200: */
1.44 millert 201: static void
202: set_hw_params(void)
203: {
204: long pages, psize;
1.1 millert 205:
1.44 millert 206: pages = sysconf(_SC_PHYS_PAGES);
207: if (pages < 1) {
208: warn("sysconf pages");
209: pages = 1;
210: }
211: psize = sysconf(_SC_PAGESIZE);
212: if (psize < 1) {
213: warn("sysconf psize");
214: psize = 4096;
215: }
216:
217: free_memory = (unsigned long long) pages * (unsigned long long) psize;
218: available_free_memory = free_memory / 2;
219:
220: if (available_free_memory < 1024)
221: available_free_memory = 1024;
222: }
223:
224: /*
225: * Convert "plain" symbol to wide symbol, with default value.
226: */
227: static void
228: conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
229: {
1.1 millert 230:
1.44 millert 231: if (wc && c) {
232: int res;
1.1 millert 233:
1.44 millert 234: res = mbtowc(wc, c, MB_CUR_MAX);
235: if (res < 1)
236: *wc = def;
1.12 millert 237: }
1.44 millert 238: }
1.12 millert 239:
1.44 millert 240: /*
241: * Set current locale symbols.
242: */
243: static void
244: set_locale(void)
1.1 millert 245: {
1.44 millert 246: struct lconv *lc;
247: const char *locale;
1.4 millert 248:
1.16 ericj 249: setlocale(LC_ALL, "");
250:
1.44 millert 251: lc = localeconv();
252:
253: if (lc) {
254: /* obtain LC_NUMERIC info */
255: /* Convert to wide char form */
256: conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
257: symbol_decimal_point);
258: conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
259: symbol_thousands_sep);
260: conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
261: symbol_positive_sign);
262: conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
263: symbol_negative_sign);
264: }
265:
266: if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
267: gnusort_numeric_compatibility = true;
268:
269: locale = setlocale(LC_COLLATE, NULL);
270: if (locale != NULL) {
271: char *tmpl;
272: const char *byteclocale;
273:
274: tmpl = sort_strdup(locale);
275: byteclocale = setlocale(LC_COLLATE, "C");
276: if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
277: byte_sort = true;
278: } else {
279: byteclocale = setlocale(LC_COLLATE, "POSIX");
280: if (byteclocale && strcmp(byteclocale, tmpl) == 0)
281: byte_sort = true;
282: else
283: setlocale(LC_COLLATE, tmpl);
284: }
285: sort_free(tmpl);
286: }
287: if (!byte_sort)
288: sort_mb_cur_max = MB_CUR_MAX;
289: }
290:
291: /*
292: * Set directory temporary files.
293: */
294: static void
295: set_tmpdir(void)
296: {
297: char *td;
298:
299: td = getenv("TMPDIR");
300: if (td != NULL)
301: tmpdir = sort_strdup(td);
302: }
303:
304: /*
305: * Parse -S option.
306: */
307: static unsigned long long
308: parse_memory_buffer_value(const char *value)
309: {
310:
311: if (value == NULL)
312: return available_free_memory;
313: else {
314: char *endptr;
315: unsigned long long membuf;
316:
317: endptr = NULL;
318: errno = 0;
319: membuf = strtoll(value, &endptr, 10);
320:
321: if (errno != 0) {
322: warn("Wrong memory buffer specification");
323: membuf = available_free_memory;
324: } else {
325: switch (*endptr){
326: case 'Y':
327: membuf *= 1024;
328: /* FALLTHROUGH */
329: case 'Z':
330: membuf *= 1024;
331: /* FALLTHROUGH */
332: case 'E':
333: membuf *= 1024;
334: /* FALLTHROUGH */
335: case 'P':
336: membuf *= 1024;
337: /* FALLTHROUGH */
338: case 'T':
339: membuf *= 1024;
340: /* FALLTHROUGH */
341: case 'G':
342: membuf *= 1024;
343: /* FALLTHROUGH */
344: case 'M':
345: membuf *= 1024;
346: /* FALLTHROUGH */
347: case '\0':
348: case 'K':
349: membuf *= 1024;
350: /* FALLTHROUGH */
351: case 'b':
352: break;
353: case '%':
354: membuf = (available_free_memory * membuf) /
355: 100;
356: break;
357: default:
358: warnc(EINVAL, "%s", optarg);
359: membuf = available_free_memory;
360: }
361: }
362: return membuf;
363: }
364: }
365:
366: /*
367: * Signal handler that clears the temporary files.
368: */
369: static void
370: sig_handler(int sig __unused, siginfo_t *siginfo __unused,
371: void *context __unused)
372: {
373:
374: clear_tmp_files();
375: exit(2);
376: }
377:
378: /*
379: * Set signal handler on panic signals.
380: */
381: static void
382: set_signal_handler(void)
383: {
384: struct sigaction sa;
385:
386: memset(&sa, 0, sizeof(sa));
387: sa.sa_sigaction = &sig_handler;
388: sa.sa_flags = SA_SIGINFO;
389:
390: if (sigaction(SIGTERM, &sa, NULL) < 0) {
391: warn("sigaction(SIGTERM)");
392: return;
393: }
394: if (sigaction(SIGHUP, &sa, NULL) < 0) {
395: warn("sigaction(SIGHUP)");
396: return;
397: }
398: if (sigaction(SIGINT, &sa, NULL) < 0) {
399: warn("sigaction(SIGINT)");
400: return;
401: }
402: if (sigaction(SIGQUIT, &sa, NULL) < 0) {
403: warn("sigaction(SIGQUIT)");
404: return;
405: }
406: if (sigaction(SIGABRT, &sa, NULL) < 0) {
407: warn("sigaction(SIGABRT)");
408: return;
409: }
410: if (sigaction(SIGBUS, &sa, NULL) < 0) {
411: warn("sigaction(SIGBUS)");
412: return;
413: }
414: if (sigaction(SIGSEGV, &sa, NULL) < 0) {
415: warn("sigaction(SIGSEGV)");
416: return;
417: }
418: if (sigaction(SIGUSR1, &sa, NULL) < 0) {
419: warn("sigaction(SIGUSR1)");
420: return;
421: }
422: if (sigaction(SIGUSR2, &sa, NULL) < 0) {
423: warn("sigaction(SIGUSR2)");
424: return;
425: }
426: }
427:
428: /*
429: * Print "unknown" message and exit with status 2.
430: */
431: static void
432: unknown(const char *what)
433: {
434:
435: errx(2, "Unknown feature: %s", what);
436: }
437:
438: /*
439: * Check whether contradictory input options are used.
440: */
441: static void
442: check_mutually_exclusive_flags(char c, bool *mef_flags)
443: {
444: int i, fo_index, mec;
445: bool found_others, found_this;
446:
447: found_others = found_this =false;
448: fo_index = 0;
449:
450: for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
451: mec = mutually_exclusive_flags[i];
452:
453: if (mec != c) {
454: if (mef_flags[i]) {
455: if (found_this)
456: errx(1, "%c:%c: mutually exclusive flags", c, mec);
457: found_others = true;
458: fo_index = i;
459: }
460: } else {
461: if (found_others)
462: errx(1, "%c:%c: mutually exclusive flags", c, mutually_exclusive_flags[fo_index]);
463: mef_flags[i] = true;
464: found_this = true;
465: }
466: }
467: }
468:
469: /*
470: * Initialise sort opts data.
471: */
472: static void
473: set_sort_opts(void)
474: {
475:
476: memset(&default_sort_mods_object, 0,
477: sizeof(default_sort_mods_object));
478: memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
479: default_sort_mods_object.func =
480: get_sort_func(&default_sort_mods_object);
481: }
482:
483: /*
484: * Set a sort modifier on a sort modifiers object.
485: */
486: static bool
487: set_sort_modifier(struct sort_mods *sm, int c)
488: {
489:
490: if (sm) {
491: switch (c){
492: case 'b':
493: sm->bflag = true;
1.1 millert 494: break;
495: case 'd':
1.44 millert 496: sm->dflag = true;
497: break;
1.4 millert 498: case 'f':
1.44 millert 499: sm->fflag = true;
500: break;
501: case 'g':
502: sm->gflag = true;
503: need_hint = true;
504: break;
1.1 millert 505: case 'i':
1.44 millert 506: sm->iflag = true;
1.1 millert 507: break;
508: case 'R':
1.44 millert 509: sm->Rflag = true;
510: need_random = true;
1.1 millert 511: break;
1.44 millert 512: case 'M':
513: initialise_months();
514: sm->Mflag = true;
515: need_hint = true;
1.1 millert 516: break;
1.44 millert 517: case 'n':
518: sm->nflag = true;
519: need_hint = true;
520: print_symbols_on_debug = true;
1.1 millert 521: break;
1.44 millert 522: case 'r':
523: sm->rflag = true;
1.1 millert 524: break;
1.44 millert 525: case 'V':
526: sm->Vflag = true;
1.1 millert 527: break;
1.44 millert 528: case 'h':
529: sm->hflag = true;
530: need_hint = true;
531: print_symbols_on_debug = true;
1.1 millert 532: break;
1.8 deraadt 533: default:
1.44 millert 534: return false;
1.1 millert 535: }
1.44 millert 536: sort_opts_vals.complex_sort = true;
537: sm->func = get_sort_func(sm);
538: }
539: return true;
540: }
541:
542: /*
543: * Parse POS in -k option.
544: */
545: static int
546: parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
547: {
548: regmatch_t pmatch[4];
549: regex_t re;
550: char *c, *f;
551: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
552: size_t len, nmatch;
553: int ret;
554:
555: ret = -1;
556: nmatch = 4;
557: c = f = NULL;
558:
559: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
560: return -1;
561:
562: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
563: goto end;
564:
565: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
566: goto end;
567:
568: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
569: goto end;
570:
571: len = pmatch[1].rm_eo - pmatch[1].rm_so;
572: f = sort_malloc((len + 1) * sizeof(char));
573:
574: strncpy(f, s + pmatch[1].rm_so, len);
575: f[len] = '\0';
576:
577: if (second) {
578: errno = 0;
579: ks->f2 = (size_t) strtoul(f, NULL, 10);
580: if (errno != 0)
581: err(2, "-k");
582: if (ks->f2 == 0) {
583: warn("0 field in key specs");
584: goto end;
585: }
586: } else {
587: errno = 0;
588: ks->f1 = (size_t) strtoul(f, NULL, 10);
589: if (errno != 0)
590: err(2, "-k");
591: if (ks->f1 == 0) {
592: warn("0 field in key specs");
593: goto end;
594: }
595: }
596:
597: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
598: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
599: c = sort_malloc((len + 1) * sizeof(char));
600:
601: strncpy(c, s + pmatch[2].rm_so + 1, len);
602: c[len] = '\0';
603:
604: if (second) {
605: errno = 0;
606: ks->c2 = (size_t) strtoul(c, NULL, 10);
607: if (errno != 0)
608: err(2, "-k");
609: } else {
610: errno = 0;
611: ks->c1 = (size_t) strtoul(c, NULL, 10);
612: if (errno != 0)
613: err(2, "-k");
614: if (ks->c1 == 0) {
615: warn("0 column in key specs");
616: goto end;
617: }
618: }
619: } else {
620: if (second)
621: ks->c2 = 0;
622: else
623: ks->c1 = 1;
624: }
625:
626: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
627: regoff_t i = 0;
628:
629: for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
630: check_mutually_exclusive_flags(s[i], mef_flags);
631: if (s[i] == 'b') {
632: if (second)
633: ks->pos2b = true;
634: else
635: ks->pos1b = true;
636: } else if (!set_sort_modifier(&(ks->sm), s[i]))
637: goto end;
638: }
639: }
640:
641: ret = 0;
642:
643: end:
644:
645: if (c)
646: sort_free(c);
647: if (f)
648: sort_free(f);
649: regfree(&re);
650:
651: return ret;
652: }
653:
654: /*
655: * Parse -k option value.
656: */
657: static int
658: parse_k(const char *s, struct key_specs *ks)
659: {
660: int ret = -1;
661: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
662: { false, false, false, false, false, false };
663:
664: if (s && *s) {
665: char *sptr;
666:
667: sptr = strchr(s, ',');
668: if (sptr) {
669: size_t size1;
670: char *pos1, *pos2;
671:
672: size1 = sptr - s;
673:
674: if (size1 < 1)
675: return -1;
676: pos1 = sort_malloc((size1 + 1) * sizeof(char));
677:
678: strncpy(pos1, s, size1);
679: pos1[size1] = '\0';
680:
681: ret = parse_pos(pos1, ks, mef_flags, false);
682:
683: sort_free(pos1);
684: if (ret < 0)
685: return ret;
686:
687: pos2 = sort_strdup(sptr + 1);
688: ret = parse_pos(pos2, ks, mef_flags, true);
689: sort_free(pos2);
690: } else
691: ret = parse_pos(s, ks, mef_flags, false);
1.1 millert 692: }
1.4 millert 693:
1.44 millert 694: return ret;
695: }
696:
697: /*
698: * Parse POS in +POS -POS option.
699: */
700: static int
701: parse_pos_obs(const char *s, int *nf, int *nc, char *sopts)
702: {
703: regex_t re;
704: regmatch_t pmatch[4];
705: char *c, *f;
706: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
707: int ret;
708: size_t len, nmatch;
709:
710: ret = -1;
711: nmatch = 4;
712: c = f = NULL;
713: *nc = *nf = 0;
714:
715: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
716: return -1;
717:
718: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
719: goto end;
720:
721: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
722: goto end;
723:
724: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
725: goto end;
726:
727: len = pmatch[1].rm_eo - pmatch[1].rm_so;
728: f = sort_malloc((len + 1) * sizeof(char));
729:
730: strncpy(f, s + pmatch[1].rm_so, len);
731: f[len] = '\0';
732:
733: errno = 0;
734: *nf = (size_t) strtoul(f, NULL, 10);
735: if (errno != 0)
736: errx(2, "Invalid key position");
737:
738: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
739: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
740: c = sort_malloc((len + 1) * sizeof(char));
741:
742: strncpy(c, s + pmatch[2].rm_so + 1, len);
743: c[len] = '\0';
744:
745: errno = 0;
746: *nc = (size_t) strtoul(c, NULL, 10);
747: if (errno != 0)
748: errx(2, "Invalid key position");
749: }
750:
751: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
752:
753: len = pmatch[3].rm_eo - pmatch[3].rm_so;
1.4 millert 754:
1.44 millert 755: strncpy(sopts, s + pmatch[3].rm_so, len);
756: sopts[len] = '\0';
1.1 millert 757: }
1.4 millert 758:
1.44 millert 759: ret = 0;
1.4 millert 760:
1.44 millert 761: end:
762: if (c)
763: sort_free(c);
764: if (f)
765: sort_free(f);
766: regfree(&re);
767:
768: return ret;
769: }
770:
771: /*
772: * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
773: */
774: static void
775: fix_obsolete_keys(int *argc, char **argv)
776: {
777: char sopt[129];
778: int i;
779:
780: for (i = 1; i < *argc; i++) {
781: char *arg1;
782:
783: arg1 = argv[i];
784:
785: if (strlen(arg1) > 1 && arg1[0] == '+') {
786: int c1, f1;
787: char sopts1[128];
788:
789: sopts1[0] = 0;
790: c1 = f1 = 0;
791:
792: if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
793: continue;
1.1 millert 794: else {
1.44 millert 795: f1 += 1;
796: c1 += 1;
797: if (i + 1 < *argc) {
798: char *arg2 = argv[i + 1];
799:
800: if (strlen(arg2) > 1 &&
801: arg2[0] == '-') {
802: int c2, f2;
803: char sopts2[128];
804:
805: sopts2[0] = 0;
806: c2 = f2 = 0;
807:
808: if (parse_pos_obs(arg2 + 1,
809: &f2, &c2, sopts2) >= 0) {
810: int j;
811: if (c2 > 0)
812: f2 += 1;
813: snprintf(sopt,
814: sizeof(sopt),
815: "-k%d.%d%s,%d.%d%s",
816: f1, c1, sopts1, f2,
817: c2, sopts2);
818: argv[i] = sort_strdup(sopt);
819: for (j = i + 1; j + 1 < *argc; j++)
820: argv[j] = argv[j + 1];
821: *argc -= 1;
822: continue;
823: }
824: }
825: }
826: snprintf(sopt, sizeof(sopt), "-k%d.%d%s",
827: f1, c1, sopts1);
828: argv[i] = sort_strdup(sopt);
1.1 millert 829: }
1.44 millert 830: }
1.1 millert 831: }
1.44 millert 832: }
833:
834: /*
835: * Set random seed
836: */
837: static void
838: set_random_seed(void)
839: {
840: if (!need_random)
841: return;
1.4 millert 842:
1.44 millert 843: MD5Init(&md5_ctx);
844: if (random_source != NULL) {
845: unsigned char buf[BUFSIZ];
846: size_t nr;
847: FILE *fp;
848:
849: if ((fp = fopen(random_source, "r")) == NULL)
850: err(2, "%s", random_source);
851: while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
852: MD5Update(&md5_ctx, buf, nr);
853: if (ferror(fp))
854: err(2, "%s", random_source);
855: fclose(fp);
1.1 millert 856: } else {
1.44 millert 857: unsigned char rsd[1024];
858:
859: arc4random_buf(rsd, sizeof(rsd));
860: MD5Update(&md5_ctx, rsd, sizeof(rsd));
861: }
862: }
863:
864: /*
865: * Main function.
866: */
867: int
868: main(int argc, char *argv[])
869: {
870: char *outfile, *real_outfile;
871: int c, result;
872: size_t i;
873: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
874: { false, false, false, false, false, false };
875:
876: result = 0;
877: outfile = sort_strdup("-");
878: real_outfile = NULL;
879:
880: struct sort_mods *sm = &default_sort_mods_object;
881:
882: init_tmp_files();
883:
884: set_signal_handler();
885:
886: set_hw_params();
887: set_locale();
888: set_tmpdir();
889: set_sort_opts();
890:
891: fix_obsolete_keys(&argc, argv);
892:
893: while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
894: != -1)) {
895:
896: check_mutually_exclusive_flags(c, mef_flags);
897:
898: if (!set_sort_modifier(sm, c)) {
899:
900: switch (c) {
901: case 'c':
902: sort_opts_vals.cflag = true;
903: if (optarg) {
904: if (!strcmp(optarg, "diagnose-first"))
905: ;
906: else if (!strcmp(optarg, "silent") ||
907: !strcmp(optarg, "quiet"))
908: sort_opts_vals.csilentflag = true;
909: else if (*optarg)
910: unknown(optarg);
911: }
912: break;
913: case 'C':
914: sort_opts_vals.cflag = true;
915: sort_opts_vals.csilentflag = true;
916: break;
917: case 'k':
918: {
919: sort_opts_vals.complex_sort = true;
920: sort_opts_vals.kflag = true;
921:
922: keys_num++;
923: keys = sort_reallocarray(keys, keys_num,
924: sizeof(struct key_specs));
925: memset(&(keys[keys_num - 1]), 0,
926: sizeof(struct key_specs));
927:
928: if (parse_k(optarg, &(keys[keys_num - 1]))
929: < 0) {
930: errc(2, EINVAL, "-k %s", optarg);
931: }
932:
933: break;
934: }
935: case 'm':
936: sort_opts_vals.mflag = true;
937: break;
938: case 'o':
939: sort_free(outfile);
940: outfile = sort_strdup(optarg);
941: break;
942: case 's':
943: sort_opts_vals.sflag = true;
944: break;
945: case 'S':
946: available_free_memory =
947: parse_memory_buffer_value(optarg);
948: break;
949: case 'T':
950: tmpdir = sort_strdup(optarg);
951: break;
952: case 't':
953: while (strlen(optarg) > 1) {
954: if (optarg[0] != '\\') {
955: errc(2, EINVAL, "%s", optarg);
956: }
957: optarg += 1;
958: if (*optarg == '0') {
959: *optarg = 0;
960: break;
961: }
962: }
963: sort_opts_vals.tflag = true;
964: sort_opts_vals.field_sep = btowc(optarg[0]);
965: if (sort_opts_vals.field_sep == WEOF) {
966: errno = EINVAL;
967: err(2, NULL);
968: }
969: if (!gnusort_numeric_compatibility) {
970: if (symbol_decimal_point == sort_opts_vals.field_sep)
971: symbol_decimal_point = WEOF;
972: if (symbol_thousands_sep == sort_opts_vals.field_sep)
973: symbol_thousands_sep = WEOF;
974: if (symbol_negative_sign == sort_opts_vals.field_sep)
975: symbol_negative_sign = WEOF;
976: if (symbol_positive_sign == sort_opts_vals.field_sep)
977: symbol_positive_sign = WEOF;
978: }
979: break;
980: case 'u':
981: sort_opts_vals.uflag = true;
982: /* stable sort for the correct unique val */
983: sort_opts_vals.sflag = true;
984: break;
985: case 'z':
986: sort_opts_vals.zflag = true;
987: break;
988: case SORT_OPT:
989: if (optarg) {
990: if (!strcmp(optarg, "general-numeric"))
991: set_sort_modifier(sm, 'g');
992: else if (!strcmp(optarg, "human-numeric"))
993: set_sort_modifier(sm, 'h');
994: else if (!strcmp(optarg, "numeric"))
995: set_sort_modifier(sm, 'n');
996: else if (!strcmp(optarg, "month"))
997: set_sort_modifier(sm, 'M');
998: else if (!strcmp(optarg, "random"))
999: set_sort_modifier(sm, 'R');
1000: else
1001: unknown(optarg);
1002: }
1003: break;
1004: case QSORT_OPT:
1005: sort_opts_vals.sort_method = SORT_QSORT;
1006: break;
1007: case 'H':
1008: sort_opts_vals.sort_method = SORT_MERGESORT;
1009: break;
1010: case MMAP_OPT:
1011: use_mmap = true;
1012: break;
1013: case HEAPSORT_OPT:
1014: sort_opts_vals.sort_method = SORT_HEAPSORT;
1015: break;
1016: case RADIXSORT_OPT:
1017: sort_opts_vals.sort_method = SORT_RADIXSORT;
1018: break;
1019: case RANDOMSOURCE_OPT:
1020: random_source = strdup(optarg);
1021: break;
1022: case COMPRESSPROGRAM_OPT:
1023: compress_program = strdup(optarg);
1024: break;
1025: case FF_OPT:
1026: read_fns_from_file0(optarg);
1027: break;
1028: case BS_OPT:
1029: {
1030: errno = 0;
1031: long mof = strtol(optarg, NULL, 10);
1032: if (errno != 0)
1033: err(2, "--batch-size");
1034: if (mof >= 2)
1035: max_open_files = (size_t) mof + 1;
1036: }
1037: break;
1038: case VERSION_OPT:
1039: printf("%s\n", VERSION);
1040: exit(EXIT_SUCCESS);
1041: /* NOTREACHED */
1042: break;
1043: case DEBUG_OPT:
1044: debug_sort = true;
1045: break;
1046: case HELP_OPT:
1047: usage(0);
1048: /* NOTREACHED */
1049: break;
1050: default:
1051: usage(2);
1052: /* NOTREACHED */
1053: }
1054: }
1055: }
1056:
1057: argc -= optind;
1058: argv += optind;
1059:
1060: if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1061: errx(1, "%c:%c: mutually exclusive flags", 'm', 'c');
1062:
1063: if (keys_num == 0) {
1064: keys_num = 1;
1065: keys = sort_realloc(keys, sizeof(struct key_specs));
1066: memset(&(keys[0]), 0, sizeof(struct key_specs));
1067: keys[0].c1 = 1;
1068: keys[0].pos1b = default_sort_mods->bflag;
1069: keys[0].pos2b = default_sort_mods->bflag;
1070: memcpy(&(keys[0].sm), default_sort_mods,
1071: sizeof(struct sort_mods));
1072: }
1073:
1074: for (i = 0; i < keys_num; i++) {
1075: struct key_specs *ks;
1076:
1077: ks = &(keys[i]);
1078:
1079: if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1080: !(ks->pos2b)) {
1081: ks->pos1b = sm->bflag;
1082: ks->pos2b = sm->bflag;
1083: memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1084: }
1085:
1086: ks->sm.func = get_sort_func(&(ks->sm));
1087: }
1088:
1089: if (argv_from_file0) {
1090: argc = argc_from_file0;
1091: argv = argv_from_file0;
1092: }
1093:
1094: if (debug_sort) {
1095: printf("Memory to be used for sorting: %llu\n",
1096: available_free_memory);
1097: printf("Using collate rules of %s locale\n",
1098: setlocale(LC_COLLATE, NULL));
1099: if (byte_sort)
1100: printf("Byte sort is used\n");
1101: if (print_symbols_on_debug) {
1102: printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1103: if (symbol_thousands_sep)
1104: printf("Thousands separator: <%lc>\n",
1105: symbol_thousands_sep);
1106: printf("Positive sign: <%lc>\n", symbol_positive_sign);
1107: printf("Negative sign: <%lc>\n", symbol_negative_sign);
1108: }
1109: }
1110:
1111: set_random_seed();
1.4 millert 1112:
1.44 millert 1113: /* Case when the outfile equals one of the input files: */
1114: if (strcmp(outfile, "-")) {
1115: int i;
1116:
1117: for (i = 0; i < argc; ++i) {
1118: if (strcmp(argv[i], outfile) == 0) {
1119: real_outfile = sort_strdup(outfile);
1120: for (;;) {
1121: const size_t size = strlen(outfile) + strlen(".tmp") + 1;
1122: outfile = sort_realloc(outfile, size);
1123: strlcat(outfile, ".tmp", size);
1124: if (access(outfile, F_OK) < 0)
1125: break;
1126: }
1127: tmp_file_atexit(outfile);
1128: }
1129: }
1130: }
1131:
1132: if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1133: struct file_list fl;
1134: struct sort_list list;
1135:
1136: sort_list_init(&list);
1137: file_list_init(&fl, true);
1138:
1139: if (argc < 1)
1140: procfile("-", &list, &fl);
1141: else {
1142: while (argc > 0) {
1143: procfile(*argv, &list, &fl);
1144: --argc;
1145: ++argv;
1146: }
1147: }
1148:
1149: if (fl.count < 1)
1150: sort_list_to_file(&list, outfile);
1151: else {
1152: if (list.count > 0) {
1153: char *flast = new_tmp_file_name();
1154:
1155: sort_list_to_file(&list, flast);
1156: file_list_add(&fl, flast, false);
1157: }
1158: merge_files(&fl, outfile);
1159: }
1160:
1161: file_list_clean(&fl);
1162:
1163: /*
1164: * We are about to exit the program, so we can ignore
1165: * the clean-up for speed
1166: *
1167: * sort_list_clean(&list);
1168: */
1169:
1170: } else if (sort_opts_vals.cflag) {
1171: result = (argc == 0) ? (check("-")) : (check(*argv));
1172: } else if (sort_opts_vals.mflag) {
1173: struct file_list fl;
1174:
1175: file_list_init(&fl, false);
1176: file_list_populate(&fl, argc, argv, true);
1177: merge_files(&fl, outfile);
1178: file_list_clean(&fl);
1179: }
1180:
1181: if (real_outfile) {
1182: unlink(real_outfile);
1183: if (rename(outfile, real_outfile) < 0)
1184: err(2, "%s", real_outfile);
1185: sort_free(real_outfile);
1186: }
1187:
1188: sort_free(outfile);
1.4 millert 1189:
1.44 millert 1190: return result;
1.1 millert 1191: }